diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -5018,6 +5018,23 @@ return true; } + if (auto C = dyn_cast(RHS)) { + int64_t ImmOff = C->getSExtValue(); + unsigned Size = 1 << Scale; + + // If we can unscale the immediate we can use the reg+reg addressing mode. + if (ImmOff % Size) + return false; + + SDLoc DL(N); + Base = LHS; + Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64); + SDValue Ops[] = {Offset}; + SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); + Offset = SDValue(MI, 0); + return true; + } + // Check if the RHS is a shift node with a constant. if (RHS.getOpcode() != ISD::SHL) return false; diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll --- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll @@ -652,12 +652,11 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-16 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-8 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -672,12 +671,11 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-2 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -716,12 +714,11 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-16 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-4 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -736,12 +733,11 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-4 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -780,12 +776,11 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-16 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-2 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -800,12 +795,11 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-8 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -844,12 +838,11 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-16 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-8 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -864,12 +857,11 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-2 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -908,12 +900,11 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-16 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-4 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -928,12 +919,11 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-4 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -972,12 +962,11 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-16 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-2 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -992,12 +981,11 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-8 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1039,12 +1027,11 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z1.d, p1/z, #1 // =0x1 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-8 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] ; CHECK-NEXT: and z0.d, z0.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: addvl sp, sp, #2 @@ -1064,12 +1051,11 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z1.s, p1/z, #1 // =0x1 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-4 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] ; CHECK-NEXT: and z0.s, z0.s, #0x1 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: addvl sp, sp, #2 @@ -1089,12 +1075,11 @@ ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z1.h, p1/z, #1 // =0x1 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-2 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] ; CHECK-NEXT: and z0.h, z0.h, #0x1 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: addvl sp, sp, #2 @@ -1136,12 +1121,11 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: mov x9, #-16 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: mov x9, #-2 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1157,14 +1141,13 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mov x9, #-32 +; CHECK-NEXT: mov x9, #-8 ; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl] ; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl] ; CHECK-NEXT: addvl x8, x8, #2 -; CHECK-NEXT: ld1b { z0.b }, p1/z, [x8, x9] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] ; CHECK-NEXT: sub x8, x8, #32 // =32 ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #4 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll @@ -65,13 +65,13 @@ ; ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[OP_LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP_LO]].b ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP_HI]].b ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]] ; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a %res = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %op) @@ -146,13 +146,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a %res = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %op) @@ -227,13 +227,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %op) @@ -308,13 +308,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %op) @@ -393,13 +393,13 @@ ; ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[OP_LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: cnt [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP_LO]].b ; VBITS_EQ_256-DAG: cnt [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP_HI]].b ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]] ; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a %res = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %op) @@ -476,13 +476,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: cnt [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h ; VBITS_EQ_256-DAG: cnt [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a %res = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %op) @@ -561,13 +561,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: cnt [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s ; VBITS_EQ_256-DAG: cnt [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a %res = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %op) @@ -648,13 +648,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: cnt [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d ; VBITS_EQ_256-DAG: cnt [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a %res = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %op) @@ -737,15 +737,15 @@ ; ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[OP_LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: rbit [[RBIT_LO:z[0-9]+]].b, [[PG]]/m, [[OP_LO]].b ; VBITS_EQ_256-DAG: rbit [[RBIT_HI:z[0-9]+]].b, [[PG]]/m, [[OP_HI]].b ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[RBIT_LO]].b ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[RBIT_HI]].b ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]] ; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a %res = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %op) @@ -826,15 +826,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: rbit [[RBIT_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h ; VBITS_EQ_256-DAG: rbit [[RBIT_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[RBIT_LO]].h ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[RBIT_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a %res = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %op) @@ -917,15 +917,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: rbit [[RBIT_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s ; VBITS_EQ_256-DAG: rbit [[RBIT_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[RBIT_LO]].s ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[RBIT_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a %res = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %op) @@ -1006,15 +1006,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: rbit [[RBIT_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d ; VBITS_EQ_256-DAG: rbit [[RBIT_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d ; VBITS_EQ_256-DAG: clz [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[RBIT_LO]].d ; VBITS_EQ_256-DAG: clz [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[RBIT_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a %res = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %op) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll @@ -73,19 +73,17 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 -; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].h, [[PG]]/z, [[OP1_HI]].h, [[OP2_HI]].h ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].h, [[CMP_HI]]/z, #-1 ; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].h, [[PG]]/z, [[OP1_LO]].h, [[OP2_LO]].h ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].h, [[CMP_LO]]/z, #-1 ; VBITS_EQ_256-DAG: st1h { [[SEXT_LO]].h }, [[PG]], [x2] -; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x[[C_HI]]] +; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x2, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x half>, <32 x half>* %a %op2 = load <32 x half>, <32 x half>* %b @@ -178,19 +176,17 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 -; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].s, [[PG]]/z, [[OP1_HI]].s, [[OP2_HI]].s ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].s, [[CMP_HI]]/z, #-1 ; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].s, [[PG]]/z, [[OP1_LO]].s, [[OP2_LO]].s ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].s, [[CMP_LO]]/z, #-1 ; VBITS_EQ_256-DAG: st1w { [[SEXT_LO]].s }, [[PG]], [x2] -; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x[[C_HI]]] +; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x2, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x float>, <16 x float>* %a %op2 = load <16 x float>, <16 x float>* %b @@ -283,19 +279,17 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 -; VBITS_EQ_256-DAG: add x[[C_HI:[0-9]+]], x2, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: fcmeq [[CMP_HI:p[0-9]+]].d, [[PG]]/z, [[OP1_HI]].d, [[OP2_HI]].d ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].d, [[CMP_HI]]/z, #-1 ; VBITS_EQ_256-DAG: fcmeq [[CMP_LO:p[0-9]+]].d, [[PG]]/z, [[OP1_LO]].d, [[OP2_LO]].d ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].d, [[CMP_LO]]/z, #-1 ; VBITS_EQ_256-DAG: st1d { [[SEXT_LO]].d }, [[PG]], [x2] -; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x[[C_HI]]] +; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x2, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x double>, <8 x double>* %a %op2 = load <8 x double>, <8 x double>* %b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll @@ -69,12 +69,11 @@ ; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] ; CHECK-DAG: fadd [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h ; CHECK-DAG: st1h { [[RES]].h }, [[PG]], [x0] -; VBITS_LE_256-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]] -; VBITS_LE_256-DAG: add x[[B1:[0-9]+]], x1, #[[#VBYTES]] -; VBITS_LE_256-DAG: ld1h { [[OP1_1:z[0-9]+]].h }, [[PG]]/z, [x[[A1]]] -; VBITS_LE_256-DAG: ld1h { [[OP2_1:z[0-9]+]].h }, [[PG]]/z, [x[[B1]]] +; VBITS_LE_256-DAG: mov x[[IDX_1:[0-9]+]], #[[#div(VBYTES,2)]] +; VBITS_LE_256-DAG: ld1h { [[OP1_1:z[0-9]+]].h }, [[PG]]/z, [x0, x[[IDX_1]], lsl #1] +; VBITS_LE_256-DAG: ld1h { [[OP2_1:z[0-9]+]].h }, [[PG]]/z, [x1, x[[IDX_1]], lsl #1] ; VBITS_LE_256-DAG: fadd [[RES_1:z[0-9]+]].h, [[PG]]/m, [[OP1_1]].h, [[OP2_1]].h -; VBITS_LE_256-DAG: st1h { [[RES_1]].h }, [[PG]], [x[[A1]]] +; VBITS_LE_256-DAG: st1h { [[RES_1]].h }, [[PG]], [x0, x[[IDX_1]], lsl #1] ; CHECK: ret %op1 = load <32 x half>, <32 x half>* %a %op2 = load <32 x half>, <32 x half>* %b @@ -90,24 +89,21 @@ ; CHECK-DAG: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1] ; CHECK-DAG: fadd [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h, [[OP2]].h ; CHECK-DAG: st1h { [[RES]].h }, [[PG]], [x0] -; VBITS_LE_512-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]] -; VBITS_LE_512-DAG: add x[[B1:[0-9]+]], x1, #[[#VBYTES]] -; VBITS_LE_512-DAG: ld1h { [[OP1_1:z[0-9]+]].h }, [[PG]]/z, [x[[A1]]] -; VBITS_LE_512-DAG: ld1h { [[OP2_1:z[0-9]+]].h }, [[PG]]/z, [x[[B1]]] +; VBITS_LE_512-DAG: mov x[[IDX_1:[0-9]+]], #[[#div(VBYTES,2)]] +; VBITS_LE_512-DAG: ld1h { [[OP1_1:z[0-9]+]].h }, [[PG]]/z, [x0, x[[IDX_1]], lsl #1] +; VBITS_LE_512-DAG: ld1h { [[OP2_1:z[0-9]+]].h }, [[PG]]/z, [x1, x[[IDX_1]], lsl #1] ; VBITS_LE_512-DAG: fadd [[RES_1:z[0-9]+]].h, [[PG]]/m, [[OP1_1]].h, [[OP2_1]].h -; VBITS_LE_512-DAG: st1h { [[RES_1]].h }, [[PG]], [x[[A1]]] -; VBITS_LE_256-DAG: add x[[A2:[0-9]+]], x0, #[[#mul(VBYTES,2)]] -; VBITS_LE_256-DAG: add x[[B2:[0-9]+]], x1, #[[#mul(VBYTES,2)]] -; VBITS_LE_256-DAG: ld1h { [[OP1_2:z[0-9]+]].h }, [[PG]]/z, [x[[A2]]] -; VBITS_LE_256-DAG: ld1h { [[OP2_2:z[0-9]+]].h }, [[PG]]/z, [x[[B2]]] +; VBITS_LE_512-DAG: st1h { [[RES_1]].h }, [[PG]], [x0, x[[IDX_1]], lsl #1] +; VBITS_LE_256-DAG: mov x[[IDX_2:[0-9]+]], #[[#mul(div(VBYTES,2),2)]] +; VBITS_LE_256-DAG: ld1h { [[OP1_2:z[0-9]+]].h }, [[PG]]/z, [x0, x[[IDX_2]], lsl #1] +; VBITS_LE_256-DAG: ld1h { [[OP2_2:z[0-9]+]].h }, [[PG]]/z, [x1, x[[IDX_2]], lsl #1] ; VBITS_LE_256-DAG: fadd [[RES_2:z[0-9]+]].h, [[PG]]/m, [[OP1_2]].h, [[OP2_2]].h -; VBITS_LE_256-DAG: st1h { [[RES_2]].h }, [[PG]], [x[[A2]]] -; VBITS_LE_256-DAG: add x[[A3:[0-9]+]], x0, #[[#mul(VBYTES,3)]] -; VBITS_LE_256-DAG: add x[[B3:[0-9]+]], x1, #[[#mul(VBYTES,3)]] -; VBITS_LE_256-DAG: ld1h { [[OP1_3:z[0-9]+]].h }, [[PG]]/z, [x[[A3]]] -; VBITS_LE_256-DAG: ld1h { [[OP2_3:z[0-9]+]].h }, [[PG]]/z, [x[[B3]]] +; VBITS_LE_256-DAG: st1h { [[RES_2]].h }, [[PG]], [x0, x[[IDX_2]], lsl #1] +; VBITS_LE_256-DAG: mov x[[IDX_3:[0-9]+]], #[[#mul(div(VBYTES,2),3)]] +; VBITS_LE_256-DAG: ld1h { [[OP1_3:z[0-9]+]].h }, [[PG]]/z, [x0, x[[IDX_3]], lsl #1] +; VBITS_LE_256-DAG: ld1h { [[OP2_3:z[0-9]+]].h }, [[PG]]/z, [x1, x[[IDX_3]], lsl #1] ; VBITS_LE_256-DAG: fadd [[RES_3:z[0-9]+]].h, [[PG]]/m, [[OP1_3]].h, [[OP2_3]].h -; VBITS_LE_256-DAG: st1h { [[RES_3]].h }, [[PG]], [x[[A3]]] +; VBITS_LE_256-DAG: st1h { [[RES_3]].h }, [[PG]], [x0, x[[IDX_3]], lsl #1] ; CHECK: ret %op1 = load <64 x half>, <64 x half>* %a %op2 = load <64 x half>, <64 x half>* %b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll @@ -72,13 +72,13 @@ ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8] ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h ; VBITS_EQ_256-DAG: fcvt [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].h ; VBITS_EQ_256-DAG: fcvt [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].h ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x[[B_HI]]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2] %op1 = load <16 x half>, <16 x half>* %a %res = fpext <16 x half> %op1 to <16 x float> store <16 x float> %res, <16 x float>* %b @@ -168,7 +168,7 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0] ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[OP]].16b, v[[OP]].16b, #8 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_LO:z[0-9]+]].s, z[[OP]].h ; VBITS_EQ_256-DAG: uunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h @@ -177,7 +177,7 @@ ; VBITS_EQ_256-DAG: fcvt [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[UPK2_LO]].h ; VBITS_EQ_256-DAG: fcvt [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[UPK2_HI]].h ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[B_HI]]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3] %op1 = load <8 x half>, <8 x half>* %a %res = fpext <8 x half> %op1 to <8 x double> store <8 x double> %res, <8 x double>* %b @@ -268,13 +268,13 @@ ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8] ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s ; VBITS_EQ_256-DAG: fcvt [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].s ; VBITS_EQ_256-DAG: fcvt [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].s ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x[[B_HI]]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3] %op1 = load <8 x float>, <8 x float>* %a %res = fpext <8 x float> %op1 to <8 x double> store <8 x double> %res, <8 x double>* %b @@ -359,9 +359,9 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG1]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG1]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8 ; VBITS_EQ_256-DAG: fcvt [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].s @@ -460,9 +460,9 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG1]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG1]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d ; VBITS_EQ_256-DAG: fcvt [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].d ; VBITS_EQ_256-DAG: fcvt [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].d @@ -558,9 +558,9 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG1]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG1]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4 ; VBITS_EQ_256-DAG: fcvt [[CVT_LO:z[0-9]+]].s, [[PG2]]/m, [[LO]].d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll @@ -68,16 +68,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: fmaxnm [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h ; VBITS_EQ_256-DAG: fmaxnm [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x half>, <32 x half>* %a %op2 = load <32 x half>, <32 x half>* %b @@ -160,16 +159,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: fmaxnm [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s ; VBITS_EQ_256-DAG: fmaxnm [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x float>, <16 x float>* %a %op2 = load <16 x float>, <16 x float>* %b @@ -252,16 +250,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: fmaxnm [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d ; VBITS_EQ_256-DAG: fmaxnm [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x double>, <8 x double>* %a %op2 = load <8 x double>, <8 x double>* %b @@ -348,16 +345,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: fminnm [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h ; VBITS_EQ_256-DAG: fminnm [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x half>, <32 x half>* %a %op2 = load <32 x half>, <32 x half>* %b @@ -440,16 +436,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: fminnm [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s ; VBITS_EQ_256-DAG: fminnm [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x float>, <16 x float>* %a %op2 = load <16 x float>, <16 x float>* %b @@ -532,16 +527,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: fminnm [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d ; VBITS_EQ_256-DAG: fminnm [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x double>, <8 x double>* %a %op2 = load <8 x double>, <8 x double>* %b @@ -628,16 +622,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: fmax [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h ; VBITS_EQ_256-DAG: fmax [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x half>, <32 x half>* %a %op2 = load <32 x half>, <32 x half>* %b @@ -720,16 +713,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: fmax [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s ; VBITS_EQ_256-DAG: fmax [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x float>, <16 x float>* %a %op2 = load <16 x float>, <16 x float>* %b @@ -812,16 +804,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: fmax [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d ; VBITS_EQ_256-DAG: fmax [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x double>, <8 x double>* %a %op2 = load <8 x double>, <8 x double>* %b @@ -908,16 +899,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: fmin [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h ; VBITS_EQ_256-DAG: fmin [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x half>, <32 x half>* %a %op2 = load <32 x half>, <32 x half>* %b @@ -1000,16 +990,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: fmin [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s ; VBITS_EQ_256-DAG: fmin [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x float>, <16 x float>* %a %op2 = load <16 x float>, <16 x float>* %b @@ -1092,16 +1081,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: fmin [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d ; VBITS_EQ_256-DAG: fmin [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x double>, <8 x double>* %a %op2 = load <8 x double>, <8 x double>* %b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll @@ -64,9 +64,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: fadda h0, [[PG]], h0, [[LO]].h ; VBITS_EQ_256-NEXT: fadda h0, [[PG]], h0, [[HI]].h ; VBITS_EQ_256-NEXT: ret @@ -137,9 +137,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: fadda s0, [[PG]], s0, [[LO]].s ; VBITS_EQ_256-NEXT: fadda s0, [[PG]], s0, [[HI]].s ; VBITS_EQ_256-NEXT: ret @@ -210,9 +210,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: fadda d0, [[PG]], d0, [[LO]].d ; VBITS_EQ_256-NEXT: fadda d0, [[PG]], d0, [[HI]].d ; VBITS_EQ_256-NEXT: ret @@ -291,10 +291,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: fadd [[ADD:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] +; VBITS_EQ_256-DAG: fadd [[ADD:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h ; VBITS_EQ_256-DAG: faddv h1, [[PG]], [[ADD]].h ; VBITS_EQ_256-DAG: fadd h0, h0, [[RDX]] ; VBITS_EQ_256-NEXT: ret @@ -370,10 +370,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: fadd [[ADD:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] +; VBITS_EQ_256-DAG: fadd [[ADD:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s ; VBITS_EQ_256-DAG: faddv [[RDX:s[0-9]+]], [[PG]], [[ADD]].s ; VBITS_EQ_256-DAG: fadd s0, s0, [[RDX]] ; VBITS_EQ_256-NEXT: ret @@ -447,10 +447,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]] -; VBITS_EQ_256-DAG: fadd [[ADD:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] +; VBITS_EQ_256-DAG: fadd [[ADD:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d ; VBITS_EQ_256-DAG: faddv [[RDX:d[0-9]+]], [[PG]], [[ADD]].d ; VBITS_EQ_256-DAG: fadd d0, d0, [[RDX]] ; VBITS_EQ_256-NEXT: ret @@ -525,10 +525,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] +; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h ; VBITS_EQ_256-DAG: fmaxnmv h0, [[PG]], [[MAX]].h ; VBITS_EQ_256-NEXT: ret %op = load <32 x half>, <32 x half>* %a @@ -596,10 +596,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] +; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s ; VBITS_EQ_256-DAG: fmaxnmv s0, [[PG]], [[MAX]].s ; VBITS_EQ_256-NEXT: ret %op = load <16 x float>, <16 x float>* %a @@ -667,10 +667,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] +; VBITS_EQ_256-DAG: fmaxnm [[MAX:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d ; VBITS_EQ_256-DAG: fmaxnmv d0, [[PG]], [[MAX]].d ; VBITS_EQ_256-NEXT: ret %op = load <8 x double>, <8 x double>* %a @@ -742,10 +742,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] +; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h ; VBITS_EQ_256-DAG: fminnmv h0, [[PG]], [[MIN]].h ; VBITS_EQ_256-NEXT: ret %op = load <32 x half>, <32 x half>* %a @@ -813,10 +813,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] +; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s ; VBITS_EQ_256-DAG: fminnmv s0, [[PG]], [[MIN]].s ; VBITS_EQ_256-NEXT: ret %op = load <16 x float>, <16 x float>* %a @@ -884,10 +884,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] +; VBITS_EQ_256-DAG: fminnm [[MIN:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d ; VBITS_EQ_256-DAG: fminnmv d0, [[PG]], [[MIN]].d ; VBITS_EQ_256-NEXT: ret %op = load <8 x double>, <8 x double>* %a diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll @@ -65,13 +65,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h ; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op = load <32 x half>, <32 x half>* %a %res = call <32 x half> @llvm.ceil.v32f16(<32 x half> %op) @@ -146,13 +146,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s ; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op = load <16 x float>, <16 x float>* %a %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %op) @@ -227,13 +227,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d ; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op = load <8 x double>, <8 x double>* %a %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %op) @@ -312,13 +312,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h ; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op = load <32 x half>, <32 x half>* %a %res = call <32 x half> @llvm.floor.v32f16(<32 x half> %op) @@ -393,13 +393,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s ; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op = load <16 x float>, <16 x float>* %a %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %op) @@ -474,13 +474,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d ; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op = load <8 x double>, <8 x double>* %a %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %op) @@ -559,13 +559,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h ; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op = load <32 x half>, <32 x half>* %a %res = call <32 x half> @llvm.nearbyint.v32f16(<32 x half> %op) @@ -640,13 +640,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s ; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op = load <16 x float>, <16 x float>* %a %res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %op) @@ -721,13 +721,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d ; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op = load <8 x double>, <8 x double>* %a %res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %op) @@ -806,13 +806,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h ; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op = load <32 x half>, <32 x half>* %a %res = call <32 x half> @llvm.rint.v32f16(<32 x half> %op) @@ -887,13 +887,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s ; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op = load <16 x float>, <16 x float>* %a %res = call <16 x float> @llvm.rint.v16f32(<16 x float> %op) @@ -968,13 +968,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d ; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op = load <8 x double>, <8 x double>* %a %res = call <8 x double> @llvm.rint.v8f64(<8 x double> %op) @@ -1053,13 +1053,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h ; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op = load <32 x half>, <32 x half>* %a %res = call <32 x half> @llvm.round.v32f16(<32 x half> %op) @@ -1134,13 +1134,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s ; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op = load <16 x float>, <16 x float>* %a %res = call <16 x float> @llvm.round.v16f32(<16 x float> %op) @@ -1215,13 +1215,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d ; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op = load <8 x double>, <8 x double>* %a %res = call <8 x double> @llvm.round.v8f64(<8 x double> %op) @@ -1300,13 +1300,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h ; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op = load <32 x half>, <32 x half>* %a %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op) @@ -1381,13 +1381,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s ; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op = load <16 x float>, <16 x float>* %a %res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op) @@ -1462,13 +1462,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d ; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op = load <8 x double>, <8 x double>* %a %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op) @@ -1547,13 +1547,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h ; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op = load <32 x half>, <32 x half>* %a %res = call <32 x half> @llvm.trunc.v32f16(<32 x half> %op) @@ -1628,13 +1628,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s ; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op = load <16 x float>, <16 x float>* %a %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %op) @@ -1709,13 +1709,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d ; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op = load <8 x double>, <8 x double>* %a %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %op) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll @@ -69,13 +69,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: fcvtzu [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[LO]].h ; VBITS_EQ_256-DAG: fcvtzu [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x half>, <32 x half>* %a %res = fptoui <32 x half> %op1 to <32 x i16> @@ -163,13 +163,13 @@ ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8] ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h ; VBITS_EQ_256-DAG: fcvtzu [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].h ; VBITS_EQ_256-DAG: fcvtzu [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].h ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x8] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2] %op1 = load <16 x half>, <16 x half>* %a %res = fptoui <16 x half> %op1 to <16 x i32> store <16 x i32> %res, <16 x i32>* %b @@ -260,7 +260,7 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0] ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[LO:[0-9]+]].16b, v[[OP]].16b, #8 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_LO:z[0-9]+]].s, z[[LO]].h ; VBITS_EQ_256-DAG: uunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h @@ -269,7 +269,7 @@ ; VBITS_EQ_256-DAG: fcvtzu [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK2_LO]].h ; VBITS_EQ_256-DAG: fcvtzu [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK2_HI]].h ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x half>, <8 x half>* %a %res = fptoui <8 x half> %op1 to <8 x i64> @@ -363,9 +363,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8 ; VBITS_EQ_256-DAG: fcvtzu [[CVT_HI:z[0-9]+]].s, [[PG2]]/m, [[HI]].s @@ -459,14 +459,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8] -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: fcvtzu [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[LO]].s ; VBITS_EQ_256-DAG: fcvtzu [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x float>, <16 x float>* %a %res = fptoui <16 x float> %op1 to <16 x i32> @@ -555,13 +554,13 @@ ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8] ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s ; VBITS_EQ_256-DAG: fcvtzu [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].s ; VBITS_EQ_256-DAG: fcvtzu [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].s ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3] %op1 = load <8 x float>, <8 x float>* %a %res = fptoui <8 x float> %op1 to <8 x i64> store <8 x i64> %res, <8 x i64>* %b @@ -651,9 +650,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d ; VBITS_EQ_256-DAG: fcvtzu [[CVT_HI:z[0-9]+]].d, [[PG2]]/m, [[HI]].d ; VBITS_EQ_256-DAG: fcvtzu [[CVT_LO:z[0-9]+]].d, [[PG2]]/m, [[LO]].d @@ -752,9 +751,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4 ; VBITS_EQ_256-DAG: fcvtzu [[CVT_HI:z[0-9]+]].d, [[PG2]]/m, [[HI]].d @@ -849,14 +848,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8] -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: fcvtzu [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[LO]].d ; VBITS_EQ_256-DAG: fcvtzu [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x double>, <8 x double>* %a %res = fptoui <8 x double> %op1 to <8 x i64> @@ -939,14 +937,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x8] -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: fcvtzs [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[LO]].h ; VBITS_EQ_256-DAG: fcvtzs [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x half>, <32 x half>* %a %res = fptosi <32 x half> %op1 to <32 x i16> @@ -1034,13 +1031,13 @@ ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8] ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h ; VBITS_EQ_256-DAG: fcvtzs [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].h ; VBITS_EQ_256-DAG: fcvtzs [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].h ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x8] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2] %op1 = load <16 x half>, <16 x half>* %a %res = fptosi <16 x half> %op1 to <16 x i32> store <16 x i32> %res, <16 x i32>* %b @@ -1131,7 +1128,7 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0] ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[LO:[0-9]+]].16b, v[[OP]].16b, #8 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_LO:z[0-9]+]].s, z[[LO]].h ; VBITS_EQ_256-DAG: uunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h @@ -1140,7 +1137,7 @@ ; VBITS_EQ_256-DAG: fcvtzs [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK2_LO]].h ; VBITS_EQ_256-DAG: fcvtzs [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK2_HI]].h ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x half>, <8 x half>* %a %res = fptosi <8 x half> %op1 to <8 x i64> @@ -1234,9 +1231,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8 ; VBITS_EQ_256-DAG: fcvtzs [[CVT_HI:z[0-9]+]].s, [[PG2]]/m, [[HI]].s @@ -1330,14 +1327,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8] -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: fcvtzs [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[LO]].s ; VBITS_EQ_256-DAG: fcvtzs [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x float>, <16 x float>* %a %res = fptosi <16 x float> %op1 to <16 x i32> @@ -1426,13 +1422,13 @@ ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8] ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s ; VBITS_EQ_256-DAG: fcvtzs [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].s ; VBITS_EQ_256-DAG: fcvtzs [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].s ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3] %op1 = load <8 x float>, <8 x float>* %a %res = fptosi <8 x float> %op1 to <8 x i64> store <8 x i64> %res, <8 x i64>* %b @@ -1522,9 +1518,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d ; VBITS_EQ_256-DAG: fcvtzs [[CVT_HI:z[0-9]+]].d, [[PG2]]/m, [[HI]].d ; VBITS_EQ_256-DAG: fcvtzs [[CVT_LO:z[0-9]+]].d, [[PG2]]/m, [[LO]].d @@ -1623,9 +1619,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4 ; VBITS_EQ_256-DAG: fcvtzs [[CVT_HI:z[0-9]+]].d, [[PG2]]/m, [[HI]].d @@ -1720,14 +1716,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8] -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: fcvtzs [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[LO]].d ; VBITS_EQ_256-DAG: fcvtzs [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x double>, <8 x double>* %a %res = fptosi <8 x double> %op1 to <8 x i64> diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll @@ -73,17 +73,17 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[OFF_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFF_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFF_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].b, [[PG]]/z, [[OP1_LO]].b, [[OP2_LO]].b ; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].b, [[PG]]/z, [[OP1_HI]].b, [[OP2_HI]].b ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].b, [[CMP_LO]]/z, #-1 ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].b, [[CMP_HI]]/z, #-1 ; VBITS_EQ_256-DAG: st1b { [[SEXT_LO]].b }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1b { [[SEXT_HI]].b }, [[PG]], [x0, x[[OFF_HI]]] +; VBITS_EQ_256-DAG: st1b { [[SEXT_HI]].b }, [[PG]], [x0, x[[NUMELTS]]] ; VBITS_EQ_256-NEXT: ret %op1 = load <64 x i8>, <64 x i8>* %a %op2 = load <64 x i8>, <64 x i8>* %b @@ -176,18 +176,17 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].h, [[PG]]/z, [[OP1_LO]].h, [[OP2_LO]].h ; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].h, [[PG]]/z, [[OP1_HI]].h, [[OP2_HI]].h ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].h, [[CMP_LO]]/z, #-1 ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].h, [[CMP_HI]]/z, #-1 ; VBITS_EQ_256-DAG: st1h { [[SEXT_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1h { [[SEXT_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x i16>, <32 x i16>* %a %op2 = load <32 x i16>, <32 x i16>* %b @@ -280,18 +279,17 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].s, [[PG]]/z, [[OP1_LO]].s, [[OP2_LO]].s ; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].s, [[PG]]/z, [[OP1_HI]].s, [[OP2_HI]].s ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].s, [[CMP_LO]]/z, #-1 ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].s, [[CMP_HI]]/z, #-1 ; VBITS_EQ_256-DAG: st1w { [[SEXT_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1w { [[SEXT_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %op2 = load <16 x i32>, <16 x i32>* %b @@ -384,18 +382,17 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: cmpeq [[CMP_LO:p[0-9]+]].d, [[PG]]/z, [[OP1_LO]].d, [[OP2_LO]].d ; VBITS_EQ_256-DAG: cmpeq [[CMP_HI:p[0-9]+]].d, [[PG]]/z, [[OP1_HI]].d, [[OP2_HI]].d ; VBITS_EQ_256-DAG: mov [[SEXT_LO:z[0-9]+]].d, [[CMP_LO]]/z, #-1 ; VBITS_EQ_256-DAG: mov [[SEXT_HI:z[0-9]+]].d, [[CMP_HI]]/z, #-1 ; VBITS_EQ_256-DAG: st1d { [[SEXT_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x[[A_HI]]] +; VBITS_EQ_256-DAG: st1d { [[SEXT_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x i64>, <8 x i64>* %a %op2 = load <8 x i64>, <8 x i64>* %b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll @@ -151,9 +151,9 @@ ; VBITS_EQ_256-DAG: sunpklo [[A_WORDS_LO:z[0-9]+]].s, [[A_HALFS_LO]].h ; VBITS_EQ_256-DAG: sunpklo [[A_WORDS_HI:z[0-9]+]].s, [[A_HALFS_HI]].h ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[OUT_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: st1w { [[A_WORDS_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[A_WORDS_HI]].s }, [[PG]], [x[[OUT_HI]]] +; VBITS_EQ_256-DAG: st1w { [[A_WORDS_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %b = sext <16 x i8> %a to <16 x i32> store <16 x i32> %b, <16 x i32>* %out @@ -513,9 +513,9 @@ ; VBITS_EQ_256-DAG: uunpklo [[A_WORDS_LO:z[0-9]+]].s, [[A_HALFS_LO]].h ; VBITS_EQ_256-DAG: uunpklo [[A_WORDS_HI:z[0-9]+]].s, [[A_HALFS_HI]].h ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[OUT_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[OUT_HI:[0-9]+]], #8 ; VBITS_EQ_256-DAG: st1w { [[A_WORDS_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[A_WORDS_HI]].s }, [[PG]], [x[[OUT_HI]]] +; VBITS_EQ_256-DAG: st1w { [[A_WORDS_HI]].s }, [[PG]], [x0, x[[OUT_HI]], lsl #2] ; VBITS_EQ_256-NEXT: ret %b = zext <16 x i8> %a to <16 x i32> store <16 x i32> %b, <16 x i32>* %out diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll @@ -68,15 +68,15 @@ ; ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: smax [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b ; VBITS_EQ_256-DAG: smax [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]] ; VBITS_EQ_256-NEXT: ret %op1 = load <64 x i8>, <64 x i8>* %a %op2 = load <64 x i8>, <64 x i8>* %b @@ -159,16 +159,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: smax [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h ; VBITS_EQ_256-DAG: smax [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x i16>, <32 x i16>* %a %op2 = load <32 x i16>, <32 x i16>* %b @@ -251,16 +250,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: smax [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s ; VBITS_EQ_256-DAG: smax [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %op2 = load <16 x i32>, <16 x i32>* %b @@ -345,16 +343,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: smax [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d ; VBITS_EQ_256-DAG: smax [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x i64>, <8 x i64>* %a %op2 = load <8 x i64>, <8 x i64>* %b @@ -441,15 +438,15 @@ ; ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: smin [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b ; VBITS_EQ_256-DAG: smin [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]] %op1 = load <64 x i8>, <64 x i8>* %a %op2 = load <64 x i8>, <64 x i8>* %b %res = call <64 x i8> @llvm.smin.v64i8(<64 x i8> %op1, <64 x i8> %op2) @@ -531,16 +528,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: smin [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h ; VBITS_EQ_256-DAG: smin [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x i16>, <32 x i16>* %a %op2 = load <32 x i16>, <32 x i16>* %b @@ -623,16 +619,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: smin [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s ; VBITS_EQ_256-DAG: smin [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %op2 = load <16 x i32>, <16 x i32>* %b @@ -717,16 +712,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: smin [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d ; VBITS_EQ_256-DAG: smin [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x i64>, <8 x i64>* %a %op2 = load <8 x i64>, <8 x i64>* %b @@ -813,15 +807,15 @@ ; ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b ; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]] ; VBITS_EQ_256-NEXT: ret %op1 = load <64 x i8>, <64 x i8>* %a %op2 = load <64 x i8>, <64 x i8>* %b @@ -904,16 +898,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h ; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x i16>, <32 x i16>* %a %op2 = load <32 x i16>, <32 x i16>* %b @@ -996,16 +989,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s ; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %op2 = load <16 x i32>, <16 x i32>* %b @@ -1090,16 +1082,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: umax [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d ; VBITS_EQ_256-DAG: umax [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x i64>, <8 x i64>* %a %op2 = load <8 x i64>, <8 x i64>* %b @@ -1186,15 +1177,15 @@ ; ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b ; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]] %op1 = load <64 x i8>, <64 x i8>* %a %op2 = load <64 x i8>, <64 x i8>* %b %res = call <64 x i8> @llvm.umin.v64i8(<64 x i8> %op1, <64 x i8> %op2) @@ -1276,16 +1267,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h ; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x i16>, <32 x i16>* %a %op2 = load <32 x i16>, <32 x i16>* %b @@ -1368,16 +1358,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s ; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %op2 = load <16 x i32>, <16 x i32>* %b @@ -1462,16 +1451,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: umin [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d ; VBITS_EQ_256-DAG: umin [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x i64>, <8 x i64>* %a %op2 = load <8 x i64>, <8 x i64>* %b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll @@ -64,9 +64,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b ; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].b ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] @@ -140,10 +140,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] +; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h ; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].h ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -216,10 +216,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] +; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s ; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].s ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -292,10 +292,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] +; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d ; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].d ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -372,9 +372,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b ; VBITS_EQ_256-DAG: smaxv b[[REDUCE:[0-9]+]], [[PG]], [[MAX]].b ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] @@ -448,10 +448,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] +; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h ; VBITS_EQ_256-DAG: smaxv h[[REDUCE:[0-9]+]], [[PG]], [[MAX]].h ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -524,10 +524,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] +; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s ; VBITS_EQ_256-DAG: smaxv [[REDUCE:s[0-9]+]], [[PG]], [[MAX]].s ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -602,10 +602,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] +; VBITS_EQ_256-DAG: smax [[MAX:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d ; VBITS_EQ_256-DAG: smaxv [[REDUCE:d[0-9]+]], [[PG]], [[MAX]].d ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -682,9 +682,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b ; VBITS_EQ_256-DAG: sminv b[[REDUCE:[0-9]+]], [[PG]], [[MIN]].b ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] @@ -758,10 +758,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] +; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h ; VBITS_EQ_256-DAG: sminv h[[REDUCE:[0-9]+]], [[PG]], [[MIN]].h ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -834,10 +834,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] +; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s ; VBITS_EQ_256-DAG: sminv [[REDUCE:s[0-9]+]], [[PG]], [[MIN]].s ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -912,10 +912,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] +; VBITS_EQ_256-DAG: smin [[MIN:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d ; VBITS_EQ_256-DAG: sminv [[REDUCE:d[0-9]+]], [[PG]], [[MIN]].d ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -992,9 +992,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b ; VBITS_EQ_256-DAG: umaxv b[[REDUCE:[0-9]+]], [[PG]], [[MAX]].b ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] @@ -1068,10 +1068,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] +; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h ; VBITS_EQ_256-DAG: umaxv h[[REDUCE:[0-9]+]], [[PG]], [[MAX]].h ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -1144,10 +1144,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] +; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s ; VBITS_EQ_256-DAG: umaxv [[REDUCE:s[0-9]+]], [[PG]], [[MAX]].s ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -1222,10 +1222,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] +; VBITS_EQ_256-DAG: umax [[MAX:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d ; VBITS_EQ_256-DAG: umaxv [[REDUCE:d[0-9]+]], [[PG]], [[MAX]].d ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -1302,9 +1302,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b ; VBITS_EQ_256-DAG: uminv b[[REDUCE:[0-9]+]], [[PG]], [[MIN]].b ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] @@ -1378,10 +1378,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].h, [[PG]]/m, [[LO]].h, [[HI]].h +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] +; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h ; VBITS_EQ_256-DAG: uminv h[[REDUCE:[0-9]+]], [[PG]], [[MIN]].h ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -1454,10 +1454,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].s, [[PG]]/m, [[LO]].s, [[HI]].s +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] +; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s ; VBITS_EQ_256-DAG: uminv [[REDUCE:s[0-9]+]], [[PG]], [[MIN]].s ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret @@ -1532,10 +1532,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] -; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].d, [[PG]]/m, [[LO]].d, [[HI]].d +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] +; VBITS_EQ_256-DAG: umin [[MIN:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d ; VBITS_EQ_256-DAG: uminv [[REDUCE:d[0-9]+]], [[PG]], [[MIN]].d ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] ; VBITS_EQ_256-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll @@ -70,15 +70,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[OFFSET_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFFSET_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFFSET_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: asr [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b ; VBITS_EQ_256-DAG: asr [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[OFFSET_HI]] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]] ; VBITS_EQ_256-NEXT: ret %op1 = load <64 x i8>, <64 x i8>* %a %op2 = load <64 x i8>, <64 x i8>* %b @@ -163,16 +163,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: asr [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h ; VBITS_EQ_256-DAG: asr [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x i16>, <32 x i16>* %a %op2 = load <32 x i16>, <32 x i16>* %b @@ -257,16 +256,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: asr [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s ; VBITS_EQ_256-DAG: asr [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %op2 = load <16 x i32>, <16 x i32>* %b @@ -351,16 +349,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: asr [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d ; VBITS_EQ_256-DAG: asr [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x i64>, <8 x i64>* %a %op2 = load <8 x i64>, <8 x i64>* %b @@ -449,15 +446,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[OFFSET_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFFSET_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFFSET_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: lsr [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b ; VBITS_EQ_256-DAG: lsr [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[OFFSET_HI]] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]] ; VBITS_EQ_256-NEXT: ret %op1 = load <64 x i8>, <64 x i8>* %a %op2 = load <64 x i8>, <64 x i8>* %b @@ -542,16 +539,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: lsr [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h ; VBITS_EQ_256-DAG: lsr [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x i16>, <32 x i16>* %a %op2 = load <32 x i16>, <32 x i16>* %b @@ -636,16 +632,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: lsr [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s ; VBITS_EQ_256-DAG: lsr [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %op2 = load <16 x i32>, <16 x i32>* %b @@ -730,16 +725,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: lsr [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d ; VBITS_EQ_256-DAG: lsr [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x i64>, <8 x i64>* %a %op2 = load <8 x i64>, <8 x i64>* %b @@ -826,15 +820,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[OFFSET_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[OP1_LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[OFFSET_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[OFFSET_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: lsl [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP1_LO]].b, [[OP2_LO]].b ; VBITS_EQ_256-DAG: lsl [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP1_HI]].b, [[OP2_HI]].b ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[OFFSET_HI]] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]] ; VBITS_EQ_256-NEXT: ret %op1 = load <64 x i8>, <64 x i8>* %a %op2 = load <64 x i8>, <64 x i8>* %b @@ -917,16 +911,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP1_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: lsl [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP1_LO]].h, [[OP2_LO]].h ; VBITS_EQ_256-DAG: lsl [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP1_HI]].h, [[OP2_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x i16>, <32 x i16>* %a %op2 = load <32 x i16>, <32 x i16>* %b @@ -1009,16 +1002,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP1_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: lsl [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP1_LO]].s, [[OP2_LO]].s ; VBITS_EQ_256-DAG: lsl [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP1_HI]].s, [[OP2_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %op2 = load <16 x i32>, <16 x i32>* %b @@ -1101,16 +1093,15 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP1_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: lsl [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP1_LO]].d, [[OP2_LO]].d ; VBITS_EQ_256-DAG: lsl [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP1_HI]].d, [[OP2_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x i64>, <8 x i64>* %a %op2 = load <8 x i64>, <8 x i64>* %b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll @@ -69,14 +69,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x8] -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[LO]].h ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x i16>, <32 x i16>* %a %res = uitofp <32 x i16> %op1 to <32 x half> @@ -165,13 +164,13 @@ ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8] ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].s ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x8] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2] %op1 = load <16 x i16>, <16 x i16>* %a %res = uitofp <16 x i16> %op1 to <16 x float> store <16 x float> %res, <16 x float>* %b @@ -264,7 +263,7 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0] ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[LO:[0-9]+]].16b, v[[OP]].16b, #8 ; VBITS_EQ_256-DAG: uunpklo [[UPK1_LO:z[0-9]+]].s, z[[LO]].h ; VBITS_EQ_256-DAG: uunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h @@ -273,7 +272,7 @@ ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK2_LO]].d ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK2_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x i16>, <8 x i16>* %a %res = uitofp <8 x i16> %op1 to <8 x double> @@ -363,9 +362,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8 ; VBITS_EQ_256-DAG: ucvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].s @@ -459,14 +458,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8] -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[LO]].s ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %res = uitofp <16 x i32> %op1 to <16 x float> @@ -555,13 +553,13 @@ ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8] ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS]], #4 ; VBITS_EQ_256-DAG: uunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s ; VBITS_EQ_256-DAG: uunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].d ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3] %op1 = load <8 x i32>, <8 x i32>* %a %res = uitofp <8 x i32> %op1 to <8 x double> store <8 x double> %res, <8 x double>* %b @@ -651,9 +649,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d ; VBITS_EQ_256-DAG: ucvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].d ; VBITS_EQ_256-DAG: ucvtf [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].d @@ -752,9 +750,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4 ; VBITS_EQ_256-DAG: ucvtf [[CVT_LO:z[0-9]+]].s, [[PG2]]/m, [[LO]].d @@ -849,14 +847,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8] -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ucvtf [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[LO]].d ; VBITS_EQ_256-DAG: ucvtf [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x i64>, <8 x i64>* %a %res = uitofp <8 x i64> %op1 to <8 x double> @@ -939,14 +936,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x8] -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[LO]].h ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x i16>, <32 x i16>* %a %res = sitofp <32 x i16> %op1 to <32 x half> @@ -1035,13 +1031,13 @@ ; VBITS_EQ_256-DAG: st1h { [[VEC:z[0-9]+]].h }, [[PG1]], [x8] ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: sunpklo [[UPK_LO:z[0-9]+]].s, z[[LO]].h ; VBITS_EQ_256-DAG: sunpklo [[UPK_HI:z[0-9]+]].s, z[[HI]].h ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].s, [[PG2]]/m, [[UPK_LO]].s ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].s, [[PG2]]/m, [[UPK_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x8] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG2]], [x1, x[[NUMELTS]], lsl #2] %op1 = load <16 x i16>, <16 x i16>* %a %res = sitofp <16 x i16> %op1 to <16 x float> store <16 x float> %res, <16 x float>* %b @@ -1134,7 +1130,7 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ldr q[[OP:[0-9]+]], [x0] ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ext v[[HI:[0-9]+]].16b, v[[LO:[0-9]+]].16b, v[[OP]].16b, #8 ; VBITS_EQ_256-DAG: sunpklo [[UPK1_LO:z[0-9]+]].s, z[[LO]].h ; VBITS_EQ_256-DAG: sunpklo [[UPK1_HI:z[0-9]+]].s, z[[HI]].h @@ -1143,7 +1139,7 @@ ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK2_LO]].d ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK2_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x i16>, <8 x i16>* %a %res = sitofp <8 x i16> %op1 to <8 x double> @@ -1233,9 +1229,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].h, vl8 ; VBITS_EQ_256-DAG: scvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].s @@ -1329,14 +1325,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x8] -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[LO]].s ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %res = sitofp <16 x i32> %op1 to <16 x float> @@ -1425,13 +1420,13 @@ ; VBITS_EQ_256-DAG: st1w { [[VEC:z[0-9]+]].s }, [[PG1]], [x8] ; VBITS_EQ_256-DAG: ldp q[[LO:[0-9]+]], q[[HI:[0-9]+]], [sp] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: sunpklo [[UPK_LO:z[0-9]+]].d, z[[LO]].s ; VBITS_EQ_256-DAG: sunpklo [[UPK_HI:z[0-9]+]].d, z[[HI]].s ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].d, [[PG2]]/m, [[UPK_LO]].d ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].d, [[PG2]]/m, [[UPK_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG2]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x8] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG2]], [x1, x[[NUMELTS]], lsl #3] %op1 = load <8 x i32>, <8 x i32>* %a %res = sitofp <8 x i32> %op1 to <8 x double> store <8 x double> %res, <8 x double>* %b @@ -1521,9 +1516,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d ; VBITS_EQ_256-DAG: scvtf [[CVT_LO:z[0-9]+]].h, [[PG2]]/m, [[LO]].d ; VBITS_EQ_256-DAG: scvtf [[CVT_HI:z[0-9]+]].h, [[PG2]]/m, [[HI]].d @@ -1622,9 +1617,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].d ; VBITS_EQ_256-DAG: ptrue [[PG3:p[0-9]+]].s, vl4 ; VBITS_EQ_256-DAG: scvtf [[CVT_LO:z[0-9]+]].s, [[PG2]]/m, [[LO]].d @@ -1719,14 +1714,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x8] -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: scvtf [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[LO]].d ; VBITS_EQ_256-DAG: scvtf [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x i64>, <8 x i64>* %a %res = sitofp <8 x i64> %op1 to <8 x double> diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll @@ -56,8 +56,8 @@ ; CHECK-LABEL: load_v16f32: ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]] ; CHECK-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0] -; VBITS_LE_256-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]] -; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A1]]] +; VBITS_LE_256-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]] +; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A1]], lsl #2] ; CHECK: ret %load = load <16 x float>, <16 x float>* %a ret <16 x float> %load @@ -67,12 +67,12 @@ ; CHECK-LABEL: load_v32f32: ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]] ; CHECK-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0] -; VBITS_LE_512-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]] -; VBITS_LE_512-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A1]]] -; VBITS_LE_256-DAG: add x[[A2:[0-9]+]], x0, #[[#mul(VBYTES,2)]] -; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A2]]] -; VBITS_LE_256-DAG: add x[[A3:[0-9]+]], x0, #[[#mul(VBYTES,3)]] -; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A3]]] +; VBITS_LE_512-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]] +; VBITS_LE_512-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A1]], lsl #2] +; VBITS_LE_256-DAG: mov x[[A2:[0-9]+]], #[[#mul(div(VBYTES,4),2)]] +; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A2]], lsl #2] +; VBITS_LE_256-DAG: mov x[[A3:[0-9]+]], #[[#mul(div(VBYTES,4),3)]] +; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A3]], lsl #2] ; CHECK: ret %load = load <32 x float>, <32 x float>* %a ret <32 x float> %load @@ -82,20 +82,20 @@ ; CHECK-LABEL: load_v64f32: ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]] ; CHECK-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0] -; VBITS_LE_1024-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]] -; VBITS_LE_1024-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A1]]] -; VBITS_LE_512-DAG: add x[[A2:[0-9]+]], x0, #[[#mul(VBYTES,2)]] -; VBITS_LE_512-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A2]]] -; VBITS_LE_512-DAG: add x[[A3:[0-9]+]], x0, #[[#mul(VBYTES,3)]] -; VBITS_LE_512-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A3]]] -; VBITS_LE_256-DAG: add x[[A4:[0-9]+]], x0, #[[#mul(VBYTES,4)]] -; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A4]]] -; VBITS_LE_256-DAG: add x[[A5:[0-9]+]], x0, #[[#mul(VBYTES,5)]] -; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A5]]] -; VBITS_LE_256-DAG: add x[[A6:[0-9]+]], x0, #[[#mul(VBYTES,6)]] -; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A6]]] -; VBITS_LE_256-DAG: add x[[A7:[0-9]+]], x0, #[[#mul(VBYTES,7)]] -; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x[[A7]]] +; VBITS_LE_1024-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]] +; VBITS_LE_1024-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A1]], lsl #2] +; VBITS_LE_512-DAG: mov x[[A2:[0-9]+]], #[[#mul(div(VBYTES,4),2)]] +; VBITS_LE_512-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A2]], lsl #2] +; VBITS_LE_512-DAG: mov x[[A3:[0-9]+]], #[[#mul(div(VBYTES,4),3)]] +; VBITS_LE_512-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A3]], lsl #2] +; VBITS_LE_256-DAG: mov x[[A4:[0-9]+]], #[[#mul(div(VBYTES,4),4)]] +; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A4]], lsl #2] +; VBITS_LE_256-DAG: mov x[[A5:[0-9]+]], #[[#mul(div(VBYTES,4),5)]] +; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A5]], lsl #2] +; VBITS_LE_256-DAG: mov x[[A6:[0-9]+]], #[[#mul(div(VBYTES,4),6)]] +; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A6]], lsl #2] +; VBITS_LE_256-DAG: mov x[[A7:[0-9]+]], #[[#mul(div(VBYTES,4),7)]] +; VBITS_LE_256-DAG: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0, x[[A7]], lsl #2] ; CHECK: ret %load = load <64 x float>, <64 x float>* %a ret <64 x float> %load diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll @@ -68,9 +68,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d ; VBITS_EQ_256-DAG: andv b[[REDUCE:[0-9]+]], [[PG]], [[AND]].b ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] @@ -149,9 +149,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d ; VBITS_EQ_256-DAG: andv h[[REDUCE:[0-9]+]], [[PG]], [[AND]].h ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] @@ -229,9 +229,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d ; VBITS_EQ_256-DAG: andv [[REDUCE:s[0-9]+]], [[PG]], [[AND]].s ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] @@ -307,9 +307,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: and [[AND:z[0-9]+]].d, [[LO]].d, [[HI]].d ; VBITS_EQ_256-DAG: andv [[REDUCE:d[0-9]+]], [[PG]], [[AND]].d ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] @@ -391,9 +391,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: eor [[EOR:z[0-9]+]].d, [[LO]].d, [[HI]].d ; VBITS_EQ_256-DAG: eorv b[[REDUCE:[0-9]+]], [[PG]], [[EOR]].b ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] @@ -472,9 +472,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: eor [[EOR:z[0-9]+]].d, [[LO]].d, [[HI]].d ; VBITS_EQ_256-DAG: eorv h[[REDUCE:[0-9]+]], [[PG]], [[EOR]].h ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] @@ -552,9 +552,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: eor [[EOR:z[0-9]+]].d, [[LO]].d, [[HI]].d ; VBITS_EQ_256-DAG: eorv [[REDUCE:s[0-9]+]], [[PG]], [[EOR]].s ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] @@ -630,9 +630,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: eor [[EOR:z[0-9]+]].d, [[LO]].d, [[HI]].d ; VBITS_EQ_256-DAG: eorv [[REDUCE:d[0-9]+]], [[PG]], [[EOR]].d ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] @@ -714,9 +714,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d ; VBITS_EQ_256-DAG: orv b[[REDUCE:[0-9]+]], [[PG]], [[OR]].b ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] @@ -795,9 +795,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d ; VBITS_EQ_256-DAG: orv h[[REDUCE:[0-9]+]], [[PG]], [[OR]].h ; VBITS_EQ_256-NEXT: fmov w0, s[[REDUCE]] @@ -875,9 +875,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d ; VBITS_EQ_256-DAG: orv [[REDUCE:s[0-9]+]], [[PG]], [[OR]].s ; VBITS_EQ_256-NEXT: fmov w0, [[REDUCE]] @@ -953,9 +953,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: orr [[OR:z[0-9]+]].d, [[LO]].d, [[HI]].d ; VBITS_EQ_256-DAG: orv [[REDUCE:d[0-9]+]], [[PG]], [[OR]].d ; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -86,9 +86,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ldr d[[VALS:[0-9]+]], [x0] ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: cmeq [[ZMSK:v[0-9]+]].8b, v[[VALS]].8b, #0 ; VBITS_EQ_256-DAG: zip1 [[VAL_LO:v[0-9]+]].8b, [[ZMSK]].8b, v[[VALS]].8b ; VBITS_EQ_256-DAG: zip2 [[VAL_HI:v[0-9]+]].8b, [[ZMSK]].8b, v[[VALS]].8b @@ -223,9 +223,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ldr q[[VALS:[0-9]+]], [x0] ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].h, vl4 ; VBITS_EQ_256-DAG: cmeq v[[ZMSK:[0-9]+]].8h, v[[VALS]].8h, #0 ; VBITS_EQ_256-DAG: cmpne [[MASK_LO:p[0-9]+]].h, [[PG1]]/z, z[[ZMSK]].h, #0 @@ -347,9 +347,9 @@ ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].s, vl8 ; VBITS_EQ_256-DAG: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG1]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG1]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: cmpeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0 ; VBITS_EQ_256-DAG: mov x8, sp ; VBITS_EQ_256-DAG: mov [[MONE:z[0-9]+]].s, p1/z, #-1 @@ -474,18 +474,17 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 -; VBITS_EQ_256-DAG: add x9, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[VALS_LO:z[0-9]+]].d }, [[PG0]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[VALS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8] -; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x9] +; VBITS_EQ_256-DAG: ld1d { [[VALS_HI:z[0-9]+]].d }, [[PG0]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1] +; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: cmpeq [[MASK_LO:p[0-9]+]].d, [[PG0]]/z, [[VALS_LO]].d, #0 ; VBITS_EQ_256-DAG: cmpeq [[MASK_HI:p[0-9]+]].d, [[PG0]]/z, [[VALS_HI]].d, #0 ; VBITS_EQ_256-DAG: ld1d { [[RES_LO:z[0-9]+]].d }, [[MASK_LO]]/z, {{\[}}[[PTRS_LO]].d] ; VBITS_EQ_256-DAG: ld1d { [[RES_HI:z[0-9]+]].d }, [[MASK_HI]]/z, {{\[}}[[PTRS_HI]].d] ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG0]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG0]], [x8] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG0]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %cval = load <8 x i64>, <8 x i64>* %a %ptrs = load <8 x i64*>, <8 x i64*>* %b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -82,14 +82,14 @@ ; VBITS_EQ_256-DAG: ldr d[[VALS:[0-9]+]], [x0] ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].d, vl4 ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].h, vl4 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: cmeq [[ZMSK:v[0-9]+]].8b, v[[VALS]].8b, #0 ; VBITS_EQ_256-DAG: zip1 [[VAL_LO:v[0-9]+]].8b, [[ZMSK]].8b, v[[VALS]].8b ; VBITS_EQ_256-DAG: zip2 [[VAL_HI:v[0-9]+]].8b, [[ZMSK]].8b, v[[VALS]].8b ; VBITS_EQ_256-DAG: shl [[SHL_LO:v[0-9]+]].4h, [[VAL_LO]].4h, #8 ; VBITS_EQ_256-DAG: shl [[SHL_HI:v[0-9]+]].4h, [[VAL_HI]].4h, #8 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: sshr v[[SSHR_LO:[0-9]+]].4h, [[SHL_LO]].4h, #8 ; VBITS_EQ_256-DAG: sshr v[[SSHR_HI:[0-9]+]].4h, [[SHL_HI]].4h, #8 ; VBITS_EQ_256-DAG: cmpne [[MASK_LO:p[0-9]+]].h, [[PG1]]/z, z[[SSHR_LO]].h, #0 @@ -208,9 +208,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ldr q[[VALS:[0-9]+]], [x0] ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x[[B_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].h, vl4 ; VBITS_EQ_256-DAG: cmeq v[[ZMSK:[0-9]+]].8h, v[[VALS]].8h, #0 ; VBITS_EQ_256-DAG: ext v[[EXT:[0-9]+]].16b, v[[VALS]].16b, v[[VALS]].16b, #8 @@ -321,9 +321,9 @@ ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].s, vl8 ; VBITS_EQ_256-DAG: ld1w { [[VALS:z[0-9]+]].s }, [[PG0]]/z, [x0] ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG1]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG1]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG1]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: cmpeq [[MASK:p[0-9]+]].s, [[PG0]]/z, [[VALS]].s, #0 ; VBITS_EQ_256-DAG: add x8, sp, #32 ; VBITS_EQ_256-DAG: mov x9, sp @@ -439,12 +439,11 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG0:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 -; VBITS_EQ_256-DAG: add x9, x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[VALS_LO:z[0-9]+]].d }, [[PG0]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[VALS_HI:z[0-9]+]].d }, [[PG0]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[VALS_HI:z[0-9]+]].d }, [[PG0]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[PTRS_LO:z[0-9]+]].d }, [[PG0]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x9] +; VBITS_EQ_256-DAG: ld1d { [[PTRS_HI:z[0-9]+]].d }, [[PG0]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: cmpeq [[MASK_LO:p[0-9]+]].d, [[PG0]]/z, [[VALS_LO]].d, #0 ; VBITS_EQ_256-DAG: cmpeq [[MASK_HI:p[0-9]+]].d, [[PG0]]/z, [[VALS_HI]].d, #0 ; VBITS_EQ_256-DAG: st1d { [[VALS_LO]].d }, [[MASK_LO]], {{\[}}[[PTRS_LO]].d] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll @@ -65,13 +65,13 @@ ; ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[A:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: ld1b { [[OP_LO:z[0-9]+]].b }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[A]]] +; VBITS_EQ_256-DAG: ld1b { [[OP_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].b, [[PG]]/m, [[OP_LO]].b ; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].b, [[PG]]/m, [[OP_HI]].b ; VBITS_EQ_256-DAG: st1b { [[RES_LO]].b }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[A]]] +; VBITS_EQ_256-DAG: st1b { [[RES_HI]].b }, [[PG]], [x0, x[[NUMELTS]]] ; VBITS_EQ_256-NEXT: ret %op = load <64 x i8>, <64 x i8>* %a %res = call <64 x i8> @llvm.bitreverse.v64i8(<64 x i8> %op) @@ -146,13 +146,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h ; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a %res = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %op) @@ -227,13 +227,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s ; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a %res = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %op) @@ -308,13 +308,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: rbit [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d ; VBITS_EQ_256-DAG: rbit [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a %res = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %op) @@ -393,13 +393,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: revb [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h ; VBITS_EQ_256-DAG: revb [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h ; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op = load <32 x i16>, <32 x i16>* %a %res = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %op) @@ -474,13 +474,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: revb [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s ; VBITS_EQ_256-DAG: revb [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s ; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op = load <16 x i32>, <16 x i32>* %a %res = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %op) @@ -555,13 +555,13 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: revb [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d ; VBITS_EQ_256-DAG: revb [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d ; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x[[A_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op = load <8 x i64>, <8 x i64>* %a %res = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %op) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll @@ -66,9 +66,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].b, w0 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w[[OFFSET_HI:[0-9]+]], #32 +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 ; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1, x[[OFFSET_HI]] +; VBITS_EQ_256-DAG: st1b { [[RES]].b }, [[PG]], [x1, x[[NUMELTS]]] ; VBITS_EQ_256-NEXT: ret %insert = insertelement <64 x i8> undef, i8 %a, i64 0 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer @@ -142,9 +142,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, w0 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x[[B_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %insert = insertelement <32 x i16> undef, i16 %a, i64 0 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer @@ -218,9 +218,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, w0 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x[[B_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %insert = insertelement <16 x i32> undef, i32 %a, i64 0 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer @@ -294,9 +294,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, x0 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x1, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x1] -; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x[[B_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %insert = insertelement <8 x i64> undef, i64 %a, i64 0 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer @@ -374,9 +374,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].h, h0 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x[[B_HI]] +; VBITS_EQ_256-DAG: st1h { [[RES]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %insert = insertelement <32 x half> undef, half %a, i64 0 %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer @@ -450,9 +450,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].s, s0 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x[[B_HI]] +; VBITS_EQ_256-DAG: st1w { [[RES]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %insert = insertelement <16 x float> undef, float %a, i64 0 %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer @@ -526,9 +526,9 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: mov [[RES:z[0-9]+]].d, d0 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[B_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x[[B_HI]] +; VBITS_EQ_256-DAG: st1d { [[RES]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %insert = insertelement <8 x double> undef, double %a, i64 0 %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll @@ -56,8 +56,8 @@ ; CHECK-LABEL: store_v16f32: ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]] ; CHECK-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0] -; VBITS_LE_256-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]] -; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A1]]] +; VBITS_LE_256-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]] +; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A1]], lsl #2] ; CHECK: ret store <16 x float> zeroinitializer, <16 x float>* %a ret void @@ -67,12 +67,12 @@ ; CHECK-LABEL: store_v32f32: ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]] ; CHECK-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0] -; VBITS_LE_512-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]] -; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A1]]] -; VBITS_LE_256-DAG: add x[[A2:[0-9]+]], x0, #[[#mul(VBYTES,2)]] -; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A2]]] -; VBITS_LE_256-DAG: add x[[A3:[0-9]+]], x0, #[[#mul(VBYTES,3)]] -; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A3]]] +; VBITS_LE_512-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]] +; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A1]], lsl #2] +; VBITS_LE_256-DAG: mov x[[A2:[0-9]+]], #[[#mul(div(VBYTES,4),2)]] +; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A2]], lsl #2] +; VBITS_LE_256-DAG: mov x[[A3:[0-9]+]], #[[#mul(div(VBYTES,4),3)]] +; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A3]], lsl #2] ; CHECK: ret store <32 x float> zeroinitializer, <32 x float>* %a ret void @@ -82,20 +82,20 @@ ; CHECK-LABEL: store_v64f32: ; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]] ; CHECK-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0] -; VBITS_LE_1024-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]] -; VBITS_LE_1024-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A1]]] -; VBITS_LE_512-DAG: add x[[A2:[0-9]+]], x0, #[[#mul(VBYTES,2)]] -; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A2]]] -; VBITS_LE_512-DAG: add x[[A3:[0-9]+]], x0, #[[#mul(VBYTES,3)]] -; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A3]]] -; VBITS_LE_256-DAG: add x[[A4:[0-9]+]], x0, #[[#mul(VBYTES,4)]] -; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A4]]] -; VBITS_LE_256-DAG: add x[[A5:[0-9]+]], x0, #[[#mul(VBYTES,5)]] -; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A5]]] -; VBITS_LE_256-DAG: add x[[A6:[0-9]+]], x0, #[[#mul(VBYTES,6)]] -; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A6]]] -; VBITS_LE_256-DAG: add x[[A7:[0-9]+]], x0, #[[#mul(VBYTES,7)]] -; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A7]]] +; VBITS_LE_1024-DAG: mov x[[A1:[0-9]+]], #[[#div(VBYTES,4)]] +; VBITS_LE_1024-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A1]], lsl #2] +; VBITS_LE_512-DAG: mov x[[A2:[0-9]+]], #[[#mul(div(VBYTES,4),2)]] +; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A2]], lsl #2] +; VBITS_LE_512-DAG: mov x[[A3:[0-9]+]], #[[#mul(div(VBYTES,4),3)]] +; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A3]], lsl #2] +; VBITS_LE_256-DAG: mov x[[A4:[0-9]+]], #[[#mul(div(VBYTES,4),4)]] +; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A4]], lsl #2] +; VBITS_LE_256-DAG: mov x[[A5:[0-9]+]], #[[#mul(div(VBYTES,4),5)]] +; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A5]], lsl #2] +; VBITS_LE_256-DAG: mov x[[A6:[0-9]+]], #[[#mul(div(VBYTES,4),6)]] +; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A6]], lsl #2] +; VBITS_LE_256-DAG: mov x[[A7:[0-9]+]], #[[#mul(div(VBYTES,4),7)]] +; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0, x[[A7]], lsl #2] ; CHECK: ret store <64 x float> zeroinitializer, <64 x float>* %a ret void diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll @@ -53,9 +53,9 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[DWORDS_LO:z[0-9]+]].d }, [[PG1]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG1]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl4 ; VBITS_EQ_256-DAG: uzp1 [[WORDS_LO:z[0-9]+]].s, [[DWORDS_LO]].s, [[DWORDS_LO]].s ; VBITS_EQ_256-DAG: uzp1 [[WORDS_HI:z[0-9]+]].s, [[DWORDS_HI]].s, [[DWORDS_HI]].s @@ -103,9 +103,9 @@ ; Ensure sensible type legalisation. ; Currently does not use the truncating store ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[DWORDS_LO:z[0-9]+]].d }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: uzp1 [[WORDS_LO:z[0-9]+]].s, [[DWORDS_LO]].s, [[DWORDS_LO]].s ; VBITS_EQ_256-DAG: uzp1 [[WORDS_HI:z[0-9]+]].s, [[DWORDS_HI]].s, [[DWORDS_HI]].s ; VBITS_EQ_256-DAG: uzp1 z[[HALFS_LO:[0-9]+]].h, [[WORDS_LO]].h, [[WORDS_LO]].h @@ -128,9 +128,9 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 ; VBITS_EQ_256-DAG: ld1d { [[DWORDS_LO:z[0-9]+]].d }, [[PG1]]/z, [x0] -; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG1]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1d { [[DWORDS_HI:z[0-9]+]].d }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].s, vl4 ; VBITS_EQ_256-DAG: uzp1 [[WORDS_LO:z[0-9]+]].s, [[DWORDS_LO]].s, [[DWORDS_LO]].s ; VBITS_EQ_256-DAG: uzp1 [[WORDS_HI:z[0-9]+]].s, [[DWORDS_HI]].s, [[DWORDS_HI]].s @@ -154,9 +154,9 @@ ; Ensure sensible type legalisation. ; Currently does not use the truncating store ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[WORDS_LO:z[0-9]+]].s }, [[PG]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[WORDS_HI:z[0-9]+]].s }, [[PG]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[WORDS_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: uzp1 [[HALFS_LO:z[0-9]+]].h, [[WORDS_LO]].h, [[WORDS_LO]].h ; VBITS_EQ_256-DAG: uzp1 [[HALFS_HI:z[0-9]+]].h, [[WORDS_HI]].h, [[WORDS_HI]].h ; VBITS_EQ_256-DAG: uzp1 z[[BYTES_LO:[0-9]+]].b, [[HALFS_LO]].b, [[HALFS_LO]].b @@ -179,9 +179,9 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 ; VBITS_EQ_256-DAG: ld1w { [[WORDS_LO:z[0-9]+]].s }, [[PG1]]/z, [x0] -; VBITS_EQ_256-DAG: ld1w { [[WORDS_HI:z[0-9]+]].s }, [[PG1]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1w { [[WORDS_HI:z[0-9]+]].s }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].h, vl8 ; VBITS_EQ_256-DAG: uzp1 [[HALFS_LO:z[0-9]+]].h, [[WORDS_LO]].h, [[WORDS_LO]].h ; VBITS_EQ_256-DAG: uzp1 [[HALFS_HI:z[0-9]+]].h, [[WORDS_HI]].h, [[WORDS_HI]].h @@ -204,9 +204,9 @@ ; Ensure sensible type legalisation ; VBITS_EQ_256-DAG: ptrue [[PG1:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x[[A_HI:[0-9]+]], x0, #32 +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 ; VBITS_EQ_256-DAG: ld1h { [[HALFS_LO:z[0-9]+]].h }, [[PG1]]/z, [x0] -; VBITS_EQ_256-DAG: ld1h { [[HALFS_HI:z[0-9]+]].h }, [[PG1]]/z, [x[[A_HI]]] +; VBITS_EQ_256-DAG: ld1h { [[HALFS_HI:z[0-9]+]].h }, [[PG1]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ptrue [[PG2:p[0-9]+]].b, vl16 ; VBITS_EQ_256-DAG: uzp1 [[BYTES_LO:z[0-9]+]].b, [[HALFS_LO]].b, [[HALFS_LO]].b ; VBITS_EQ_256-DAG: uzp1 [[BYTES_HI:z[0-9]+]].b, [[HALFS_HI]].b, [[HALFS_HI]].b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll @@ -72,10 +72,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].b, vl32 -; VBITS_EQ_256-DAG: mov w8, #32 -; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x8] +; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32 +; VBITS_EQ_256-DAG: ld1b { [[OP1_HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: ld1b { [[OP2_LO:z[0-9]+]].b }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x8] +; VBITS_EQ_256-DAG: ld1b { [[OP2_HI:z[0-9]+]].b }, [[PG]]/z, [x1, x[[NUMELTS]]] ; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].b, [[OP1_HI]].b[31] ; VBITS_EQ_256-DAG: fmov [[TMP1:w[0-9]+]], s[[ELEM1]] ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].b, [[OP2_LO]].b[31] @@ -83,7 +83,7 @@ ; VBITS_EQ_256-DAG: fmov [[TMP2:w[0-9]+]], s[[ELEM2]] ; VBITS_EQ_256-DAG: insr [[OP2_HI]].b, [[TMP2]] ; VBITS_EQ_256-DAG: st1b { [[OP2_LO]].b }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1b { [[OP2_HI]].b }, [[PG]], [x0, x8] +; VBITS_EQ_256-DAG: st1b { [[OP2_HI]].b }, [[PG]], [x0, x[[NUMELTS]]] ; VBITS_EQ_256-NEXT: ret %op1 = load <64 x i8>, <64 x i8>* %a %op2 = load <64 x i8>, <64 x i8>* %b @@ -230,11 +230,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x8, x0, #32 -; VBITS_EQ_256-DAG: add x9, x1, #32 -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x9] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].h, [[OP1_HI]].h[15] ; VBITS_EQ_256-DAG: fmov [[TMP1:w[0-9]+]], s[[ELEM1]] ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].h, [[OP2_LO]].h[15] @@ -242,7 +241,7 @@ ; VBITS_EQ_256-DAG: fmov [[TMP2:w[0-9]+]], s[[ELEM2]] ; VBITS_EQ_256-DAG: insr [[OP2_HI]].h, [[TMP2]] ; VBITS_EQ_256-DAG: st1h { [[OP2_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[OP2_HI]].h }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1h { [[OP2_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x i16>, <32 x i16>* %a %op2 = load <32 x i16>, <32 x i16>* %b @@ -360,11 +359,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x0, #32 -; VBITS_EQ_256-DAG: add x9, x1, #32 -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x9] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].s, [[OP1_HI]].s[7] ; VBITS_EQ_256-DAG: fmov [[TMP1:w[0-9]+]], s[[ELEM1]] ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].s, [[OP2_LO]].s[7] @@ -372,7 +370,7 @@ ; VBITS_EQ_256-DAG: fmov [[TMP2:w[0-9]+]], s[[ELEM2]] ; VBITS_EQ_256-DAG: insr [[OP2_HI]].s, [[TMP2]] ; VBITS_EQ_256-DAG: st1w { [[OP2_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[OP2_HI]].s }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1w { [[OP2_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ret %op1 = load <16 x i32>, <16 x i32>* %a @@ -468,11 +466,10 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 -; VBITS_EQ_256-DAG: add x9, x1, #32 -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x9] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].d, [[OP1_HI]].d[3] ; VBITS_EQ_256-DAG: fmov [[TMP1:x[0-9]+]], d[[ELEM1]] ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].d, [[OP2_LO]].d[3] @@ -480,7 +477,7 @@ ; VBITS_EQ_256-DAG: fmov [[TMP2:x[0-9]+]], d[[ELEM2]] ; VBITS_EQ_256-DAG: insr [[OP2_HI]].d, [[TMP2]] ; VBITS_EQ_256-DAG: st1d { [[OP2_LO]].d }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1d { [[OP2_HI]].d }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1d { [[OP2_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x i64>, <8 x i64>* %a %op2 = load <8 x i64>, <8 x i64>* %b @@ -576,17 +573,16 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 -; VBITS_EQ_256-DAG: add x8, x0, #32 -; VBITS_EQ_256-DAG: add x9, x1, #32 -; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 +; VBITS_EQ_256-DAG: ld1h { [[OP1_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: ld1h { [[OP2_LO:z[0-9]+]].h }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x9] +; VBITS_EQ_256-DAG: ld1h { [[OP2_HI:z[0-9]+]].h }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].h, [[OP2_LO]].h[15] ; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].h, [[OP1_HI]].h[15] ; VBITS_EQ_256-DAG: insr [[OP2_LO]].h, h[[ELEM1]] ; VBITS_EQ_256-DAG: insr [[OP2_HI]].h, h[[ELEM2]] ; VBITS_EQ_256-DAG: st1h { [[OP2_LO]].h }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1h { [[OP2_HI]].h }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1h { [[OP2_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] ; VBITS_EQ_256-NEXT: ret %op1 = load <32 x half>, <32 x half>* %a %op2 = load <32 x half>, <32 x half>* %b @@ -702,17 +698,16 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 -; VBITS_EQ_256-DAG: add x8, x0, #32 -; VBITS_EQ_256-DAG: add x9, x1, #32 -; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 +; VBITS_EQ_256-DAG: ld1w { [[OP1_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: ld1w { [[OP2_LO:z[0-9]+]].s }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x9] +; VBITS_EQ_256-DAG: ld1w { [[OP2_HI:z[0-9]+]].s }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].s, [[OP2_LO]].s[7] ; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].s, [[OP1_HI]].s[7] ; VBITS_EQ_256-DAG: insr [[OP2_LO]].s, s[[ELEM1]] ; VBITS_EQ_256-DAG: insr [[OP2_HI]].s, s[[ELEM2]] ; VBITS_EQ_256-DAG: st1w { [[OP2_LO]].s }, [[PG]], [x0] -; VBITS_EQ_256-DAG: st1w { [[OP2_HI]].s }, [[PG]], [x8] +; VBITS_EQ_256-DAG: st1w { [[OP2_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] ; VBITS_EQ_256-NEXT: ret %op1 = load <16 x float>, <16 x float>* %a %op2 = load <16 x float>, <16 x float>* %b @@ -805,17 +800,16 @@ ; Ensure sensible type legalisation. ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 -; VBITS_EQ_256-DAG: add x8, x0, #32 -; VBITS_EQ_256-DAG: add x9, x1, #32 -; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x8] -; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x9] +; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 +; VBITS_EQ_256-DAG: ld1d { [[OP1_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: ld1d { [[OP2_LO:z[0-9]+]].d }, [[PG]]/z, [x1] -; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].d, [[OP1_HI]].d[3] +; VBITS_EQ_256-DAG: ld1d { [[OP2_HI:z[0-9]+]].d }, [[PG]]/z, [x1, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-DAG: mov z[[ELEM2:[0-9]+]].d, [[OP2_LO]].d[3] +; VBITS_EQ_256-DAG: mov z[[ELEM1:[0-9]+]].d, [[OP1_HI]].d[3] ; VBITS_EQ_256-DAG: insr [[OP2_LO]].d, d[[ELEM1]] ; VBITS_EQ_256-DAG: insr [[OP2_HI]].d, d[[ELEM2]] -; VBITS_EQ_256-DAG: st1d { [[OP2_HI]].d }, [[PG]], [x8] ; VBITS_EQ_256-DAG: st1d { [[OP2_LO]].d }, [[PG]], [x0] +; VBITS_EQ_256-DAG: st1d { [[OP2_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] ; VBITS_EQ_256-NEXT: ret %op1 = load <8 x double>, <8 x double>* %a %op2 = load <8 x double>, <8 x double>* %b diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-imm.ll @@ -104,8 +104,8 @@ ; below lower bound define @ld1roh_i16_below_lower_bound( %pg, i16* %a) nounwind { ; CHECK-LABEL: ld1roh_i16_below_lower_bound: -; CHECK-NEXT: sub x[[BASE:[0-9]+]], x0, #258 -; CHECK-NEXT: ld1roh { z0.h }, p0/z, [x[[BASE]]] +; CHECK-NEXT: mov x[[IDX:[0-9]+]], #-129 +; CHECK-NEXT: ld1roh { z0.h }, p0/z, [x0, x[[IDX]], lsl #1] ; CHECK-NEXT: ret %base = getelementptr i16, i16* %a, i64 -129 %load = call @llvm.aarch64.sve.ld1ro.nxv8i16( %pg, i16* %base) @@ -125,8 +125,8 @@ ; not a multiple of 32 define @ld1row_i32_not_multiple( %pg, i32* %a) nounwind { ; CHECK-LABEL: ld1row_i32_not_multiple: -; CHECK-NEXT: add x[[BASE:[0-9]+]], x0, #12 -; CHECK-NEXT: ld1row { z0.s }, p0/z, [x[[BASE]]] +; CHECK-NEXT: mov x[[IDX:[0-9]+]], #3 +; CHECK-NEXT: ld1row { z0.s }, p0/z, [x0, x[[IDX]], lsl #2] ; CHECK-NEXT: ret %base = getelementptr i32, i32* %a, i64 3 %load = call @llvm.aarch64.sve.ld1ro.nxv4i32( %pg, i32* %base) diff --git a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll --- a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll +++ b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll @@ -62,17 +62,16 @@ define void @func_vscale2_2(<16 x i32>* %a, <16 x i32>* %b) #2 { ; CHECK-LABEL: func_vscale2_2: ; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #8 ; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: add x8, x0, #32 // =32 -; CHECK-NEXT: add x9, x1, #32 // =32 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8] -; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] -; CHECK-NEXT: ld1w { z3.s }, p0/z, [x9] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x1] ; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s ; CHECK-NEXT: add z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: st1w { z0.s }, p0, [x0] -; CHECK-NEXT: st1w { z1.s }, p0, [x8] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; CHECK-NEXT: st1w { z1.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %op2 = load <16 x i32>, <16 x i32>* %b @@ -86,17 +85,16 @@ define void @func_vscale2_4(<16 x i32>* %a, <16 x i32>* %b) #3 { ; CHECK-LABEL: func_vscale2_4: ; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #8 ; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: add x8, x0, #32 // =32 -; CHECK-NEXT: add x9, x1, #32 // =32 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8] -; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] -; CHECK-NEXT: ld1w { z3.s }, p0/z, [x9] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x1] ; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s ; CHECK-NEXT: add z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: st1w { z0.s }, p0, [x0] -; CHECK-NEXT: st1w { z1.s }, p0, [x8] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; CHECK-NEXT: st1w { z1.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %op2 = load <16 x i32>, <16 x i32>* %b