Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -1312,6 +1312,10 @@ getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); } + // Returns true if VT is a legal index type for masked gathers/scatters + // on this target + virtual bool shouldRemoveExtendFromGSIndex(EVT VT) const { return false; } + /// Return how the condition code should be treated: either it is legal, needs /// to be expanded to some other code sequence, or the target has a custom /// expander for it. Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9374,16 +9374,74 @@ TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); } +bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) { + if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD) + return false; + + // For now we check only the LHS of the add. + SDValue LHS = Index.getOperand(0); + SDValue SplatVal = DAG.getSplatValue(LHS); + if (!SplatVal) + return false; + + BasePtr = SplatVal; + Index = Index.getOperand(1); + return true; +} + +// Fold sext/zext of index into index type. +bool refineIndexType(MaskedScatterSDNode *MSC, SDValue &Index, bool Scaled, + SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Op = Index.getOperand(0); + + if (Index.getOpcode() == ISD::ZERO_EXTEND) { + MSC->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED); + if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) { + Index = Op; + return true; + } + } + + if (Index.getOpcode() == ISD::SIGN_EXTEND) { + MSC->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED); + if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) { + Index = Op; + return true; + } + } + + return false; +} + SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MaskedScatterSDNode *MSC = cast(N); SDValue Mask = MSC->getMask(); SDValue Chain = MSC->getChain(); + SDValue Index = MSC->getIndex(); + SDValue Scale = MSC->getScale(); + SDValue StoreVal = MSC->getValue(); + SDValue BasePtr = MSC->getBasePtr(); SDLoc DL(N); // Zap scatters with a zero mask. if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; + if (refineUniformBase(BasePtr, Index, DAG)) { + SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; + return DAG.getMaskedScatter( + DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops, + MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore()); + } + + if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) { + SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; + return DAG.getMaskedScatter( + DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops, + MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore()); + } + return SDValue(); } Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -982,6 +982,7 @@ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } + bool shouldRemoveExtendFromGSIndex(EVT VT) const override; bool isVectorLoadExtDesirable(SDValue ExtVal) const override; bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3706,6 +3706,14 @@ } } +bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { + if (VT.getVectorElementType() == MVT::i32 && + VT.getVectorElementCount().getKnownMinValue() >= 4) + return true; + + return false; +} + bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { return ExtVal.getValueType().isScalableVector(); } @@ -3793,11 +3801,8 @@ InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger()); } - if (getScatterIndexIsExtended(Index)) { - if (Index.getOpcode() == ISD::AND) - IsSigned = false; + if (getScatterIndexIsExtended(Index)) Index = Index.getOperand(0); - } SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT}; return DAG.getNode(getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend), DL, Index: llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-scaled.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-scaled.ll +++ llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-scaled.ll @@ -166,15 +166,7 @@ define void @masked_scatter_nxv4i16_sext( %data, i16* %base, %indexes, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4i16_sext: ; CHECK: // %bb.0: -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: uunpkhi z2.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, sxtw #1] -; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, sxtw #1] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw #1] ; CHECK-NEXT: ret %ext = sext %indexes to %ptrs = getelementptr i16, i16* %base, %ext @@ -185,15 +177,7 @@ define void @masked_scatter_nxv4i32_sext( %data, i32* %base, %indexes, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4i32_sext: ; CHECK: // %bb.0: -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: uunpkhi z2.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, sxtw #2] -; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, sxtw #2] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw #2] ; CHECK-NEXT: ret %ext = sext %indexes to %ptrs = getelementptr i32, i32* %base, %ext @@ -204,15 +188,7 @@ define void @masked_scatter_nxv4f16_sext( %data, half* %base, %indexes, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4f16_sext: ; CHECK: // %bb.0: -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: uunpkhi z2.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, sxtw #1] -; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, sxtw #1] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw #1] ; CHECK-NEXT: ret %ext = sext %indexes to %ptrs = getelementptr half, half* %base, %ext @@ -223,15 +199,7 @@ define void @masked_scatter_nxv4bf16_sext( %data, bfloat* %base, %indexes, %masks) nounwind #0 { ; CHECK-LABEL: masked_scatter_nxv4bf16_sext: ; CHECK: // %bb.0: -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: uunpkhi z2.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, sxtw #1] -; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, sxtw #1] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw #1] ; CHECK-NEXT: ret %ext = sext %indexes to %ptrs = getelementptr bfloat, bfloat* %base, %ext @@ -242,15 +210,7 @@ define void @masked_scatter_nxv4f32_sext( %data, float* %base, %indexes, %masks) nounwind #0 { ; CHECK-LABEL: masked_scatter_nxv4f32_sext: ; CHECK: // %bb.0: -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: uunpkhi z2.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, sxtw #2] -; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, sxtw #2] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw #2] ; CHECK-NEXT: ret %ext = sext %indexes to %ptrs = getelementptr float, float* %base, %ext @@ -261,15 +221,7 @@ define void @masked_scatter_nxv4i16_zext( %data, i16* %base, %indexes, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4i16_zext: ; CHECK: // %bb.0: -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: uunpkhi z2.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, uxtw #1] -; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, uxtw #1] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw #1] ; CHECK-NEXT: ret %ext = zext %indexes to %ptrs = getelementptr i16, i16* %base, %ext @@ -280,15 +232,7 @@ define void @masked_scatter_nxv4i32_zext( %data, i32* %base, %indexes, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4i32_zext: ; CHECK: // %bb.0: -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: uunpkhi z2.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, uxtw #2] -; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, uxtw #2] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, uxtw #2] ; CHECK-NEXT: ret %ext = zext %indexes to %ptrs = getelementptr i32, i32* %base, %ext @@ -299,15 +243,7 @@ define void @masked_scatter_nxv4f16_zext( %data, half* %base, %indexes, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4f16_zext: ; CHECK: // %bb.0: -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: uunpkhi z2.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, uxtw #1] -; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, uxtw #1] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw #1] ; CHECK-NEXT: ret %ext = zext %indexes to %ptrs = getelementptr half, half* %base, %ext @@ -318,15 +254,7 @@ define void @masked_scatter_nxv4bf16_zext( %data, bfloat* %base, %indexes, %masks) nounwind #0 { ; CHECK-LABEL: masked_scatter_nxv4bf16_zext: ; CHECK: // %bb.0: -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: uunpkhi z2.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1h { z3.d }, p2, [x0, z1.d, uxtw #1] -; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, uxtw #1] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw #1] ; CHECK-NEXT: ret %ext = zext %indexes to %ptrs = getelementptr bfloat, bfloat* %base, %ext @@ -337,15 +265,7 @@ define void @masked_scatter_nxv4f32_zext( %data, float* %base, %indexes, %masks) nounwind #0 { ; CHECK-LABEL: masked_scatter_nxv4f32_zext: ; CHECK: // %bb.0: -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: uunpkhi z2.d, z1.s -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1w { z3.d }, p2, [x0, z1.d, uxtw #2] -; CHECK-NEXT: st1w { z0.d }, p0, [x0, z2.d, uxtw #2] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, uxtw #2] ; CHECK-NEXT: ret %ext = zext %indexes to %ptrs = getelementptr float, float* %base, %ext Index: llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-unscaled.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-unscaled.ll +++ llvm/test/CodeGen/AArch64/sve-masked-scatter-32b-unscaled.ll @@ -8,12 +8,7 @@ define void @masked_scatter_nxv2i8_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2i8_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: sxtw z1.d, p1/m, z1.d -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -25,12 +20,7 @@ define void @masked_scatter_nxv2i16_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2i16_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: sxtw z1.d, p1/m, z1.d -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -42,12 +32,7 @@ define void @masked_scatter_nxv2i32_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2i32_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: sxtw z1.d, p1/m, z1.d -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -59,12 +44,7 @@ define void @masked_scatter_nxv2i64_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2i64_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: sxtw z1.d, p1/m, z1.d -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -76,12 +56,7 @@ define void @masked_scatter_nxv2f16_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2f16_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: sxtw z1.d, p1/m, z1.d -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -93,12 +68,7 @@ define void @masked_scatter_nxv2bf16_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind #0 { ; CHECK-LABEL: masked_scatter_nxv2bf16_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: sxtw z1.d, p1/m, z1.d -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -110,12 +80,7 @@ define void @masked_scatter_nxv2f32_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2f32_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: sxtw z1.d, p1/m, z1.d -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -127,12 +92,7 @@ define void @masked_scatter_nxv2f64_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2f64_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: sxtw z1.d, p1/m, z1.d -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -144,11 +104,7 @@ define void @masked_scatter_nxv2i8_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2i8_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: and z1.d, z1.d, #0xffffffff -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -160,11 +116,7 @@ define void @masked_scatter_nxv2i16_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2i16_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: and z1.d, z1.d, #0xffffffff -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -176,11 +128,7 @@ define void @masked_scatter_nxv2i32_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2i32_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: and z1.d, z1.d, #0xffffffff -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -192,11 +140,7 @@ define void @masked_scatter_nxv2i64_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2i64_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: and z1.d, z1.d, #0xffffffff -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -208,11 +152,7 @@ define void @masked_scatter_nxv2f16_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2f16_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: and z1.d, z1.d, #0xffffffff -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -224,11 +164,7 @@ define void @masked_scatter_nxv2bf16_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind #0 { ; CHECK-LABEL: masked_scatter_nxv2bf16_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: and z1.d, z1.d, #0xffffffff -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -240,11 +176,7 @@ define void @masked_scatter_nxv2f32_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2f32_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: and z1.d, z1.d, #0xffffffff -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -256,11 +188,7 @@ define void @masked_scatter_nxv2f64_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2f64_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: and z1.d, z1.d, #0xffffffff -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -275,19 +203,7 @@ define void @masked_scatter_nxv4i8_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4i8_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: sunpklo z3.d, z1.s -; CHECK-NEXT: sunpkhi z1.d, z1.s -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: add z2.d, z2.d, z3.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1b { z3.d }, p2, [x8, z2.d] -; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1b { z0.s }, p0, [x0, z1.s, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -299,19 +215,7 @@ define void @masked_scatter_nxv4i16_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4i16_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: sunpklo z3.d, z1.s -; CHECK-NEXT: sunpkhi z1.d, z1.s -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: add z2.d, z2.d, z3.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d] -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -323,19 +227,7 @@ define void @masked_scatter_nxv4i32_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4i32_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: sunpklo z3.d, z1.s -; CHECK-NEXT: sunpkhi z1.d, z1.s -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: add z2.d, z2.d, z3.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d] -; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -347,19 +239,7 @@ define void @masked_scatter_nxv4f16_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4f16_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: sunpklo z3.d, z1.s -; CHECK-NEXT: sunpkhi z1.d, z1.s -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: add z2.d, z2.d, z3.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d] -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -371,19 +251,7 @@ define void @masked_scatter_nxv4bf16_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind #0 { ; CHECK-LABEL: masked_scatter_nxv4bf16_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: sunpklo z3.d, z1.s -; CHECK-NEXT: sunpkhi z1.d, z1.s -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: add z2.d, z2.d, z3.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d] -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -395,19 +263,7 @@ define void @masked_scatter_nxv4f32_sext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind #0 { ; CHECK-LABEL: masked_scatter_nxv4f32_sext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: sunpklo z3.d, z1.s -; CHECK-NEXT: sunpkhi z1.d, z1.s -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: add z2.d, z2.d, z3.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d] -; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw] ; CHECK-NEXT: ret %offsets = sext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -419,19 +275,7 @@ define void @masked_scatter_nxv4i8_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4i8_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: uunpklo z3.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: add z2.d, z2.d, z3.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1b { z3.d }, p2, [x8, z2.d] -; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1b { z0.s }, p0, [x0, z1.s, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -443,19 +287,7 @@ define void @masked_scatter_nxv4i16_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4i16_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: uunpklo z3.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: add z2.d, z2.d, z3.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d] -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -467,19 +299,7 @@ define void @masked_scatter_nxv4i32_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4i32_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: uunpklo z3.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: add z2.d, z2.d, z3.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d] -; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -491,19 +311,7 @@ define void @masked_scatter_nxv4f16_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv4f16_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: uunpklo z3.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: add z2.d, z2.d, z3.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d] -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -515,19 +323,7 @@ define void @masked_scatter_nxv4bf16_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind #0 { ; CHECK-LABEL: masked_scatter_nxv4bf16_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: uunpklo z3.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: add z2.d, z2.d, z3.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1h { z3.d }, p2, [x8, z2.d] -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets @@ -539,19 +335,7 @@ define void @masked_scatter_nxv4f32_zext_offsets( %data, i8* %base, %i32offsets, %masks) nounwind #0 { ; CHECK-LABEL: masked_scatter_nxv4f32_zext_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: uunpklo z3.d, z1.s -; CHECK-NEXT: uunpkhi z1.d, z1.s -; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: add z2.d, z2.d, z3.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: zip1 p2.s, p0.s, p1.s -; CHECK-NEXT: zip2 p0.s, p0.s, p1.s -; CHECK-NEXT: st1w { z3.d }, p2, [x8, z2.d] -; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, uxtw] ; CHECK-NEXT: ret %offsets = zext %i32offsets to %byte_ptrs = getelementptr i8, i8* %base, %offsets Index: llvm/test/CodeGen/AArch64/sve-masked-scatter-64b-unscaled.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-masked-scatter-64b-unscaled.ll +++ llvm/test/CodeGen/AArch64/sve-masked-scatter-64b-unscaled.ll @@ -8,10 +8,7 @@ define void @masked_scatter_nxv2i8_unscaled_64bit_offsets( %data, i8* %base, %offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2i8_unscaled_64bit_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d] ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -22,10 +19,7 @@ define void @masked_scatter_nxv2i16_unscaled_64bit_offsets( %data, i8* %base, %offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2i16_unscaled_64bit_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d] ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -36,10 +30,7 @@ define void @masked_scatter_nxv2i32_unscaled_64bit_offsets( %data, i8* %base, %offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2i32_unscaled_64bit_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d] ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -50,10 +41,7 @@ define void @masked_scatter_nxv2i64_unscaled_64bit_offsets( %data, i8* %base, %offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2i64_unscaled_64bit_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d] ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -64,10 +52,7 @@ define void @masked_scatter_nxv2f16_unscaled_64bit_offsets( %data, i8* %base, %offsets, %masks) nounwind { ; CHECK-LABEL: masked_scatter_nxv2f16_unscaled_64bit_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d] ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -78,10 +63,7 @@ define void @masked_scatter_nxv2bf16_unscaled_64bit_offsets( %data, i8* %base, %offsets, %masks) nounwind #0 { ; CHECK-LABEL: masked_scatter_nxv2bf16_unscaled_64bit_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1h { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d] ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -92,10 +74,7 @@ define void @masked_scatter_nxv2f32_unscaled_64bit_offsets( %data, i8* %base, %offsets, %masks) nounwind #0 { ; CHECK-LABEL: masked_scatter_nxv2f32_unscaled_64bit_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1w { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1w { z0.d }, p0, [x0, z1.d] ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to @@ -106,10 +85,7 @@ define void @masked_scatter_nxv2f64_unscaled_64bit_offsets( %data, i8* %base, %offsets, %masks) nounwind #0 { ; CHECK-LABEL: masked_scatter_nxv2f64_unscaled_64bit_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: add z1.d, z2.d, z1.d -; CHECK-NEXT: st1d { z0.d }, p0, [x8, z1.d] +; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d] ; CHECK-NEXT: ret %byte_ptrs = getelementptr i8, i8* %base, %offsets %ptrs = bitcast %byte_ptrs to