Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -777,6 +777,31 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". + class AdvSIMD_SVE_Create_2Vector_Tuple + : Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>], + [IntrReadMem]>; + + class AdvSIMD_SVE_Create_3Vector_Tuple + : Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>], + [IntrReadMem]>; + + class AdvSIMD_SVE_Create_4Vector_Tuple + : Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, + LLVMMatchType<1>], + [IntrReadMem]>; + + class AdvSIMD_SVE_Set_Vector_Tuple + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty], + [IntrReadMem, ImmArg<1>]>; + + class AdvSIMD_SVE_Get_Vector_Tuple + : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg<1>]>; + class AdvSIMD_1Vec_PredLoad_Intrinsic : Intrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1255,6 +1280,21 @@ [IntrWriteMem, IntrArgMemOnly]>; // +// Vector tuple creation intrinsics (ACLE) +// + +def int_aarch64_sve_tuple_create2 : AdvSIMD_SVE_Create_2Vector_Tuple; +def int_aarch64_sve_tuple_create3 : AdvSIMD_SVE_Create_3Vector_Tuple; +def int_aarch64_sve_tuple_create4 : AdvSIMD_SVE_Create_4Vector_Tuple; + +// +// Vector tuple insertion/extraction intrinsics (ACLE) +// + +def int_aarch64_sve_tuple_get : AdvSIMD_SVE_Get_Vector_Tuple; +def int_aarch64_sve_tuple_set : AdvSIMD_SVE_Set_Vector_Tuple; + +// // Loads // Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -731,8 +731,9 @@ // Convert the vector to the appropriate type if necessary. unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts; - EVT BuiltVectorTy = EVT::getVectorVT( - *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts); + EVT BuiltVectorTy = + EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(), + DestVectorNoElts, ValueVT.isScalableVector()); if (ValueVT != BuiltVectorTy) { if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) Val = Widened; @@ -7029,6 +7030,53 @@ DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT))); return; } + case Intrinsic::aarch64_sve_tuple_get: { + SDValue Src1 = getValue(I.getOperand(0)); + SDValue Idx = getValue(I.getOperand(1)); + + assert(Src1.getOpcode() == ISD::CONCAT_VECTORS && + "Unexpected operand for sve_tuple_get"); + + uint64_t IdxConst = cast(Idx)->getZExtValue(); + if (IdxConst > Src1->getNumOperands() - 1) + report_fatal_error("index larger than expected"); + setValue(&I, SDValue(Src1.getOperand(IdxConst))); + return; + } + case Intrinsic::aarch64_sve_tuple_set: { + SDValue Tuple = getValue(I.getOperand(0)); + SDValue Idx = getValue(I.getOperand(1)); + SDValue Vec = getValue(I.getOperand(2)); + + assert(Tuple.getOpcode() == ISD::CONCAT_VECTORS && + "Unexpected operand for sve_tuple_set"); + + uint64_t IdxConst = cast(Idx)->getZExtValue(); + + SmallVector Opnds; + for (unsigned J = 0; J < Tuple->getNumOperands(); ++J) + Opnds.push_back(J == IdxConst ? Vec : Tuple->getOperand(J)); + SDValue Result = + DAG.getNode(ISD::CONCAT_VECTORS, sdl, Tuple.getValueType(), Opnds); + setValue(&I, Result); + return; + } + case Intrinsic::aarch64_sve_tuple_create2: + case Intrinsic::aarch64_sve_tuple_create3: + case Intrinsic::aarch64_sve_tuple_create4: { + unsigned N = I.getNumArgOperands(); + SmallVector Opnds; + for (const auto &Arg : I.args()) + Opnds.push_back(getValue(Arg.get())); + + EVT VT = Opnds[0].getValueType(); + EVT EltVT = VT.getVectorElementType(); + EVT DestVT = + EVT::getVectorVT(*Context, EltVT, VT.getVectorElementCount() * N); + SDValue Result = DAG.getNode(ISD::CONCAT_VECTORS, sdl, DestVT, Opnds); + setValue(&I, Result); + return; + } } } Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -425,6 +425,18 @@ MachineFunction &MF, unsigned Intrinsic) const override; + unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, + CallingConv::ID CC, EVT VT, + EVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT) const override; + + MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, + EVT VT) const override; + unsigned getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const override; + bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const override; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9003,6 +9003,60 @@ return false; } +MVT AArch64TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const { + if (!VT.isScalableVector() || + VT.getSizeInBits().getKnownMinSize() <= AArch64::SVEBitsPerBlock) + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); + + switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { + case MVT::i8: + return MVT::nxv16i8; + case MVT::i16: + return MVT::nxv8i16; + case MVT::i32: + return MVT::nxv4i32; + case MVT::i64: + return MVT::nxv2i64; + case MVT::f16: + return MVT::nxv8f16; + case MVT::f32: + return MVT::nxv4f32; + case MVT::f64: + return MVT::nxv2f64; + default: + llvm_unreachable("Unsupported type for SVE vectors"); + } +} + +unsigned AArch64TargetLowering::getVectorTypeBreakdownForCallingConv( + LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, + unsigned &NumIntermediates, MVT &RegisterVT) const { + if (!VT.isScalableVector() || + VT.getSizeInBits().getKnownMinSize() <= AArch64::SVEBitsPerBlock) + return TargetLowering::getVectorTypeBreakdownForCallingConv( + Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT); + + IntermediateVT = RegisterVT = getRegisterTypeForCallingConv(Context, CC, VT); + NumIntermediates = getNumRegistersForCallingConv(Context, CC, VT); + return NumIntermediates; +} + +unsigned AArch64TargetLowering::getNumRegistersForCallingConv( + LLVMContext &Context, CallingConv::ID CC, EVT VT) const { + if (!VT.isScalableVector() || + VT.getSizeInBits().getKnownMinSize() <= AArch64::SVEBitsPerBlock) + return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); + + unsigned NumVectors = + VT.getSizeInBits().getKnownMinSize() / AArch64::SVEBitsPerBlock; + assert(NumVectors * AArch64::SVEBitsPerBlock == + VT.getSizeInBits().getKnownMinSize() && + "Not a multiple of a full SVE vector"); + return NumVectors; +} + bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const { Index: llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll @@ -0,0 +1,499 @@ +; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s + +; +; svint8x2_t +; + +define @ret_svint8x2_t( %unused_z0, %z1, %z2) #0 { +; CHECK-LABEL: ret_svint8x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z1, %z2) + ret %tuple +} + +define void @call_svint8x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { +; CHECK-LABEL: call_svint8x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: bl callee_svint8x2_t + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z1, %z3) + call void @callee_svint8x2_t( %tuple) + ret void +} + +; +; svint16x2_t +; + +define @ret_svint16x2_t( %unused_z0, %z1, %z2) #0 { +; CHECK-LABEL: ret_svint16x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z1, %z2) + ret %tuple +} + +define void @call_svint16x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { +; CHECK-LABEL: call_svint16x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: bl callee_svint16x2_t + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z1, %z3) + call void @callee_svint16x2_t( %tuple) + ret void +} + +; +; svint32x2_t +; + +define @ret_svint32x2_t( %unused_z0, %z1, %z2) #0 { +; CHECK-LABEL: ret_svint32x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z1, %z2) + ret %tuple +} + +define void @call_svint32x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { +; CHECK-LABEL: call_svint32x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: bl callee_svint32x2_t + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z1, %z3) + call void @callee_svint32x2_t( %tuple) + ret void +} + +; +; svint64x2_t +; + +define @ret_svint64x2_t( %unused_z0, %z1, %z2) #0 { +; CHECK-LABEL: ret_svint64x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z1, %z2) + ret %tuple +} + +define void @call_svint64x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { +; CHECK-LABEL: call_svint64x2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: bl callee_svint64x2_t + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z1, %z3) + call void @callee_svint64x2_t( %tuple) + ret void +} + +; +; svfloatx2_t +; + +define @ret_svfloatx2_t( %unused_z0, %z1, %z2) #0 { +; CHECK-LABEL: ret_svfloatx2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z1, %z2) + ret %tuple +} + +define void @call_svfloatx2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { +; CHECK-LABEL: call_svfloatx2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: bl callee_svfloatx2_t + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z1, %z3) + call void @callee_svfloatx2_t( %tuple) + ret void +} + +; +; svdoublex2_t +; + +define @ret_svdoublex2_t( %unused_z0, %z1, %z2) #0 { +; CHECK-LABEL: ret_svdoublex2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z1, %z2) + ret %tuple +} + +define void @call_svdoublex2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { +; CHECK-LABEL: call_svdoublex2_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: bl callee_svdoublex2_t + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z1, %z3) + call void @callee_svdoublex2_t( %tuple) + ret void +} + +; +; svint8x3_t +; + +define @ret_svint8x3_t( %unused_z0, %z1, %z2, %z3) #0 { +; CHECK-LABEL: ret_svint8x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z1, %z2, %z3) + ret %tuple +} + +define void @call_svint8x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { +; CHECK-LABEL: call_svint8x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint8x3_t + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z1, %z2, %z4) + call void @callee_svint8x3_t( %tuple) + ret void +} + +; +; svint16x3_t +; + +define @ret_svint16x3_t( %unused_z0, %z1, %z2, %z3) #0 { +; CHECK-LABEL: ret_svint16x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z1, %z2, %z3) + ret %tuple +} + +define void @call_svint16x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { +; CHECK-LABEL: call_svint16x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint16x3_t + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z1, %z2, %z4) + call void @callee_svint16x3_t( %tuple) + ret void +} + +; +; svint32x3_t +; + +define @ret_svint32x3_t( %unused_z0, %z1, %z2, %z3) #0 { +; CHECK-LABEL: ret_svint32x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z1, %z2, %z3) + ret %tuple +} + +define void @call_svint32x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { +; CHECK-LABEL: call_svint32x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint32x3_t + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z1, %z2, %z4) + call void @callee_svint32x3_t( %tuple) + ret void +} + +; +; svint64x3_t +; + +define @ret_svint64x3_t( %unused_z0, %z1, %z2, %z3) #0 { +; CHECK-LABEL: ret_svint64x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z1, %z2, %z3) + ret %tuple +} + +define void @call_svint64x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { +; CHECK-LABEL: call_svint64x3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint64x3_t + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z1, %z2, %z4) + call void @callee_svint64x3_t( %tuple) + ret void +} + +; +; svfloatx3_t +; + +define @ret_svfloatx3_t( %unused_z0, %z1, %z2, %z3) #0 { +; CHECK-LABEL: ret_svfloatx3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z1, %z2, %z3) + ret %tuple +} + +define void @call_svfloatx3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { +; CHECK-LABEL: call_svfloatx3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svfloatx3_t + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z1, %z2, %z4) + call void @callee_svfloatx3_t( %tuple) + ret void +} + +; +; svdoublex3_t +; + +define @ret_svdoublex3_t( %unused_z0, %z1, %z2, %z3) #0 { +; CHECK-LABEL: ret_svdoublex3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z1, %z2, %z3) + ret %tuple +} + +define void @call_svdoublex3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { +; CHECK-LABEL: call_svdoublex3_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svdoublex3_t + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z1, %z2, %z4) + call void @callee_svdoublex3_t( %tuple) + ret void +} + +; +; svint8x4_t +; + +define @ret_svint8x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { +; CHECK-LABEL: ret_svint8x4_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z1, %z2, %z3, %z4) + ret %tuple +} + +define void @call_svint8x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { +; CHECK-LABEL: call_svint8x4_t +; CHECK: mov z3.d, z5.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint8x4_t + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z1, %z2, %z4, %z5) + call void @callee_svint8x4_t( %tuple) + ret void +} + +; +; svint16x4_t +; + +define @ret_svint16x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { +; CHECK-LABEL: ret_svint16x4_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z1, %z2, %z3, %z4) + ret %tuple +} + +define void @call_svint16x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { +; CHECK-LABEL: call_svint16x4_t +; CHECK: mov z3.d, z5.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint16x4_t + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z1, %z2, %z4, %z5) + call void @callee_svint16x4_t( %tuple) + ret void +} + +; +; svint32x4_t +; + +define @ret_svint32x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { +; CHECK-LABEL: ret_svint32x4_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z1, %z2, %z3, %z4) + ret %tuple +} + +define void @call_svint32x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { +; CHECK-LABEL: call_svint32x4_t +; CHECK: mov z3.d, z5.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint32x4_t + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z1, %z2, %z4, %z5) + call void @callee_svint32x4_t( %tuple) + ret void +} + +; +; svint64x4_t +; + +define @ret_svint64x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { +; CHECK-LABEL: ret_svint64x4_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z1, %z2, %z3, %z4) + ret %tuple +} + +define void @call_svint64x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { +; CHECK-LABEL: call_svint64x4_t +; CHECK: mov z3.d, z5.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svint64x4_t + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z1, %z2, %z4, %z5) + call void @callee_svint64x4_t( %tuple) + ret void +} + +; +; svfloatx4_t +; + +define @ret_svfloatx4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { +; CHECK-LABEL: ret_svfloatx4_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z1, %z2, %z3, %z4) + ret %tuple +} + +define void @call_svfloatx4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { +; CHECK-LABEL: call_svfloatx4_t +; CHECK: mov z3.d, z5.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svfloatx4_t + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z1, %z2, %z4, %z5) + call void @callee_svfloatx4_t( %tuple) + ret void +} + +; +; svdoublex4_t +; + +define @ret_svdoublex4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { +; CHECK-LABEL: ret_svdoublex4_t +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z1, %z2, %z3, %z4) + ret %tuple +} + +define void @call_svdoublex4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { +; CHECK-LABEL: call_svdoublex4_t +; CHECK: mov z3.d, z5.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d +; CHECK-NEXT: bl callee_svdoublex4_t + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z1, %z2, %z4, %z5) + call void @callee_svdoublex4_t( %tuple) + ret void +} + +attributes #0 = { nounwind "target-features"="+sve" } + +declare void @callee_svint8x2_t() +declare void @callee_svint16x2_t() +declare void @callee_svint32x2_t() +declare void @callee_svint64x2_t() +declare void @callee_svfloatx2_t() +declare void @callee_svdoublex2_t() + +declare void @callee_svint8x3_t() +declare void @callee_svint16x3_t() +declare void @callee_svint32x3_t() +declare void @callee_svint64x3_t() +declare void @callee_svfloatx3_t() +declare void @callee_svdoublex3_t() + +declare void @callee_svint8x4_t() +declare void @callee_svint16x4_t() +declare void @callee_svint32x4_t() +declare void @callee_svint64x4_t() +declare void @callee_svfloatx4_t() +declare void @callee_svdoublex4_t() + + +; x2 +declare @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(, ) + +; x3 +declare @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(, , ) + +; x4 +declare @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(, , , ) Index: llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll @@ -0,0 +1,496 @@ +; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s + +; +; SVCREATE2 (i8) +; + +define @test_svcreate2_s8_vec0( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s8_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_s8_vec1( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s8_vec1: +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE2 (i16) +; + +define @test_svcreate2_s16_vec0( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s16_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_s16_vec1( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s16_vec1: +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE2 (half) +; + +define @test_svcreate2_f16_vec0( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_f16_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_f16_vec1( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_f16_vec1: +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE2 (i32) +; + +define @test_svcreate2_s32_vec0( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s32_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_s32_vec1( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s32_vec1: +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE2 (float) +; + +define @test_svcreate2_f32_vec0( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_f32_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_f32_vec1( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_f32_vec1: +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE2 (i64) +; + +define @test_svcreate2_s64_vec0( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s64_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_s64_vec1( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_s64_vec1: +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE2 (double) +; + +define @test_svcreate2_f64_vec0( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_f64_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %tuple, i32 0) + ret %extract +} + +define @test_svcreate2_f64_vec1( %x0, %x1) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate2_f64_vec1: +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %x0, %x1) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %tuple, i32 1) + ret %extract +} + +; +; SVCREATE3 (i8) +; + +define @test_svcreate3_s8_vec0( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s8_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_s8_vec2( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s8_vec2: +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %tuple, i32 2) + ret %extract +} + +; +; SVCREATE3 (i16) +; + +define @test_svcreate3_s16_vec0( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s16_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_s16_vec2( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s16_vec2: +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %tuple, i32 2) + ret %extract +} +; +; SVCREATE3 (half) +; + +define @test_svcreate3_f16_vec0( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_f16_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_f16_vec2( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_f16_vec2: +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %tuple, i32 2) + ret %extract +} + + +; +; SVCREATE3 (i32) +; + +define @test_svcreate3_s32_vec0( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s32_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_s32_vec2( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s32_vec2: +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %tuple, i32 2) + ret %extract +} + +; +; SVCREATE3 (float) +; + +define @test_svcreate3_f32_vec0( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_f32_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_f32_vec2( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_f32_vec2: +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %tuple, i32 2) + ret %extract +} + +; +; SVCREATE3 (i64) +; + +define @test_svcreate3_s64_vec0( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s64_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_s64_vec2( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_s64_vec2: +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %tuple, i32 2) + ret %extract +} + +; +; SVCREATE3 (double) +; + +define @test_svcreate3_f64_vec0( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_f64_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %tuple, i32 0) + ret %extract +} + +define @test_svcreate3_f64_vec2( %x0, %x1, %x2) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate3_f64_vec2: +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %x0, %x1, %x2) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %tuple, i32 2) + ret %extract +} + +; +; SVCREATE4 (i8) +; + +define @test_svcreate4_s8_vec0( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s8_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_s8_vec3( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s8_vec3: +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %tuple, i32 3) + ret %extract +} + +; +; SVCREATE4 (i16) +; + +define @test_svcreate4_s16_vec0( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s16_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_s16_vec3( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s16_vec3: +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %tuple, i32 3) + ret %extract +} + +; +; SVCREATE4 (half) +; + +define @test_svcreate4_f16_vec0( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_f16_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_f16_vec3( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_f16_vec3: +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %tuple, i32 3) + ret %extract +} + +; +; SVCREATE4 (i32) +; + +define @test_svcreate4_s32_vec0( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s32_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_s32_vec3( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s32_vec3: +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %tuple, i32 3) + ret %extract +} + +; +; SVCREATE4 (float) +; + +define @test_svcreate4_f32_vec0( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_f32_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_f32_vec3( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_f32_vec3: +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %tuple, i32 3) + ret %extract +} + +; +; SVCREATE4 (i64) +; + +define @test_svcreate4_s64_vec0( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s64_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_s64_vec3( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_s64_vec3: +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %tuple, i32 3) + ret %extract +} + +; +; SVCREATE4 (double) +; + +define @test_svcreate4_f64_vec0( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_f64_vec0: +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %tuple, i32 0) + ret %extract +} + +define @test_svcreate4_f64_vec3( %x0, %x1, %x2, %x3) local_unnamed_addr #0 { +; CHECK-LABEL: test_svcreate4_f64_vec3: +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %x0, %x1, %x2, %x3) + %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %tuple, i32 3) + ret %extract +} + +attributes #0 = { nounwind "target-features"="+sve" } + +declare @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(, ) + +declare @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(, , ) +declare @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(, , ) + +declare @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64 (, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(, , , ) +declare @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(, , , ) + +declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(, i32 immarg) + +declare @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(, i32 immarg) + +declare @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(, i32 immarg) + +declare @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(, i32 immarg) + +declare @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(, i32 immarg) + +declare @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(, i32 immarg) + +declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(, i32 immarg) +declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(, i32 immarg) Index: llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll @@ -0,0 +1,167 @@ +; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s + +; All these tests create a vector tuple, insert z5 into one of the elements, +; and finally extracts that element from the wide vector to return it. These +; checks ensure that z5 is always the value that is returned. + +; +; Insert into two element tuples +; + +; tuple: { tuple2.res0, tuple2.res1 } +; insert z5: { z5 , tuple2.res1 } +; extract z5: ^^ +define @set_tuple2_nxv8i32_elt0( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple2_nxv8i32_elt0: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) + %ins = call @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32( %tuple, i32 0, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %ins, i32 0) + ret %ext +} + +; tuple: { tuple2.res0, tuple2.res1 } +; insert z5: { tuple2.res0, z5 } +; extract z5: ^^ +define @set_tuple2_nxv8i32_elt1( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple2_nxv8i32_elt1: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) + %ins = call @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32( %tuple, i32 1, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %ins, i32 1) + ret %ext +} + + +; +; Insert into three element tuples +; + +; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } +; insert z5: { z5 , tuple3.res0, tuple3.res2 } +; extract z5: ^^ +define @set_tuple3_nxv12i32_elt0( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple3_nxv12i32_elt0: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) + %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 0, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 0) + ret %ext +} + +; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } +; insert z5: { tuple3.res0, z5 , tuple3.res2 } +; extract z5: ^^ +define @set_tuple3_nxv12i32_elt1( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple3_nxv12i32_elt1: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) + %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 1, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 1) + ret %ext +} + +; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } +; insert z5: { tuple3.res0, tuple3.res1, z5 } +; extract z5: ^^ +define @set_tuple3_nxv12i32_elt2( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple3_nxv12i32_elt2: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) + %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 2, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 2) + ret %ext +} + +; +; Insert into four element tuples +; + +; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } +; insert z5: { z5 , tuple4.res1, tuple4.res2, tuple4.res3 } +; extract z5: ^^ +define @set_tuple4_nxv16i32_elt0( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple4_nxv16i32_elt0: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) + %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 0, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 0) + ret %ext +} + +; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } +; insert z5: { tuple4.res0, z5 , tuple4.res2, tuple4.res3 } +; extract z5: ^^ +define @set_tuple4_nxv16i32_elt1( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple4_nxv16i32_elt1: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) + %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 1, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 1) + ret %ext +} + +; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } +; insert z5: { tuple4.res0, tuple4.res1, z5 , tuple4.res3 } +; extract z5: ^^ +define @set_tuple4_nxv16i32_elt2( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple4_nxv16i32_elt2: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) + %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 2, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 2) + ret %ext +} + +; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } +; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 } +; extract z5: ^^ +define @set_tuple4_nxv16i32_elt3( %z0, %z1, + %z2, %z3, + %z4, %z5) #0 { + ; CHECK-LABEL: set_tuple4_nxv16i32_elt3: + ; CHECK-NEXT: mov z0.d, z5.d + ; CHECK-NEXT: ret + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) + %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 3, %z5) + %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 3) + ret %ext +} + +attributes #0 = { nounwind "target-features"="+sve" } + +declare @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(, ) +declare @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(, i32, ) +declare @llvm.aarch64.sve.tuple.get.nxv8i32(, i32) + +declare @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(, , ) +declare @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(, i32, ) +declare @llvm.aarch64.sve.tuple.get.nxv12i32(, i32) + +declare @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(, , , ) +declare @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(, i32, ) +declare @llvm.aarch64.sve.tuple.get.nxv16i32(, i32)