diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -946,31 +946,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". - class AdvSIMD_SVE_Create_2Vector_Tuple - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, LLVMMatchType<1>], - [IntrReadMem]>; - - class AdvSIMD_SVE_Create_3Vector_Tuple - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>], - [IntrReadMem]>; - - class AdvSIMD_SVE_Create_4Vector_Tuple - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, - LLVMMatchType<1>], - [IntrReadMem]>; - - class AdvSIMD_SVE_Set_Vector_Tuple - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty], - [IntrReadMem, ImmArg>]>; - - class AdvSIMD_SVE_Get_Vector_Tuple - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, ImmArg>]>; - class AdvSIMD_ManyVec_PredLoad_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMPointerToElt<0>], [IntrReadMem, IntrArgMemOnly]>; @@ -1548,21 +1523,6 @@ [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i64_ty], [IntrNoMem, ImmArg>]>; -// -// Vector tuple creation intrinsics (ACLE) -// - -def int_aarch64_sve_tuple_create2 : AdvSIMD_SVE_Create_2Vector_Tuple; -def int_aarch64_sve_tuple_create3 : AdvSIMD_SVE_Create_3Vector_Tuple; -def int_aarch64_sve_tuple_create4 : AdvSIMD_SVE_Create_4Vector_Tuple; - -// -// Vector tuple insertion/extraction intrinsics (ACLE) -// - -def int_aarch64_sve_tuple_get : AdvSIMD_SVE_Get_Vector_Tuple; -def int_aarch64_sve_tuple_set : AdvSIMD_SVE_Set_Vector_Tuple; - // // Loads // diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -19852,71 +19852,6 @@ /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_st1_scatter_scalar_offset: return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED); - case Intrinsic::aarch64_sve_tuple_get: { - SDLoc DL(N); - SDValue Chain = N->getOperand(0); - SDValue Src1 = N->getOperand(2); - SDValue Idx = N->getOperand(3); - - uint64_t IdxConst = cast(Idx)->getZExtValue(); - EVT ResVT = N->getValueType(0); - uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue(); - SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL); - SDValue Val = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx); - return DAG.getMergeValues({Val, Chain}, DL); - } - case Intrinsic::aarch64_sve_tuple_set: { - SDLoc DL(N); - SDValue Chain = N->getOperand(0); - SDValue Tuple = N->getOperand(2); - SDValue Idx = N->getOperand(3); - SDValue Vec = N->getOperand(4); - - EVT TupleVT = Tuple.getValueType(); - uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue(); - - uint64_t IdxConst = cast(Idx)->getZExtValue(); - uint64_t NumLanes = - Vec.getValueType().getVectorElementCount().getKnownMinValue(); - - if ((TupleLanes % NumLanes) != 0) - report_fatal_error("invalid tuple vector!"); - - uint64_t NumVecs = TupleLanes / NumLanes; - - SmallVector Opnds; - for (unsigned I = 0; I < NumVecs; ++I) { - if (I == IdxConst) - Opnds.push_back(Vec); - else { - SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL); - Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, - Vec.getValueType(), Tuple, ExtIdx)); - } - } - SDValue Concat = - DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds); - return DAG.getMergeValues({Concat, Chain}, DL); - } - case Intrinsic::aarch64_sve_tuple_create2: - case Intrinsic::aarch64_sve_tuple_create3: - case Intrinsic::aarch64_sve_tuple_create4: { - SDLoc DL(N); - SDValue Chain = N->getOperand(0); - - SmallVector Opnds; - for (unsigned I = 2; I < N->getNumOperands(); ++I) - Opnds.push_back(N->getOperand(I)); - - EVT VT = Opnds[0].getValueType(); - EVT EltVT = VT.getVectorElementType(); - EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, - VT.getVectorElementCount() * - (N->getNumOperands() - 2)); - SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds); - return DAG.getMergeValues({Concat, Chain}, DL); - } case Intrinsic::aarch64_sve_ld2: case Intrinsic::aarch64_sve_ld3: case Intrinsic::aarch64_sve_ld4: { diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1196,32 +1196,6 @@ return IC.replaceInstUsesWith(II, VectorSplat); } -static Optional instCombineSVETupleGet(InstCombiner &IC, - IntrinsicInst &II) { - // Try to remove sequences of tuple get/set. - Value *SetTuple, *SetIndex, *SetValue; - auto *GetTuple = II.getArgOperand(0); - auto *GetIndex = II.getArgOperand(1); - // Check that we have tuple_get(GetTuple, GetIndex) where GetTuple is a - // call to tuple_set i.e. tuple_set(SetTuple, SetIndex, SetValue). - // Make sure that the types of the current intrinsic and SetValue match - // in order to safely remove the sequence. - if (!match(GetTuple, - m_Intrinsic( - m_Value(SetTuple), m_Value(SetIndex), m_Value(SetValue))) || - SetValue->getType() != II.getType()) - return None; - // Case where we get the same index right after setting it. - // tuple_get(tuple_set(SetTuple, SetIndex, SetValue), GetIndex) --> SetValue - if (GetIndex == SetIndex) - return IC.replaceInstUsesWith(II, SetValue); - // If we are getting a different index than what was set in the tuple_set - // intrinsic. We can just set the input tuple to the one up in the chain. - // tuple_get(tuple_set(SetTuple, SetIndex, SetValue), GetIndex) - // --> tuple_get(SetTuple, GetIndex) - return IC.replaceOperand(II, 0, SetTuple); -} - static Optional instCombineSVEZip(InstCombiner &IC, IntrinsicInst &II) { // zip1(uzp1(A, B), uzp2(A, B)) --> A @@ -1436,8 +1410,6 @@ case Intrinsic::aarch64_sve_sunpkhi: case Intrinsic::aarch64_sve_sunpklo: return instCombineSVEUnpack(IC, II); - case Intrinsic::aarch64_sve_tuple_get: - return instCombineSVETupleGet(IC, II); case Intrinsic::aarch64_sve_zip1: case Intrinsic::aarch64_sve_zip2: return instCombineSVEZip(IC, II); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2409,28 +2409,50 @@ Value *Vec = II->getArgOperand(0); Value *Idx = II->getArgOperand(1); - auto *DstTy = dyn_cast(II->getType()); - auto *VecTy = dyn_cast(Vec->getType()); - + Type *ReturnType = II->getType(); + // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx), + // ExtractIdx) + unsigned ExtractIdx = cast(Idx)->getZExtValue(); + Value *InsertTuple, *InsertIdx, *InsertValue; + if (match(Vec, m_Intrinsic(m_Value(InsertTuple), + m_Value(InsertValue), + m_Value(InsertIdx))) && + InsertValue->getType() == ReturnType) { + unsigned Index = cast(InsertIdx)->getZExtValue(); + // Case where we get the same index right after setting it. + // extract.vector(insert.vector(InsertTuple, InsertIndex, Idx), Idx) --> + // InsertValue + if (ExtractIdx == Index) + return replaceInstUsesWith(CI, InsertValue); + // If we are getting a different index than what was set in the + // insert.vector intrinsic. We can just set the input tuple to the one up + // in the chain. extract.vector(insert.vector(InsertTuple, InsertIndex, + // InsertValue), ExtractIndex) + // --> extract.vector(InsertTuple, ExtractIndex) + else + return replaceOperand(CI, 0, InsertTuple); + } // Only canonicalize if the the destination vector and Vec are fixed // vectors. - if (DstTy && VecTy) { - unsigned DstNumElts = DstTy->getNumElements(); - unsigned VecNumElts = VecTy->getNumElements(); - unsigned IdxN = cast(Idx)->getZExtValue(); - - // Extracting the entirety of Vec is a nop. - if (VecNumElts == DstNumElts) { - replaceInstUsesWith(CI, Vec); - return eraseInstFromFunction(CI); - } + if (auto *FVTy = dyn_cast(Vec->getType())) { + if (auto *DstFVTy = dyn_cast(ReturnType)) { + unsigned DstNumElts = DstFVTy->getNumElements(); + unsigned VecNumElts = FVTy->getNumElements(); + unsigned IdxN = cast(Idx)->getZExtValue(); + + // Extracting the entirety of Vec is a nop. + if (VecNumElts == DstNumElts) { + replaceInstUsesWith(CI, Vec); + return eraseInstFromFunction(CI); + } - SmallVector Mask; - for (unsigned i = 0; i != DstNumElts; ++i) - Mask.push_back(IdxN + i); + SmallVector Mask; + for (unsigned i = 0; i != DstNumElts; ++i) + Mask.push_back(IdxN + i); - Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask); - return replaceInstUsesWith(CI, Shuffle); + Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask); + return replaceInstUsesWith(CI, Shuffle); + } } break; } diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll +++ /dev/null @@ -1,499 +0,0 @@ -; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s - -; -; svint8x2_t -; - -define @ret_svint8x2_t( %unused_z0, %z1, %z2) #0 { -; CHECK-LABEL: ret_svint8x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z1, %z2) - ret %tuple -} - -define void @call_svint8x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { -; CHECK-LABEL: call_svint8x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: bl callee_svint8x2_t - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z1, %z3) - call void @callee_svint8x2_t( %tuple) - ret void -} - -; -; svint16x2_t -; - -define @ret_svint16x2_t( %unused_z0, %z1, %z2) #0 { -; CHECK-LABEL: ret_svint16x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z1, %z2) - ret %tuple -} - -define void @call_svint16x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { -; CHECK-LABEL: call_svint16x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: bl callee_svint16x2_t - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z1, %z3) - call void @callee_svint16x2_t( %tuple) - ret void -} - -; -; svint32x2_t -; - -define @ret_svint32x2_t( %unused_z0, %z1, %z2) #0 { -; CHECK-LABEL: ret_svint32x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z1, %z2) - ret %tuple -} - -define void @call_svint32x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { -; CHECK-LABEL: call_svint32x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: bl callee_svint32x2_t - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z1, %z3) - call void @callee_svint32x2_t( %tuple) - ret void -} - -; -; svint64x2_t -; - -define @ret_svint64x2_t( %unused_z0, %z1, %z2) #0 { -; CHECK-LABEL: ret_svint64x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z1, %z2) - ret %tuple -} - -define void @call_svint64x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { -; CHECK-LABEL: call_svint64x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: bl callee_svint64x2_t - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z1, %z3) - call void @callee_svint64x2_t( %tuple) - ret void -} - -; -; svfloatx2_t -; - -define @ret_svfloatx2_t( %unused_z0, %z1, %z2) #0 { -; CHECK-LABEL: ret_svfloatx2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z1, %z2) - ret %tuple -} - -define void @call_svfloatx2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { -; CHECK-LABEL: call_svfloatx2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: bl callee_svfloatx2_t - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z1, %z3) - call void @callee_svfloatx2_t( %tuple) - ret void -} - -; -; svdoublex2_t -; - -define @ret_svdoublex2_t( %unused_z0, %z1, %z2) #0 { -; CHECK-LABEL: ret_svdoublex2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z1, %z2) - ret %tuple -} - -define void @call_svdoublex2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { -; CHECK-LABEL: call_svdoublex2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: bl callee_svdoublex2_t - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z1, %z3) - call void @callee_svdoublex2_t( %tuple) - ret void -} - -; -; svint8x3_t -; - -define @ret_svint8x3_t( %unused_z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: ret_svint8x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z1, %z2, %z3) - ret %tuple -} - -define void @call_svint8x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { -; CHECK-LABEL: call_svint8x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint8x3_t - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z1, %z2, %z4) - call void @callee_svint8x3_t( %tuple) - ret void -} - -; -; svint16x3_t -; - -define @ret_svint16x3_t( %unused_z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: ret_svint16x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z1, %z2, %z3) - ret %tuple -} - -define void @call_svint16x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { -; CHECK-LABEL: call_svint16x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint16x3_t - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z1, %z2, %z4) - call void @callee_svint16x3_t( %tuple) - ret void -} - -; -; svint32x3_t -; - -define @ret_svint32x3_t( %unused_z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: ret_svint32x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z1, %z2, %z3) - ret %tuple -} - -define void @call_svint32x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { -; CHECK-LABEL: call_svint32x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint32x3_t - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z1, %z2, %z4) - call void @callee_svint32x3_t( %tuple) - ret void -} - -; -; svint64x3_t -; - -define @ret_svint64x3_t( %unused_z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: ret_svint64x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z1, %z2, %z3) - ret %tuple -} - -define void @call_svint64x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { -; CHECK-LABEL: call_svint64x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint64x3_t - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z1, %z2, %z4) - call void @callee_svint64x3_t( %tuple) - ret void -} - -; -; svfloatx3_t -; - -define @ret_svfloatx3_t( %unused_z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: ret_svfloatx3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z1, %z2, %z3) - ret %tuple -} - -define void @call_svfloatx3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { -; CHECK-LABEL: call_svfloatx3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svfloatx3_t - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z1, %z2, %z4) - call void @callee_svfloatx3_t( %tuple) - ret void -} - -; -; svdoublex3_t -; - -define @ret_svdoublex3_t( %unused_z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: ret_svdoublex3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z1, %z2, %z3) - ret %tuple -} - -define void @call_svdoublex3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { -; CHECK-LABEL: call_svdoublex3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svdoublex3_t - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z1, %z2, %z4) - call void @callee_svdoublex3_t( %tuple) - ret void -} - -; -; svint8x4_t -; - -define @ret_svint8x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { -; CHECK-LABEL: ret_svint8x4_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: mov z3.d, z4.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z1, %z2, %z3, %z4) - ret %tuple -} - -define void @call_svint8x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { -; CHECK-LABEL: call_svint8x4_t -; CHECK: mov z3.d, z5.d -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint8x4_t - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z1, %z2, %z4, %z5) - call void @callee_svint8x4_t( %tuple) - ret void -} - -; -; svint16x4_t -; - -define @ret_svint16x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { -; CHECK-LABEL: ret_svint16x4_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: mov z3.d, z4.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z1, %z2, %z3, %z4) - ret %tuple -} - -define void @call_svint16x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { -; CHECK-LABEL: call_svint16x4_t -; CHECK: mov z3.d, z5.d -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint16x4_t - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z1, %z2, %z4, %z5) - call void @callee_svint16x4_t( %tuple) - ret void -} - -; -; svint32x4_t -; - -define @ret_svint32x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { -; CHECK-LABEL: ret_svint32x4_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: mov z3.d, z4.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z1, %z2, %z3, %z4) - ret %tuple -} - -define void @call_svint32x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { -; CHECK-LABEL: call_svint32x4_t -; CHECK: mov z3.d, z5.d -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint32x4_t - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z1, %z2, %z4, %z5) - call void @callee_svint32x4_t( %tuple) - ret void -} - -; -; svint64x4_t -; - -define @ret_svint64x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { -; CHECK-LABEL: ret_svint64x4_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: mov z3.d, z4.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z1, %z2, %z3, %z4) - ret %tuple -} - -define void @call_svint64x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { -; CHECK-LABEL: call_svint64x4_t -; CHECK: mov z3.d, z5.d -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint64x4_t - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z1, %z2, %z4, %z5) - call void @callee_svint64x4_t( %tuple) - ret void -} - -; -; svfloatx4_t -; - -define @ret_svfloatx4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { -; CHECK-LABEL: ret_svfloatx4_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: mov z3.d, z4.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z1, %z2, %z3, %z4) - ret %tuple -} - -define void @call_svfloatx4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { -; CHECK-LABEL: call_svfloatx4_t -; CHECK: mov z3.d, z5.d -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svfloatx4_t - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z1, %z2, %z4, %z5) - call void @callee_svfloatx4_t( %tuple) - ret void -} - -; -; svdoublex4_t -; - -define @ret_svdoublex4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { -; CHECK-LABEL: ret_svdoublex4_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: mov z3.d, z4.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z1, %z2, %z3, %z4) - ret %tuple -} - -define void @call_svdoublex4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { -; CHECK-LABEL: call_svdoublex4_t -; CHECK: mov z3.d, z5.d -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svdoublex4_t - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z1, %z2, %z4, %z5) - call void @callee_svdoublex4_t( %tuple) - ret void -} - -attributes #0 = { nounwind "target-features"="+sve" } - -declare void @callee_svint8x2_t() -declare void @callee_svint16x2_t() -declare void @callee_svint32x2_t() -declare void @callee_svint64x2_t() -declare void @callee_svfloatx2_t() -declare void @callee_svdoublex2_t() - -declare void @callee_svint8x3_t() -declare void @callee_svint16x3_t() -declare void @callee_svint32x3_t() -declare void @callee_svint64x3_t() -declare void @callee_svfloatx3_t() -declare void @callee_svdoublex3_t() - -declare void @callee_svint8x4_t() -declare void @callee_svint16x4_t() -declare void @callee_svint32x4_t() -declare void @callee_svint64x4_t() -declare void @callee_svfloatx4_t() -declare void @callee_svdoublex4_t() - - -; x2 -declare @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(, ) - -; x3 -declare @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(, , ) - -; x4 -declare @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(, , , ) diff --git a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll +++ /dev/null @@ -1,78 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s - -; Test that DAGCombiner doesn't drop the scalable flag when it tries to fold: -; extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') -define @extract_nxv16i8_nxv4i64( %z0_z1) { -; CHECK-LABEL: extract_nxv16i8_nxv4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %z0_z1_bc = bitcast %z0_z1 to - %ext = call @llvm.aarch64.sve.tuple.get.nxv32i8( %z0_z1_bc, i32 1) - ret %ext -} - - -define @extract_nxv2i64_nxv32i8( %z0_z1) { -; CHECK-LABEL: extract_nxv2i64_nxv32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %z0_z1_bc = bitcast %z0_z1 to - %ext = call @llvm.aarch64.sve.tuple.get.nxv4i64( %z0_z1_bc, i32 1) - ret %ext -} - -define @extract_lo_nxv4f16_nxv8f16( %z0) { -; CHECK-LABEL: extract_lo_nxv4f16_nxv8f16: -; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv8f16( %z0, i32 0) - ret %ext -} - -define @extract_hi_nxv4f16_nxv8f16( %z0) { -; CHECK-LABEL: extract_hi_nxv4f16_nxv8f16: -; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv8f16( %z0, i32 1) - ret %ext -} - -define @extract_lo_nxv2f32_nxv4f32( %z0) { -; CHECK-LABEL: extract_lo_nxv2f32_nxv4f32: -; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv4f32( %z0, i32 0) - ret %ext -} - -define @extract_hi_nxv2f32_nxv4f32( %z0) { -; CHECK-LABEL: extract_hi_nxv2f32_nxv4f32: -; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv4f32( %z0, i32 1) - ret %ext -} - -define @load_extract_nxv4f32_nxv8f32(* %p) { -; CHECK-LABEL: load_extract_nxv4f32_nxv8f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ret - %tmp1 = load , * %p, align 16 - %tmp2 = call @llvm.aarch64.sve.tuple.get.nxv8f32( %tmp1, i32 1) - ret %tmp2 -} - -declare @llvm.aarch64.sve.tuple.get.nxv4i64(, i32) -declare @llvm.aarch64.sve.tuple.get.nxv32i8(, i32) -declare @llvm.aarch64.sve.tuple.get.nxv4f32(, i32) -declare @llvm.aarch64.sve.tuple.get.nxv8f16(, i32) -declare @llvm.aarch64.sve.tuple.get.nxv8f32(, i32) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll +++ /dev/null @@ -1,1002 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple aarch64 -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple aarch64 -mattr=+sme < %s | FileCheck %s - -; -; SVCREATE2 (i8) -; - -define @test_svcreate2_s8_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s8_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB0_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB0_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_s8_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s8_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB1_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB1_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (i16) -; - -define @test_svcreate2_s16_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB2_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB2_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_s16_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s16_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB3_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB3_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (half) -; - -define @test_svcreate2_f16_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_f16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB4_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB4_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_f16_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_f16_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB5_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB5_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (bfloat) -; - -define @test_svcreate2_bf16_vec0(i1 %p, %z0, %z1) #1 { -; CHECK-LABEL: test_svcreate2_bf16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB6_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB6_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_bf16_vec1(i1 %p, %z0, %z1) #1 { -; CHECK-LABEL: test_svcreate2_bf16_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB7_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB7_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (i32) -; - -define @test_svcreate2_s32_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s32_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB8_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB8_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_s32_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s32_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB9_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB9_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (float) -; - -define @test_svcreate2_f32_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_f32_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB10_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB10_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_f32_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_f32_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB11_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB11_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (i64) -; - -define @test_svcreate2_s64_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s64_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB12_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB12_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_s64_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s64_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB13_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB13_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (double) -; - -define @test_svcreate2_f64_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_f64_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB14_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB14_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_f64_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_f64_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB15_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB15_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE3 (i8) -; - -define @test_svcreate3_s8_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s8_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB16_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB16_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_s8_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s8_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB17_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB17_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (i16) -; - -define @test_svcreate3_s16_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB18_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB18_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_s16_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s16_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB19_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB19_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (half) -; - -define @test_svcreate3_f16_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_f16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB20_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB20_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_f16_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_f16_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB21_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB21_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (bfloat) -; - -define @test_svcreate3_bf16_vec0(i1 %p, %z0, %z1, %z2) #1 { -; CHECK-LABEL: test_svcreate3_bf16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB22_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB22_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_bf16_vec2(i1 %p, %z0, %z1, %z2) #1 { -; CHECK-LABEL: test_svcreate3_bf16_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB23_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB23_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (i32) -; - -define @test_svcreate3_s32_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s32_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB24_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB24_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_s32_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s32_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB25_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB25_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (float) -; - -define @test_svcreate3_f32_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_f32_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB26_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB26_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_f32_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_f32_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB27_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB27_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (i64) -; - -define @test_svcreate3_s64_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s64_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB28_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB28_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_s64_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s64_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB29_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB29_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (double) -; - -define @test_svcreate3_f64_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_f64_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB30_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB30_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_f64_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_f64_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB31_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB31_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE4 (i8) -; - -define @test_svcreate4_s8_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s8_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB32_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB32_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_s8_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s8_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB33_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB33_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (i16) -; - -define @test_svcreate4_s16_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB34_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB34_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_s16_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s16_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB35_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB35_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (half) -; - -define @test_svcreate4_f16_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_f16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB36_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB36_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_f16_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_f16_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB37_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB37_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (bfloat) -; - -define @test_svcreate4_bf16_vec0(i1 %p, %z0, %z1, %z2, %z3) #1 { -; CHECK-LABEL: test_svcreate4_bf16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB38_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB38_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_bf16_vec3(i1 %p, %z0, %z1, %z2, %z3) #1 { -; CHECK-LABEL: test_svcreate4_bf16_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB39_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB39_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (i32) -; - -define @test_svcreate4_s32_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s32_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB40_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB40_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_s32_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s32_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB41_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB41_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (float) -; - -define @test_svcreate4_f32_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_f32_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB42_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB42_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_f32_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_f32_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB43_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB43_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (i64) -; - -define @test_svcreate4_s64_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s64_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB44_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB44_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_s64_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s64_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB45_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB45_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (double) -; - -define @test_svcreate4_f64_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_f64_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB46_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB46_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_f64_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_f64_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB47_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB47_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %tuple, i32 3) - ret %extract -} - -attributes #0 = { nounwind } -; +bf16 is required for the bfloat version. -attributes #1 = { nounwind "target-features"="+bf16" } - -declare @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(, ) - -declare @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(, , ) - -declare @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64 (, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(, , , ) - -declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(, i32 immarg) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll +++ /dev/null @@ -1,234 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple aarch64 -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple aarch64 -mattr=+sme < %s | FileCheck %s - -; All these tests create a vector tuple, insert z5 into one of the elements, -; and finally extracts that element from the wide vector to return it. These -; checks ensure that z5 is always the value that is returned. - -; -; Insert into two element tuples -; - -; tuple: { tuple2.res0, tuple2.res1 } -; insert z5: { z5 , tuple2.res1 } -; extract z5: ^^ -define @set_tuple2_nxv8i32_elt0( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple2_nxv8i32_elt0: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) - %ins = call @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32( %tuple, i32 0, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %ins, i32 0) - ret %ext -} - -; tuple: { tuple2.res0, tuple2.res1 } -; insert z5: { tuple2.res0, z5 } -; extract z5: ^^ -define @set_tuple2_nxv8i32_elt1( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple2_nxv8i32_elt1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) - %ins = call @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32( %tuple, i32 1, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %ins, i32 1) - ret %ext -} - -; This test checks the elements _not_ being set aren't changed. - -; tuple: { tuple2.res0, tuple2.res1 } -; insert z5: { tuple2.res0, z5 } -; extract z0: ^^ -define @set_tuple2_nxv8i32_elt1_ret_elt0( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0: -; CHECK: // %bb.0: -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) - %ins = call @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32( %tuple, i32 1, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %ins, i32 0) - ret %ext -} - -; Test extract of tuple passed into function -define @get_tuple2_nxv8i32_elt1( %tuple) { -; CHECK-LABEL: get_tuple2_nxv8i32_elt1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %tuple, i32 1) - ret %ext -} - -; -; Insert into three element tuples -; - -; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } -; insert z5: { z5 , tuple3.res0, tuple3.res2 } -; extract z5: ^^ -define @set_tuple3_nxv12i32_elt0( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple3_nxv12i32_elt0: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) - %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 0, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 0) - ret %ext -} - -; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } -; insert z5: { tuple3.res0, z5 , tuple3.res2 } -; extract z5: ^^ -define @set_tuple3_nxv12i32_elt1( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple3_nxv12i32_elt1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) - %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 1, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 1) - ret %ext -} - -; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } -; insert z5: { tuple3.res0, tuple3.res1, z5 } -; extract z5: ^^ -define @set_tuple3_nxv12i32_elt2( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple3_nxv12i32_elt2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) - %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 2, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 2) - ret %ext -} - -; This test checks the elements _not_ being set aren't changed. - -; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } -; insert z5: { tuple3.res0, z5 , tuple3.res2 } -; extract z2: ^^ -define @set_tuple3_nxv12i32_elt1_ret_elt2( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) - %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 1, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 2) - ret %ext -} - -; Test extract of tuple passed into function -define @get_tuple3_nxv12i32_elt2( %z0, %tuple) { -; CHECK-LABEL: get_tuple3_nxv12i32_elt2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %tuple, i32 2) - ret %ext -} - -; -; Insert into four element tuples -; - -; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } -; insert z5: { z5 , tuple4.res1, tuple4.res2, tuple4.res3 } -; extract z5: ^^ -define @set_tuple4_nxv16i32_elt0( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple4_nxv16i32_elt0: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 0, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 0) - ret %ext -} - -; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } -; insert z5: { tuple4.res0, z5 , tuple4.res2, tuple4.res3 } -; extract z5: ^^ -define @set_tuple4_nxv16i32_elt1( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple4_nxv16i32_elt1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 1, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 1) - ret %ext -} - -; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } -; insert z5: { tuple4.res0, tuple4.res1, z5 , tuple4.res3 } -; extract z5: ^^ -define @set_tuple4_nxv16i32_elt2( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple4_nxv16i32_elt2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 2, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 2) - ret %ext -} - -; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } -; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 } -; extract z5: ^^ -define @set_tuple4_nxv16i32_elt3( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple4_nxv16i32_elt3: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 3, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 3) - ret %ext -} - -; This test checks the elements _not_ being set aren't changed. - -; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } -; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 } -; extract z2: ^^ -define @set_tuple4_nxv16i32_elt3_ret_elt2( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 3, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 2) - ret %ext -} - -; Test extract of tuple passed into function -define @get_tuple4_nxv16i32_elt3( %tuple) { -; CHECK-LABEL: get_tuple4_nxv16i32_elt3: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %tuple, i32 3) - ret %ext -} - -declare @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(, ) -declare @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(, i32, ) -declare @llvm.aarch64.sve.tuple.get.nxv8i32(, i32) - -declare @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(, , ) -declare @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(, i32, ) -declare @llvm.aarch64.sve.tuple.get.nxv12i32(, i32) - -declare @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(, , , ) -declare @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(, i32, ) -declare @llvm.aarch64.sve.tuple.get.nxv16i32(, i32) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll @@ -397,45 +397,6 @@ ret %out } -; ADD (tuples) - -define @add_i64_tuple2(* %out, %in1, %in2) { -; CHECK-LABEL: add_i64_tuple2: -; CHECK: // %bb.0: -; CHECK-NEXT: add z0.d, z0.d, z0.d -; CHECK-NEXT: add z1.d, z1.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %in1, %in2) - %res = add %tuple, %tuple - ret %res -} - -define @add_i64_tuple3(* %out, %in1, %in2, %in3) { -; CHECK-LABEL: add_i64_tuple3: -; CHECK: // %bb.0: -; CHECK-NEXT: add z1.d, z1.d, z1.d -; CHECK-NEXT: add z0.d, z0.d, z0.d -; CHECK-NEXT: add z2.d, z2.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %in1, %in2, %in3) - %res = add %tuple, %tuple - ret %res -} - -define @add_i64_tuple4(* %out, %in1, %in2, %in3, %in4) { -; CHECK-LABEL: add_i64_tuple4: -; CHECK: // %bb.0: -; CHECK-NEXT: add z2.d, z2.d, z2.d -; CHECK-NEXT: add z0.d, z0.d, z0.d -; CHECK-NEXT: add z1.d, z1.d, z1.d -; CHECK-NEXT: add z3.d, z3.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %in1, %in2, %in3, %in4) - %res = add %tuple, %tuple - ret %res -} - - declare @llvm.aarch64.sve.abs.nxv16i8(, , ) declare @llvm.aarch64.sve.abs.nxv8i16(, , ) declare @llvm.aarch64.sve.abs.nxv4i32(, , ) @@ -478,6 +439,3 @@ declare @llvm.aarch64.sve.uqsub.x.nxv4i32(, ) declare @llvm.aarch64.sve.uqsub.x.nxv2i64(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(, ) -declare @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(, , ) -declare @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(, , , ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -586,87 +586,6 @@ } -; Stores (tuples) - -define void @store_i64_tuple3(* %out, %in1, %in2, %in3) { -; CHECK-LABEL: store_i64_tuple3: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z2.d }, p0, [x0, #2, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl] -; CHECK-NEXT: st1d { z0.d }, p0, [x0] -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %in1, %in2, %in3) - store %tuple, * %out - ret void -} - -define void @store_i64_tuple4(* %out, %in1, %in2, %in3, %in4) { -; CHECK-LABEL: store_i64_tuple4: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z3.d }, p0, [x0, #3, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [x0, #2, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl] -; CHECK-NEXT: st1d { z0.d }, p0, [x0] -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %in1, %in2, %in3, %in4) - store %tuple, * %out - ret void -} - -define void @store_i16_tuple2(* %out, %in1, %in2) { -; CHECK-LABEL: store_i16_tuple2: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: st1h { z1.h }, p0, [x0, #1, mul vl] -; CHECK-NEXT: st1h { z0.h }, p0, [x0] -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %in1, %in2) - store %tuple, * %out - ret void -} - -define void @store_i16_tuple3(* %out, %in1, %in2, %in3) { -; CHECK-LABEL: store_i16_tuple3: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: st1h { z2.h }, p0, [x0, #2, mul vl] -; CHECK-NEXT: st1h { z1.h }, p0, [x0, #1, mul vl] -; CHECK-NEXT: st1h { z0.h }, p0, [x0] -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %in1, %in2, %in3) - store %tuple, * %out - ret void -} - -define void @store_f32_tuple3(* %out, %in1, %in2, %in3) { -; CHECK-LABEL: store_f32_tuple3: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: st1w { z2.s }, p0, [x0, #2, mul vl] -; CHECK-NEXT: st1w { z1.s }, p0, [x0, #1, mul vl] -; CHECK-NEXT: st1w { z0.s }, p0, [x0] -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %in1, %in2, %in3) - store %tuple, * %out - ret void -} - -define void @store_f32_tuple4(* %out, %in1, %in2, %in3, %in4) { -; CHECK-LABEL: store_f32_tuple4: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: st1w { z3.s }, p0, [x0, #3, mul vl] -; CHECK-NEXT: st1w { z2.s }, p0, [x0, #2, mul vl] -; CHECK-NEXT: st1w { z1.s }, p0, [x0, #1, mul vl] -; CHECK-NEXT: st1w { z0.s }, p0, [x0] -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %in1, %in2, %in3, %in4) - store %tuple, * %out - ret void -} - declare void @llvm.aarch64.sve.st2.nxv16i8(, , , i8*) declare void @llvm.aarch64.sve.st2.nxv8i16(, , , i16*) declare void @llvm.aarch64.sve.st2.nxv4i32(, , , i32*) @@ -706,14 +625,5 @@ declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , float*) declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , double*) -declare @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(, , ) -declare @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(, , , ) - -declare @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(, ) -declare @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(, , ) - -declare @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(, , ) -declare @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(, , , ) - ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll @@ -3,7 +3,7 @@ %complex = type { { double, double } } ; Function Attrs: argmemonly nounwind readonly -declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(, i32 immarg) #3 +declare @llvm.vector.extract.nxv2f64.nxv4f64(, i64 immarg) #3 ; Function Attrs: argmemonly nounwind readonly declare @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1(, double*) #3 @@ -21,9 +21,9 @@ %realp = getelementptr inbounds %complex, %complex* %outval, i64 0, i32 0, i32 0 %imagp = getelementptr inbounds %complex, %complex* %outval, i64 0, i32 0, i32 1 %1 = call @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1( %pred, double* nonnull %inptr) - %2 = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %1, i32 0) + %2 = call @llvm.vector.extract.nxv2f64.nxv4f64( %1, i64 0) %3 = call double @llvm.aarch64.sve.faddv.nxv2f64( %pred, %2) - %4 = call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %1, i32 1) + %4 = call @llvm.vector.extract.nxv2f64.nxv4f64( %1, i64 2) %5 = call double @llvm.aarch64.sve.faddv.nxv2f64( %pred, %4) store double %3, double* %realp, align 8 store double %5, double* %imagp, align 8 diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-tuple-get.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-tuple-get.ll deleted file mode 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-tuple-get.ll +++ /dev/null @@ -1,37 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=instcombine < %s | FileCheck %s - -target triple = "aarch64-unknown-linux-gnu" - -; This stores %a using st4 after reversing the 4 tuples. Check that the -; redundant sequences of get/set are eliminated. -define void @redundant_tuple_get_set( %a, i8* %ptr) #0 { -; CHECK-LABEL: @redundant_tuple_get_set( -; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( [[A:%.*]], i32 3) -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( [[A]], i32 0) -; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( [[A]], i32 2) -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( [[A]], i32 1) -; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP1]], [[TMP3]], [[TMP4]], [[TMP2]], shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), i8* [[PTR:%.*]]) -; CHECK-NEXT: ret void -; - %1 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %a, i32 3) - %2 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %a, i32 0) - %3 = call @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8( %a, i32 3, %2) - %4 = call @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8( %3, i32 0, %1) - %5 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %4, i32 2) - %6 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %4, i32 1) - %7 = call @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8( %4, i32 2, %6) - %8 = call @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8( %7, i32 1, %5) - %9 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %8, i32 0) - %10 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %8, i32 1) - %11 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %8, i32 2) - %12 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %8, i32 3) - call void @llvm.aarch64.sve.st4.nxv16i8( %9, %10, %11, %12, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), i8* %ptr) - ret void -} - -declare @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8(, i32, ) -declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(, i32) -declare void @llvm.aarch64.sve.st4.nxv16i8(, , , , , i8*) - -attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/Transforms/InstCombine/opts-tuples-extract-intrinsic.ll b/llvm/test/Transforms/InstCombine/opts-tuples-extract-intrinsic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/opts-tuples-extract-intrinsic.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + + +; Check that the redundant sequences of extract/insert are eliminated. +define @redundant_tuple_get_set( %a, i8* %ptr) #0 { +; CHECK-LABEL: @redundant_tuple_get_set( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.vector.extract.nxv16i8.nxv64i8( [[A:%.*]], i64 48) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.vector.extract.nxv16i8.nxv64i8( [[A]], i64 0) +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.vector.extract.nxv16i8.nxv64i8( [[A]], i64 32) +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.vector.extract.nxv16i8.nxv64i8( [[A]], i64 16) +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP3]], i64 16) +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP4]], i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP2]], i64 48) +; CHECK-NEXT: ret [[TMP8]] +; + %1 = call @llvm.vector.extract.nxv16i8.nxv64i8( %a, i64 48) + %2 = call @llvm.vector.extract.nxv16i8.nxv64i8( %a, i64 0) + %3 = call @llvm.vector.insert.nxv64i8.nxv16i8( %a, %2, i64 48) + %4 = call @llvm.vector.insert.nxv64i8.nxv16i8( %3, %1, i64 0) + %5 = call @llvm.vector.extract.nxv16i8.nxv64i8( %4, i64 32) + %6 = call @llvm.vector.extract.nxv16i8.nxv64i8( %4, i64 16) + %7 = call @llvm.vector.insert.nxv64i8.nxv16i8( %4, %6, i64 32) + %8 = call @llvm.vector.insert.nxv64i8.nxv16i8( %7, %5, i64 16) + %9 = call @llvm.vector.extract.nxv16i8.nxv64i8( %8, i64 0) + %10 = call @llvm.vector.extract.nxv16i8.nxv64i8( %8, i64 16) + %11 = call @llvm.vector.extract.nxv16i8.nxv64i8( %8, i64 32) + %12 = call @llvm.vector.extract.nxv16i8.nxv64i8( %8, i64 48) + %13 = call @llvm.vector.insert.nxv64i8.nxv16i8( poison, %9, i64 0) + %14 = call @llvm.vector.insert.nxv64i8.nxv16i8( %13, %10, i64 16) + %15 = call @llvm.vector.insert.nxv64i8.nxv16i8( %14, %11, i64 32) + %16 = call @llvm.vector.insert.nxv64i8.nxv16i8( %15, %12, i64 48) + ret %16 +} + +declare @llvm.vector.insert.nxv64i8.nxv16i8(, , i64) +declare @llvm.vector.extract.nxv16i8.nxv64i8(, i64) +