diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -946,31 +946,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". - class AdvSIMD_SVE_Create_2Vector_Tuple - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, LLVMMatchType<1>], - [IntrReadMem]>; - - class AdvSIMD_SVE_Create_3Vector_Tuple - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>], - [IntrReadMem]>; - - class AdvSIMD_SVE_Create_4Vector_Tuple - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, - LLVMMatchType<1>], - [IntrReadMem]>; - - class AdvSIMD_SVE_Set_Vector_Tuple - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty], - [IntrReadMem, ImmArg>]>; - - class AdvSIMD_SVE_Get_Vector_Tuple - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, ImmArg>]>; - class AdvSIMD_1Vec_PredLoad_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1544,21 +1519,6 @@ [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i64_ty], [IntrNoMem, ImmArg>]>; -// -// Vector tuple creation intrinsics (ACLE) -// - -def int_aarch64_sve_tuple_create2 : AdvSIMD_SVE_Create_2Vector_Tuple; -def int_aarch64_sve_tuple_create3 : AdvSIMD_SVE_Create_3Vector_Tuple; -def int_aarch64_sve_tuple_create4 : AdvSIMD_SVE_Create_4Vector_Tuple; - -// -// Vector tuple insertion/extraction intrinsics (ACLE) -// - -def int_aarch64_sve_tuple_get : AdvSIMD_SVE_Get_Vector_Tuple; -def int_aarch64_sve_tuple_set : AdvSIMD_SVE_Set_Vector_Tuple; - // // Loads // diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -574,6 +574,28 @@ NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Ty); return true; } + if (Name.startswith("aarch64.sve.tuple.get")) { + Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::vector_extract, Tys); + return true; + } + if (Name.startswith("aarch64.sve.tuple.set")) { + auto Args = F->getFunctionType()->params(); + Type *Tys[] = {Args[0], Args[2], Args[1]}; + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::vector_insert, Tys); + return true; + } + static const Regex CreateTupleRegex( + "^aarch64\\.sve\\.tuple\\.create[234](.nxv[a-z0-9]+|$)"); + if (CreateTupleRegex.match(Name)) { + auto Args = F->getFunctionType()->params(); + Type *Tys[] = {F->getReturnType(), Args[1]}; + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::vector_insert, Tys); + return true; + } if (Name.startswith("arm.neon.vclz")) { Type* args[2] = { F->arg_begin()->getType(), @@ -3898,6 +3920,61 @@ NewCall = dyn_cast(Ret); break; } + + case Intrinsic::vector_extract: { + StringRef Name = F->getName(); + Name = Name.substr(5); // Strip llvm + if (!Name.startswith("aarch64.sve.tuple.get")) { + DefaultCase(); + return; + } + ScalableVectorType *RetTy = + dyn_cast(F->getReturnType()); + unsigned MinElts = RetTy->getMinNumElements(); + unsigned I = cast(CI->getArgOperand(1))->getZExtValue(); + Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts); + NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx}); + break; + } + + case Intrinsic::vector_insert: { + StringRef Name = F->getName(); + Name = Name.substr(5); + if (!Name.startswith("aarch64.sve.tuple")) { + DefaultCase(); + return; + } + if (Name.startswith("aarch64.sve.tuple.set")) { + unsigned I = dyn_cast(CI->getArgOperand(1))->getZExtValue(); + ScalableVectorType *Ty = + dyn_cast(CI->getArgOperand(2)->getType()); + Value *NewIdx = + ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements()); + NewCall = Builder.CreateCall( + NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx}); + break; + } + if (Name.startswith("aarch64.sve.tuple.create")) { + unsigned N = StringSwitch(Name) + .StartsWith("aarch64.sve.tuple.create2", 2) + .StartsWith("aarch64.sve.tuple.create3", 3) + .StartsWith("aarch64.sve.tuple.create4", 4) + .Default(0); + assert(N > 1 && "Create is expected to be between 2-4"); + ScalableVectorType *RetTy = + dyn_cast(F->getReturnType()); + Value *Ret = llvm::PoisonValue::get(RetTy); + unsigned MinElts = RetTy->getMinNumElements() / N; + for (unsigned I = 0; I < N; I++) { + Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts); + Value *V = CI->getArgOperand(I); + Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx); + } + NewCall = dyn_cast(Ret); + } + break; + } + case Intrinsic::arm_neon_bfdot: case Intrinsic::arm_neon_bfmmla: case Intrinsic::arm_neon_bfmlalb: diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -20339,71 +20339,6 @@ /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_st1_scatter_scalar_offset: return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED); - case Intrinsic::aarch64_sve_tuple_get: { - SDLoc DL(N); - SDValue Chain = N->getOperand(0); - SDValue Src1 = N->getOperand(2); - SDValue Idx = N->getOperand(3); - - uint64_t IdxConst = cast(Idx)->getZExtValue(); - EVT ResVT = N->getValueType(0); - uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue(); - SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL); - SDValue Val = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx); - return DAG.getMergeValues({Val, Chain}, DL); - } - case Intrinsic::aarch64_sve_tuple_set: { - SDLoc DL(N); - SDValue Chain = N->getOperand(0); - SDValue Tuple = N->getOperand(2); - SDValue Idx = N->getOperand(3); - SDValue Vec = N->getOperand(4); - - EVT TupleVT = Tuple.getValueType(); - uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue(); - - uint64_t IdxConst = cast(Idx)->getZExtValue(); - uint64_t NumLanes = - Vec.getValueType().getVectorElementCount().getKnownMinValue(); - - if ((TupleLanes % NumLanes) != 0) - report_fatal_error("invalid tuple vector!"); - - uint64_t NumVecs = TupleLanes / NumLanes; - - SmallVector Opnds; - for (unsigned I = 0; I < NumVecs; ++I) { - if (I == IdxConst) - Opnds.push_back(Vec); - else { - SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL); - Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, - Vec.getValueType(), Tuple, ExtIdx)); - } - } - SDValue Concat = - DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds); - return DAG.getMergeValues({Concat, Chain}, DL); - } - case Intrinsic::aarch64_sve_tuple_create2: - case Intrinsic::aarch64_sve_tuple_create3: - case Intrinsic::aarch64_sve_tuple_create4: { - SDLoc DL(N); - SDValue Chain = N->getOperand(0); - - SmallVector Opnds; - for (unsigned I = 2; I < N->getNumOperands(); ++I) - Opnds.push_back(N->getOperand(I)); - - EVT VT = Opnds[0].getValueType(); - EVT EltVT = VT.getVectorElementType(); - EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, - VT.getVectorElementCount() * - (N->getNumOperands() - 2)); - SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds); - return DAG.getMergeValues({Concat, Chain}, DL); - } case Intrinsic::aarch64_rndr: case Intrinsic::aarch64_rndrrs: { unsigned IntrinsicID = diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1208,32 +1208,6 @@ return IC.replaceInstUsesWith(II, VectorSplat); } -static Optional instCombineSVETupleGet(InstCombiner &IC, - IntrinsicInst &II) { - // Try to remove sequences of tuple get/set. - Value *SetTuple, *SetIndex, *SetValue; - auto *GetTuple = II.getArgOperand(0); - auto *GetIndex = II.getArgOperand(1); - // Check that we have tuple_get(GetTuple, GetIndex) where GetTuple is a - // call to tuple_set i.e. tuple_set(SetTuple, SetIndex, SetValue). - // Make sure that the types of the current intrinsic and SetValue match - // in order to safely remove the sequence. - if (!match(GetTuple, - m_Intrinsic( - m_Value(SetTuple), m_Value(SetIndex), m_Value(SetValue))) || - SetValue->getType() != II.getType()) - return None; - // Case where we get the same index right after setting it. - // tuple_get(tuple_set(SetTuple, SetIndex, SetValue), GetIndex) --> SetValue - if (GetIndex == SetIndex) - return IC.replaceInstUsesWith(II, SetValue); - // If we are getting a different index than what was set in the tuple_set - // intrinsic. We can just set the input tuple to the one up in the chain. - // tuple_get(tuple_set(SetTuple, SetIndex, SetValue), GetIndex) - // --> tuple_get(SetTuple, GetIndex) - return IC.replaceOperand(II, 0, SetTuple); -} - static Optional instCombineSVEZip(InstCombiner &IC, IntrinsicInst &II) { // zip1(uzp1(A, B), uzp2(A, B)) --> A @@ -1448,8 +1422,6 @@ case Intrinsic::aarch64_sve_sunpkhi: case Intrinsic::aarch64_sve_sunpklo: return instCombineSVEUnpack(IC, II); - case Intrinsic::aarch64_sve_tuple_get: - return instCombineSVETupleGet(IC, II); case Intrinsic::aarch64_sve_zip1: case Intrinsic::aarch64_sve_zip2: return instCombineSVEZip(IC, II); diff --git a/llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll b/llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll --- a/llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll +++ b/llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll @@ -72,3 +72,99 @@ declare @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(, i8*) declare @llvm.aarch64.sve.ld2(, i8 *) declare @llvm.aarch64.sve.ld2.nxv32i8(, i8 *) + +; aarch64.sve.tuple.create.N +define @create2_nxv32i8_nxv16i8( %z1, %z2) { +; CHECK-LABEL: @create2_nxv32i8_nxv16i8 +; CHECK: %1 = call @llvm.vector.insert.nxv32i8.nxv16i8( poison, %z1, i64 0) +; CHECK-NEXT: %tuple = call @llvm.vector.insert.nxv32i8.nxv16i8( %1, %z2, i64 16) +; CHECK-NEXT: ret %tuple + + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z1, %z2) + ret %tuple +} + +define @create3_nxv24i8_nxv16i8( %unused_z0, %z1, %z2, %z3) { +; CHECK-LABEL: @create3_nxv24i8_nxv16i8 +; CHECK: %1 = call @llvm.vector.insert.nxv24i16.nxv8i16( poison, %z1, i64 0) +; CHECK-NEXT: %2 = call @llvm.vector.insert.nxv24i16.nxv8i16( %1, %z2, i64 8) +; CHECK-NEXT: %tuple = call @llvm.vector.insert.nxv24i16.nxv8i16( %2, %z3, i64 16) +; CHECK-NEXT: ret %tuple + + %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z1, %z2, %z3) + ret %tuple +} + +define @create4_nxv64i8_nxv16i8( %unused_z0, %z1, %z2, %z3, %z4) { +; CHECK-LABEL: @create4_nxv64i8_nxv16i8 +; CHECK: %1 = call @llvm.vector.insert.nxv64i8.nxv16i8( poison, %z1, i64 0) +; CHECK-NEXT: %2 = call @llvm.vector.insert.nxv64i8.nxv16i8( %1, %z2, i64 16) +; CHECK-NEXT: %3 = call @llvm.vector.insert.nxv64i8.nxv16i8( %2, %z3, i64 32) +; CHECK-NEXT: %tuple = call @llvm.vector.insert.nxv64i8.nxv16i8( %3, %z4, i64 48) +; CHECK-NEXT: ret %tuple + + %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z1, %z2, %z3, %z4) + ret %tuple +} + +; Accept short mangling name +define @create2_nxv32i8( %z1, %z2) { +; CHECK-LABEL: @create2_nxv32i8 +; CHECK: %1 = call @llvm.vector.insert.nxv32i8.nxv16i8( poison, %z1, i64 0) +; CHECK-NEXT: %tuple = call @llvm.vector.insert.nxv32i8.nxv16i8( %1, %z2, i64 16) +; CHECK-NEXT: ret %tuple + + %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8( %z1, %z2) + ret %tuple +} + +define @create2( %z1, %z2) { +; CHECK-LABEL: @create2 +; CHECK: %1 = call @llvm.vector.insert.nxv32i8.nxv16i8( poison, %z1, i64 0) +; CHECK-NEXT: %tuple = call @llvm.vector.insert.nxv32i8.nxv16i8( %1, %z2, i64 16) +; CHECK-NEXT: ret %tuple + + %tuple = tail call @llvm.aarch64.sve.tuple.create2( %z1, %z2) + ret %tuple +} + +; Negative test for create +; Should not upgrade when create is not 2,3 or 4 +define @sve_tuple_create1( %z0) { +; CHECK-LABEL: @sve_tuple_create1 +; CHECK: %tuple = tail call @llvm.aarch64.sve.tuple.create1.nxv16i8.nxv16i8( %z0) +; CHECK-NEXT: ret %tuple + + %tuple = tail call @llvm.aarch64.sve.tuple.create1.nxv16i8.nxv16i8( %z0); + ret %tuple; +} + +; aarch64.sve.tuple.set + +define void @set_tuple2_nxv8i32_elt1( %z0, %z1) { +; CHECK-LABEL: @set_tuple2_nxv8i32_elt1 +; CHECK: %ins = call @llvm.vector.insert.nxv8i32.nxv4i32( %z0, %z1, i64 4) +; CHECK-NEXT: ret void + + %ins = call @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32( %z0, i32 1, %z1) + ret void +} + +; aarch64.sve.tuple.get +define @get_tuple2_nxv8i32_elt1( %tuple) { +; CHECK-LABEL: @get_tuple2_nxv8i32_elt1 +; CHECK: %ext = call @llvm.vector.extract.nxv4i32.nxv8i32( %tuple, i64 4) +; CHECK-NEXT: ret %ext + + %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %tuple, i32 1) + ret %ext +} + +declare @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(, ) +declare @llvm.aarch64.sve.tuple.create2.nxv32i8(, ) +declare @llvm.aarch64.sve.tuple.create2(, ) +declare @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(, , ) +declare @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(, , , ) +declare @llvm.aarch64.sve.tuple.create1.nxv16i8.nxv16i8() +declare @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(, i32, ) +declare @llvm.aarch64.sve.tuple.get.nxv8i32(, i32) diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll +++ /dev/null @@ -1,499 +0,0 @@ -; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s - -; -; svint8x2_t -; - -define @ret_svint8x2_t( %unused_z0, %z1, %z2) #0 { -; CHECK-LABEL: ret_svint8x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z1, %z2) - ret %tuple -} - -define void @call_svint8x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { -; CHECK-LABEL: call_svint8x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: bl callee_svint8x2_t - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z1, %z3) - call void @callee_svint8x2_t( %tuple) - ret void -} - -; -; svint16x2_t -; - -define @ret_svint16x2_t( %unused_z0, %z1, %z2) #0 { -; CHECK-LABEL: ret_svint16x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z1, %z2) - ret %tuple -} - -define void @call_svint16x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { -; CHECK-LABEL: call_svint16x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: bl callee_svint16x2_t - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z1, %z3) - call void @callee_svint16x2_t( %tuple) - ret void -} - -; -; svint32x2_t -; - -define @ret_svint32x2_t( %unused_z0, %z1, %z2) #0 { -; CHECK-LABEL: ret_svint32x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z1, %z2) - ret %tuple -} - -define void @call_svint32x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { -; CHECK-LABEL: call_svint32x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: bl callee_svint32x2_t - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z1, %z3) - call void @callee_svint32x2_t( %tuple) - ret void -} - -; -; svint64x2_t -; - -define @ret_svint64x2_t( %unused_z0, %z1, %z2) #0 { -; CHECK-LABEL: ret_svint64x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z1, %z2) - ret %tuple -} - -define void @call_svint64x2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { -; CHECK-LABEL: call_svint64x2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: bl callee_svint64x2_t - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z1, %z3) - call void @callee_svint64x2_t( %tuple) - ret void -} - -; -; svfloatx2_t -; - -define @ret_svfloatx2_t( %unused_z0, %z1, %z2) #0 { -; CHECK-LABEL: ret_svfloatx2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z1, %z2) - ret %tuple -} - -define void @call_svfloatx2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { -; CHECK-LABEL: call_svfloatx2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: bl callee_svfloatx2_t - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z1, %z3) - call void @callee_svfloatx2_t( %tuple) - ret void -} - -; -; svdoublex2_t -; - -define @ret_svdoublex2_t( %unused_z0, %z1, %z2) #0 { -; CHECK-LABEL: ret_svdoublex2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z1, %z2) - ret %tuple -} - -define void @call_svdoublex2_t( %dummy_z0, %z1, %dummy_z2, %z3) #0 { -; CHECK-LABEL: call_svdoublex2_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: bl callee_svdoublex2_t - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z1, %z3) - call void @callee_svdoublex2_t( %tuple) - ret void -} - -; -; svint8x3_t -; - -define @ret_svint8x3_t( %unused_z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: ret_svint8x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z1, %z2, %z3) - ret %tuple -} - -define void @call_svint8x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { -; CHECK-LABEL: call_svint8x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint8x3_t - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z1, %z2, %z4) - call void @callee_svint8x3_t( %tuple) - ret void -} - -; -; svint16x3_t -; - -define @ret_svint16x3_t( %unused_z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: ret_svint16x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z1, %z2, %z3) - ret %tuple -} - -define void @call_svint16x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { -; CHECK-LABEL: call_svint16x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint16x3_t - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z1, %z2, %z4) - call void @callee_svint16x3_t( %tuple) - ret void -} - -; -; svint32x3_t -; - -define @ret_svint32x3_t( %unused_z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: ret_svint32x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z1, %z2, %z3) - ret %tuple -} - -define void @call_svint32x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { -; CHECK-LABEL: call_svint32x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint32x3_t - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z1, %z2, %z4) - call void @callee_svint32x3_t( %tuple) - ret void -} - -; -; svint64x3_t -; - -define @ret_svint64x3_t( %unused_z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: ret_svint64x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z1, %z2, %z3) - ret %tuple -} - -define void @call_svint64x3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { -; CHECK-LABEL: call_svint64x3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint64x3_t - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z1, %z2, %z4) - call void @callee_svint64x3_t( %tuple) - ret void -} - -; -; svfloatx3_t -; - -define @ret_svfloatx3_t( %unused_z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: ret_svfloatx3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z1, %z2, %z3) - ret %tuple -} - -define void @call_svfloatx3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { -; CHECK-LABEL: call_svfloatx3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svfloatx3_t - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z1, %z2, %z4) - call void @callee_svfloatx3_t( %tuple) - ret void -} - -; -; svdoublex3_t -; - -define @ret_svdoublex3_t( %unused_z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: ret_svdoublex3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z1, %z2, %z3) - ret %tuple -} - -define void @call_svdoublex3_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4) #0 { -; CHECK-LABEL: call_svdoublex3_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svdoublex3_t - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z1, %z2, %z4) - call void @callee_svdoublex3_t( %tuple) - ret void -} - -; -; svint8x4_t -; - -define @ret_svint8x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { -; CHECK-LABEL: ret_svint8x4_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: mov z3.d, z4.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z1, %z2, %z3, %z4) - ret %tuple -} - -define void @call_svint8x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { -; CHECK-LABEL: call_svint8x4_t -; CHECK: mov z3.d, z5.d -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint8x4_t - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z1, %z2, %z4, %z5) - call void @callee_svint8x4_t( %tuple) - ret void -} - -; -; svint16x4_t -; - -define @ret_svint16x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { -; CHECK-LABEL: ret_svint16x4_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: mov z3.d, z4.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z1, %z2, %z3, %z4) - ret %tuple -} - -define void @call_svint16x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { -; CHECK-LABEL: call_svint16x4_t -; CHECK: mov z3.d, z5.d -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint16x4_t - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z1, %z2, %z4, %z5) - call void @callee_svint16x4_t( %tuple) - ret void -} - -; -; svint32x4_t -; - -define @ret_svint32x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { -; CHECK-LABEL: ret_svint32x4_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: mov z3.d, z4.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z1, %z2, %z3, %z4) - ret %tuple -} - -define void @call_svint32x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { -; CHECK-LABEL: call_svint32x4_t -; CHECK: mov z3.d, z5.d -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint32x4_t - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z1, %z2, %z4, %z5) - call void @callee_svint32x4_t( %tuple) - ret void -} - -; -; svint64x4_t -; - -define @ret_svint64x4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { -; CHECK-LABEL: ret_svint64x4_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: mov z3.d, z4.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z1, %z2, %z3, %z4) - ret %tuple -} - -define void @call_svint64x4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { -; CHECK-LABEL: call_svint64x4_t -; CHECK: mov z3.d, z5.d -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svint64x4_t - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z1, %z2, %z4, %z5) - call void @callee_svint64x4_t( %tuple) - ret void -} - -; -; svfloatx4_t -; - -define @ret_svfloatx4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { -; CHECK-LABEL: ret_svfloatx4_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: mov z3.d, z4.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z1, %z2, %z3, %z4) - ret %tuple -} - -define void @call_svfloatx4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { -; CHECK-LABEL: call_svfloatx4_t -; CHECK: mov z3.d, z5.d -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svfloatx4_t - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z1, %z2, %z4, %z5) - call void @callee_svfloatx4_t( %tuple) - ret void -} - -; -; svdoublex4_t -; - -define @ret_svdoublex4_t( %unused_z0, %z1, %z2, %z3, %z4) #0 { -; CHECK-LABEL: ret_svdoublex4_t -; CHECK: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z3.d -; CHECK-NEXT: mov z3.d, z4.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z1, %z2, %z3, %z4) - ret %tuple -} - -define void @call_svdoublex4_t( %dummy_z0, %z1, %z2, %dummy_z3, %z4, %z5) #0 { -; CHECK-LABEL: call_svdoublex4_t -; CHECK: mov z3.d, z5.d -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: mov z2.d, z4.d -; CHECK-NEXT: bl callee_svdoublex4_t - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z1, %z2, %z4, %z5) - call void @callee_svdoublex4_t( %tuple) - ret void -} - -attributes #0 = { nounwind "target-features"="+sve" } - -declare void @callee_svint8x2_t() -declare void @callee_svint16x2_t() -declare void @callee_svint32x2_t() -declare void @callee_svint64x2_t() -declare void @callee_svfloatx2_t() -declare void @callee_svdoublex2_t() - -declare void @callee_svint8x3_t() -declare void @callee_svint16x3_t() -declare void @callee_svint32x3_t() -declare void @callee_svint64x3_t() -declare void @callee_svfloatx3_t() -declare void @callee_svdoublex3_t() - -declare void @callee_svint8x4_t() -declare void @callee_svint16x4_t() -declare void @callee_svint32x4_t() -declare void @callee_svint64x4_t() -declare void @callee_svfloatx4_t() -declare void @callee_svdoublex4_t() - - -; x2 -declare @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(, ) - -; x3 -declare @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(, , ) - -; x4 -declare @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(, , , ) diff --git a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll +++ /dev/null @@ -1,78 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s - -; Test that DAGCombiner doesn't drop the scalable flag when it tries to fold: -; extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') -define @extract_nxv16i8_nxv4i64( %z0_z1) { -; CHECK-LABEL: extract_nxv16i8_nxv4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %z0_z1_bc = bitcast %z0_z1 to - %ext = call @llvm.aarch64.sve.tuple.get.nxv32i8( %z0_z1_bc, i32 1) - ret %ext -} - - -define @extract_nxv2i64_nxv32i8( %z0_z1) { -; CHECK-LABEL: extract_nxv2i64_nxv32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %z0_z1_bc = bitcast %z0_z1 to - %ext = call @llvm.aarch64.sve.tuple.get.nxv4i64( %z0_z1_bc, i32 1) - ret %ext -} - -define @extract_lo_nxv4f16_nxv8f16( %z0) { -; CHECK-LABEL: extract_lo_nxv4f16_nxv8f16: -; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv8f16( %z0, i32 0) - ret %ext -} - -define @extract_hi_nxv4f16_nxv8f16( %z0) { -; CHECK-LABEL: extract_hi_nxv4f16_nxv8f16: -; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z0.s, z0.h -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv8f16( %z0, i32 1) - ret %ext -} - -define @extract_lo_nxv2f32_nxv4f32( %z0) { -; CHECK-LABEL: extract_lo_nxv2f32_nxv4f32: -; CHECK: // %bb.0: -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv4f32( %z0, i32 0) - ret %ext -} - -define @extract_hi_nxv2f32_nxv4f32( %z0) { -; CHECK-LABEL: extract_hi_nxv2f32_nxv4f32: -; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv4f32( %z0, i32 1) - ret %ext -} - -define @load_extract_nxv4f32_nxv8f32(* %p) { -; CHECK-LABEL: load_extract_nxv4f32_nxv8f32: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ret - %tmp1 = load , * %p, align 16 - %tmp2 = call @llvm.aarch64.sve.tuple.get.nxv8f32( %tmp1, i32 1) - ret %tmp2 -} - -declare @llvm.aarch64.sve.tuple.get.nxv4i64(, i32) -declare @llvm.aarch64.sve.tuple.get.nxv32i8(, i32) -declare @llvm.aarch64.sve.tuple.get.nxv4f32(, i32) -declare @llvm.aarch64.sve.tuple.get.nxv8f16(, i32) -declare @llvm.aarch64.sve.tuple.get.nxv8f32(, i32) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll +++ /dev/null @@ -1,1002 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple aarch64 -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple aarch64 -mattr=+sme < %s | FileCheck %s - -; -; SVCREATE2 (i8) -; - -define @test_svcreate2_s8_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s8_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB0_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB0_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_s8_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s8_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB1_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB1_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (i16) -; - -define @test_svcreate2_s16_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB2_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB2_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_s16_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s16_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB3_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB3_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (half) -; - -define @test_svcreate2_f16_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_f16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB4_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB4_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_f16_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_f16_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB5_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB5_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (bfloat) -; - -define @test_svcreate2_bf16_vec0(i1 %p, %z0, %z1) #1 { -; CHECK-LABEL: test_svcreate2_bf16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB6_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB6_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_bf16_vec1(i1 %p, %z0, %z1) #1 { -; CHECK-LABEL: test_svcreate2_bf16_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB7_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB7_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (i32) -; - -define @test_svcreate2_s32_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s32_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB8_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB8_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_s32_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s32_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB9_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB9_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (float) -; - -define @test_svcreate2_f32_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_f32_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB10_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB10_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_f32_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_f32_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB11_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB11_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (i64) -; - -define @test_svcreate2_s64_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s64_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB12_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB12_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_s64_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_s64_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB13_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB13_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE2 (double) -; - -define @test_svcreate2_f64_vec0(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_f64_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB14_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB14_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %tuple, i32 0) - ret %extract -} - -define @test_svcreate2_f64_vec1(i1 %p, %z0, %z1) #0 { -; CHECK-LABEL: test_svcreate2_f64_vec1: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB15_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB15_2: // %L2 -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64( %z0, %z1) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64( %tuple, i32 1) - ret %extract -} - -; -; SVCREATE3 (i8) -; - -define @test_svcreate3_s8_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s8_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB16_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB16_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_s8_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s8_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB17_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB17_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (i16) -; - -define @test_svcreate3_s16_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB18_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB18_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_s16_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s16_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB19_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB19_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (half) -; - -define @test_svcreate3_f16_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_f16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB20_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB20_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_f16_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_f16_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB21_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB21_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (bfloat) -; - -define @test_svcreate3_bf16_vec0(i1 %p, %z0, %z1, %z2) #1 { -; CHECK-LABEL: test_svcreate3_bf16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB22_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB22_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_bf16_vec2(i1 %p, %z0, %z1, %z2) #1 { -; CHECK-LABEL: test_svcreate3_bf16_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB23_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB23_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (i32) -; - -define @test_svcreate3_s32_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s32_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB24_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB24_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_s32_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s32_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB25_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB25_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (float) -; - -define @test_svcreate3_f32_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_f32_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB26_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB26_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_f32_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_f32_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB27_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB27_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (i64) -; - -define @test_svcreate3_s64_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s64_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB28_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB28_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_s64_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_s64_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB29_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB29_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE3 (double) -; - -define @test_svcreate3_f64_vec0(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_f64_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB30_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB30_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %tuple, i32 0) - ret %extract -} - -define @test_svcreate3_f64_vec2(i1 %p, %z0, %z1, %z2) #0 { -; CHECK-LABEL: test_svcreate3_f64_vec2: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB31_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB31_2: // %L2 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64( %z0, %z1, %z2) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64( %tuple, i32 2) - ret %extract -} - -; -; SVCREATE4 (i8) -; - -define @test_svcreate4_s8_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s8_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB32_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB32_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_s8_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s8_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB33_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB33_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (i16) -; - -define @test_svcreate4_s16_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB34_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB34_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_s16_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s16_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB35_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB35_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (half) -; - -define @test_svcreate4_f16_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_f16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB36_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB36_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_f16_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_f16_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB37_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB37_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (bfloat) -; - -define @test_svcreate4_bf16_vec0(i1 %p, %z0, %z1, %z2, %z3) #1 { -; CHECK-LABEL: test_svcreate4_bf16_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB38_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB38_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_bf16_vec3(i1 %p, %z0, %z1, %z2, %z3) #1 { -; CHECK-LABEL: test_svcreate4_bf16_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB39_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB39_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (i32) -; - -define @test_svcreate4_s32_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s32_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB40_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB40_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_s32_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s32_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB41_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB41_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (float) -; - -define @test_svcreate4_f32_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_f32_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB42_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB42_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_f32_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_f32_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB43_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB43_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (i64) -; - -define @test_svcreate4_s64_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s64_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB44_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB44_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_s64_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_s64_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB45_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB45_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64( %tuple, i32 3) - ret %extract -} - -; -; SVCREATE4 (double) -; - -define @test_svcreate4_f64_vec0(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_f64_vec0: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB46_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: // implicit-def: $z0 -; CHECK-NEXT: .LBB46_2: // %L2 -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %tuple, i32 0) - ret %extract -} - -define @test_svcreate4_f64_vec3(i1 %p, %z0, %z1, %z2, %z3) #0 { -; CHECK-LABEL: test_svcreate4_f64_vec3: -; CHECK: // %bb.0: -; CHECK-NEXT: tbz w0, #0, .LBB47_2 -; CHECK-NEXT: // %bb.1: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB47_2: // %L2 -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64( %z0, %z1, %z2, %z3) - br i1 %p, label %L1, label %L2 -L1: - ret undef -L2: - %extract = tail call @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64( %tuple, i32 3) - ret %extract -} - -attributes #0 = { nounwind } -; +bf16 is required for the bfloat version. -attributes #1 = { nounwind "target-features"="+bf16" } - -declare @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(, ) - -declare @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(, , ) -declare @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(, , ) - -declare @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64 (, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(, , , ) -declare @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(, , , ) - -declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(, i32 immarg) - -declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(, i32 immarg) -declare @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(, i32 immarg) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll +++ /dev/null @@ -1,234 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple aarch64 -mattr=+sve < %s | FileCheck %s -; RUN: llc -mtriple aarch64 -mattr=+sme < %s | FileCheck %s - -; All these tests create a vector tuple, insert z5 into one of the elements, -; and finally extracts that element from the wide vector to return it. These -; checks ensure that z5 is always the value that is returned. - -; -; Insert into two element tuples -; - -; tuple: { tuple2.res0, tuple2.res1 } -; insert z5: { z5 , tuple2.res1 } -; extract z5: ^^ -define @set_tuple2_nxv8i32_elt0( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple2_nxv8i32_elt0: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) - %ins = call @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32( %tuple, i32 0, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %ins, i32 0) - ret %ext -} - -; tuple: { tuple2.res0, tuple2.res1 } -; insert z5: { tuple2.res0, z5 } -; extract z5: ^^ -define @set_tuple2_nxv8i32_elt1( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple2_nxv8i32_elt1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) - %ins = call @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32( %tuple, i32 1, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %ins, i32 1) - ret %ext -} - -; This test checks the elements _not_ being set aren't changed. - -; tuple: { tuple2.res0, tuple2.res1 } -; insert z5: { tuple2.res0, z5 } -; extract z0: ^^ -define @set_tuple2_nxv8i32_elt1_ret_elt0( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0: -; CHECK: // %bb.0: -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32( %z0, %z1) - %ins = call @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32( %tuple, i32 1, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %ins, i32 0) - ret %ext -} - -; Test extract of tuple passed into function -define @get_tuple2_nxv8i32_elt1( %tuple) { -; CHECK-LABEL: get_tuple2_nxv8i32_elt1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv8i32( %tuple, i32 1) - ret %ext -} - -; -; Insert into three element tuples -; - -; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } -; insert z5: { z5 , tuple3.res0, tuple3.res2 } -; extract z5: ^^ -define @set_tuple3_nxv12i32_elt0( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple3_nxv12i32_elt0: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) - %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 0, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 0) - ret %ext -} - -; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } -; insert z5: { tuple3.res0, z5 , tuple3.res2 } -; extract z5: ^^ -define @set_tuple3_nxv12i32_elt1( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple3_nxv12i32_elt1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) - %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 1, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 1) - ret %ext -} - -; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } -; insert z5: { tuple3.res0, tuple3.res1, z5 } -; extract z5: ^^ -define @set_tuple3_nxv12i32_elt2( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple3_nxv12i32_elt2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) - %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 2, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 2) - ret %ext -} - -; This test checks the elements _not_ being set aren't changed. - -; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } -; insert z5: { tuple3.res0, z5 , tuple3.res2 } -; extract z2: ^^ -define @set_tuple3_nxv12i32_elt1_ret_elt2( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = call @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32( %z0, %z1, %z2) - %ins = call @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32( %tuple, i32 1, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %ins, i32 2) - ret %ext -} - -; Test extract of tuple passed into function -define @get_tuple3_nxv12i32_elt2( %z0, %tuple) { -; CHECK-LABEL: get_tuple3_nxv12i32_elt2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv12i32( %tuple, i32 2) - ret %ext -} - -; -; Insert into four element tuples -; - -; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } -; insert z5: { z5 , tuple4.res1, tuple4.res2, tuple4.res3 } -; extract z5: ^^ -define @set_tuple4_nxv16i32_elt0( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple4_nxv16i32_elt0: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 0, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 0) - ret %ext -} - -; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } -; insert z5: { tuple4.res0, z5 , tuple4.res2, tuple4.res3 } -; extract z5: ^^ -define @set_tuple4_nxv16i32_elt1( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple4_nxv16i32_elt1: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 1, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 1) - ret %ext -} - -; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } -; insert z5: { tuple4.res0, tuple4.res1, z5 , tuple4.res3 } -; extract z5: ^^ -define @set_tuple4_nxv16i32_elt2( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple4_nxv16i32_elt2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 2, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 2) - ret %ext -} - -; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } -; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 } -; extract z5: ^^ -define @set_tuple4_nxv16i32_elt3( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple4_nxv16i32_elt3: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z5.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 3, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 3) - ret %ext -} - -; This test checks the elements _not_ being set aren't changed. - -; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } -; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 } -; extract z2: ^^ -define @set_tuple4_nxv16i32_elt3_ret_elt2( %z0, %z1, %z2, %z3, %z4, %z5) { -; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32( %z0, %z1, %z2, %z3) - %ins = call @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32( %tuple, i32 3, %z5) - %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %ins, i32 2) - ret %ext -} - -; Test extract of tuple passed into function -define @get_tuple4_nxv16i32_elt3( %tuple) { -; CHECK-LABEL: get_tuple4_nxv16i32_elt3: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, z3.d -; CHECK-NEXT: ret - %ext = call @llvm.aarch64.sve.tuple.get.nxv16i32( %tuple, i32 3) - ret %ext -} - -declare @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(, ) -declare @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(, i32, ) -declare @llvm.aarch64.sve.tuple.get.nxv8i32(, i32) - -declare @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(, , ) -declare @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(, i32, ) -declare @llvm.aarch64.sve.tuple.get.nxv12i32(, i32) - -declare @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(, , , ) -declare @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(, i32, ) -declare @llvm.aarch64.sve.tuple.get.nxv16i32(, i32) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll @@ -397,45 +397,6 @@ ret %out } -; ADD (tuples) - -define @add_i64_tuple2(* %out, %in1, %in2) { -; CHECK-LABEL: add_i64_tuple2: -; CHECK: // %bb.0: -; CHECK-NEXT: add z0.d, z0.d, z0.d -; CHECK-NEXT: add z1.d, z1.d, z1.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64( %in1, %in2) - %res = add %tuple, %tuple - ret %res -} - -define @add_i64_tuple3(* %out, %in1, %in2, %in3) { -; CHECK-LABEL: add_i64_tuple3: -; CHECK: // %bb.0: -; CHECK-NEXT: add z1.d, z1.d, z1.d -; CHECK-NEXT: add z0.d, z0.d, z0.d -; CHECK-NEXT: add z2.d, z2.d, z2.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %in1, %in2, %in3) - %res = add %tuple, %tuple - ret %res -} - -define @add_i64_tuple4(* %out, %in1, %in2, %in3, %in4) { -; CHECK-LABEL: add_i64_tuple4: -; CHECK: // %bb.0: -; CHECK-NEXT: add z2.d, z2.d, z2.d -; CHECK-NEXT: add z0.d, z0.d, z0.d -; CHECK-NEXT: add z1.d, z1.d, z1.d -; CHECK-NEXT: add z3.d, z3.d, z3.d -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %in1, %in2, %in3, %in4) - %res = add %tuple, %tuple - ret %res -} - - declare @llvm.aarch64.sve.abs.nxv16i8(, , ) declare @llvm.aarch64.sve.abs.nxv8i16(, , ) declare @llvm.aarch64.sve.abs.nxv4i32(, , ) @@ -478,6 +439,3 @@ declare @llvm.aarch64.sve.uqsub.x.nxv4i32(, ) declare @llvm.aarch64.sve.uqsub.x.nxv2i64(, ) -declare @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(, ) -declare @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(, , ) -declare @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(, , , ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -586,87 +586,6 @@ } -; Stores (tuples) - -define void @store_i64_tuple3(* %out, %in1, %in2, %in3) { -; CHECK-LABEL: store_i64_tuple3: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z2.d }, p0, [x0, #2, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl] -; CHECK-NEXT: st1d { z0.d }, p0, [x0] -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64( %in1, %in2, %in3) - store %tuple, * %out - ret void -} - -define void @store_i64_tuple4(* %out, %in1, %in2, %in3, %in4) { -; CHECK-LABEL: store_i64_tuple4: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z3.d }, p0, [x0, #3, mul vl] -; CHECK-NEXT: st1d { z2.d }, p0, [x0, #2, mul vl] -; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl] -; CHECK-NEXT: st1d { z0.d }, p0, [x0] -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64( %in1, %in2, %in3, %in4) - store %tuple, * %out - ret void -} - -define void @store_i16_tuple2(* %out, %in1, %in2) { -; CHECK-LABEL: store_i16_tuple2: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: st1h { z1.h }, p0, [x0, #1, mul vl] -; CHECK-NEXT: st1h { z0.h }, p0, [x0] -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16( %in1, %in2) - store %tuple, * %out - ret void -} - -define void @store_i16_tuple3(* %out, %in1, %in2, %in3) { -; CHECK-LABEL: store_i16_tuple3: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: st1h { z2.h }, p0, [x0, #2, mul vl] -; CHECK-NEXT: st1h { z1.h }, p0, [x0, #1, mul vl] -; CHECK-NEXT: st1h { z0.h }, p0, [x0] -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16( %in1, %in2, %in3) - store %tuple, * %out - ret void -} - -define void @store_f32_tuple3(* %out, %in1, %in2, %in3) { -; CHECK-LABEL: store_f32_tuple3: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: st1w { z2.s }, p0, [x0, #2, mul vl] -; CHECK-NEXT: st1w { z1.s }, p0, [x0, #1, mul vl] -; CHECK-NEXT: st1w { z0.s }, p0, [x0] -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32( %in1, %in2, %in3) - store %tuple, * %out - ret void -} - -define void @store_f32_tuple4(* %out, %in1, %in2, %in3, %in4) { -; CHECK-LABEL: store_f32_tuple4: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: st1w { z3.s }, p0, [x0, #3, mul vl] -; CHECK-NEXT: st1w { z2.s }, p0, [x0, #2, mul vl] -; CHECK-NEXT: st1w { z1.s }, p0, [x0, #1, mul vl] -; CHECK-NEXT: st1w { z0.s }, p0, [x0] -; CHECK-NEXT: ret - %tuple = tail call @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32( %in1, %in2, %in3, %in4) - store %tuple, * %out - ret void -} - declare void @llvm.aarch64.sve.st2.nxv16i8(, , , i8*) declare void @llvm.aarch64.sve.st2.nxv8i16(, , , i16*) declare void @llvm.aarch64.sve.st2.nxv4i32(, , , i32*) @@ -706,14 +625,5 @@ declare void @llvm.aarch64.sve.stnt1.nxv4f32(, , float*) declare void @llvm.aarch64.sve.stnt1.nxv2f64(, , double*) -declare @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(, , ) -declare @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(, , , ) - -declare @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(, ) -declare @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(, , ) - -declare @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(, , ) -declare @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(, , , ) - ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+bf16" } diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-tuple-get.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-tuple-get.ll deleted file mode 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-tuple-get.ll +++ /dev/null @@ -1,37 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=instcombine < %s | FileCheck %s - -target triple = "aarch64-unknown-linux-gnu" - -; This stores %a using st4 after reversing the 4 tuples. Check that the -; redundant sequences of get/set are eliminated. -define void @redundant_tuple_get_set( %a, i8* %ptr) #0 { -; CHECK-LABEL: @redundant_tuple_get_set( -; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( [[A:%.*]], i32 3) -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( [[A]], i32 0) -; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( [[A]], i32 2) -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( [[A]], i32 1) -; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP1]], [[TMP3]], [[TMP4]], [[TMP2]], shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), i8* [[PTR:%.*]]) -; CHECK-NEXT: ret void -; - %1 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %a, i32 3) - %2 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %a, i32 0) - %3 = call @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8( %a, i32 3, %2) - %4 = call @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8( %3, i32 0, %1) - %5 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %4, i32 2) - %6 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %4, i32 1) - %7 = call @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8( %4, i32 2, %6) - %8 = call @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8( %7, i32 1, %5) - %9 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %8, i32 0) - %10 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %8, i32 1) - %11 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %8, i32 2) - %12 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %8, i32 3) - call void @llvm.aarch64.sve.st4.nxv16i8( %9, %10, %11, %12, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), i8* %ptr) - ret void -} - -declare @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8(, i32, ) -declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(, i32) -declare void @llvm.aarch64.sve.st4.nxv16i8(, , , , , i8*) - -attributes #0 = { "target-features"="+sve" }