diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -142,6 +142,7 @@ VecOfBitcastsToInt, AMX, PPCQuad, + AnyPtrToElt, } Kind; union { @@ -180,14 +181,15 @@ return (ArgKind)(Argument_Info & 7); } - // VecOfAnyPtrsToElt uses both an overloaded argument (for address space) - // and a reference argument (for matching vector width and element types) + // VecOfAnyPtrsToElt and AnyPtrToElt uses both an overloaded argument (for + // address space) and a reference argument (for matching vector width and + // element types) unsigned getOverloadArgNumber() const { - assert(Kind == VecOfAnyPtrsToElt); + assert(Kind == VecOfAnyPtrsToElt || Kind == AnyPtrToElt); return Argument_Info >> 16; } unsigned getRefArgNumber() const { - assert(Kind == VecOfAnyPtrsToElt); + assert(Kind == VecOfAnyPtrsToElt || Kind == AnyPtrToElt); return Argument_Info & 0xFFFF; } diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -212,6 +212,7 @@ class LLVMPointerTo : LLVMMatchType; class LLVMPointerToElt : LLVMMatchType; +class LLVMAnyPointerToElt : LLVMMatchType; class LLVMVectorOfAnyPointersToElt : LLVMMatchType; class LLVMVectorElementType : LLVMMatchType; @@ -1412,14 +1413,14 @@ // Experimental strided memory accesses def int_experimental_vp_strided_store : DefaultAttrsIntrinsic<[], [ llvm_anyvector_ty, - LLVMPointerToElt<0>, + LLVMAnyPointerToElt<0>, llvm_anyint_ty, // Stride in bytes LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty], [ NoCapture>, IntrNoSync, IntrWriteMem, IntrArgMemOnly, IntrWillReturn ]>; def int_experimental_vp_strided_load : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [ LLVMPointerToElt<0>, + [ LLVMAnyPointerToElt<0>, llvm_anyint_ty, // Stride in bytes LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty], diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -983,7 +983,8 @@ IIT_PPCF128 = 52, IIT_V3 = 53, IIT_EXTERNREF = 54, - IIT_FUNCREF = 55 + IIT_FUNCREF = 55, + IIT_ANYPTR_TO_ELT = 56, }; static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, @@ -1157,6 +1158,13 @@ OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToElt, ArgInfo)); return; } + case IIT_ANYPTR_TO_ELT: { + unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back( + IITDescriptor::get(IITDescriptor::AnyPtrToElt, ArgNo, RefNo)); + return; + } case IIT_VEC_OF_ANYPTRS_TO_ELT: { unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); @@ -1348,6 +1356,9 @@ case IITDescriptor::VecOfAnyPtrsToElt: // Return the overloaded type (which determines the pointers address space) return Tys[D.getOverloadArgNumber()]; + case IITDescriptor::AnyPtrToElt: + // Return the overloaded type (which determines the pointers address space) + return Tys[D.getOverloadArgNumber()]; } llvm_unreachable("unhandled"); } @@ -1592,6 +1603,30 @@ return !ThisArgType->isOpaqueOrPointeeTypeMatches( ReferenceType->getElementType()); } + case IITDescriptor::AnyPtrToElt: { + unsigned RefArgNumber = D.getRefArgNumber(); + if (RefArgNumber >= ArgTys.size()) { + if (IsDeferredCheck) + return true; + // If forward referencing, already add the pointer type and + // defer the checks for later. + ArgTys.push_back(Ty); + return DeferCheck(Ty); + } + + if (!IsDeferredCheck) { + assert(D.getOverloadArgNumber() == ArgTys.size() && + "Table consistency error"); + ArgTys.push_back(Ty); + } + + auto *ReferenceType = dyn_cast(ArgTys[RefArgNumber]); + auto *ThisArgType = dyn_cast(Ty); + if (!ThisArgType || !ReferenceType) + return true; + return !ThisArgType->isOpaqueOrPointeeTypeMatches( + ReferenceType->getElementType()); + } case IITDescriptor::VecOfAnyPtrsToElt: { unsigned RefArgNumber = D.getRefArgNumber(); if (RefArgNumber >= ArgTys.size()) { diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -511,8 +511,8 @@ M, VPID, {ReturnType, Params[0]->getType()}); break; case Intrinsic::experimental_vp_strided_load: - VPFunc = - Intrinsic::getDeclaration(M, VPID, {ReturnType, Params[1]->getType()}); + VPFunc = Intrinsic::getDeclaration( + M, VPID, {ReturnType, Params[0]->getType(), Params[1]->getType()}); break; case Intrinsic::vp_gather: VPFunc = Intrinsic::getDeclaration( @@ -524,7 +524,8 @@ break; case Intrinsic::experimental_vp_strided_store: VPFunc = Intrinsic::getDeclaration( - M, VPID, {Params[0]->getType(), Params[2]->getType()}); + M, VPID, + {Params[0]->getType(), Params[1]->getType(), Params[2]->getType()}); break; case Intrinsic::vp_scatter: VPFunc = Intrinsic::getDeclaration( diff --git a/llvm/test/CodeGen/VE/Vector/vp_strided_load.ll b/llvm/test/CodeGen/VE/Vector/vp_strided_load.ll --- a/llvm/test/CodeGen/VE/Vector/vp_strided_load.ll +++ b/llvm/test/CodeGen/VE/Vector/vp_strided_load.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s -declare <256 x float> @llvm.experimental.vp.strided.load.v256f32.i64(float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) +declare <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0f32.i64(float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) define fastcc <256 x float> @vp_strided_load_v256f32_rrm(float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) { ; CHECK-LABEL: vp_strided_load_v256f32_rrm: @@ -13,7 +13,7 @@ ; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1 ; CHECK-NEXT: vgtu %v0, %v0, 0, 0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) - %r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.i64(float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) + %r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0f32.i64(float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) ret <256 x float> %r } @@ -26,7 +26,7 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - %r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.i64(float* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) + %r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0f32.i64(float* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) ret <256 x float> %r } @@ -39,11 +39,11 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - %r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.i64(float* %ptr, i64 24, <256 x i1> %allones, i32 %evl) + %r = call <256 x float> @llvm.experimental.vp.strided.load.v256f32.p0f32.i64(float* %ptr, i64 24, <256 x i1> %allones, i32 %evl) ret <256 x float> %r } -declare <256 x i32> @llvm.experimental.vp.strided.load.v256i32.i64(i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) +declare <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0i32.i64(i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) define fastcc <256 x i32> @vp_strided_load_v256i32_rrm(i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) { ; CHECK-LABEL: vp_strided_load_v256i32_rrm: @@ -55,7 +55,7 @@ ; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1 ; CHECK-NEXT: vgtl.zx %v0, %v0, 0, 0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) - %r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.i64(i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) + %r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0i32.i64(i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) ret <256 x i32> %r } @@ -68,7 +68,7 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - %r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.i64(i32* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) + %r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0i32.i64(i32* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) ret <256 x i32> %r } @@ -81,11 +81,11 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - %r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.i64(i32* %ptr, i64 24, <256 x i1> %allones, i32 %evl) + %r = call <256 x i32> @llvm.experimental.vp.strided.load.v256i32.p0i32.i64(i32* %ptr, i64 24, <256 x i1> %allones, i32 %evl) ret <256 x i32> %r } -declare <256 x double> @llvm.experimental.vp.strided.load.v256f64.i64(double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) +declare <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0f64.i64(double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) define fastcc <256 x double> @vp_strided_load_v256f64_rrm(double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) { ; CHECK-LABEL: vp_strided_load_v256f64_rrm: @@ -97,7 +97,7 @@ ; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1 ; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) - %r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.i64(double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) + %r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0f64.i64(double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) ret <256 x double> %r } @@ -110,7 +110,7 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - %r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.i64(double* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) + %r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0f64.i64(double* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) ret <256 x double> %r } @@ -123,11 +123,11 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - %r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.i64(double* %ptr, i64 24, <256 x i1> %allones, i32 %evl) + %r = call <256 x double> @llvm.experimental.vp.strided.load.v256f64.p0f64.i64(double* %ptr, i64 24, <256 x i1> %allones, i32 %evl) ret <256 x double> %r } -declare <256 x i64> @llvm.experimental.vp.strided.load.v256i64.i64(i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) +declare <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0i64.i64(i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) define fastcc <256 x i64> @vp_strided_load_v256i64_rrm(i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) { ; CHECK-LABEL: vp_strided_load_v256i64_rrm: @@ -139,7 +139,7 @@ ; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1 ; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) - %r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.i64(i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) + %r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0i64.i64(i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) ret <256 x i64> %r } @@ -152,7 +152,7 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - %r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.i64(i64* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) + %r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0i64.i64(i64* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) ret <256 x i64> %r } @@ -165,6 +165,6 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - %r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.i64(i64* %ptr, i64 24, <256 x i1> %allones, i32 %evl) + %r = call <256 x i64> @llvm.experimental.vp.strided.load.v256i64.p0i64.i64(i64* %ptr, i64 24, <256 x i1> %allones, i32 %evl) ret <256 x i64> %r } diff --git a/llvm/test/CodeGen/VE/Vector/vp_strided_store.ll b/llvm/test/CodeGen/VE/Vector/vp_strided_store.ll --- a/llvm/test/CodeGen/VE/Vector/vp_strided_store.ll +++ b/llvm/test/CodeGen/VE/Vector/vp_strided_store.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s -declare void @llvm.experimental.vp.strided.store.v256f32.i64(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) +declare void @llvm.experimental.vp.strided.store.v256f32.p0f32.i64(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) define fastcc void @vp_strided_store_v256f32_rrm(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) { ; CHECK-LABEL: vp_strided_store_v256f32_rrm: @@ -10,7 +10,7 @@ ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vstu %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) - call void @llvm.experimental.vp.strided.store.v256f32.i64(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) + call void @llvm.experimental.vp.strided.store.v256f32.p0f32.i64(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) ret void } @@ -23,7 +23,7 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - call void @llvm.experimental.vp.strided.store.v256f32.i64(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) + call void @llvm.experimental.vp.strided.store.v256f32.p0f32.i64(<256 x float> %val, float* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) ret void } @@ -36,11 +36,11 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - call void @llvm.experimental.vp.strided.store.v256f32.i64(<256 x float> %val, float* %ptr, i64 24, <256 x i1> %allones, i32 %evl) + call void @llvm.experimental.vp.strided.store.v256f32.p0f32.i64(<256 x float> %val, float* %ptr, i64 24, <256 x i1> %allones, i32 %evl) ret void } -declare void @llvm.experimental.vp.strided.store.v256i32.i64(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) +declare void @llvm.experimental.vp.strided.store.v256i32.p0i32.i64(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) define fastcc void @vp_strided_store_v256i32_rrm(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) { ; CHECK-LABEL: vp_strided_store_v256i32_rrm: @@ -49,7 +49,7 @@ ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vstl %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) - call void @llvm.experimental.vp.strided.store.v256i32.i64(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) + call void @llvm.experimental.vp.strided.store.v256i32.p0i32.i64(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) ret void } @@ -62,7 +62,7 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - call void @llvm.experimental.vp.strided.store.v256i32.i64(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) + call void @llvm.experimental.vp.strided.store.v256i32.p0i32.i64(<256 x i32> %val, i32* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) ret void } @@ -75,11 +75,11 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - call void @llvm.experimental.vp.strided.store.v256i32.i64(<256 x i32> %val, i32* %ptr, i64 24, <256 x i1> %allones, i32 %evl) + call void @llvm.experimental.vp.strided.store.v256i32.p0i32.i64(<256 x i32> %val, i32* %ptr, i64 24, <256 x i1> %allones, i32 %evl) ret void } -declare void @llvm.experimental.vp.strided.store.v256f64.i64(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) +declare void @llvm.experimental.vp.strided.store.v256f64.p0f64.i64(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) define fastcc void @vp_strided_store_v256f64_rrm(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) { ; CHECK-LABEL: vp_strided_store_v256f64_rrm: @@ -88,7 +88,7 @@ ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vst %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) - call void @llvm.experimental.vp.strided.store.v256f64.i64(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) + call void @llvm.experimental.vp.strided.store.v256f64.p0f64.i64(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) ret void } @@ -101,7 +101,7 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - call void @llvm.experimental.vp.strided.store.v256f64.i64(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) + call void @llvm.experimental.vp.strided.store.v256f64.p0f64.i64(<256 x double> %val, double* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) ret void } @@ -114,11 +114,11 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - call void @llvm.experimental.vp.strided.store.v256f64.i64(<256 x double> %val, double* %ptr, i64 24, <256 x i1> %allones, i32 %evl) + call void @llvm.experimental.vp.strided.store.v256f64.p0f64.i64(<256 x double> %val, double* %ptr, i64 24, <256 x i1> %allones, i32 %evl) ret void } -declare void @llvm.experimental.vp.strided.store.v256i64.i64(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) +declare void @llvm.experimental.vp.strided.store.v256i64.p0i64.i64(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) define fastcc void @vp_strided_store_v256i64_rrm(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) { ; CHECK-LABEL: vp_strided_store_v256i64_rrm: @@ -127,7 +127,7 @@ ; CHECK-NEXT: lvl %s2 ; CHECK-NEXT: vst %v0, %s1, %s0, %vm1 ; CHECK-NEXT: b.l.t (, %s10) - call void @llvm.experimental.vp.strided.store.v256i64.i64(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) + call void @llvm.experimental.vp.strided.store.v256i64.p0i64.i64(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %mask, i32 %evl) ret void } @@ -140,7 +140,7 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - call void @llvm.experimental.vp.strided.store.v256i64.i64(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) + call void @llvm.experimental.vp.strided.store.v256i64.p0i64.i64(<256 x i64> %val, i64* %ptr, i64 %stride, <256 x i1> %allones, i32 %evl) ret void } @@ -153,6 +153,6 @@ ; CHECK-NEXT: b.l.t (, %s10) %one = insertelement <256 x i1> undef, i1 1, i32 0 %allones = shufflevector <256 x i1> %one, <256 x i1> undef, <256 x i32> zeroinitializer - call void @llvm.experimental.vp.strided.store.v256i64.i64(<256 x i64> %val, i64* %ptr, i64 24, <256 x i1> %allones, i32 %evl) + call void @llvm.experimental.vp.strided.store.v256i64.p0i64.i64(<256 x i64> %val, i64* %ptr, i64 24, <256 x i1> %allones, i32 %evl) ret void } diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -61,6 +61,9 @@ Str << "declare void " "@llvm.experimental.vp.strided.store.v8i32.i32(<8 x i32>, " "i32*, i32, <8 x i1>, i32) "; + Str << "declare void " + "@llvm.experimental.vp.strided.store.v8i32.p1i32.i32(<8 x i32>, " + "i32 addrspace(1)*, i32, <8 x i1>, i32) "; Str << " declare void @llvm.vp.scatter.v8i32.v8p0i32(<8 x i32>, <8 x " "i32*>, <8 x i1>, i32) "; Str << " declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x " @@ -68,6 +71,9 @@ Str << "declare <8 x i32> " "@llvm.experimental.vp.strided.load.v8i32.i32(i32*, i32, <8 " "x i1>, i32) "; + Str << "declare <8 x i32> " + "@llvm.experimental.vp.strided.load.v8i32.p1i32.i32(i32 " + "addrspace(1)*, i32, <8 x i1>, i32) "; Str << " declare <8 x i32> @llvm.vp.gather.v8i32.v8p0i32(<8 x i32*>, <8 x " "i1>, i32) "; diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -252,7 +252,8 @@ IIT_PPCF128 = 52, IIT_V3 = 53, IIT_EXTERNREF = 54, - IIT_FUNCREF = 55 + IIT_FUNCREF = 55, + IIT_ANYPTR_TO_ELT = 56, }; static void EncodeFixedValueType(MVT::SimpleValueType VT, @@ -327,6 +328,13 @@ // Encode LLVMMatchType ArgNo Sig.push_back(Number); return; + } else if (R->isSubClassOf("LLVMAnyPointerToElt")) { + Sig.push_back(IIT_ANYPTR_TO_ELT); + // Encode overloaded ArgNo + Sig.push_back(NextArgCode++); + // Encode LLVMMatchType ArgNo + Sig.push_back(Number); + return; } else if (R->isSubClassOf("LLVMPointerToElt")) Sig.push_back(IIT_PTR_TO_ELT); else if (R->isSubClassOf("LLVMVectorElementType")) @@ -415,6 +423,9 @@ if (R->isSubClassOf("LLVMVectorOfAnyPointersToElt")) { ArgCodes.push_back(3 /*vAny*/); ++NumInserted; + } else if (R->isSubClassOf("LLVMAnyPointerToElt")) { + ArgCodes.push_back(4 /*iPTRAny*/); + ++NumInserted; } return; }