diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -601,6 +601,18 @@ switch (VPI.getIntrinsicID()) { default: break; + case Intrinsic::vp_inttoptr: { + Value *NewOp = + Builder.CreateIntToPtr(VPI.getOperand(0), VPI.getType(), VPI.getName()); + replaceOperation(*NewOp, VPI); + return NewOp; + } + case Intrinsic::vp_ptrtoint: { + Value *NewOp = + Builder.CreatePtrToInt(VPI.getOperand(0), VPI.getType(), VPI.getName()); + replaceOperation(*NewOp, VPI); + return NewOp; + } case Intrinsic::vp_fneg: { Value *NewNegOp = Builder.CreateFNeg(VPI.getOperand(0), VPI.getName()); replaceOperation(*NewNegOp, VPI); diff --git a/llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll b/llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512 + +declare <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i32(<4 x i32>, <4 x i1>, i32) +define <4 x ptr> @inttoptr_v4p0_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { +; SSE-LABEL: inttoptr_v4p0_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: xorps %xmm2, %xmm2 +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE-NEXT: retq +; +; AVX1-LABEL: inttoptr_v4p0_v4i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: inttoptr_v4p0_v4i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: retq +; +; AVX512-LABEL: inttoptr_v4p0_v4i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX512-NEXT: retq + %v = call <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) + ret <4 x ptr> %v +} + +declare <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i64(<4 x i64>, <4 x i1>, i32) + +define <4 x ptr> @inttoptr_v4p0_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { +; SSE-LABEL: inttoptr_v4p0_v4i64: +; SSE: # %bb.0: +; SSE-NEXT: retq +; +; AVX-LABEL: inttoptr_v4p0_v4i64: +; AVX: # %bb.0: +; AVX-NEXT: retq + %v = call <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl) + ret <4 x ptr> %v +} + +declare <4 x i32> @llvm.vp.ptrtoint.v4i32.v4p0(<4 x ptr>, <4 x i1>, i32) + +define <4 x i32> @ptrtoint_v4i32_v4p0(<4 x ptr> %va, <4 x i1> %m, i32 zeroext %evl) { +; SSE-LABEL: ptrtoint_v4i32_v4p0: +; SSE: # %bb.0: +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: retq +; +; AVX1-LABEL: ptrtoint_v4i32_v4p0: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: ptrtoint_v4i32_v4p0: +; AVX2: # %bb.0: +; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: ptrtoint_v4i32_v4p0: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %v = call <4 x i32> @llvm.vp.ptrtoint.v4i32.v4p0(<4 x ptr> %va, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <4 x i64> @llvm.vp.ptrtoint.v4i64.v4p0(<4 x ptr>, <4 x i1>, i32) + +define <4 x i64> @ptrtoint_v4i64_v4p0(<4 x ptr> %va, <4 x i1> %m, i32 zeroext %evl) { +; SSE-LABEL: ptrtoint_v4i64_v4p0: +; SSE: # %bb.0: +; SSE-NEXT: retq +; +; AVX-LABEL: ptrtoint_v4i64_v4p0: +; AVX: # %bb.0: +; AVX-NEXT: retq + %v = call <4 x i64> @llvm.vp.ptrtoint.v4i64.v4p0(<4 x ptr> %va, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +}