diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -179,6 +179,10 @@ Value *expandPredicationInReduction(IRBuilder<> &Builder, VPReductionIntrinsic &PI); + /// Lower this VP cast operation to a non-VP intrinsic. + Value *expandPredicationToCastIntrinsic(IRBuilder<> &Builder, + VPIntrinsic &VPI); + /// Lower this VP memory operation to a non-VP intrinsic. Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, VPIntrinsic &VPI); @@ -436,6 +440,27 @@ return Reduction; } +Value *CachingVPExpander::expandPredicationToCastIntrinsic(IRBuilder<> &Builder, + VPIntrinsic &VPI) { + // TODO: Add anthor Cast Intrinsic, VP_TRUNC/VP_ZEXT + switch (VPI.getIntrinsicID()) { + default: + llvm_unreachable("Not a VP memory intrinsic"); + case Intrinsic::vp_inttoptr: { + Value *NewOp = + Builder.CreateIntToPtr(VPI.getOperand(0), VPI.getType(), VPI.getName()); + replaceOperation(*NewOp, VPI); + return NewOp; + } + case Intrinsic::vp_ptrtoint: { + Value *NewOp = + Builder.CreatePtrToInt(VPI.getOperand(0), VPI.getType(), VPI.getName()); + replaceOperation(*NewOp, VPI); + return NewOp; + } + } +} + Value * CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, VPIntrinsic &VPI) { @@ -598,6 +623,10 @@ if (auto *VPCmp = dyn_cast(&VPI)) return expandPredicationInComparison(Builder, *VPCmp); + if (VPCastIntrinsic::isVPCast(VPI.getIntrinsicID())) { + return expandPredicationToCastIntrinsic(Builder, VPI); + } + switch (VPI.getIntrinsicID()) { default: break; diff --git a/llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll b/llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512 + +declare <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i32(<4 x i32>, <4 x i1>, i32) +define <4 x ptr> @inttoptr_v4p0_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { +; X86-LABEL: inttoptr_v4p0_v4i32: +; X86: # %bb.0: +; X86-NEXT: retl +; +; SSE-LABEL: inttoptr_v4p0_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: xorps %xmm2, %xmm2 +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE-NEXT: retq +; +; AVX1-LABEL: inttoptr_v4p0_v4i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: inttoptr_v4p0_v4i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: retq +; +; AVX512-LABEL: inttoptr_v4p0_v4i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX512-NEXT: retq + %v = call <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) + ret <4 x ptr> %v +} + +declare <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i64(<4 x i64>, <4 x i1>, i32) + +define <4 x ptr> @inttoptr_v4p0_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { +; X86-LABEL: inttoptr_v4p0_v4i64: +; X86: # %bb.0: +; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 +; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; X86-NEXT: vzeroupper +; X86-NEXT: retl +; +; SSE-LABEL: inttoptr_v4p0_v4i64: +; SSE: # %bb.0: +; SSE-NEXT: retq +; +; AVX-LABEL: inttoptr_v4p0_v4i64: +; AVX: # %bb.0: +; AVX-NEXT: retq + %v = call <4 x ptr> @llvm.vp.inttoptr.v4p0.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl) + ret <4 x ptr> %v +} + +declare <4 x i32> @llvm.vp.ptrtoint.v4i32.v4p0(<4 x ptr>, <4 x i1>, i32) + +define <4 x i32> @ptrtoint_v4i32_v4p0(<4 x ptr> %va, <4 x i1> %m, i32 zeroext %evl) { +; X86-LABEL: ptrtoint_v4i32_v4p0: +; X86: # %bb.0: +; X86-NEXT: retl +; +; SSE-LABEL: ptrtoint_v4i32_v4p0: +; SSE: # %bb.0: +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: retq +; +; AVX1-LABEL: ptrtoint_v4i32_v4p0: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: ptrtoint_v4i32_v4p0: +; AVX2: # %bb.0: +; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: ptrtoint_v4i32_v4p0: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %v = call <4 x i32> @llvm.vp.ptrtoint.v4i32.v4p0(<4 x ptr> %va, <4 x i1> %m, i32 %evl) + ret <4 x i32> %v +} + +declare <4 x i64> @llvm.vp.ptrtoint.v4i64.v4p0(<4 x ptr>, <4 x i1>, i32) + +define <4 x i64> @ptrtoint_v4i64_v4p0(<4 x ptr> %va, <4 x i1> %m, i32 zeroext %evl) { +; X86-LABEL: ptrtoint_v4i64_v4p0: +; X86: # %bb.0: +; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X86-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X86-NEXT: retl +; +; SSE-LABEL: ptrtoint_v4i64_v4p0: +; SSE: # %bb.0: +; SSE-NEXT: retq +; +; AVX-LABEL: ptrtoint_v4i64_v4p0: +; AVX: # %bb.0: +; AVX-NEXT: retq + %v = call <4 x i64> @llvm.vp.ptrtoint.v4i64.v4p0(<4 x ptr> %va, <4 x i1> %m, i32 %evl) + ret <4 x i64> %v +}