Index: llvm/lib/CodeGen/ExpandVectorPredication.cpp =================================================================== --- llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -163,6 +163,10 @@ Value *expandPredicationInReduction(IRBuilder<> &Builder, VPReductionIntrinsic &PI); + /// \brief Lower this VP memory operation to a non-VP intrinsic. + Value *expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, + VPIntrinsic &VPI); + /// \brief Query TTI and expand the vector predication in \p P accordingly. Value *expandPredication(VPIntrinsic &PI); @@ -383,6 +387,53 @@ return Reduction; } +Value * +CachingVPExpander::expandPredicationInMemoryIntrinsic(IRBuilder<> &Builder, + VPIntrinsic &VPI) { + assert(VPI.canIgnoreVectorLengthParam()); + + Value *MaskParam = VPI.getMaskParam(); + Value *PtrParam = VPI.getMemoryPointerParam(); + Value *DataParam = VPI.getMemoryDataParam(); + bool IsUnmasked = isAllTrueMask(MaskParam); + + MaybeAlign AlignOpt = VPI.getPointerAlignment(); + + Value *NewMemoryInst = nullptr; + switch (VPI.getIntrinsicID()) { + default: + llvm_unreachable("Not a VP memory intrinsic"); + case Intrinsic::vp_store: + if (IsUnmasked) { + StoreInst *NewStore = + Builder.CreateStore(DataParam, PtrParam, /*IsVolatile*/ false); + if (AlignOpt.hasValue()) + NewStore->setAlignment(AlignOpt.getValue()); + NewMemoryInst = NewStore; + } else + NewMemoryInst = Builder.CreateMaskedStore( + DataParam, PtrParam, AlignOpt.valueOrOne(), MaskParam); + + break; + case Intrinsic::vp_load: + if (IsUnmasked) { + LoadInst *NewLoad = + Builder.CreateLoad(VPI.getType(), PtrParam, /*IsVolatile*/ false); + if (AlignOpt.hasValue()) + NewLoad->setAlignment(AlignOpt.getValue()); + NewMemoryInst = NewLoad; + } else + NewMemoryInst = Builder.CreateMaskedLoad( + VPI.getType(), PtrParam, AlignOpt.valueOrOne(), MaskParam); + + break; + } + + assert(NewMemoryInst); + replaceOperation(*NewMemoryInst, VPI); + return NewMemoryInst; +} + void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); @@ -459,6 +510,14 @@ if (auto *VPRI = dyn_cast(&VPI)) return expandPredicationInReduction(Builder, *VPRI); + switch (VPI.getIntrinsicID()) { + default: + break; + case Intrinsic::vp_load: + case Intrinsic::vp_store: + return expandPredicationInMemoryIntrinsic(Builder, VPI); + } + return &VPI; } Index: llvm/test/CodeGen/Generic/expand-vp-load-store.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Generic/expand-vp-load-store.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt --expandvp -S < %s | FileCheck %s +; RUN: opt --expandvp --expandvp-override-evl-transform=Legal --expandvp-override-mask-transform=Convert -S < %s | FileCheck %s + +declare @llvm.vp.load.nxv1i64.p0nxv1i64(*, , i32) + +define @vpload_nxv1i64(* %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: @vpload_nxv1i64( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and [[TMP1]], [[M:%.*]] +; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.masked.load.nxv1i64.p0nxv1i64(* [[PTR:%.*]], i32 1, [[TMP2]], undef) +; CHECK-NEXT: ret [[TMP3]] +; + %load = call @llvm.vp.load.nxv1i64.p0nxv1i64(* %ptr, %m, i32 %evl) + ret %load +} + +define @vpload_nxv1i64_allones_mask(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: @vpload_nxv1i64_allones_mask( +; CHECK-NEXT: [[ONE:%.*]] = insertelement poison, i1 true, i32 0 +; CHECK-NEXT: [[ALLONES:%.*]] = shufflevector [[ONE]], poison, zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and [[TMP1]], [[ALLONES]] +; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.masked.load.nxv1i64.p0nxv1i64(* [[PTR:%.*]], i32 1, [[TMP2]], undef) +; CHECK-NEXT: ret [[TMP3]] +; + %one = insertelement poison, i1 true, i32 0 + %allones = shufflevector %one, poison, zeroinitializer + %load = call @llvm.vp.load.nxv1i64.p0nxv1i64(* %ptr, %allones, i32 %evl) + ret %load +} + +declare void @llvm.vp.store.nxv1i64.p0nxv1i64(, *, , i32) + +define void @vpstore_nxv1i64( %val, * %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: @vpstore_nxv1i64( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and [[TMP1]], [[M:%.*]] +; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1 +; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0nxv1i64( [[VAL:%.*]], * [[PTR:%.*]], i32 1, [[TMP2]]) +; CHECK-NEXT: ret void +; + call void @llvm.vp.store.nxv1i64.p0nxv1i64( %val, * %ptr, %m, i32 %evl) + ret void +} + +define void @vpstore_nxv1i64_allones_mask( %val, * %ptr, i32 zeroext %evl) { +; CHECK-LABEL: @vpstore_nxv1i64_allones_mask( +; CHECK-NEXT: [[ONE:%.*]] = insertelement poison, i1 true, i32 0 +; CHECK-NEXT: [[ALLONES:%.*]] = shufflevector [[ONE]], poison, zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 [[EVL:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = and [[TMP1]], [[ALLONES]] +; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[SCALABLE_SIZE:%.*]] = mul nuw i32 [[VSCALE]], 1 +; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0nxv1i64( [[VAL:%.*]], * [[PTR:%.*]], i32 1, [[TMP2]]) +; CHECK-NEXT: ret void +; + %one = insertelement poison, i1 true, i32 0 + %allones = shufflevector %one, poison, zeroinitializer + call void @llvm.vp.store.nxv1i64.p0nxv1i64( %val, * %ptr, %allones, i32 %evl) + ret void +}