diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -725,6 +725,71 @@ return IC.replaceInstUsesWith(II, FMLA); } +static Optional +instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { + Value *PtrOp = II.getOperand(1); + Type *VecTyPtr = II.getType()->getPointerTo(); + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + + Value *VecPtr = Builder.CreateBitCast(PtrOp, VecTyPtr); + LoadInst *Load = Builder.CreateLoad(II.getType(), VecPtr); + return IC.replaceInstUsesWith(II, Load); +} + +static Optional instCombineSVEMaskedLD1(InstCombiner &IC, + IntrinsicInst &II, + const DataLayout &DL) { + Value *Pred = II.getOperand(0); + if (match(Pred, m_Intrinsic( + m_ConstantInt()))) + return instCombineSVELD1(IC, II, DL); + + Type *VecTy = II.getType(); + Value *PtrOp = II.getOperand(1); + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + + Value *VecPtr = Builder.CreateBitCast(PtrOp, VecTy->getPointerTo()); + CallInst *MaskedLoad = + Builder.CreateMaskedLoad(VecTy, VecPtr, PtrOp->getPointerAlignment(DL), + Pred, ConstantAggregateZero::get(VecTy)); + return IC.replaceInstUsesWith(II, MaskedLoad); +} + +static Optional +instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { + Value *VecOp = II.getOperand(0); + Value *PtrOp = II.getOperand(2); + Type *VecTyPtr = VecOp->getType()->getPointerTo(); + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + + Value *VecPtr = Builder.CreateBitCast(PtrOp, VecTyPtr); + (void)Builder.CreateStore(VecOp, VecPtr); + return IC.eraseInstFromFunction(II); +} + +static Optional instCombineSVEMaskedST1(InstCombiner &IC, + IntrinsicInst &II, + const DataLayout &DL) { + Value *Pred = II.getOperand(1); + if (match(Pred, m_Intrinsic( + m_ConstantInt()))) + return instCombineSVEST1(IC, II, DL); + + Value *VecOp = II.getOperand(0); + Value *PtrOp = II.getOperand(2); + Type *VecTyPtr = VecOp->getType()->getPointerTo(); + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + + auto VecPtr = Builder.CreateBitCast(PtrOp, VecTyPtr); + (void)Builder.CreateMaskedStore(VecOp, VecPtr, PtrOp->getPointerAlignment(DL), + Pred); + return IC.eraseInstFromFunction(II); +} + static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) { switch (Intrinsic) { case Intrinsic::aarch64_sve_fmul: @@ -1025,6 +1090,10 @@ return instCombineLD1GatherIndex(IC, II); case Intrinsic::aarch64_sve_st1_scatter_index: return instCombineST1ScatterIndex(IC, II); + case Intrinsic::aarch64_sve_ld1: + return instCombineSVEMaskedLD1(IC, II, DL); + case Intrinsic::aarch64_sve_st1: + return instCombineSVEMaskedST1(IC, II, DL); } return None; diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define @combine_ld1(i32* %ptr) #0 { +; CHECK-LABEL: @combine_ld1( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to * +; CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16 +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = call @llvm.aarch64.sve.ld1.nxv4i32( %1, i32* %ptr) + ret %2 +} + +define @combine_ld1_masked(i32* %ptr) #0 { +; CHECK-LABEL: @combine_ld1_masked( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to * +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP2]], i32 1, [[TMP1]], zeroinitializer) +; CHECK-NEXT: ret [[TMP3]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) + %2 = call @llvm.aarch64.sve.ld1.nxv4i32( %1, i32* %ptr) + ret %2 +} + +define void @combine_st1( %vec, i32* %ptr) #0 { +; CHECK-LABEL: @combine_st1( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to * +; CHECK-NEXT: store [[VEC:%.*]], * [[TMP1]], align 16 +; CHECK-NEXT: ret void +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + call void @llvm.aarch64.sve.st1.nxv4i32( %vec, %1, i32* %ptr) + ret void +} + +define void @combine_st1_masked( %vec, i32* %ptr) #0 { +; CHECK-LABEL: @combine_st1_masked( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to * +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[VEC:%.*]], * [[TMP2]], i32 1, [[TMP1]]) +; CHECK-NEXT: ret void +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) + call void @llvm.aarch64.sve.st1.nxv4i32( %vec, %1, i32* %ptr) + ret void +} + +declare void @llvm.aarch64.sve.st1.nxv4i32(, , i32*) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() +declare @llvm.aarch64.sve.ld1.nxv4i32(, i32*) + +attributes #0 = { "target-features"="+sve" }