diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -695,6 +695,72 @@ return None; } +static Optional instCombineSVEMaskedLD1(InstCombiner &IC, + IntrinsicInst &II, + const DataLayout &DL) { + Type *VecOp = II.getType(); + Value *PointerOp = II.getOperand(1); + + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + auto VScalePointer = Builder.CreateBitCast(PointerOp, VecOp->getPointerTo()); + auto MaskedLoad = Builder.CreateMaskedLoad(VecOp, VScalePointer, + PointerOp->getPointerAlignment(DL), + II.getOperand(0)); + return IC.replaceInstUsesWith(II, MaskedLoad); +} + +static Optional +instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { + Value *Predicate; + if (!match(II.getOperand(0), + m_Intrinsic(m_Value(Predicate)))) + return None; + if (!match(Predicate, m_ConstantInt())) + return instCombineSVEMaskedLD1(IC, II, DL); + + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + auto VScalePointer = + Builder.CreateBitCast(II.getOperand(1), II.getType()->getPointerTo()); + auto Load = Builder.CreateLoad(II.getType(), VScalePointer); + return IC.replaceInstUsesWith(II, Load); +} + +static Optional instCombineSVEST1Masked(InstCombiner &IC, + IntrinsicInst &II, + const DataLayout &DL) { + auto VecOp = II.getOperand(0); + auto PointerOp = II.getOperand(2); + + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + auto VScalePointer = + Builder.CreateBitCast(PointerOp, VecOp->getType()->getPointerTo()); + (void)Builder.CreateMaskedStore(VecOp, VScalePointer, + PointerOp->getPointerAlignment(DL), + II.getOperand(1)); + return IC.eraseInstFromFunction(II); +} + +static Optional +instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) { + Value *Predicate; + if (!match(II.getOperand(1), + m_Intrinsic(m_Value(Predicate)))) + return None; + if (!match(Predicate, m_ConstantInt())) + return instCombineSVEST1Masked(IC, II, DL); + + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + auto VecOp = II.getOperand(0); + auto VScalePointer = + Builder.CreateBitCast(II.getOperand(2), VecOp->getType()->getPointerTo()); + (void)Builder.CreateStore(VecOp, VScalePointer); + return IC.eraseInstFromFunction(II); +} + static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) { switch (Intrinsic) { case Intrinsic::aarch64_sve_fmul: @@ -987,6 +1053,10 @@ return instCombineLD1GatherIndex(IC, II); case Intrinsic::aarch64_sve_st1_scatter_index: return instCombineST1ScatterIndex(IC, II); + case Intrinsic::aarch64_sve_ld1: + return instCombineSVELD1(IC, II, DL); + case Intrinsic::aarch64_sve_st1: + return instCombineSVEST1(IC, II, DL); } return None; diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define @combine_ld1(i32* %ptr) #0 { +; CHECK-LABEL: @combine_ld1( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to * +; CHECK-NEXT: [[TMP2:%.*]] = load , * [[TMP1]], align 16 +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = call @llvm.aarch64.sve.ld1.nxv4i32( %1, i32* %ptr) + ret %2 +} + +define @combine_ld1_masked(i32* %ptr) #0 { +; CHECK-LABEL: @combine_ld1_masked( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to * +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.masked.load.nxv4i32.p0nxv4i32(* [[TMP2]], i32 1, [[TMP1]], undef) +; CHECK-NEXT: ret [[TMP3]] +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) + %2 = call @llvm.aarch64.sve.ld1.nxv4i32( %1, i32* %ptr) + ret %2 +} + +define void @combine_st1( %vec, i32* %ptr) #0 { +; CHECK-LABEL: @combine_st1( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to * +; CHECK-NEXT: store [[VEC:%.*]], * [[TMP1]], align 16 +; CHECK-NEXT: ret void +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + call void @llvm.aarch64.sve.st1.nxv4i32( %vec, %1, i32* %ptr) + ret void +} + +define void @combine_st1_masked( %vec, i32* %ptr) #0 { +; CHECK-LABEL: @combine_st1_masked( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to * +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32( [[VEC:%.*]], * [[TMP2]], i32 1, [[TMP1]]) +; CHECK-NEXT: ret void +; + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 16) + call void @llvm.aarch64.sve.st1.nxv4i32( %vec, %1, i32* %ptr) + ret void +} + +declare void @llvm.aarch64.sve.st1.nxv4i32(, , i32*) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.convert.from.svbool.nxv4i1() +declare @llvm.aarch64.sve.ld1.nxv4i32(, i32*) + +attributes #0 = { "target-features"="+sve" }