Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2066,6 +2066,22 @@ Info.WriteMem = true; Info.PtrVal = Inst->getArgOperand(Inst->arg_size() - 1); break; + case Intrinsic::aarch64_sve_ld1: + case Intrinsic::aarch64_sve_ld2: + case Intrinsic::aarch64_sve_ld3: + case Intrinsic::aarch64_sve_ld4: + Info.ReadMem = true; + Info.WriteMem = false; + Info.PtrVal = Inst->getArgOperand(1); + break; + case Intrinsic::aarch64_sve_st1: + case Intrinsic::aarch64_sve_st2: + case Intrinsic::aarch64_sve_st3: + case Intrinsic::aarch64_sve_st4: + Info.ReadMem = false; + Info.WriteMem = true; + Info.PtrVal = Inst->getArgOperand(Inst->arg_size() - 1); + break; } switch (Inst->getIntrinsicID()) { @@ -2083,6 +2099,15 @@ case Intrinsic::aarch64_neon_st4: Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS; break; + case Intrinsic::aarch64_sve_ld1: + case Intrinsic::aarch64_sve_st1: + case Intrinsic::aarch64_sve_ld2: + case Intrinsic::aarch64_sve_st2: + case Intrinsic::aarch64_sve_ld3: + case Intrinsic::aarch64_sve_st3: + case Intrinsic::aarch64_sve_ld4: + case Intrinsic::aarch64_sve_st4: + break; } return true; } Index: llvm/test/Transforms/LoopStrengthReduce/AArch64/getTgtMemIntrinsic1.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopStrengthReduce/AArch64/getTgtMemIntrinsic1.ll @@ -0,0 +1,62 @@ +; RUN: opt < %s -loop-reduce +; ModuleID = 'test.tmp.bc' +target triple = "aarch64-none-linux-gnu" +; CHECK: %lsr.iv12 = bitcast i32* %lsr.iv1 to i1* + +define dso_local void @example01_sve(i32* noalias nocapture %a, i32* %b, i32* %c, i64 %N) local_unnamed_addr #0 { +entry: + %0 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %1 = tail call @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64 0, i64 %N) + %2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1( %0, %1) + br i1 %2, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: ; preds = %entry + %3 = tail call i64 @llvm.vscale.i64() + %4 = shl nuw nsw i64 %3, 2 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +for.body: ; preds = %for.body.lr.ph, %for.body + %5 = phi [ %1, %for.body.lr.ph ], [ %9, %for.body ] + %i.06 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.06 + %6 = tail call @llvm.aarch64.sve.ld1.nxv4i32( %5, i32* %arrayidx) + %arrayidx1 = getelementptr inbounds i32, i32* %c, i64 %i.06 + %7 = tail call @llvm.aarch64.sve.ld1.nxv4i32( %5, i32* %arrayidx1) + %8 = tail call @llvm.aarch64.sve.add.nxv4i32( %5, %6, %7) + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.06 + tail call void @llvm.aarch64.sve.st1.nxv4i32( %8, %5, i32* %arrayidx2) + %add = add i64 %4, %i.06 + %9 = tail call @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64 %add, i64 %N) + %10 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv4i1( %0, %9) + br i1 %10, label %for.body, label %for.cond.cleanup +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg) #1 + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +declare @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64, i64) #1 + +; Function Attrs: argmemonly mustprogress nofree nosync nounwind readonly willreturn +declare @llvm.aarch64.sve.ld1.nxv4i32(, i32*) #2 + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +declare @llvm.aarch64.sve.add.nxv4i32(, , ) #1 + +; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn +declare void @llvm.aarch64.sve.st1.nxv4i32(, , i32* nocapture) #3 + +; Function Attrs: nofree nosync nounwind readnone willreturn +declare i1 @llvm.aarch64.sve.ptest.first.nxv4i1(, ) #4 + +; Function Attrs: nofree nosync nounwind readnone willreturn +declare i64 @llvm.vscale.i64() #4 + +attributes #0 = { nofree nosync nounwind uwtable vscale_range(0,16) "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+outline-atomics,+sve2,+v8.2a" } +attributes #1 = { mustprogress nofree nosync nounwind readnone willreturn } +attributes #2 = { argmemonly mustprogress nofree nosync nounwind readonly willreturn } +attributes #3 = { argmemonly mustprogress nofree nosync nounwind willreturn } +attributes #4 = { nofree nosync nounwind readnone willreturn }