diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -247,6 +247,9 @@ Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind); + bool maybeLoweredToCall(Instruction &I); bool isLoweredToCall(const Function *F); bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1409,6 +1409,21 @@ return ScalarCost; } +int ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) { + // Currently we make a somewhat optimistic assumption that active_lane_mask's + // are always free. In reality it may be freely folded into a tail predicated + // loop, expanded into a VCPT or expanded into a lot of add/icmp code. We + // may need to improve this in the future, but being able to detect if it + // is free or not involves looking at a lot of other code. We currently assume + // that the vectorizer inserted these, and knew what it was doing in adding + // one. + if (ST->hasMVEIntegerOps() && ICA.getID() == Intrinsic::get_active_lane_mask) + return 0; + + return BaseT::getIntrinsicInstrCost(ICA, CostKind); +} + bool ARMTTIImpl::isLoweredToCall(const Function *F) { if (!F->isIntrinsic()) BaseT::isLoweredToCall(F); diff --git a/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll b/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/mve-active_lane_mask.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -S -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -cost-model -analyze | FileCheck %s + +; Note that these instructions like this (not in a look that could be tail +; predicated) should not really be free. We currently assume that all active +; lane masks are free. + +define void @v4i32(i32 %index, i32 %TC) { +; CHECK-LABEL: 'v4i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC) + ret void +} + +define void @v8i16(i32 %index, i32 %TC) { +; CHECK-LABEL: 'v8i16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC) + ret void +} + +define void @v16i8(i32 %index, i32 %TC) { +; CHECK-LABEL: 'v16i8' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC) + ret void +} + +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) +declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)