Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -59,6 +59,14 @@ bool isWideningInstruction(Type *Ty, unsigned Opcode, ArrayRef Args); + // A helper function called by 'getVectorInstrCost'. + // + // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse' + // indicates whether the vector instruction is available in the input IR or + // just imaginary in vectorizer passes. + InstructionCost getVectorInstrCostHelper(Type *Val, unsigned Index, + bool HasRealUse); + public: explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), @@ -173,9 +181,10 @@ InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, + unsigned Index); InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1948,8 +1948,9 @@ return 0; } -InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { +InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(Type *Val, + unsigned Index, + bool HasRealUse) { assert(Val->isVectorTy() && "This must be a vector type"); if (Index != -1U) { @@ -1968,7 +1969,18 @@ } // The element at index zero is already inside the vector. - if (Index == 0) + // - For a physical (VirtualInst==false) insert-element or extract-element + // instruction that extracts integers, an explicit FPR -> GPR move is + // needed. So it has non-zero cost. + // - For the rest of cases (virtual instruction or element type is float), + // consider the instruction free. + // + // FIXME: + // If the extract-element and insert-element instructions could be + // simplified away (e.g., could be combined into users by looking at use-def + // context), they have no cost. This is not done in the first place for + // compile-time considerations. + if (Index == 0 && (!HasRealUse || !Val->getScalarType()->isIntegerTy())) return 0; } @@ -1976,6 +1988,16 @@ return ST->getVectorInsertExtractBaseCost(); } +InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) { + return getVectorInstrCostHelper(Val, Index, false /* HasRealUse */); +} + +InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I, + Type *Val, unsigned Index) { + return getVectorInstrCostHelper(Val, Index, true /* HasRealUse */); +} + InstructionCost AArch64TTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, Index: llvm/test/Analysis/CostModel/AArch64/kryo-inseltpoison.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/kryo-inseltpoison.ll +++ llvm/test/Analysis/CostModel/AArch64/kryo-inseltpoison.ll @@ -9,7 +9,7 @@ ; Vector extracts - extracting the first element should have a zero cost; ; all other elements should have a cost of two. ; - ; CHECK: cost of 0 {{.*}} extractelement <2 x i64> undef, i32 0 + ; CHECK: cost of 2 {{.*}} extractelement <2 x i64> undef, i32 0 ; CHECK: cost of 2 {{.*}} extractelement <2 x i64> undef, i32 1 %t1 = extractelement <2 x i64> undef, i32 0 %t2 = extractelement <2 x i64> undef, i32 1 @@ -17,7 +17,7 @@ ; Vector inserts - inserting the first element should have a zero cost; all ; other elements should have a cost of two. ; - ; CHECK: cost of 0 {{.*}} insertelement <2 x i64> poison, i64 undef, i32 0 + ; CHECK: cost of 2 {{.*}} insertelement <2 x i64> poison, i64 undef, i32 0 ; CHECK: cost of 2 {{.*}} insertelement <2 x i64> poison, i64 undef, i32 1 %t3 = insertelement <2 x i64> poison, i64 undef, i32 0 %t4 = insertelement <2 x i64> poison, i64 undef, i32 1 Index: llvm/test/Analysis/CostModel/AArch64/kryo.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/kryo.ll +++ llvm/test/Analysis/CostModel/AArch64/kryo.ll @@ -9,7 +9,7 @@ ; Vector extracts - extracting the first element should have a zero cost; ; all other elements should have a cost of two. ; - ; CHECK: cost of 0 {{.*}} extractelement <2 x i64> undef, i32 0 + ; CHECK: cost of 2 {{.*}} extractelement <2 x i64> undef, i32 0 ; CHECK: cost of 2 {{.*}} extractelement <2 x i64> undef, i32 1 %t1 = extractelement <2 x i64> undef, i32 0 %t2 = extractelement <2 x i64> undef, i32 1 @@ -17,34 +17,9 @@ ; Vector inserts - inserting the first element should have a zero cost; all ; other elements should have a cost of two. ; - ; CHECK: cost of 0 {{.*}} insertelement <2 x i64> undef, i64 undef, i32 0 + ; CHECK: cost of 2 {{.*}} insertelement <2 x i64> undef, i64 undef, i32 0 ; CHECK: cost of 2 {{.*}} insertelement <2 x i64> undef, i64 undef, i32 1 %t3 = insertelement <2 x i64> undef, i64 undef, i32 0 %t4 = insertelement <2 x i64> undef, i64 undef, i32 1 - ret void -} - -; CHECK-LABEL: vectorInstrExtractCost -define i64 @vectorInstrExtractCost(<4 x i64> %vecreg) { - - ; Vector extracts - extracting each element at index 0 is considered - ; free in the current implementation. When extracting element at index - ; 2, 2 is rounded to 0, so extracting element at index 2 has cost 0 as - ; well. - ; - ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 1 - ; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 2 - %t1 = extractelement <4 x i64> %vecreg, i32 1 - %t2 = extractelement <4 x i64> %vecreg, i32 2 - %ele = add i64 %t2, 1 - %cond = icmp eq i64 %t1, %ele - - ; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 0 - ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 3 - %t0 = extractelement <4 x i64> %vecreg, i32 0 - %t3 = extractelement <4 x i64> %vecreg, i32 3 - %val = select i1 %cond, i64 %t0 , i64 %t3 - - ret i64 %val -} +} \ No newline at end of file Index: llvm/test/Analysis/CostModel/AArch64/sve-insert-extract.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/sve-insert-extract.ll +++ llvm/test/Analysis/CostModel/AArch64/sve-insert-extract.ll @@ -9,10 +9,10 @@ define void @ins_el0() #0 { ; CHECK-DEFAULT-LABEL: 'ins_el0' -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v0 = insertelement zeroinitializer, i8 0, i64 0 -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = insertelement zeroinitializer, i16 0, i64 0 -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = insertelement zeroinitializer, i32 0, i64 0 -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3 = insertelement zeroinitializer, i64 0, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v0 = insertelement zeroinitializer, i8 0, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v1 = insertelement zeroinitializer, i16 0, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement zeroinitializer, i32 0, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3 = insertelement zeroinitializer, i64 0, i64 0 ; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4 = insertelement zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5 = insertelement zeroinitializer, double 0.000000e+00, i64 0 ; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -27,10 +27,10 @@ ; CHECK-LOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-HIGH-LABEL: 'ins_el0' -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v0 = insertelement zeroinitializer, i8 0, i64 0 -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = insertelement zeroinitializer, i16 0, i64 0 -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = insertelement zeroinitializer, i32 0, i64 0 -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3 = insertelement zeroinitializer, i64 0, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v0 = insertelement zeroinitializer, i8 0, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v1 = insertelement zeroinitializer, i16 0, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v2 = insertelement zeroinitializer, i32 0, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v3 = insertelement zeroinitializer, i64 0, i64 0 ; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4 = insertelement zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5 = insertelement zeroinitializer, double 0.000000e+00, i64 0 ; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -84,10 +84,10 @@ define void @ext_el0() #0 { ; CHECK-DEFAULT-LABEL: 'ext_el0' -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v0 = extractelement zeroinitializer, i64 0 -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = extractelement zeroinitializer, i64 0 -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = extractelement zeroinitializer, i64 0 -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3 = extractelement zeroinitializer, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v0 = extractelement zeroinitializer, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v1 = extractelement zeroinitializer, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = extractelement zeroinitializer, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3 = extractelement zeroinitializer, i64 0 ; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4 = extractelement zeroinitializer, i64 0 ; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5 = extractelement zeroinitializer, i64 0 ; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -102,10 +102,10 @@ ; CHECK-LOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-HIGH-LABEL: 'ext_el0' -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v0 = extractelement zeroinitializer, i64 0 -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = extractelement zeroinitializer, i64 0 -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = extractelement zeroinitializer, i64 0 -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3 = extractelement zeroinitializer, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v0 = extractelement zeroinitializer, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v1 = extractelement zeroinitializer, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v2 = extractelement zeroinitializer, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v3 = extractelement zeroinitializer, i64 0 ; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4 = extractelement zeroinitializer, i64 0 ; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5 = extractelement zeroinitializer, i64 0 ; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void Index: llvm/test/Transforms/LICM/AArch64/extract-element.ll =================================================================== --- llvm/test/Transforms/LICM/AArch64/extract-element.ll +++ llvm/test/Transforms/LICM/AArch64/extract-element.ll @@ -18,24 +18,23 @@ ; CHECK-NEXT: [[TMP12]] = add i64 [[TMP4]], 1 ; CHECK-NEXT: br label [[TMP3]] ; CHECK: .split.loop.exit: -; CHECK-NEXT: [[DOTLCSSA7:%.*]] = phi <1 x i64> [ [[TMP8]], [[TMP6]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[TMP9]], [[TMP6]] ] ; CHECK-NEXT: [[DOTLCSSA6:%.*]] = phi i64 [ [[TMP4]], [[TMP6]] ] ; CHECK-NEXT: [[DOTPH:%.*]] = phi i1 [ [[TMP5]], [[TMP6]] ] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[DOTLCSSA7]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], -1 -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[DOTLCSSA6]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp uge i64 [[TMP15]], [[TMP1]] -; CHECK-NEXT: br label [[TMP17:%.*]] +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[DOTLCSSA]], -1 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], [[DOTLCSSA6]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp uge i64 [[TMP14]], [[TMP1]] +; CHECK-NEXT: br label [[TMP16:%.*]] ; CHECK: .split.loop.exit2: ; CHECK-NEXT: [[DOTPH3:%.*]] = phi i1 [ [[TMP5]], [[TMP3]] ] ; CHECK-NEXT: [[DOTPH4:%.*]] = phi i1 [ undef, [[TMP3]] ] -; CHECK-NEXT: br label [[TMP17]] -; CHECK: 17: -; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ [[DOTPH]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH3]], [[DOTSPLIT_LOOP_EXIT2]] ] -; CHECK-NEXT: [[TMP19:%.*]] = phi i1 [ [[TMP16]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH4]], [[DOTSPLIT_LOOP_EXIT2]] ] -; CHECK-NEXT: [[TMP20:%.*]] = xor i1 [[TMP18]], true -; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i1 true, i1 [[TMP19]] -; CHECK-NEXT: ret i1 [[TMP21]] +; CHECK-NEXT: br label [[TMP16]] +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ [[DOTPH]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH3]], [[DOTSPLIT_LOOP_EXIT2]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ [[TMP15]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH4]], [[DOTSPLIT_LOOP_EXIT2]] ] +; CHECK-NEXT: [[TMP19:%.*]] = xor i1 [[TMP17]], true +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i1 true, i1 [[TMP18]] +; CHECK-NEXT: ret i1 [[TMP20]] ; br label %3