diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -942,10 +942,6 @@ MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) { - // TODO: Handle other cost kinds. - if (CostKind != TTI::TCK_RecipThroughput) - return 1; - // Type legalization can't handle structs if (TLI->getValueType(DL, Ty, true) == MVT::Other) return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace, @@ -953,6 +949,13 @@ auto LT = TLI->getTypeLegalizationCost(DL, Ty); + // TODO: consider latency as well for TCK_SizeAndLatency. + if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) + return LT.first; + + if (CostKind != TTI::TCK_RecipThroughput) + return 1; + if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store && LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) { // Unaligned stores are extremely inefficient. We don't split all diff --git a/llvm/test/Analysis/CostModel/AArch64/store.ll b/llvm/test/Analysis/CostModel/AArch64/store.ll --- a/llvm/test/Analysis/CostModel/AArch64/store.ll +++ b/llvm/test/Analysis/CostModel/AArch64/store.ll @@ -30,13 +30,13 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'getMemoryOpCost' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4 -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4 -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4 -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4 -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 4 -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 4 -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x half> undef, <16 x half>* undef, align 4 +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4 +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4 +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4 +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4 +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 4 +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 4 +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x half> undef, <16 x half>* undef, align 4 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4 ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4 diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/large-vector-ops.ll b/llvm/test/Transforms/LoopUnroll/AArch64/large-vector-ops.ll --- a/llvm/test/Transforms/LoopUnroll/AArch64/large-vector-ops.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/large-vector-ops.ll @@ -6,70 +6,22 @@ ; The loop in the function only contains a few instructions, but they will get ; lowered to a very large amount of target instructions. -; FIXME: Currently the cost-model assigns a cost of 1 to those large vector ops. define void @loop_with_large_vector_ops(i32 %i, <225 x double>* %A, <225 x double>* %B) { ; CHECK-LABEL: @loop_with_large_vector_ops( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[LV_1:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8 -; CHECK-NEXT: [[LV_2:%.*]] = load <225 x double>, <225 x double>* [[A]], align 8 +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr <225 x double>, <225 x double>* [[A:%.*]], i32 [[IV]] +; CHECK-NEXT: [[LV_1:%.*]] = load <225 x double>, <225 x double>* [[A_GEP]], align 8 +; CHECK-NEXT: [[B_GEP:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 [[IV]] +; CHECK-NEXT: [[LV_2:%.*]] = load <225 x double>, <225 x double>* [[B_GEP]], align 8 ; CHECK-NEXT: [[MUL:%.*]] = fmul <225 x double> [[LV_1]], [[LV_2]] -; CHECK-NEXT: store <225 x double> [[MUL]], <225 x double>* [[A]], align 8 -; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 1 -; CHECK-NEXT: [[LV_1_1:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_1]], align 8 -; CHECK-NEXT: [[B_GEP_1:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 1 -; CHECK-NEXT: [[LV_2_1:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_1]], align 8 -; CHECK-NEXT: [[MUL_1:%.*]] = fmul <225 x double> [[LV_1_1]], [[LV_2_1]] -; CHECK-NEXT: store <225 x double> [[MUL_1]], <225 x double>* [[B_GEP_1]], align 8 -; CHECK-NEXT: [[A_GEP_2:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 2 -; CHECK-NEXT: [[LV_1_2:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_2]], align 8 -; CHECK-NEXT: [[B_GEP_2:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 2 -; CHECK-NEXT: [[LV_2_2:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_2]], align 8 -; CHECK-NEXT: [[MUL_2:%.*]] = fmul <225 x double> [[LV_1_2]], [[LV_2_2]] -; CHECK-NEXT: store <225 x double> [[MUL_2]], <225 x double>* [[B_GEP_2]], align 8 -; CHECK-NEXT: [[A_GEP_3:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 3 -; CHECK-NEXT: [[LV_1_3:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_3]], align 8 -; CHECK-NEXT: [[B_GEP_3:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 3 -; CHECK-NEXT: [[LV_2_3:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_3]], align 8 -; CHECK-NEXT: [[MUL_3:%.*]] = fmul <225 x double> [[LV_1_3]], [[LV_2_3]] -; CHECK-NEXT: store <225 x double> [[MUL_3]], <225 x double>* [[B_GEP_3]], align 8 -; CHECK-NEXT: [[A_GEP_4:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 4 -; CHECK-NEXT: [[LV_1_4:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_4]], align 8 -; CHECK-NEXT: [[B_GEP_4:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 4 -; CHECK-NEXT: [[LV_2_4:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_4]], align 8 -; CHECK-NEXT: [[MUL_4:%.*]] = fmul <225 x double> [[LV_1_4]], [[LV_2_4]] -; CHECK-NEXT: store <225 x double> [[MUL_4]], <225 x double>* [[B_GEP_4]], align 8 -; CHECK-NEXT: [[A_GEP_5:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 5 -; CHECK-NEXT: [[LV_1_5:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_5]], align 8 -; CHECK-NEXT: [[B_GEP_5:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 5 -; CHECK-NEXT: [[LV_2_5:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_5]], align 8 -; CHECK-NEXT: [[MUL_5:%.*]] = fmul <225 x double> [[LV_1_5]], [[LV_2_5]] -; CHECK-NEXT: store <225 x double> [[MUL_5]], <225 x double>* [[B_GEP_5]], align 8 -; CHECK-NEXT: [[A_GEP_6:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 6 -; CHECK-NEXT: [[LV_1_6:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_6]], align 8 -; CHECK-NEXT: [[B_GEP_6:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 6 -; CHECK-NEXT: [[LV_2_6:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_6]], align 8 -; CHECK-NEXT: [[MUL_6:%.*]] = fmul <225 x double> [[LV_1_6]], [[LV_2_6]] -; CHECK-NEXT: store <225 x double> [[MUL_6]], <225 x double>* [[B_GEP_6]], align 8 -; CHECK-NEXT: [[A_GEP_7:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 7 -; CHECK-NEXT: [[LV_1_7:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_7]], align 8 -; CHECK-NEXT: [[B_GEP_7:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 7 -; CHECK-NEXT: [[LV_2_7:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_7]], align 8 -; CHECK-NEXT: [[MUL_7:%.*]] = fmul <225 x double> [[LV_1_7]], [[LV_2_7]] -; CHECK-NEXT: store <225 x double> [[MUL_7]], <225 x double>* [[B_GEP_7]], align 8 -; CHECK-NEXT: [[A_GEP_8:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 8 -; CHECK-NEXT: [[LV_1_8:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_8]], align 8 -; CHECK-NEXT: [[B_GEP_8:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 8 -; CHECK-NEXT: [[LV_2_8:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_8]], align 8 -; CHECK-NEXT: [[MUL_8:%.*]] = fmul <225 x double> [[LV_1_8]], [[LV_2_8]] -; CHECK-NEXT: store <225 x double> [[MUL_8]], <225 x double>* [[B_GEP_8]], align 8 -; CHECK-NEXT: [[A_GEP_9:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 9 -; CHECK-NEXT: [[LV_1_9:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_9]], align 8 -; CHECK-NEXT: [[B_GEP_9:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 9 -; CHECK-NEXT: [[LV_2_9:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_9]], align 8 -; CHECK-NEXT: [[MUL_9:%.*]] = fmul <225 x double> [[LV_1_9]], [[LV_2_9]] -; CHECK-NEXT: store <225 x double> [[MUL_9]], <225 x double>* [[B_GEP_9]], align 8 +; CHECK-NEXT: store <225 x double> [[MUL]], <225 x double>* [[B_GEP]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], 10 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: ; CHECK-NEXT: ret void ; entry: