diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -944,10 +944,6 @@
                                                 unsigned AddressSpace,
                                                 TTI::TargetCostKind CostKind,
                                                 const Instruction *I) {
-  // TODO: Handle other cost kinds.
-  if (CostKind != TTI::TCK_RecipThroughput)
-    return 1;
-
   // Type legalization can't handle structs
   if (TLI->getValueType(DL, Ty,  true) == MVT::Other)
     return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
@@ -955,6 +951,13 @@
 
   auto LT = TLI->getTypeLegalizationCost(DL, Ty);
 
+  // TODO: consider latency as well for TCK_SizeAndLatency.
+  if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
+    return LT.first;
+
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return 1;
+
   if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
       LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
     // Unaligned stores are extremely inefficient. We don't split all
diff --git a/llvm/test/Analysis/CostModel/AArch64/store.ll b/llvm/test/Analysis/CostModel/AArch64/store.ll
--- a/llvm/test/Analysis/CostModel/AArch64/store.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/store.ll
@@ -30,13 +30,13 @@
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'getMemoryOpCost'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <16 x half> undef, <16 x half>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> undef, <4 x i64>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> undef, <8 x i32>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> undef, <16 x i16>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> undef, <32 x i8>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x double> undef, <4 x double>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <8 x float> undef, <8 x float>* undef, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <16 x half> undef, <16 x half>* undef, align 4
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 4
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/large-vector-ops.ll b/llvm/test/Transforms/LoopUnroll/AArch64/large-vector-ops.ll
--- a/llvm/test/Transforms/LoopUnroll/AArch64/large-vector-ops.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/large-vector-ops.ll
@@ -6,70 +6,22 @@
 
 ; The loop in the function only contains a few instructions, but they will get
 ; lowered to a very large amount of target instructions.
-; FIXME: Currently the cost-model assigns a cost of 1 to those large vector ops.
 define void @loop_with_large_vector_ops(i32 %i, <225 x double>* %A, <225 x double>* %B) {
 ; CHECK-LABEL: @loop_with_large_vector_ops(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[LV_1:%.*]] = load <225 x double>, <225 x double>* [[A:%.*]], align 8
-; CHECK-NEXT:    [[LV_2:%.*]] = load <225 x double>, <225 x double>* [[A]], align 8
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr <225 x double>, <225 x double>* [[A:%.*]], i32 [[IV]]
+; CHECK-NEXT:    [[LV_1:%.*]] = load <225 x double>, <225 x double>* [[A_GEP]], align 8
+; CHECK-NEXT:    [[B_GEP:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 [[IV]]
+; CHECK-NEXT:    [[LV_2:%.*]] = load <225 x double>, <225 x double>* [[B_GEP]], align 8
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul <225 x double> [[LV_1]], [[LV_2]]
-; CHECK-NEXT:    store <225 x double> [[MUL]], <225 x double>* [[A]], align 8
-; CHECK-NEXT:    [[A_GEP_1:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 1
-; CHECK-NEXT:    [[LV_1_1:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_1]], align 8
-; CHECK-NEXT:    [[B_GEP_1:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 1
-; CHECK-NEXT:    [[LV_2_1:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_1]], align 8
-; CHECK-NEXT:    [[MUL_1:%.*]] = fmul <225 x double> [[LV_1_1]], [[LV_2_1]]
-; CHECK-NEXT:    store <225 x double> [[MUL_1]], <225 x double>* [[B_GEP_1]], align 8
-; CHECK-NEXT:    [[A_GEP_2:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 2
-; CHECK-NEXT:    [[LV_1_2:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_2]], align 8
-; CHECK-NEXT:    [[B_GEP_2:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 2
-; CHECK-NEXT:    [[LV_2_2:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_2]], align 8
-; CHECK-NEXT:    [[MUL_2:%.*]] = fmul <225 x double> [[LV_1_2]], [[LV_2_2]]
-; CHECK-NEXT:    store <225 x double> [[MUL_2]], <225 x double>* [[B_GEP_2]], align 8
-; CHECK-NEXT:    [[A_GEP_3:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 3
-; CHECK-NEXT:    [[LV_1_3:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_3]], align 8
-; CHECK-NEXT:    [[B_GEP_3:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 3
-; CHECK-NEXT:    [[LV_2_3:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_3]], align 8
-; CHECK-NEXT:    [[MUL_3:%.*]] = fmul <225 x double> [[LV_1_3]], [[LV_2_3]]
-; CHECK-NEXT:    store <225 x double> [[MUL_3]], <225 x double>* [[B_GEP_3]], align 8
-; CHECK-NEXT:    [[A_GEP_4:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 4
-; CHECK-NEXT:    [[LV_1_4:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_4]], align 8
-; CHECK-NEXT:    [[B_GEP_4:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 4
-; CHECK-NEXT:    [[LV_2_4:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_4]], align 8
-; CHECK-NEXT:    [[MUL_4:%.*]] = fmul <225 x double> [[LV_1_4]], [[LV_2_4]]
-; CHECK-NEXT:    store <225 x double> [[MUL_4]], <225 x double>* [[B_GEP_4]], align 8
-; CHECK-NEXT:    [[A_GEP_5:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 5
-; CHECK-NEXT:    [[LV_1_5:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_5]], align 8
-; CHECK-NEXT:    [[B_GEP_5:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 5
-; CHECK-NEXT:    [[LV_2_5:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_5]], align 8
-; CHECK-NEXT:    [[MUL_5:%.*]] = fmul <225 x double> [[LV_1_5]], [[LV_2_5]]
-; CHECK-NEXT:    store <225 x double> [[MUL_5]], <225 x double>* [[B_GEP_5]], align 8
-; CHECK-NEXT:    [[A_GEP_6:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 6
-; CHECK-NEXT:    [[LV_1_6:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_6]], align 8
-; CHECK-NEXT:    [[B_GEP_6:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 6
-; CHECK-NEXT:    [[LV_2_6:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_6]], align 8
-; CHECK-NEXT:    [[MUL_6:%.*]] = fmul <225 x double> [[LV_1_6]], [[LV_2_6]]
-; CHECK-NEXT:    store <225 x double> [[MUL_6]], <225 x double>* [[B_GEP_6]], align 8
-; CHECK-NEXT:    [[A_GEP_7:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 7
-; CHECK-NEXT:    [[LV_1_7:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_7]], align 8
-; CHECK-NEXT:    [[B_GEP_7:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 7
-; CHECK-NEXT:    [[LV_2_7:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_7]], align 8
-; CHECK-NEXT:    [[MUL_7:%.*]] = fmul <225 x double> [[LV_1_7]], [[LV_2_7]]
-; CHECK-NEXT:    store <225 x double> [[MUL_7]], <225 x double>* [[B_GEP_7]], align 8
-; CHECK-NEXT:    [[A_GEP_8:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 8
-; CHECK-NEXT:    [[LV_1_8:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_8]], align 8
-; CHECK-NEXT:    [[B_GEP_8:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 8
-; CHECK-NEXT:    [[LV_2_8:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_8]], align 8
-; CHECK-NEXT:    [[MUL_8:%.*]] = fmul <225 x double> [[LV_1_8]], [[LV_2_8]]
-; CHECK-NEXT:    store <225 x double> [[MUL_8]], <225 x double>* [[B_GEP_8]], align 8
-; CHECK-NEXT:    [[A_GEP_9:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 9
-; CHECK-NEXT:    [[LV_1_9:%.*]] = load <225 x double>, <225 x double>* [[A_GEP_9]], align 8
-; CHECK-NEXT:    [[B_GEP_9:%.*]] = getelementptr <225 x double>, <225 x double>* [[A]], i32 9
-; CHECK-NEXT:    [[LV_2_9:%.*]] = load <225 x double>, <225 x double>* [[B_GEP_9]], align 8
-; CHECK-NEXT:    [[MUL_9:%.*]] = fmul <225 x double> [[LV_1_9]], [[LV_2_9]]
-; CHECK-NEXT:    store <225 x double> [[MUL_9]], <225 x double>* [[B_GEP_9]], align 8
+; CHECK-NEXT:    store <225 x double> [[MUL]], <225 x double>* [[B_GEP]], align 8
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], 10
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
 entry: