Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -493,32 +493,49 @@
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
 
-  if (ISD == ISD::SDIV &&
-      Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
-      Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
-    // On AArch64, scalar signed division by constants power-of-two are
-    // normally expanded to the sequence ADD + CMP + SELECT + SRA.
-    // The OperandValue properties many not be same as that of previous
-    // operation; conservatively assume OP_None.
-    Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
-                                   TargetTransformInfo::OP_None,
-                                   TargetTransformInfo::OP_None);
-    Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
-                                   TargetTransformInfo::OP_None,
-                                   TargetTransformInfo::OP_None);
-    Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
-                                   TargetTransformInfo::OP_None,
-                                   TargetTransformInfo::OP_None);
-    Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
-                                   TargetTransformInfo::OP_None,
-                                   TargetTransformInfo::OP_None);
-    return Cost;
-  }
-
   switch (ISD) {
   default:
     return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
                                                 Opd1PropInfo, Opd2PropInfo);
+  case ISD::SDIV:
+    if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
+        Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
+      // On AArch64, scalar signed division by constants power-of-two are
+      // normally expanded to the sequence ADD + CMP + SELECT + SRA.
+      // The OperandValue properties many not be same as that of previous
+      // operation; conservatively assume OP_None.
+      Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
+                                     TargetTransformInfo::OP_None,
+                                     TargetTransformInfo::OP_None);
+      Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
+                                     TargetTransformInfo::OP_None,
+                                     TargetTransformInfo::OP_None);
+      Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
+                                     TargetTransformInfo::OP_None,
+                                     TargetTransformInfo::OP_None);
+      Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
+                                     TargetTransformInfo::OP_None,
+                                     TargetTransformInfo::OP_None);
+      return Cost;
+    }
+    LLVM_FALLTHROUGH;
+  case ISD::UDIV:
+    Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+                                          Opd1PropInfo, Opd2PropInfo);
+    if (Ty->isVectorTy()) {
+      // On AArch64, vector divisions are not supported natively and are
+      // expanded into scalar divisions of each pair of elements.
+      // Conservatively assume OP_None.
+      Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
+                                     Opd2Info, Opd1PropInfo, Opd2PropInfo);
+      Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
+                                     Opd2Info, Opd1PropInfo, Opd2PropInfo);
+      // TODO: if one of the arguments is a scalar, then it's not necessary to
+      // double the cost of handling the vector elements.
+      Cost += Cost;
+    }
+    return Cost;
+
   case ISD::ADD:
   case ISD::MUL:
   case ISD::XOR:
Index: llvm/test/Analysis/CostModel/AArch64/vdiv.ll
===================================================================
--- /dev/null
+++ llvm/test/Analysis/CostModel/AArch64/vdiv.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -O3 -debug-only=loop-vectorize 2> %t; FileCheck %s --input-file %t 
+
+; CHECK: LV: Scalar loop            costs: {{[0-9]}}.
+; CHECK: LV: Vector loop of width 2 costs: {{[0-9]+}}.
+; CHECK: LV: Vector loop of width 4 costs: {{[0-9]+}}.
+; CHECK: LV: Selecting VF: 1.
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+define void @scale(i32* noalias nocapture %p, i32* noalias nocapture readonly %scale, i32 %width) #0 {
+entry:
+  %cmp = icmp ugt i32 %width, 3
+  %rem = and i32 %width, 3
+  %cmp1 = icmp eq i32 %rem, 0
+  %or.cond17 = and i1 %cmp, %cmp1
+  br i1 %or.cond17, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %width to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %p, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !2
+  %arrayidx4 = getelementptr inbounds i32, i32* %scale, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx4, align 4, !tbaa !2
+  %div = sdiv i32 %0, %1
+  store i32 %div, i32* %arrayidx, align 4, !tbaa !2
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { norecurse nounwind }
+
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}