Index: include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfo.h
+++ include/llvm/Analysis/TargetTransformInfo.h
@@ -639,15 +639,16 @@
 
   /// \brief The various kinds of shuffle patterns for vector queries.
   enum ShuffleKind {
-    SK_Broadcast,       ///< Broadcast element 0 to all other elements.
-    SK_Reverse,         ///< Reverse the order of the vector.
-    SK_Alternate,       ///< Choose alternate elements from vector.
-    SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
-    SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
-    SK_PermuteTwoSrc,   ///< Merge elements from two source vectors into one
-                        ///< with any shuffle mask.
-    SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
-                        ///< shuffle mask.
+    SK_Broadcast,        ///< Broadcast element 0 to all other elements.
+    SK_Reverse,          ///< Reverse the order of the vector.
+    SK_Alternate,        ///< Choose alternate elements from vector.
+    SK_Transpose,        ///< Transpose two vectors.
+    SK_InsertSubvector,  ///< InsertSubvector. Index indicates start offset.
+    SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
+    SK_PermuteTwoSrc,    ///< Merge elements from two source vectors into one
+                         ///< with any shuffle mask.
+    SK_PermuteSingleSrc  ///< Shuffle elements of single source vector with any
+                         ///< shuffle mask.
   };
 
   /// \brief Additional information about an operand's possible values.
Index: include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- include/llvm/CodeGen/BasicTTIImpl.h
+++ include/llvm/CodeGen/BasicTTIImpl.h
@@ -553,11 +553,15 @@
 
   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                           Type *SubTp) {
-    if (Kind == TTI::SK_Alternate || Kind == TTI::SK_PermuteTwoSrc ||
-        Kind == TTI::SK_PermuteSingleSrc) {
+    switch (Kind) {
+    case TTI::SK_Alternate:
+    case TTI::SK_Transpose:
+    case TTI::SK_PermuteSingleSrc:
+    case TTI::SK_PermuteTwoSrc:
       return getPermuteShuffleOverhead(Tp);
+    default:
+      return 1;
     }
-    return 1;
   }
 
   unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
Index: lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- lib/Analysis/TargetTransformInfo.cpp
+++ lib/Analysis/TargetTransformInfo.cpp
@@ -683,6 +683,72 @@
   return isAlternate;
 }
 
+static bool isTransposeVectorMask(ArrayRef<int> Mask) {
+  // Transpose vector masks transpose a 2xn matrix. They read corresponding
+  // even- or odd-numbered vector elements from two n-dimentional source
+  // vectors and write each result into consectuive elements of an
+  // n-dimensional destination vector. Two shuffles are necessary to complete
+  // the transpose, one for the even elements and another for the odd elements.
+  // This description closely follows how the TRN1 and TRN2 AArch64
+  // instructions operate.
+  //
+  // For example, a simple 2x2 matrix can be transposed with:
+  //
+  //   ; Original matrix
+  //   m0 = <a, b>
+  //   m1 = <c, d>
+  //
+  //   ; Transposed matrix
+  //   t0 = <a, c> = shufflevector m0, m1, <0, 2>
+  //   t1 = <b, d> = shufflevector m0, m1, <1, 3>
+  //
+  // For matrices having greater than n columns, the resulting nx2 transposed
+  // matrix is stored in two result vectors such that one vector contains
+  // elements from all the even-numbered rows and the other vector contains
+  // elements from all the odd-numbered rows. For example, a 2x4 matrix can be
+  // transposed with:
+  //
+  //   ; Original matrix
+  //   m0 = <a, b, c, d>
+  //   m1 = <e, f, g, h>
+  //
+  //   ; Transposed matrix
+  //   t0 = <a, e, c, g> = shufflevector m0, m1 <0, 4, 2, 6>
+  //   t1 = <b, f, d, h> = shufflevector m0, m1 <1, 5, 3, 7>
+  //
+  // The above explanation places limations on what valid transpose masks can
+  // look like. These limitations are defined by the checks below.
+  //
+  // 1. The number of elements in the mask must be a power of two.
+  if (!isPowerOf2_32(Mask.size()))
+    return false;
+
+  // 2. The first element of the mask must be either a zero (for the
+  // even-numbered vector elements) or a one (for the odd-numbered vector
+  // elements).
+  if (Mask[0] != 0 && Mask[0] != 1)
+    return false;
+
+  // 3. The difference between the first two elements must be equal to the
+  // number of elements in the mask.
+  if (Mask[1] - Mask[0] != (int)Mask.size())
+    return false;
+
+  // 4. The difference between consecutive even-numbered elements must be equal
+  // to two.
+  for (int I = 2; I < (int)Mask.size(); I += 2)
+    if (Mask[I] - Mask[I - 2] != 2)
+      return false;
+
+  // 5. The difference between consecutive odd-numbered elements must also be
+  // equal to two.
+  for (int I = 3; I < (int)Mask.size(); I += 2)
+    if (Mask[I] - Mask[I - 2] != 2)
+      return false;
+
+  return true;
+}
+
 static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
   TargetTransformInfo::OperandValueKind OpInfo =
       TargetTransformInfo::OK_AnyValue;
@@ -1139,22 +1205,26 @@
 
     if (NumVecElems == Mask.size()) {
       if (isReverseVectorMask(Mask))
-        return getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
-                                   0, nullptr);
+        return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Reverse,
+                                       VecTypOp0, 0, nullptr);
       if (isAlternateVectorMask(Mask))
-        return getShuffleCost(TargetTransformInfo::SK_Alternate,
-                                   VecTypOp0, 0, nullptr);
+        return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Alternate,
+                                       VecTypOp0, 0, nullptr);
+
+      if (isTransposeVectorMask(Mask))
+        return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Transpose,
+                                       VecTypOp0, 0, nullptr);
 
       if (isZeroEltBroadcastVectorMask(Mask))
-        return getShuffleCost(TargetTransformInfo::SK_Broadcast,
-                                   VecTypOp0, 0, nullptr);
+        return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Broadcast,
+                                       VecTypOp0, 0, nullptr);
 
       if (isSingleSourceVectorMask(Mask))
-        return getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
-                                   VecTypOp0, 0, nullptr);
+        return TTIImpl->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+                                       VecTypOp0, 0, nullptr);
 
-      return getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
-                                 VecTypOp0, 0, nullptr);
+      return TTIImpl->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
+                                     VecTypOp0, 0, nullptr);
     }
 
     return -1;
Index: lib/Target/AArch64/AArch64TargetTransformInfo.h
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -169,6 +169,8 @@
 
   int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
                                  bool IsPairwiseForm);
+
+  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
   /// @}
 };
 
Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -912,3 +912,30 @@
 
   return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
 }
+
+int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+                                   Type *SubTp) {
+
+  // Transpose shuffle kinds can be performed with 'trn1/trn2' and 'zip1/zip2'
+  // instructions.
+  if (Kind == TTI::SK_Transpose) {
+    static const CostTblEntry TransposeTbl[] = {
+        {ISD::VECTOR_SHUFFLE, MVT::v8i8,  1},
+        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
+        {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+    };
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+    if (const auto *Entry =
+            CostTableLookup(TransposeTbl, ISD::VECTOR_SHUFFLE, LT.second))
+      return LT.first * Entry->Cost;
+  }
+
+  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+}
Index: test/Analysis/CostModel/AArch64/shuffle-transpose.ll
===================================================================
--- test/Analysis/CostModel/AArch64/shuffle-transpose.ll
+++ test/Analysis/CostModel/AArch64/shuffle-transpose.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
 
 ; COST-LABEL: trn1.v8i8
-; COST:       Found an estimated cost of 42 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 ; CODE-LABEL: trn1.v8i8
 ; CODE:       trn1 v0.8b, v0.8b, v1.8b
 define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
@@ -11,7 +11,7 @@
 }
 
 ; COST-LABEL: trn2.v8i8
-; COST:       Found an estimated cost of 42 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 ; CODE-LABEL: trn2.v8i8
 ; CODE:       trn2 v0.8b, v0.8b, v1.8b
 define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) {
@@ -20,7 +20,7 @@
 }
 
 ; COST-LABEL: trn1.v16i8
-; COST:       Found an estimated cost of 90 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
 ; CODE-LABEL: trn1.v16i8
 ; CODE:       trn1 v0.16b, v0.16b, v1.16b
 define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
@@ -29,7 +29,7 @@
 }
 
 ; COST-LABEL: trn2.v16i8
-; COST:       Found an estimated cost of 90 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
 ; CODE-LABEL: trn2.v16i8
 ; CODE:       trn2 v0.16b, v0.16b, v1.16b
 define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) {
@@ -38,7 +38,7 @@
 }
 
 ; COST-LABEL: trn1.v4i16
-; COST:       Found an estimated cost of 18 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 ; CODE-LABEL: trn1.v4i16
 ; CODE:       trn1 v0.4h, v0.4h, v1.4h
 define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
@@ -47,7 +47,7 @@
 }
 
 ; COST-LABEL: trn2.v4i16
-; COST:       Found an estimated cost of 18 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 ; CODE-LABEL: trn2.v4i16
 ; CODE:       trn2 v0.4h, v0.4h, v1.4h
 define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) {
@@ -56,7 +56,7 @@
 }
 
 ; COST-LABEL: trn1.v8i16
-; COST:       Found an estimated cost of 42 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 ; CODE-LABEL: trn1.v8i16
 ; CODE:       trn1 v0.8h, v0.8h, v1.8h
 define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
@@ -65,7 +65,7 @@
 }
 
 ; COST-LABEL: trn2.v8i16
-; COST:       Found an estimated cost of 42 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
 ; CODE-LABEL: trn2.v8i16
 ; CODE:       trn2 v0.8h, v0.8h, v1.8h
 define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) {
@@ -74,7 +74,7 @@
 }
 
 ; COST-LABEL: trn1.v2i32
-; COST:       Found an estimated cost of 6 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2>
 ; CODE-LABEL: trn1.v2i32
 ; CODE:       zip1 v0.2s, v0.2s, v1.2s
 define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
@@ -83,7 +83,7 @@
 }
 
 ; COST-LABEL: trn2.v2i32
-; COST:       Found an estimated cost of 6 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3>
 ; CODE-LABEL: trn2.v2i32
 ; CODE:       zip2 v0.2s, v0.2s, v1.2s
 define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) {
@@ -92,7 +92,7 @@
 }
 
 ; COST-LABEL: trn1.v4i32
-; COST:       Found an estimated cost of 18 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 ; CODE-LABEL: trn1.v4i32
 ; CODE:       trn1 v0.4s, v0.4s, v1.4s
 define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
@@ -101,7 +101,7 @@
 }
 
 ; COST-LABEL: trn2.v4i32
-; COST:       Found an estimated cost of 18 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 ; CODE-LABEL: trn2.v4i32
 ; CODE:       trn2 v0.4s, v0.4s, v1.4s
 define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) {
@@ -110,7 +110,7 @@
 }
 
 ; COST-LABEL: trn1.v2i64
-; COST:       Found an estimated cost of 6 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2>
 ; CODE-LABEL: trn1.v2i64
 ; CODE:       zip1 v0.2d, v0.2d, v1.2d
 define <2 x i64> @trn1.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
@@ -119,7 +119,7 @@
 }
 
 ; COST-LABEL: trn2.v2i64
-; COST:       Found an estimated cost of 6 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3>
 ; CODE-LABEL: trn2.v2i64
 ; CODE:       zip2 v0.2d, v0.2d, v1.2d
 define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) {
@@ -128,7 +128,7 @@
 }
 
 ; COST-LABEL: trn1.v2f32
-; COST:       Found an estimated cost of 6 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2>
 ; CODE-LABEL: trn1.v2f32
 ; CODE:       zip1 v0.2s, v0.2s, v1.2s
 define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) {
@@ -137,7 +137,7 @@
 }
 
 ; COST-LABEL: trn2.v2f32
-; COST:       Found an estimated cost of 6 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3>
 ; CODE-LABEL: trn2.v2f32
 ; CODE:       zip2 v0.2s, v0.2s, v1.2s
 define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) {
@@ -146,7 +146,7 @@
 }
 
 ; COST-LABEL: trn1.v4f32
-; COST:       Found an estimated cost of 18 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 ; CODE-LABEL: trn1.v4f32
 ; CODE:       trn1 v0.4s, v0.4s, v1.4s
 define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) {
@@ -155,7 +155,7 @@
 }
 
 ; COST-LABEL: trn2.v4f32
-; COST:       Found an estimated cost of 18 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
 ; CODE-LABEL: trn2.v4f32
 ; CODE:       trn2 v0.4s, v0.4s, v1.4s
 define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) {
@@ -164,7 +164,7 @@
 }
 
 ; COST-LABEL: trn1.v2f64
-; COST:       Found an estimated cost of 6 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2>
 ; CODE-LABEL: trn1.v2f64
 ; CODE:       zip1 v0.2d, v0.2d, v1.2d
 define <2 x double> @trn1.v2f64(<2 x double> %v0, <2 x double> %v1) {
@@ -173,7 +173,7 @@
 }
 
 ; COST-LABEL: trn2.v2f64
-; COST:       Found an estimated cost of 6 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3>
+; COST:       Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3>
 ; CODE-LABEL: trn2.v2f64
 ; CODE:       zip2 v0.2d, v0.2d, v1.2d
 define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) {