diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -281,6 +281,91 @@
   return BaseT::getIntrinsicInstrCost(ICA, CostKind);
 }
 
+/// The function will remove redundant reinterprets casting in the presence
+/// of the control flow
+static Optional<Instruction *> processPhiNode(InstCombiner &IC,
+                                              IntrinsicInst &II) {
+  SmallVector<Instruction *, 32> Worklist;
+  auto RequiredType = II.getType();
+
+  auto *PN = dyn_cast<PHINode>(II.getArgOperand(0));
+  assert(PN && "Expected Phi Node!");
+
+  // Don't create a new Phi unless we can remove the old one.
+  if (!PN->hasOneUse())
+    return None;
+
+  for (Value *IncValPhi : PN->incoming_values()) {
+    auto *Reinterpret = dyn_cast<IntrinsicInst>(IncValPhi);
+    if (!Reinterpret ||
+        Reinterpret->getIntrinsicID() !=
+            Intrinsic::aarch64_sve_convert_to_svbool ||
+        RequiredType != Reinterpret->getArgOperand(0)->getType())
+      return None;
+  }
+
+  // Create the new Phi
+  LLVMContext &Ctx = PN->getContext();
+  IRBuilder<> Builder(Ctx);
+  Builder.SetInsertPoint(PN);
+  PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues());
+  Worklist.push_back(PN);
+
+  for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) {
+    auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(I));
+    NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I));
+    Worklist.push_back(Reinterpret);
+  }
+
+  // Cleanup Phi Node and reinterprets
+  return IC.replaceInstUsesWith(II, NPN);
+}
+
+static Optional<Instruction *> instCombineConvertFromSVBool(InstCombiner &IC,
+                                                            IntrinsicInst &II) {
+  // If the reinterpret instruction operand is a PHI Node
+  if (isa<PHINode>(II.getArgOperand(0)))
+    return processPhiNode(IC, II);
+
+  SmallVector<Instruction *, 32> CandidatesForRemoval;
+  Value *Cursor = II.getOperand(0), *EarliestReplacement = nullptr;
+
+  const auto *IVTy = cast<VectorType>(II.getType());
+
+  // Walk the chain of conversions.
+  while (Cursor) {
+    // If the type of the cursor has fewer lanes than the final result, zeroing
+    // must take place, which breaks the equivalence chain.
+    const auto *CursorVTy = cast<VectorType>(Cursor->getType());
+    if (CursorVTy->getElementCount().getKnownMinValue() <
+        IVTy->getElementCount().getKnownMinValue())
+      break;
+
+    // If the cursor has the same type as I, it is a viable replacement.
+    if (Cursor->getType() == IVTy)
+      EarliestReplacement = Cursor;
+
+    auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
+
+    // If this is not an SVE conversion intrinsic, this is the end of the chain.
+    if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
+                                  Intrinsic::aarch64_sve_convert_to_svbool ||
+                              IntrinsicCursor->getIntrinsicID() ==
+                                  Intrinsic::aarch64_sve_convert_from_svbool))
+      break;
+
+    CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);
+    Cursor = IntrinsicCursor->getOperand(0);
+  }
+
+  // If no viable replacement in the conversion chain was found, there is
+  // nothing to do.
+  if (!EarliestReplacement)
+    return None;
+
+  return IC.replaceInstUsesWith(II, EarliestReplacement);
+}
+
 static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
                                                   IntrinsicInst &II) {
   Value *Pg = II.getArgOperand(0);
@@ -368,6 +453,8 @@
   switch (IID) {
   default:
     break;
+  case Intrinsic::aarch64_sve_convert_from_svbool:
+    return instCombineConvertFromSVBool(IC, II);
   case Intrinsic::aarch64_sve_lasta:
   case Intrinsic::aarch64_sve_lastb:
     return instCombineSVELast(IC, II);
diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
--- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -11,18 +11,13 @@
 //
 // This pass performs the following optimizations:
 //
-// - removes unnecessary reinterpret intrinsics
-//   (llvm.aarch64.sve.convert.[to|from].svbool), e.g:
-//     %1 = @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
-//     %2 = @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
-//
 // - removes unnecessary ptrue intrinsics (llvm.aarch64.sve.ptrue), e.g:
 //     %1 = @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
 //     %2 = @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
 //     ; (%1 can be replaced with a reinterpret of %2)
 //
-// - optimizes ptest intrinsics and phi instructions where the operands are
-//   being needlessly converted to and from svbool_t.
+// - optimizes ptest intrinsics where the operands are being needlessly
+//   converted to and from svbool_t.
 //
 //===----------------------------------------------------------------------===//
 
@@ -75,12 +70,9 @@
   /// the functions themselves.
   bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions);
 
-  static bool optimizeConvertFromSVBool(IntrinsicInst *I);
   static bool optimizePTest(IntrinsicInst *I);
   static bool optimizeVectorMul(IntrinsicInst *I);
   static bool optimizeTBL(IntrinsicInst *I);
-
-  static bool processPhiNode(IntrinsicInst *I);
 };
 } // end anonymous namespace
 
@@ -197,17 +189,30 @@
       Intrinsic::aarch64_sve_convert_to_svbool, {MostEncompassingPTrueVTy},
       {MostEncompassingPTrue});
 
+  bool ConvertFromCreated = false;
   for (auto *PTrue : PTrues) {
     auto *PTrueVTy = cast<VectorType>(PTrue->getType());
 
-    Builder.SetInsertPoint(&BB, ++ConvertToSVBool->getIterator());
-    auto *ConvertFromSVBool =
-        Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
-                                {PTrueVTy}, {ConvertToSVBool});
-    PTrue->replaceAllUsesWith(ConvertFromSVBool);
+    // Only create the converts if the types are not already the same, otherwise
+    // just use the most encompassing ptrue.
+    if (MostEncompassingPTrueVTy != PTrueVTy) {
+      ConvertFromCreated = true;
+
+      Builder.SetInsertPoint(&BB, ++ConvertToSVBool->getIterator());
+      auto *ConvertFromSVBool =
+          Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
+                                  {PTrueVTy}, {ConvertToSVBool});
+      PTrue->replaceAllUsesWith(ConvertFromSVBool);
+    } else
+      PTrue->replaceAllUsesWith(MostEncompassingPTrue);
+
     PTrue->eraseFromParent();
   }
 
+  // We never used the ConvertTo so remove it
+  if (!ConvertFromCreated)
+    ConvertToSVBool->eraseFromParent();
+
   return true;
 }
 
@@ -294,51 +299,6 @@
   return Changed;
 }
 
-/// The function will remove redundant reinterprets casting in the presence
-/// of the control flow
-bool SVEIntrinsicOpts::processPhiNode(IntrinsicInst *X) {
-
-  SmallVector<Instruction *, 32> Worklist;
-  auto RequiredType = X->getType();
-
-  auto *PN = dyn_cast<PHINode>(X->getArgOperand(0));
-  assert(PN && "Expected Phi Node!");
-
-  // Don't create a new Phi unless we can remove the old one.
-  if (!PN->hasOneUse())
-    return false;
-
-  for (Value *IncValPhi : PN->incoming_values()) {
-    auto *Reinterpret = isReinterpretToSVBool(IncValPhi);
-    if (!Reinterpret ||
-        RequiredType != Reinterpret->getArgOperand(0)->getType())
-      return false;
-  }
-
-  // Create the new Phi
-  LLVMContext &Ctx = PN->getContext();
-  IRBuilder<> Builder(Ctx);
-  Builder.SetInsertPoint(PN);
-  PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues());
-  Worklist.push_back(PN);
-
-  for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) {
-    auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(I));
-    NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I));
-    Worklist.push_back(Reinterpret);
-  }
-
-  // Cleanup Phi Node and reinterprets
-  X->replaceAllUsesWith(NPN);
-  X->eraseFromParent();
-
-  for (auto &I : Worklist)
-    if (I->use_empty())
-      I->eraseFromParent();
-
-  return true;
-}
-
 bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) {
   IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(I->getArgOperand(0));
   IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(I->getArgOperand(1));
@@ -473,69 +433,12 @@
   return true;
 }
 
-bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) {
-  assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_convert_from_svbool &&
-         "Unexpected opcode");
-
-  // If the reinterpret instruction operand is a PHI Node
-  if (isa<PHINode>(I->getArgOperand(0)))
-    return processPhiNode(I);
-
-  SmallVector<Instruction *, 32> CandidatesForRemoval;
-  Value *Cursor = I->getOperand(0), *EarliestReplacement = nullptr;
-
-  const auto *IVTy = cast<VectorType>(I->getType());
-
-  // Walk the chain of conversions.
-  while (Cursor) {
-    // If the type of the cursor has fewer lanes than the final result, zeroing
-    // must take place, which breaks the equivalence chain.
-    const auto *CursorVTy = cast<VectorType>(Cursor->getType());
-    if (CursorVTy->getElementCount().getKnownMinValue() <
-        IVTy->getElementCount().getKnownMinValue())
-      break;
-
-    // If the cursor has the same type as I, it is a viable replacement.
-    if (Cursor->getType() == IVTy)
-      EarliestReplacement = Cursor;
-
-    auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor);
-
-    // If this is not an SVE conversion intrinsic, this is the end of the chain.
-    if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
-                                  Intrinsic::aarch64_sve_convert_to_svbool ||
-                              IntrinsicCursor->getIntrinsicID() ==
-                                  Intrinsic::aarch64_sve_convert_from_svbool))
-      break;
-
-    CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);
-    Cursor = IntrinsicCursor->getOperand(0);
-  }
-
-  // If no viable replacement in the conversion chain was found, there is
-  // nothing to do.
-  if (!EarliestReplacement)
-    return false;
-
-  I->replaceAllUsesWith(EarliestReplacement);
-  I->eraseFromParent();
-
-  while (!CandidatesForRemoval.empty()) {
-    Instruction *Candidate = CandidatesForRemoval.pop_back_val();
-    if (Candidate->use_empty())
-      Candidate->eraseFromParent();
-  }
-  return true;
-}
-
 bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) {
   IntrinsicInst *IntrI = dyn_cast<IntrinsicInst>(I);
   if (!IntrI)
     return false;
 
   switch (IntrI->getIntrinsicID()) {
-  case Intrinsic::aarch64_sve_convert_from_svbool:
-    return optimizeConvertFromSVBool(IntrI);
   case Intrinsic::aarch64_sve_fmul:
   case Intrinsic::aarch64_sve_mul:
     return optimizeVectorMul(IntrI);
@@ -591,7 +494,6 @@
       continue;
 
     switch (F.getIntrinsicID()) {
-    case Intrinsic::aarch64_sve_convert_from_svbool:
     case Intrinsic::aarch64_sve_ptest_any:
     case Intrinsic::aarch64_sve_ptest_first:
     case Intrinsic::aarch64_sve_ptest_last:
diff --git a/llvm/test/CodeGen/AArch64/sve-coalesce-ptrue-intrinsics.ll b/llvm/test/CodeGen/AArch64/sve-coalesce-ptrue-intrinsics.ll
--- a/llvm/test/CodeGen/AArch64/sve-coalesce-ptrue-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/sve-coalesce-ptrue-intrinsics.ll
@@ -163,14 +163,13 @@
 define <vscale x 8 x i16> @coalesce_test_promoted_ptrue(i32* %addr1, i16* %addr2) {
 ; CHECK-LABEL: @coalesce_test_promoted_ptrue(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP3]])
-; CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP4]])
-; CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> [[TMP3]], i32* [[ADDR1:%.*]])
-; CHECK-NEXT:    [[TMP7:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> [[TMP5]], i16* [[ADDR2:%.*]])
-; CHECK-NEXT:    [[TMP8:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> [[TMP1]], i16* [[ADDR2]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP8]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> [[TMP2]], i32* [[ADDR1:%.*]])
+; CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> [[TMP4]], i16* [[ADDR2:%.*]])
+; CHECK-NEXT:    [[TMP7:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> [[TMP1]], i16* [[ADDR2]])
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP7]]
 ;
   %1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %2 = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-reinterpret.ll
rename from llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
rename to llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-reinterpret.ll
--- a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-reinterpret.ll
@@ -1,9 +1,11 @@
-; RUN: opt -S -aarch64-sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target triple = "aarch64"
 
 define <vscale x 8 x i1> @reinterpret_test_h(<vscale x 8 x i1> %a) {
-; OPT-LABEL: @reinterpret_test_h(
-; OPT-NOT: convert
-; OPT: ret <vscale x 8 x i1> %a
+; CHECK-LABEL: @reinterpret_test_h(
+; CHECK-NOT: convert
+; CHECK: ret <vscale x 8 x i1> %a
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
   %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
   ret <vscale x 8 x i1> %2
@@ -12,19 +14,19 @@
 ; Reinterprets are not redundant because the second reinterpret zeros the
 ; lanes that don't exist within its input.
 define <vscale x 16 x i1> @reinterpret_test_h_rev(<vscale x 16 x i1> %a) {
-; OPT-LABEL: @reinterpret_test_h_rev(
-; OPT: %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
-; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
-; OPT-NEXT: ret <vscale x 16 x i1> %2
+; CHECK-LABEL: @reinterpret_test_h_rev(
+; CHECK: %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
+; CHECK-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
+; CHECK-NEXT: ret <vscale x 16 x i1> %2
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
   %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
   ret <vscale x 16 x i1> %2
 }
 
 define <vscale x 4 x i1> @reinterpret_test_w(<vscale x 4 x i1> %a) {
-; OPT-LABEL: @reinterpret_test_w(
-; OPT-NOT: convert
-; OPT: ret <vscale x 4 x i1> %a
+; CHECK-LABEL: @reinterpret_test_w(
+; CHECK-NOT: convert
+; CHECK: ret <vscale x 4 x i1> %a
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
   ret <vscale x 4 x i1> %2
@@ -33,19 +35,19 @@
 ; Reinterprets are not redundant because the second reinterpret zeros the
 ; lanes that don't exist within its input.
 define <vscale x 16 x i1> @reinterpret_test_w_rev(<vscale x 16 x i1> %a) {
-; OPT-LABEL: @reinterpret_test_w_rev(
-; OPT: %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
-; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
-; OPT-NEXT: ret <vscale x 16 x i1> %2
+; CHECK-LABEL: @reinterpret_test_w_rev(
+; CHECK: %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
+; CHECK-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
+; CHECK-NEXT: ret <vscale x 16 x i1> %2
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
   %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
   ret <vscale x 16 x i1> %2
 }
 
 define <vscale x 2 x i1> @reinterpret_test_d(<vscale x 2 x i1> %a) {
-; OPT-LABEL: @reinterpret_test_d(
-; OPT-NOT: convert
-; OPT: ret <vscale x 2 x i1> %a
+; CHECK-LABEL: @reinterpret_test_d(
+; CHECK-NOT: convert
+; CHECK: ret <vscale x 2 x i1> %a
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
   %2 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %1)
   ret <vscale x 2 x i1> %2
@@ -54,18 +56,18 @@
 ; Reinterprets are not redundant because the second reinterpret zeros the
 ; lanes that don't exist within its input.
 define <vscale x 16 x i1> @reinterpret_test_d_rev(<vscale x 16 x i1> %a) {
-; OPT-LABEL: @reinterpret_test_d_rev(
-; OPT: %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
-; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
-; OPT-NEXT: ret <vscale x 16 x i1> %2
+; CHECK-LABEL: @reinterpret_test_d_rev(
+; CHECK: %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
+; CHECK-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
+; CHECK-NEXT: ret <vscale x 16 x i1> %2
   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
   %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
   ret <vscale x 16 x i1> %2
 }
 
 define <vscale x 2 x i1> @reinterpret_test_full_chain(<vscale x 2 x i1> %a) {
-; OPT-LABEL: @reinterpret_test_full_chain(
-; OPT: ret <vscale x 2 x i1> %a
+; CHECK-LABEL: @reinterpret_test_full_chain(
+; CHECK: ret <vscale x 2 x i1> %a
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
   %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
@@ -78,10 +80,10 @@
 ; The last two reinterprets are not necessary, since they are doing the same
 ; work as the first two.
 define <vscale x 4 x i1> @reinterpret_test_partial_chain(<vscale x 2 x i1> %a) {
-; OPT-LABEL: @reinterpret_test_partial_chain(
-; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
-; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
-; OPT-NEXT: ret <vscale x 4 x i1> %2
+; CHECK-LABEL: @reinterpret_test_partial_chain(
+; CHECK: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
+; CHECK-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
+; CHECK-NEXT: ret <vscale x 4 x i1> %2
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
   %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
@@ -92,12 +94,12 @@
 ; The chain cannot be reduced because of the second reinterpret, which causes
 ; zeroing.
 define <vscale x 8 x i1> @reinterpret_test_irreducible_chain(<vscale x 8 x i1> %a) {
-; OPT-LABEL: @reinterpret_test_irreducible_chain(
-; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
-; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
-; OPT-NEXT: %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
-; OPT-NEXT: %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
-; OPT-NEXT: ret <vscale x 8 x i1> %4
+; CHECK-LABEL: @reinterpret_test_irreducible_chain(
+; CHECK: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
+; CHECK-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
+; CHECK-NEXT: %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+; CHECK-NEXT: %4 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %3)
+; CHECK-NEXT: ret <vscale x 8 x i1> %4
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
   %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
@@ -108,10 +110,10 @@
 ; Here, the candidate list is larger than the number of instructions that we
 ; end up removing.
 define <vscale x 4 x i1> @reinterpret_test_keep_some_candidates(<vscale x 8 x i1> %a) {
-; OPT-LABEL: @reinterpret_test_keep_some_candidates(
-; OPT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
-; OPT-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
-; OPT-NEXT: ret <vscale x 4 x i1> %2
+; CHECK-LABEL: @reinterpret_test_keep_some_candidates(
+; CHECK: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
+; CHECK-NEXT: %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
+; CHECK-NEXT: ret <vscale x 4 x i1> %2
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
   %2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
   %3 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
@@ -120,12 +122,12 @@
 }
 
 define <vscale x 2 x i1> @reinterpret_reductions(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
-; OPT-LABEL: reinterpret_reductions
-; OPT-NOT: convert
-; OPT-NOT: phi <vscale x 16 x i1>
-; OPT: phi <vscale x 2 x i1> [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ]
-; OPT-NOT: convert
-; OPT: ret
+; CHECK-LABEL: reinterpret_reductions
+; CHECK-NOT: convert
+; CHECK-NOT: phi <vscale x 16 x i1>
+; CHECK: phi <vscale x 2 x i1> [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ]
+; CHECK-NOT: convert
+; CHECK: ret
 
 entry:
   switch i32 %cond, label %br_phi_c [
@@ -154,12 +156,12 @@
 ; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1)
 ; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets.
 define <vscale x 2 x i1> @reinterpret_reductions_1(i32 %cond, <vscale x 2 x i1> %a, <vscale x 4 x i1> %b, <vscale x 2 x i1> %c) {
-; OPT-LABEL: reinterpret_reductions_1
-; OPT: convert
-; OPT: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
-; OPT-NOT: phi <vscale x 2 x i1>
-; OPT: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
-; OPT: ret
+; CHECK-LABEL: reinterpret_reductions_1
+; CHECK: convert
+; CHECK: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
+; CHECK-NOT: phi <vscale x 2 x i1>
+; CHECK: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+; CHECK: ret
 
 entry:
   switch i32 %cond, label %br_phi_c [
@@ -188,12 +190,12 @@
 ; No transform. Similar to the the test above, but here only two of the arguments need to
 ; be converted to svbool.
 define <vscale x 2 x i1> @reinterpret_reductions_2(i32 %cond, <vscale x 2 x i1> %a, <vscale x 16 x i1> %b, <vscale x 2 x i1> %c) {
-; OPT-LABEL: reinterpret_reductions_2
-; OPT: convert
-; OPT: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
-; OPT-NOT: phi <vscale x 2 x i1>
-; OPT: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
-; OPT: ret
+; CHECK-LABEL: reinterpret_reductions_2
+; CHECK: convert
+; CHECK: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
+; CHECK-NOT: phi <vscale x 2 x i1>
+; CHECK: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
+; CHECK: ret
 
 entry:
   switch i32 %cond, label %br_phi_c [
@@ -221,11 +223,10 @@
 ; Similar to reinterpret_reductions but the reinterprets remain because the
 ; original phi cannot be removed (i.e. prefer reinterprets over multiple phis).
 define <vscale x 16 x i1> @reinterpret_reductions3(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
-; OPT-LABEL: reinterpret_reductions3
-; OPT: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
-; OPT-NOT: phi <vscale x 2 x i1>
-; OPT: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
-; OPT-NEXT: ret <vscale x 16 x i1> %pg
+; CHECK-LABEL: reinterpret_reductions3
+; CHECK: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
+; CHECK-NOT: phi <vscale x 2 x i1>
+; CHECK: ret <vscale x 16 x i1> %pg
 
 entry:
   switch i32 %cond, label %br_phi_c [
@@ -257,3 +258,5 @@
 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
 declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
+
+attributes #0 = { "target-features"="+sve" }