diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -14405,6 +14405,8 @@
 When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
+.. _int_powi:
+
 '``llvm.powi.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -20024,7 +20026,7 @@
 Overview:
 """""""""
 
-Predicated floating-point square root of a vector of floating-point values.
+Predicated version of raising a vector of floating-point values to an integer power.
 
 
 Arguments:
@@ -20055,6 +20057,55 @@
       %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison
 
 
+.. _int_vp_powi:
+
+'``llvm.vp.powi.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <16 x float>  @llvm.vp.sqrt.v16f32.i32 (<16 x float> <base>, i32 <exp>, <16 x i1> <mask>, i32 <vector_length>)
+      declare <vscale x 4 x float>  @llvm.vp.sqrt.nxv4f32.i32 (<vscale x 4 x float> <op>, i32 <exp>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+      declare <256 x double>  @llvm.vp.sqrt.v256f64.i64 (<256 x double> <op>, i64 <exp>, <256 x i1> <mask>, i32 <vector_length>)
+
+Overview:
+"""""""""
+
+Predicated floating-point square root of a vector of floating-point values.
+
+
+Arguments:
+""""""""""
+
+The first operand and the result have the same vector of floating-point type.
+The second oeprand is an integer power. The third operand is the vector mask and
+has the same number of elements as the result vector type. The fourth operand is
+the explicit vector length of the operation.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.powi``' intrinsic performs floating-point powi (:ref:`powi <int_powi>`) of
+the first vector operand on each enabled lane with the second operand as
+exponent.  The result on disabled lanes is a :ref:`poison value <poisonvalues>`.
+The operation is performed in the default floating-point environment.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %r = call <4 x float> @llvm.vp.powi.v4f32.i32(<4 x float> %a, i32 %b, <4 x i1> %mask, i32 %evl)
+      ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+      %t = call <4 x float> @llvm.powi.v4f32(<4 x float> %a, i32 %b)
+      %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison
+
+
 .. _int_vp_fma:
 
 '``llvm.vp.fma.*``' Intrinsics
diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
--- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def
+++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -45,6 +45,7 @@
 FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))
 FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass, ())
 FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass, ())
+FUNCTION_PASS("expand-powi", ExpandPowiPass, ())
 FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ())
 FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ())
 FUNCTION_PASS("lowerinvoke", LowerInvokePass, ())
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -518,6 +518,9 @@
   // Expands large div/rem instructions.
   FunctionPass *createExpandLargeFpConvertPass();
 
+  // Expands powi instructions.
+  FunctionPass *createExpandPowiPass();
+
   // This pass expands memcmp() to load/stores.
   FunctionPass *createExpandMemCmpPass();
 
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1682,6 +1682,11 @@
                              [ LLVMMatchType<0>,
                                LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                                llvm_i32_ty]>;
+  def int_vp_powi : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+                             [ LLVMMatchType<0>,
+                               llvm_anyint_ty,
+                               LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+                               llvm_i32_ty]>;
 
   // Casts
   def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -364,6 +364,9 @@
 BEGIN_REGISTER_VP(vp_nearbyint, 1, 2, VP_FNEARBYINT, -1)
 END_REGISTER_VP(vp_nearbyint, VP_FNEARBYINT)
 
+// llvm.vp.powi(x, y, mask,vlen)
+BEGIN_REGISTER_VP_INTRINSIC(vp_powi, 2, 3)
+END_REGISTER_VP_INTRINSIC(vp_powi)
 ///// } Floating-Point Arithmetic
 
 ///// Type Casts {
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -126,6 +126,7 @@
 void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
 void initializeExpandMemCmpPassPass(PassRegistry&);
 void initializeExpandPostRAPass(PassRegistry&);
+void initializeExpandPowiLegacyPassPass(PassRegistry &);
 void initializeExpandReductionsPass(PassRegistry&);
 void initializeExpandVectorPredicationPass(PassRegistry &);
 void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&);
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -59,6 +59,7 @@
   ExpandLargeFpConvert.cpp
   ExpandMemCmp.cpp
   ExpandPostRAPseudos.cpp
+  ExpandPowi.cpp
   ExpandReductions.cpp
   ExpandVectorPredication.cpp
   FaultMaps.cpp
diff --git a/llvm/lib/CodeGen/ExpandPowi.cpp b/llvm/lib/CodeGen/ExpandPowi.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/CodeGen/ExpandPowi.cpp
@@ -0,0 +1,160 @@
+//===--- ExpandPowi.cpp - Expand Powi intrinsics ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR expansion for powi/vp.powi. The expansion is based on
+// compiler-rt/__powidf2.c.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+#define DEBUG_TYPE "expand-powi"
+
+using namespace llvm;
+
+// The expansion is based on the c code of compiler-rt/__powidf2.c,
+// const int recip = b < 0;
+// double r = 1;
+// while (1) {
+//   if (b & 1)
+//     r *= a;
+//   b /= 2;
+//   if (b == 0)
+//     break;
+//   a *= a;
+// }
+// return recip ? 1 / r : r;
+//
+// Expansion of llvm.powi still uses vp intrinsics here. It regards llvm.powi as
+// llvm.vp.powi with true mask and maximum vl.
+static void expandPowi(IntrinsicInst *II) {
+  LLVMContext &C = II->getContext();
+  Value *OrigBase = II->getOperand(0);
+  Value *OrigExp = II->getOperand(1);
+  VectorType *BaseTy = cast<VectorType>(OrigBase->getType());
+  Type *ExpTy = OrigExp->getType();
+  Type *CondTy = BaseTy->getWithNewType(Type::getInt1Ty(C));
+  Value *True = ConstantInt::get(CondTy, 1);
+  Value *Mask, *EVL;
+  if (II->getIntrinsicID() == Intrinsic::vp_powi) {
+    Mask = II->getOperand(2);
+    EVL = II->getOperand(3);
+  } else {
+    assert(II->getIntrinsicID() == Intrinsic::powi);
+    Mask = True;
+    IRBuilder<> Builder(II);
+    EVL = Builder.CreateElementCount(Type::getInt32Ty(C),
+                                     BaseTy->getElementCount());
+  }
+
+  BasicBlock *PreLoopBB = II->getParent();
+  BasicBlock *PostLoopBB = PreLoopBB->splitBasicBlock(II, "powi-post-loop");
+  BasicBlock *LoopBody =
+      BasicBlock::Create(PreLoopBB->getContext(), "powi-expansion-loop",
+                         PreLoopBB->getParent(), PostLoopBB);
+
+  IRBuilder<> Builder(PreLoopBB->getTerminator());
+  Builder.CreateBr(LoopBody);
+  PreLoopBB->getTerminator()->eraseFromParent();
+
+  Builder.SetInsertPoint(LoopBody);
+  // Create phi of base.
+  PHINode *Base = Builder.CreatePHI(BaseTy, 2, "base");
+  Base->addIncoming(OrigBase, PreLoopBB);
+  // Create phi of exponent.
+  PHINode *Exp = Builder.CreatePHI(ExpTy, 2, "exp");
+  Exp->addIncoming(OrigExp, PreLoopBB);
+  // Create phi of res.
+  PHINode *Res = Builder.CreatePHI(BaseTy, 2, "res");
+  Res->addIncoming(ConstantFP::get(BaseTy, 1.), PreLoopBB);
+  // Res *= Base if Exp is odd.
+  Value *Tmp = Builder.CreateIntrinsic(BaseTy, Intrinsic::vp_fmul,
+                                       {Res, Base, True, EVL});
+  Value *And1 = Builder.CreateAnd(Exp, ConstantInt::get(ExpTy, 1));
+  Value *IsOdd = Builder.CreateICmpNE(And1, ConstantInt::get(ExpTy, 0));
+  Value *IsOddVec = Builder.CreateVectorSplat(BaseTy->getElementCount(), IsOdd);
+  Value *NewRes = Builder.CreateIntrinsic(BaseTy, Intrinsic::vp_select,
+                                          {IsOddVec, Tmp, Res, EVL});
+  Res->addIncoming(NewRes, LoopBody);
+  // Update Exp.
+  Value *NewExp = Builder.CreateLShr(Exp, ConstantInt::get(ExpTy, 1));
+  Exp->addIncoming(NewExp, LoopBody);
+  // Update Base.
+  Value *NewBase = Builder.CreateIntrinsic(BaseTy, Intrinsic::vp_fmul,
+                                           {Base, Base, True, EVL});
+  Base->addIncoming(NewBase, LoopBody);
+  // Check whether NewExp is zero.
+  Builder.CreateCondBr(Builder.CreateICmpEQ(NewExp, ConstantInt::get(ExpTy, 1)),
+                       PostLoopBB, LoopBody);
+
+  Builder.SetInsertPoint(&PostLoopBB->front());
+  // Use reciprocal if power is negative.
+  Value *Recip =
+      Builder.CreateIntrinsic(BaseTy, Intrinsic::vp_fdiv,
+                              {ConstantFP::get(BaseTy, 1.), NewRes, Mask, EVL});
+  Value *IsNegative =
+      Builder.CreateICmpSLT(OrigExp, ConstantInt::get(ExpTy, 0));
+  Value *IsNegativeVec =
+      Builder.CreateVectorSplat(BaseTy->getElementCount(), IsNegative);
+  Value *Powi = Builder.CreateIntrinsic(BaseTy, Intrinsic::vp_select,
+                                        {IsNegativeVec, Recip, NewRes, EVL});
+  II->replaceAllUsesWith(Powi);
+  II->eraseFromParent();
+}
+
+static bool runImpl(Function &F) {
+  SmallVector<IntrinsicInst *, 4> Replace;
+  for (auto &I : instructions(F)) {
+    if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+      // TODO: Add cost model to select small fixed vectors llvm.powi.
+      if (II->getIntrinsicID() == Intrinsic::vp_powi ||
+          (II->getIntrinsicID() == Intrinsic::powi &&
+           isa<ScalableVectorType>(II->getType())))
+        Replace.push_back(II);
+    }
+  }
+
+  if (Replace.empty())
+    return false;
+
+  for (IntrinsicInst *II : Replace)
+    expandPowi(II);
+
+  return true;
+}
+
+namespace {
+class ExpandPowiLegacyPass : public FunctionPass {
+public:
+  static char ID;
+
+  ExpandPowiLegacyPass() : FunctionPass(ID) {
+    initializeExpandPowiLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override { return runImpl(F); }
+};
+} // namespace
+
+char ExpandPowiLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ExpandPowiLegacyPass, "expand-powi",
+                      "Expand powi functions", false, false)
+INITIALIZE_PASS_END(ExpandPowiLegacyPass, "expand-powi",
+                    "Expand powi functions", false, false)
+
+FunctionPass *llvm::createExpandPowiPass() {
+  return new ExpandPowiLegacyPass();
+}
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1088,6 +1088,7 @@
   PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
   addPass(createExpandLargeDivRemPass());
   addPass(createExpandLargeFpConvertPass());
+  addPass(createExpandPowiPass());
   addIRPasses();
   addCodeGenPrepare();
   addPassesToHandleExceptions();
diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
--- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll
@@ -17,6 +17,7 @@
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand large fp convert
+; CHECK-NEXT:       Expand powi functions
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:       Module Verifier
 ; CHECK-NEXT:       Lower Garbage Collection Instructions
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -21,6 +21,7 @@
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand large fp convert
+; CHECK-NEXT:       Expand powi functions
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:     SVE intrinsics optimizations
 ; CHECK-NEXT:       FunctionPass Manager
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -28,6 +28,7 @@
 ; GCN-O0-NEXT:    FunctionPass Manager
 ; GCN-O0-NEXT:      Expand large div/rem
 ; GCN-O0-NEXT:      Expand large fp convert
+; GCN-O0-NEXT:      Expand powi functions
 ; GCN-O0-NEXT:    AMDGPU Printf lowering
 ; GCN-O0-NEXT:      FunctionPass Manager
 ; GCN-O0-NEXT:        Dominator Tree Construction
@@ -178,6 +179,7 @@
 ; GCN-O1-NEXT:    FunctionPass Manager
 ; GCN-O1-NEXT:      Expand large div/rem
 ; GCN-O1-NEXT:      Expand large fp convert
+; GCN-O1-NEXT:      Expand powi functions
 ; GCN-O1-NEXT:    AMDGPU Printf lowering
 ; GCN-O1-NEXT:      FunctionPass Manager
 ; GCN-O1-NEXT:        Dominator Tree Construction
@@ -453,6 +455,7 @@
 ; GCN-O1-OPTS-NEXT:    FunctionPass Manager
 ; GCN-O1-OPTS-NEXT:      Expand large div/rem
 ; GCN-O1-OPTS-NEXT:      Expand large fp convert
+; GCN-O1-OPTS-NEXT:      Expand powi functions
 ; GCN-O1-OPTS-NEXT:    AMDGPU Printf lowering
 ; GCN-O1-OPTS-NEXT:      FunctionPass Manager
 ; GCN-O1-OPTS-NEXT:        Dominator Tree Construction
@@ -760,6 +763,7 @@
 ; GCN-O2-NEXT:    FunctionPass Manager
 ; GCN-O2-NEXT:      Expand large div/rem
 ; GCN-O2-NEXT:      Expand large fp convert
+; GCN-O2-NEXT:      Expand powi functions
 ; GCN-O2-NEXT:    AMDGPU Printf lowering
 ; GCN-O2-NEXT:      FunctionPass Manager
 ; GCN-O2-NEXT:        Dominator Tree Construction
@@ -1070,6 +1074,7 @@
 ; GCN-O3-NEXT:    FunctionPass Manager
 ; GCN-O3-NEXT:      Expand large div/rem
 ; GCN-O3-NEXT:      Expand large fp convert
+; GCN-O3-NEXT:      Expand powi functions
 ; GCN-O3-NEXT:    AMDGPU Printf lowering
 ; GCN-O3-NEXT:      FunctionPass Manager
 ; GCN-O3-NEXT:        Dominator Tree Construction
diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll
--- a/llvm/test/CodeGen/ARM/O3-pipeline.ll
+++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll
@@ -7,6 +7,7 @@
 ; CHECK-NEXT:    FunctionPass Manager
 ; CHECK-NEXT:      Expand large div/rem
 ; CHECK-NEXT:      Expand large fp convert
+; CHECK-NEXT:      Expand powi functions
 ; CHECK-NEXT:      Expand Atomic instructions
 ; CHECK-NEXT:      Simplify the CFG
 ; CHECK-NEXT:      Dominator Tree Construction
diff --git a/llvm/test/CodeGen/Generic/expand-powi.ll b/llvm/test/CodeGen/Generic/expand-powi.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/expand-powi.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -expand-powi -S < %s | FileCheck %s
+declare <vscale x 1 x float> @llvm.vp.powi.nxv1f32.i32(<vscale x 1 x float>, i32, <vscale x 1 x i1>, i32)
+define <vscale x 1 x float> @foo(<vscale x 1 x float> %a, i32 %b, <vscale x 1 x i1> %m, i32 %evl) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[POWI_EXPANSION_LOOP:%.*]]
+; CHECK:       powi-expansion-loop:
+; CHECK-NEXT:    [[BASE:%.*]] = phi <vscale x 1 x float> [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[TMP5:%.*]], [[POWI_EXPANSION_LOOP]] ]
+; CHECK-NEXT:    [[EXP:%.*]] = phi i32 [ [[B:%.*]], [[ENTRY]] ], [ [[TMP4:%.*]], [[POWI_EXPANSION_LOOP]] ]
+; CHECK-NEXT:    [[RES:%.*]] = phi <vscale x 1 x float> [ shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 1.000000e+00, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), [[ENTRY]] ], [ [[TMP3:%.*]], [[POWI_EXPANSION_LOOP]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.vp.fmul.nxv1f32(<vscale x 1 x float> [[RES]], <vscale x 1 x float> [[BASE]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[EVL:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[EXP]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 [[TMP2]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i1> [[DOTSPLATINSERT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3]] = call <vscale x 1 x float> @llvm.vp.select.nxv1f32(<vscale x 1 x i1> [[DOTSPLAT]], <vscale x 1 x float> [[TMP0]], <vscale x 1 x float> [[RES]], i32 [[EVL]])
+; CHECK-NEXT:    [[TMP4]] = lshr i32 [[EXP]], 1
+; CHECK-NEXT:    [[TMP5]] = call <vscale x 1 x float> @llvm.vp.fmul.nxv1f32(<vscale x 1 x float> [[BASE]], <vscale x 1 x float> [[BASE]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[EVL]])
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP4]], 1
+; CHECK-NEXT:    br i1 [[TMP6]], label [[POWI_POST_LOOP:%.*]], label [[POWI_EXPANSION_LOOP]]
+; CHECK:       powi-post-loop:
+; CHECK-NEXT:    [[TMP7:%.*]] = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 1.000000e+00, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[M:%.*]], i32 [[EVL]])
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt i32 [[B]], 0
+; CHECK-NEXT:    [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 1 x i1> poison, i1 [[TMP8]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT2:%.*]] = shufflevector <vscale x 1 x i1> [[DOTSPLATINSERT1]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = call <vscale x 1 x float> @llvm.vp.select.nxv1f32(<vscale x 1 x i1> [[DOTSPLAT2]], <vscale x 1 x float> [[TMP7]], <vscale x 1 x float> [[TMP3]], i32 [[EVL]])
+; CHECK-NEXT:    ret <vscale x 1 x float> [[TMP9]]
+;
+entry:
+  %0 = call <vscale x 1 x float> @llvm.vp.powi.nxv1f32.i32(<vscale x 1 x float> %a, i32 %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x float> %0
+}
+
+declare <vscale x 1 x float> @llvm.powi.nxv1f32.i32(<vscale x 1 x float>, i32)
+define <vscale x 1 x float> @foo2(<vscale x 1 x float> %a, i32 %b) {
+; CHECK-LABEL: @foo2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    br label [[POWI_EXPANSION_LOOP:%.*]]
+; CHECK:       powi-expansion-loop:
+; CHECK-NEXT:    [[BASE:%.*]] = phi <vscale x 1 x float> [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[TMP6:%.*]], [[POWI_EXPANSION_LOOP]] ]
+; CHECK-NEXT:    [[EXP:%.*]] = phi i32 [ [[B:%.*]], [[ENTRY]] ], [ [[TMP5:%.*]], [[POWI_EXPANSION_LOOP]] ]
+; CHECK-NEXT:    [[RES:%.*]] = phi <vscale x 1 x float> [ shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 1.000000e+00, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), [[ENTRY]] ], [ [[TMP4:%.*]], [[POWI_EXPANSION_LOOP]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.vp.fmul.nxv1f32(<vscale x 1 x float> [[RES]], <vscale x 1 x float> [[BASE]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[EXP]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 [[TMP3]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i1> [[DOTSPLATINSERT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4]] = call <vscale x 1 x float> @llvm.vp.select.nxv1f32(<vscale x 1 x i1> [[DOTSPLAT]], <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> [[RES]], i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP5]] = lshr i32 [[EXP]], 1
+; CHECK-NEXT:    [[TMP6]] = call <vscale x 1 x float> @llvm.vp.fmul.nxv1f32(<vscale x 1 x float> [[BASE]], <vscale x 1 x float> [[BASE]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP5]], 1
+; CHECK-NEXT:    br i1 [[TMP7]], label [[POWI_POST_LOOP:%.*]], label [[POWI_EXPANSION_LOOP]]
+; CHECK:       powi-post-loop:
+; CHECK-NEXT:    [[TMP8:%.*]] = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 1.000000e+00, i64 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x float> [[TMP4]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i32 [[B]], 0
+; CHECK-NEXT:    [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 1 x i1> poison, i1 [[TMP9]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT2:%.*]] = shufflevector <vscale x 1 x i1> [[DOTSPLATINSERT1]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = call <vscale x 1 x float> @llvm.vp.select.nxv1f32(<vscale x 1 x i1> [[DOTSPLAT2]], <vscale x 1 x float> [[TMP8]], <vscale x 1 x float> [[TMP4]], i32 [[TMP0]])
+; CHECK-NEXT:    ret <vscale x 1 x float> [[TMP10]]
+;
+entry:
+  %0 = call <vscale x 1 x float> @llvm.powi.nxv1f32.i32(<vscale x 1 x float> %a, i32 %b)
+  ret <vscale x 1 x float> %0
+}
diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
--- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
@@ -21,6 +21,7 @@
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand large fp convert
+; CHECK-NEXT:       Expand powi functions
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:       Module Verifier
 ; CHECK-NEXT:       Lower Garbage Collection Instructions
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -33,6 +33,7 @@
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand large fp convert
+; CHECK-NEXT:       Expand powi functions
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:       Module Verifier
 ; CHECK-NEXT:       Dominator Tree Construction
diff --git a/llvm/test/CodeGen/M68k/pipeline.ll b/llvm/test/CodeGen/M68k/pipeline.ll
--- a/llvm/test/CodeGen/M68k/pipeline.ll
+++ b/llvm/test/CodeGen/M68k/pipeline.ll
@@ -4,6 +4,7 @@
 ; CHECK-NEXT:    FunctionPass Manager
 ; CHECK-NEXT:      Expand large div/rem
 ; CHECK-NEXT:      Expand large fp convert
+; CHECK-NEXT:      Expand powi functions
 ; CHECK-NEXT:      Expand Atomic instructions
 ; CHECK-NEXT:      Module Verifier
 ; CHECK-NEXT:      Dominator Tree Construction
diff --git a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
--- a/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O0-pipeline.ll
@@ -18,6 +18,7 @@
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand large fp convert
+; CHECK-NEXT:       Expand powi functions
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:     PPC Lower MASS Entries
 ; CHECK-NEXT:     FunctionPass Manager
diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
--- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
+++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll
@@ -21,6 +21,7 @@
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand large fp convert
+; CHECK-NEXT:       Expand powi functions
 ; CHECK-NEXT:       Convert i1 constants to i32/i64 if they are returned
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:     PPC Lower MASS Entries
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -21,6 +21,7 @@
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand large fp convert
+; CHECK-NEXT:       Expand powi functions
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:       Module Verifier
 ; CHECK-NEXT:       Lower Garbage Collection Instructions
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -25,6 +25,7 @@
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand large fp convert
+; CHECK-NEXT:       Expand powi functions
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Natural Loop Information
diff --git a/llvm/test/CodeGen/RISCV/rvv/expand-powi.ll b/llvm/test/CodeGen/RISCV/rvv/expand-powi.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/expand-powi.ll
@@ -0,0 +1,151 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
+
+declare <vscale x 1 x float> @llvm.vp.powi.nxv1f32.i32(<vscale x 1 x float>, i32, <vscale x 1 x i1>, i32)
+define <vscale x 1 x float> @foo(<vscale x 1 x float> %a, i32 %b, <vscale x 1 x i1> %m, i32 %evl) {
+; RV32-LABEL: foo:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vmv1r.v v9, v0
+; RV32-NEXT:    lui a2, 260096
+; RV32-NEXT:    vsetvli a3, zero, e32, mf2, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a2
+; RV32-NEXT:    li a2, 1
+; RV32-NEXT:    mv a3, a0
+; RV32-NEXT:  .LBB0_1: # %powi-expansion-loop
+; RV32-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32-NEXT:    andi a4, a3, 1
+; RV32-NEXT:    vsetvli a5, zero, e8, mf8, ta, ma
+; RV32-NEXT:    vmv.v.x v11, a4
+; RV32-NEXT:    vmsne.vi v0, v11, 0
+; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; RV32-NEXT:    vfmul.vv v10, v10, v8, v0.t
+; RV32-NEXT:    srli a3, a3, 1
+; RV32-NEXT:    vfmul.vv v8, v8, v8
+; RV32-NEXT:    bne a3, a2, .LBB0_1
+; RV32-NEXT:  # %bb.2: # %powi-post-loop
+; RV32-NEXT:    lui a2, 260096
+; RV32-NEXT:    fmv.w.x ft0, a2
+; RV32-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV32-NEXT:    vmv1r.v v0, v9
+; RV32-NEXT:    vfrdiv.vf v8, v10, ft0, v0.t
+; RV32-NEXT:    slti a0, a0, 0
+; RV32-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a0
+; RV32-NEXT:    vmsne.vi v0, v9, 0
+; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV32-NEXT:    vmerge.vvm v8, v10, v8, v0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: foo:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vmv1r.v v9, v0
+; RV64-NEXT:    lui a2, 260096
+; RV64-NEXT:    vsetvli a3, zero, e32, mf2, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a2
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    srli a1, a1, 32
+; RV64-NEXT:    li a2, 1
+; RV64-NEXT:    mv a3, a0
+; RV64-NEXT:  .LBB0_1: # %powi-expansion-loop
+; RV64-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64-NEXT:    andi a4, a3, 1
+; RV64-NEXT:    vsetvli a5, zero, e8, mf8, ta, ma
+; RV64-NEXT:    vmv.v.x v11, a4
+; RV64-NEXT:    vmsne.vi v0, v11, 0
+; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; RV64-NEXT:    vfmul.vv v10, v10, v8, v0.t
+; RV64-NEXT:    srliw a3, a3, 1
+; RV64-NEXT:    vfmul.vv v8, v8, v8
+; RV64-NEXT:    bne a3, a2, .LBB0_1
+; RV64-NEXT:  # %bb.2: # %powi-post-loop
+; RV64-NEXT:    lui a2, 260096
+; RV64-NEXT:    fmv.w.x ft0, a2
+; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; RV64-NEXT:    vmv1r.v v0, v9
+; RV64-NEXT:    vfrdiv.vf v8, v10, ft0, v0.t
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    slti a0, a0, 0
+; RV64-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a0
+; RV64-NEXT:    vmsne.vi v0, v9, 0
+; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
+; RV64-NEXT:    vmerge.vvm v8, v10, v8, v0
+; RV64-NEXT:    ret
+entry:
+  %0 = call <vscale x 1 x float> @llvm.vp.powi.nxv1f32.i32(<vscale x 1 x float> %a, i32 %b, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x float> %0
+}
+
+declare <vscale x 1 x float> @llvm.powi.nxv1f32.i32(<vscale x 1 x float>, i32)
+define <vscale x 1 x float> @foo2(<vscale x 1 x float> %a, i32 %b) {
+; RV32-LABEL: foo2:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vmv1r.v v9, v8
+; RV32-NEXT:    lui a1, 260096
+; RV32-NEXT:    vsetvli a2, zero, e32, mf2, ta, ma
+; RV32-NEXT:    vmv.v.x v8, a1
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    srli a1, a1, 3
+; RV32-NEXT:    li a2, 1
+; RV32-NEXT:    mv a3, a0
+; RV32-NEXT:  .LBB1_1: # %powi-expansion-loop
+; RV32-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV32-NEXT:    andi a4, a3, 1
+; RV32-NEXT:    vsetvli a5, zero, e8, mf8, ta, ma
+; RV32-NEXT:    vmv.v.x v10, a4
+; RV32-NEXT:    vmsne.vi v0, v10, 0
+; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; RV32-NEXT:    vfmul.vv v8, v8, v9, v0.t
+; RV32-NEXT:    srli a3, a3, 1
+; RV32-NEXT:    vfmul.vv v9, v9, v9
+; RV32-NEXT:    bne a3, a2, .LBB1_1
+; RV32-NEXT:  # %bb.2: # %powi-post-loop
+; RV32-NEXT:    slti a0, a0, 0
+; RV32-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
+; RV32-NEXT:    vmv.v.x v9, a0
+; RV32-NEXT:    vmsne.vi v0, v9, 0
+; RV32-NEXT:    lui a0, 260096
+; RV32-NEXT:    fmv.w.x ft0, a0
+; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; RV32-NEXT:    vfrdiv.vf v8, v8, ft0, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: foo2:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vmv1r.v v9, v8
+; RV64-NEXT:    lui a1, 260096
+; RV64-NEXT:    vsetvli a2, zero, e32, mf2, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    srli a1, a1, 3
+; RV64-NEXT:    li a2, 1
+; RV64-NEXT:    mv a3, a0
+; RV64-NEXT:  .LBB1_1: # %powi-expansion-loop
+; RV64-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64-NEXT:    andi a4, a3, 1
+; RV64-NEXT:    vsetvli a5, zero, e8, mf8, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a4
+; RV64-NEXT:    vmsne.vi v0, v10, 0
+; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; RV64-NEXT:    vfmul.vv v8, v8, v9, v0.t
+; RV64-NEXT:    srliw a3, a3, 1
+; RV64-NEXT:    vfmul.vv v9, v9, v9
+; RV64-NEXT:    bne a3, a2, .LBB1_1
+; RV64-NEXT:  # %bb.2: # %powi-post-loop
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    slti a0, a0, 0
+; RV64-NEXT:    vsetvli a2, zero, e8, mf8, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a0
+; RV64-NEXT:    vmsne.vi v0, v9, 0
+; RV64-NEXT:    lui a0, 260096
+; RV64-NEXT:    fmv.w.x ft0, a0
+; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
+; RV64-NEXT:    vfrdiv.vf v8, v8, ft0, v0.t
+; RV64-NEXT:    ret
+entry:
+  %0 = call <vscale x 1 x float> @llvm.powi.nxv1f32.i32(<vscale x 1 x float> %a, i32 %b)
+  ret <vscale x 1 x float> %0
+}
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -19,6 +19,7 @@
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand large fp convert
+; CHECK-NEXT:       Expand powi functions
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:       Lower AMX intrinsics
 ; CHECK-NEXT:       Lower AMX type for load/store
diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll
--- a/llvm/test/CodeGen/X86/opt-pipeline.ll
+++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
@@ -29,6 +29,7 @@
 ; CHECK-NEXT:     FunctionPass Manager
 ; CHECK-NEXT:       Expand large div/rem
 ; CHECK-NEXT:       Expand large fp convert
+; CHECK-NEXT:       Expand powi functions
 ; CHECK-NEXT:       Expand Atomic instructions
 ; CHECK-NEXT:       Lower AMX intrinsics
 ; CHECK-NEXT:       Lower AMX type for load/store
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -363,6 +363,7 @@
   initializeConstantHoistingLegacyPassPass(*Registry);
   initializeScalarOpts(*Registry);
   initializeVectorization(*Registry);
+  initializeExpandPowiLegacyPassPass(*Registry);
   initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry);
   initializeExpandReductionsPass(*Registry);
   initializeExpandVectorPredicationPass(*Registry);
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -394,6 +394,7 @@
       "fix-irreducible",
       "expand-large-fp-convert",
       "callbrprepare",
+      "expand-powi",
   };
   for (const auto &P : PassNamePrefix)
     if (Pass.startswith(P))
@@ -443,6 +444,7 @@
   initializeExpandLargeDivRemLegacyPassPass(Registry);
   initializeExpandLargeFpConvertLegacyPassPass(Registry);
   initializeExpandMemCmpPassPass(Registry);
+  initializeExpandPowiLegacyPassPass(Registry);
   initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
   initializeSelectOptimizePass(Registry);
   initializeCallBrPreparePass(Registry);
diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -77,6 +77,8 @@
            "i32)";
     Str << " declare <8 x float> @llvm.vp.sqrt.v8f32(<8 x float>, <8 x i1>, "
            "i32)";
+    Str << " declare <8 x float> @llvm.vp.powi.v8f32.i32(<8 x float>, i32, "
+           "<8 x i1>, i32)";
     Str << " declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, "
            "<8 x float>, <8 x i1>, i32) ";
     Str << " declare <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float>, "