diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1682,6 +1682,35 @@
     return new ShuffleVectorInst(NewBO0, NewBO1, Mask);
   }
 
+  auto createBinOpReverse = [&](Value *X, Value *Y) {
+    Value *V = Builder.CreateBinOp(Opcode, X, Y, Inst.getName());
+    if (auto *BO = dyn_cast<BinaryOperator>(V))
+      BO->copyIRFlags(&Inst);
+    Module *M = Inst.getModule();
+    Function *F = Intrinsic::getDeclaration(
+        M, Intrinsic::experimental_vector_reverse, V->getType());
+    return CallInst::Create(F, V);
+  };
+
+  // NOTE: Reverse shuffles don't require the speculative execution protection
+  // below because they don't affect which lanes take part in the computation.
+
+  Value *V1, *V2;
+  if (match(LHS, m_VecReverse(m_Value(V1)))) {
+    // Op(rev(V1), rev(V2)) -> rev(Op(V1, V2))
+    if (match(RHS, m_VecReverse(m_Value(V2))) &&
+        (LHS->hasOneUse() || RHS->hasOneUse() ||
+         (LHS == RHS && LHS->hasNUses(2))))
+      return createBinOpReverse(V1, V2);
+
+    // Op(rev(V1), RHSSplat)) -> rev(Op(V1, RHSSplat))
+    if (LHS->hasOneUse() && isSplatValue(RHS))
+      return createBinOpReverse(V1, RHS);
+  }
+  // Op(LHSSplat, rev(V2)) -> rev(Op(LHSSplat, V2))
+  else if (isSplatValue(LHS) && match(RHS, m_OneUse(m_VecReverse(m_Value(V2)))))
+    return createBinOpReverse(LHS, V2);
+
   // It may not be safe to reorder shuffles and things like div, urem, etc.
   // because we may trap when executing those ops on unknown vector elements.
   // See PR20059.
@@ -1697,7 +1726,6 @@
 
   // If both arguments of the binary operation are shuffles that use the same
   // mask and shuffle within a single vector, move the shuffle after the binop.
-  Value *V1, *V2;
   if (match(LHS, m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))) &&
       match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(Mask))) &&
       V1->getType() == V2->getType() &&
diff --git a/llvm/test/Transforms/InstCombine/vector-reverse.ll b/llvm/test/Transforms/InstCombine/vector-reverse.ll
--- a/llvm/test/Transforms/InstCombine/vector-reverse.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reverse.ll
@@ -7,14 +7,13 @@
 
 define <vscale x 4 x i32> @binop_reverse(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: @binop_reverse(
-; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
-; CHECK-NEXT:    [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
-; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[A_REV]], [[B_REV]]
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]])
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
 ;
   %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
   %b.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %b)
-  %add = add <vscale x 4 x i32> %a.rev, %b.rev
+  %add = add nsw <vscale x 4 x i32> %a.rev, %b.rev
   ret <vscale x 4 x i32> %add
 }
 
@@ -22,9 +21,9 @@
 define <vscale x 4 x i32> @binop_reverse_1(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: @binop_reverse_1(
 ; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
-; CHECK-NEXT:    [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
 ; CHECK-NEXT:    call void @use_nxv4i32(<vscale x 4 x i32> [[A_REV]])
-; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[A_REV]], [[B_REV]]
+; CHECK-NEXT:    [[ADD1:%.*]] = add <vscale x 4 x i32> [[A]], [[B:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]])
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
 ;
   %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
@@ -37,10 +36,10 @@
 ; %b.rev has multiple uses
 define <vscale x 4 x i32> @binop_reverse_2(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: @binop_reverse_2(
-; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
 ; CHECK-NEXT:    [[B_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[B:%.*]])
 ; CHECK-NEXT:    call void @use_nxv4i32(<vscale x 4 x i32> [[B_REV]])
-; CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[A_REV]], [[B_REV]]
+; CHECK-NEXT:    [[ADD1:%.*]] = add <vscale x 4 x i32> [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[ADD:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[ADD1]])
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
 ;
   %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
@@ -71,21 +70,35 @@
 ; %a.rev used as both operands
 define <vscale x 4 x i32> @binop_reverse_4(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: @binop_reverse_4(
+; CHECK-NEXT:    [[MUL1:%.*]] = mul <vscale x 4 x i32> [[A:%.*]], [[A]]
+; CHECK-NEXT:    [[MUL:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[MUL1]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[MUL]]
+;
+  %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
+  %mul = mul <vscale x 4 x i32> %a.rev, %a.rev
+  ret <vscale x 4 x i32> %mul
+}
+
+; %a.rev used as both operands along with a third use
+define <vscale x 4 x i32> @binop_reverse_5(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: @binop_reverse_5(
 ; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
+; CHECK-NEXT:    call void @use_nxv4i32(<vscale x 4 x i32> [[A_REV]])
 ; CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 4 x i32> [[A_REV]], [[A_REV]]
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[MUL]]
 ;
   %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
+  call void @use_nxv4i32(<vscale x 4 x i32> %a.rev)
   %mul = mul <vscale x 4 x i32> %a.rev, %a.rev
   ret <vscale x 4 x i32> %mul
 }
 
 define <vscale x 4 x i32> @binop_reverse_splat_RHS(<vscale x 4 x i32> %a, i32 %b) {
 ; CHECK-LABEL: @binop_reverse_splat_RHS(
-; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
 ; CHECK-NEXT:    [[B_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[B_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[B_INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[A_REV]], [[B_SPLAT]]
+; CHECK-NEXT:    [[DIV1:%.*]] = udiv <vscale x 4 x i32> [[A:%.*]], [[B_SPLAT]]
+; CHECK-NEXT:    [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[DIV1]])
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
 ;
   %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
@@ -115,10 +128,10 @@
 
 define <vscale x 4 x i32> @binop_reverse_splat_LHS(<vscale x 4 x i32> %a, i32 %b) {
 ; CHECK-LABEL: @binop_reverse_splat_LHS(
-; CHECK-NEXT:    [[A_REV:%.*]] = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[A:%.*]])
 ; CHECK-NEXT:    [[B_INSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[B_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[B_INSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[B_SPLAT]], [[A_REV]]
+; CHECK-NEXT:    [[DIV1:%.*]] = udiv <vscale x 4 x i32> [[B_SPLAT]], [[A:%.*]]
+; CHECK-NEXT:    [[DIV:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> [[DIV1]])
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
 ;
   %a.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
@@ -36,15 +36,15 @@
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = bitcast double* [[TMP11]] to <vscale x 8 x double>*
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, <vscale x 8 x double>* [[TMP12]], align 8
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP14:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i32 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP13:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i32 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP16:%.*]] = shl i32 [[TMP15]], 3
 ; CHECK-NEXT:    [[TMP17:%.*]] = sub i32 1, [[TMP16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 [[TMP18]]
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP14]], i64 [[TMP18]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = bitcast double* [[TMP19]] to <vscale x 8 x double>*
-; CHECK-NEXT:    store <vscale x 8 x double> [[TMP14]], <vscale x 8 x double>* [[TMP20]], align 8
+; CHECK-NEXT:    store <vscale x 8 x double> [[TMP13]], <vscale x 8 x double>* [[TMP20]], align 8
 ; CHECK-NEXT:    [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP22:%.*]] = shl i64 [[TMP21]], 3
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]]
@@ -130,15 +130,15 @@
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, i64* [[TMP12]], i64 [[TMP16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i64* [[TMP17]] to <vscale x 8 x i64>*
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, <vscale x 8 x i64>* [[TMP18]], align 8
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP20:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i32 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP19:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i32 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP11]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT:    [[TMP22:%.*]] = shl i32 [[TMP21]], 3
 ; CHECK-NEXT:    [[TMP23:%.*]] = sub i32 1, [[TMP22]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, i64* [[TMP19]], i64 [[TMP24]]
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, i64* [[TMP20]], i64 [[TMP24]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = bitcast i64* [[TMP25]] to <vscale x 8 x i64>*
-; CHECK-NEXT:    store <vscale x 8 x i64> [[TMP20]], <vscale x 8 x i64>* [[TMP26]], align 8
+; CHECK-NEXT:    store <vscale x 8 x i64> [[TMP19]], <vscale x 8 x i64>* [[TMP26]], align 8
 ; CHECK-NEXT:    [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
 ; CHECK-NEXT:    [[TMP28:%.*]] = shl i64 [[TMP27]], 3
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP28]]