Index: llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1415,6 +1415,30 @@
     return createBinOpShuffle(V1, V2, Mask);
   }
 
+  // If both arguments of a commutative binop are select-shuffles that use the
+  // same mask with commuted operands, the shuffles are unnecessary.
+  if (Inst.isCommutative() &&
+      match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask))) &&
+      match(RHS, m_ShuffleVector(m_Specific(V2), m_Specific(V1),
+                                 m_Specific(Mask)))) {
+    auto *LShuf = cast<ShuffleVectorInst>(LHS);
+    auto *RShuf = cast<ShuffleVectorInst>(RHS);
+    // TODO: Allow shuffles that contain undefs in the mask?
+    //       That is legal, but it reduces undef knowledge.
+    // TODO: Allow arbitrary shuffles by shuffling after binop?
+    //       That might be legal, but we have to deal with poison.
+    if (LShuf->isSelect() && !LShuf->getMask()->containsUndefElement() &&
+        RShuf->isSelect() && !RShuf->getMask()->containsUndefElement()) {
+      // Example:
+      // LHS = shuffle V1, V2, <0, 5, 6, 3>
+      // RHS = shuffle V2, V1, <0, 5, 6, 3>
+      // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2
+      Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2);
+      NewBO->copyIRFlags(&Inst);
+      return NewBO;
+    }
+  }
+
   // If one argument is a shuffle within one vector and the other is a constant,
   // try moving the shuffle after the binary operation. This canonicalization
   // intends to move shuffles closer to other shuffles and binops closer to
Index: llvm/trunk/test/Transforms/InstCombine/vec-binop-select.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/vec-binop-select.ll
+++ llvm/trunk/test/Transforms/InstCombine/vec-binop-select.ll
@@ -5,9 +5,7 @@
 
 define <4 x i32> @and(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @and(
-; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    [[R:%.*]] = and <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    [[R:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
@@ -18,9 +16,7 @@
 
 define <4 x i32> @or(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @or(
-; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
@@ -33,9 +29,7 @@
 
 define <4 x i32> @xor(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @xor(
-; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[R:%.*]] = xor <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    [[R:%.*]] = xor <4 x i32> [[Y:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
@@ -48,13 +42,56 @@
 
 define <4 x i32> @add(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @add(
+; CHECK-NEXT:    [[R:%.*]] = add nsw <4 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %r = add nsw <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+; Negative test - wrong operand
+
+define <4 x i32> @add_wrong_op(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: @add_wrong_op(
 ; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 ; CHECK-NEXT:    [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %z, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %r = add nsw <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+; Negative test - wrong mask (but we could handle this...)
+
+define <4 x i32> @add_non_select_mask(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @add_non_select_mask(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+  %r = add nsw <4 x i32> %sel1, %sel2
+  ret <4 x i32> %r
+}
+
+; Negative test - wrong mask (but we could handle this...)
+
+define <4 x i32> @add_masks_with_undefs(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @add_masks_with_undefs(
+; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
+  %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
   %r = add nsw <4 x i32> %sel1, %sel2
   ret <4 x i32> %r
 }
@@ -76,9 +113,7 @@
 
 define <4 x i32> @mul(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @mul(
-; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT:    [[R:%.*]] = mul nuw <4 x i32> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    [[R:%.*]] = mul nuw <4 x i32> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
@@ -180,9 +215,7 @@
 
 define <4 x float> @fadd(<4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: @fadd(
-; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[R:%.*]] = fadd <4 x float> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    [[R:%.*]] = fadd <4 x float> [[Y:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %sel1 = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
@@ -206,9 +239,7 @@
 
 define <4 x double> @fmul(<4 x double> %x, <4 x double> %y) {
 ; CHECK-LABEL: @fmul(
-; CHECK-NEXT:    [[SEL1:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[SEL2:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[R:%.*]] = fmul nnan <4 x double> [[SEL1]], [[SEL2]]
+; CHECK-NEXT:    [[R:%.*]] = fmul nnan <4 x double> [[Y:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    ret <4 x double> [[R]]
 ;
   %sel1 = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 4, i32 1, i32 6, i32 3>