Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1415,6 +1415,30 @@ return createBinOpShuffle(V1, V2, Mask); } + // If both arguments of a commutative binop are select-shuffles that use the + // same mask with commuted operands, the shuffles are unnecessary. + if (Inst.isCommutative() && + match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask))) && + match(RHS, m_ShuffleVector(m_Specific(V2), m_Specific(V1), + m_Specific(Mask)))) { + auto *LShuf = cast(LHS); + auto *RShuf = cast(RHS); + // TODO: Allow shuffles that contain undefs in the mask? + // That is legal, but it reduces undef knowledge. + // TODO: Allow arbitrary shuffles by shuffling after binop? + // That might be legal, but we have to be deal with poison. + if (LShuf->isSelect() && !LShuf->getMask()->containsUndefElement() && + RShuf->isSelect() && !RShuf->getMask()->containsUndefElement()) { + // Example: + // LHS = shuffle V1, V2, <0, 5, 6, 3> + // RHS = shuffle V2, V1, <0, 5, 6, 3> + // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2 + Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2); + NewBO->copyIRFlags(&Inst); + return NewBO; + } + } + // If one argument is a shuffle within one vector and the other is a constant, // try moving the shuffle after the binary operation. This canonicalization // intends to move shuffles closer to other shuffles and binops closer to Index: llvm/test/Transforms/InstCombine/vec-binop-select.ll =================================================================== --- llvm/test/Transforms/InstCombine/vec-binop-select.ll +++ llvm/test/Transforms/InstCombine/vec-binop-select.ll @@ -5,9 +5,7 @@ define <4 x i32> @and(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @and( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = and <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -18,9 +16,7 @@ define <4 x i32> @or(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @or( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -33,9 +29,7 @@ define <4 x i32> @xor(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @xor( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = xor <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = xor <4 x i32> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -48,9 +42,7 @@ define <4 x i32> @add(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @add( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -76,9 +68,7 @@ define <4 x i32> @mul(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @mul( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = mul nuw <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = mul nuw <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -180,9 +170,7 @@ define <4 x float> @fadd(<4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @fadd( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[X:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <4 x float> [[R]] ; %sel1 = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> @@ -206,9 +194,7 @@ define <4 x double> @fmul(<4 x double> %x, <4 x double> %y) { ; CHECK-LABEL: @fmul( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[X:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = fmul nnan <4 x double> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = fmul nnan <4 x double> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <4 x double> [[R]] ; %sel1 = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32>