Index: llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1415,6 +1415,30 @@ return createBinOpShuffle(V1, V2, Mask); } + // If both arguments of a commutative binop are select-shuffles that use the + // same mask with commuted operands, the shuffles are unnecessary. + if (Inst.isCommutative() && + match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask))) && + match(RHS, m_ShuffleVector(m_Specific(V2), m_Specific(V1), + m_Specific(Mask)))) { + auto *LShuf = cast(LHS); + auto *RShuf = cast(RHS); + // TODO: Allow shuffles that contain undefs in the mask? + // That is legal, but it reduces undef knowledge. + // TODO: Allow arbitrary shuffles by shuffling after binop? + // That might be legal, but we have to deal with poison. + if (LShuf->isSelect() && !LShuf->getMask()->containsUndefElement() && + RShuf->isSelect() && !RShuf->getMask()->containsUndefElement()) { + // Example: + // LHS = shuffle V1, V2, <0, 5, 6, 3> + // RHS = shuffle V2, V1, <0, 5, 6, 3> + // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2 + Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2); + NewBO->copyIRFlags(&Inst); + return NewBO; + } + } + // If one argument is a shuffle within one vector and the other is a constant, // try moving the shuffle after the binary operation. This canonicalization // intends to move shuffles closer to other shuffles and binops closer to Index: llvm/trunk/test/Transforms/InstCombine/vec-binop-select.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/vec-binop-select.ll +++ llvm/trunk/test/Transforms/InstCombine/vec-binop-select.ll @@ -5,9 +5,7 @@ define <4 x i32> @and(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @and( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = and <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -18,9 +16,7 @@ define <4 x i32> @or(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @or( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = or <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -33,9 +29,7 @@ define <4 x i32> @xor(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @xor( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> [[Y]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = xor <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = xor <4 x i32> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -48,13 +42,56 @@ define <4 x i32> @add(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @add( +; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> + %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> + %r = add nsw <4 x i32> %sel1, %sel2 + ret <4 x i32> %r +} + +; Negative test - wrong operand + +define <4 x i32> @add_wrong_op(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: @add_wrong_op( ; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> +; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z:%.*]], <4 x i32> ; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> - %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> + %sel2 = shufflevector <4 x i32> %y, <4 x i32> %z, <4 x i32> + %r = add nsw <4 x i32> %sel1, %sel2 + ret <4 x i32> %r +} + +; Negative test - wrong mask (but we could handle this...) + +define <4 x i32> @add_non_select_mask(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @add_non_select_mask( +; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> + %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> + %r = add nsw <4 x i32> %sel1, %sel2 + ret <4 x i32> %r +} + +; Negative test - wrong mask (but we could handle this...) + +define <4 x i32> @add_masks_with_undefs(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @add_masks_with_undefs( +; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> +; CHECK-NEXT: [[R:%.*]] = add nsw <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> + %sel2 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> %r = add nsw <4 x i32> %sel1, %sel2 ret <4 x i32> %r } @@ -76,9 +113,7 @@ define <4 x i32> @mul(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @mul( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = mul nuw <4 x i32> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = mul nuw <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[R]] ; %sel1 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -180,9 +215,7 @@ define <4 x float> @fadd(<4 x float> %x, <4 x float> %y) { ; CHECK-LABEL: @fadd( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[X:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = fadd <4 x float> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <4 x float> [[R]] ; %sel1 = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> @@ -206,9 +239,7 @@ define <4 x double> @fmul(<4 x double> %x, <4 x double> %y) { ; CHECK-LABEL: @fmul( -; CHECK-NEXT: [[SEL1:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[X:%.*]], <4 x i32> -; CHECK-NEXT: [[SEL2:%.*]] = shufflevector <4 x double> [[X]], <4 x double> [[Y]], <4 x i32> -; CHECK-NEXT: [[R:%.*]] = fmul nnan <4 x double> [[SEL1]], [[SEL2]] +; CHECK-NEXT: [[R:%.*]] = fmul nnan <4 x double> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <4 x double> [[R]] ; %sel1 = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32>