Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1164,8 +1164,28 @@ else return nullptr; - // TODO: There are potential folds where the opcodes do not match (mul+shl). - if (B0->getOpcode() != B1->getOpcode()) + // We need matching binops to fold the lanes together. + BinaryOperator::BinaryOps Opcode0 = B0->getOpcode(); + BinaryOperator::BinaryOps Opcode1 = B1->getOpcode(); + bool DropNSW = false; + if (ConstantsAreOp1 && Opcode0 != Opcode1) { + // If we have multiply and shift-left-by-constant, convert the shift: + // shl X, C --> mul X, 1 << C + // TODO: We drop "nsw" if shift is converted into multiply because it may + // not be correct when the shift amount is BitWidth - 1. We could examine + // each vector element to determine if it is safe to keep that flag. + if (Opcode0 == Instruction::Mul && Opcode1 == Instruction::Shl) { + C1 = ConstantExpr::getShl(ConstantInt::get(C1->getType(), 1), C1); + Opcode1 = Instruction::Mul; + DropNSW = true; + } else if (Opcode0 == Instruction::Shl && Opcode1 == Instruction::Mul) { + C0 = ConstantExpr::getShl(ConstantInt::get(C0->getType(), 1), C0); + Opcode0 = Instruction::Mul; + DropNSW = true; + } + } + + if (Opcode0 != Opcode1) return nullptr; // Remove a binop and the shuffle by rearranging the constant: @@ -1186,6 +1206,8 @@ // Flags are intersected from the 2 source binops. NewBO->copyIRFlags(B0); NewBO->andIRFlags(B1); + if (DropNSW) + NewBO->setHasNoSignedWrap(false); return NewBO; } Index: test/Transforms/InstCombine/shuffle_select.ll =================================================================== --- test/Transforms/InstCombine/shuffle_select.ll +++ test/Transforms/InstCombine/shuffle_select.ll @@ -239,14 +239,11 @@ ret <4 x double> %t3 } -; FIXME: ; Shift-left with constant shift amount can be converted to mul to enable the fold. define <4 x i32> @mul_shl(<4 x i32> %v0) { ; CHECK-LABEL: @mul_shl( -; CHECK-NEXT: [[T1:%.*]] = mul nuw <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = shl nuw <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul nuw <4 x i32> %v0, @@ -255,11 +252,11 @@ ret <4 x i32> %t3 } +; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved. + define <4 x i32> @shl_mul(<4 x i32> %v0) { ; CHECK-LABEL: @shl_mul( -; CHECK-NEXT: [[T1:%.*]] = shl nsw <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = mul nsw <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nsw <4 x i32> %v0, @@ -273,8 +270,7 @@ define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) { ; CHECK-LABEL: @mul_is_nop_shl( -; CHECK-NEXT: [[T2:%.*]] = shl <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul <4 x i32> %v0, @@ -283,6 +279,8 @@ ret <4 x i32> %t3 } +; Negative test: shift amount (operand 1) must be constant. + define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) { ; CHECK-LABEL: @shl_mul_not_constant_shift_amount( ; CHECK-NEXT: [[T1:%.*]] = shl <4 x i32> , [[V0:%.*]]