Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1164,10 +1164,33 @@ else return nullptr; - // TODO: There are potential folds where the opcodes do not match (mul+shl). - if (B0->getOpcode() != B1->getOpcode()) + // We need matching binops to fold the lanes together. + BinaryOperator::BinaryOps Opc0 = B0->getOpcode(); + BinaryOperator::BinaryOps Opc1 = B1->getOpcode(); + bool DropNSW = false; + if (ConstantsAreOp1 && Opc0 != Opc1) { + // If we have multiply and shift-left-by-constant, convert the shift: + // shl X, C --> mul X, 1 << C + // TODO: We drop "nsw" if shift is converted into multiply because it may + // not be correct when the shift amount is BitWidth - 1. We could examine + // each vector element to determine if it is safe to keep that flag. + if (Opc0 == Instruction::Mul && Opc1 == Instruction::Shl) { + C1 = ConstantExpr::getShl(ConstantInt::get(C1->getType(), 1), C1); + Opc1 = Instruction::Mul; + DropNSW = true; + } else if (Opc0 == Instruction::Shl && Opc1 == Instruction::Mul) { + C0 = ConstantExpr::getShl(ConstantInt::get(C0->getType(), 1), C0); + Opc0 = Instruction::Mul; + DropNSW = true; + } + } + + if (Opc0 != Opc1) return nullptr; + // The opcodes must be the same. Use a new name to make that clear. + BinaryOperator::BinaryOps BOpc = Opc0; + // Remove a binop and the shuffle by rearranging the constant: // shuffle (op X, C0), (op X, C1), M --> op X, C' // shuffle (op C0, X), (op C1, X), M --> op C', X @@ -1179,13 +1202,14 @@ if (B0->isIntDivRem()) NewC = getSafeVectorConstantForIntDivRem(NewC); - BinaryOperator::BinaryOps Opc = B0->getOpcode(); - Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(Opc, X, NewC) : - BinaryOperator::Create(Opc, NewC, X); + Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, X, NewC) : + BinaryOperator::Create(BOpc, NewC, X); // Flags are intersected from the 2 source binops. NewBO->copyIRFlags(B0); NewBO->andIRFlags(B1); + if (DropNSW) + NewBO->setHasNoSignedWrap(false); return NewBO; } Index: llvm/trunk/test/Transforms/InstCombine/shuffle_select.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/shuffle_select.ll +++ llvm/trunk/test/Transforms/InstCombine/shuffle_select.ll @@ -502,14 +502,11 @@ ret <4 x double> %t3 } -; FIXME: ; Shift-left with constant shift amount can be converted to mul to enable the fold. define <4 x i32> @mul_shl(<4 x i32> %v0) { ; CHECK-LABEL: @mul_shl( -; CHECK-NEXT: [[T1:%.*]] = mul nuw <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = shl nuw <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul nuw <4 x i32> %v0, @@ -518,11 +515,11 @@ ret <4 x i32> %t3 } +; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved. + define <4 x i32> @shl_mul(<4 x i32> %v0) { ; CHECK-LABEL: @shl_mul( -; CHECK-NEXT: [[T1:%.*]] = shl nsw <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T2:%.*]] = mul nsw <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nsw <4 x i32> %v0, @@ -536,8 +533,7 @@ define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) { ; CHECK-LABEL: @mul_is_nop_shl( -; CHECK-NEXT: [[T2:%.*]] = shl <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul <4 x i32> %v0, @@ -546,6 +542,8 @@ ret <4 x i32> %t3 } +; Negative test: shift amount (operand 1) must be constant. + define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) { ; CHECK-LABEL: @shl_mul_not_constant_shift_amount( ; CHECK-NEXT: [[T1:%.*]] = shl <4 x i32> , [[V0:%.*]]