Index: llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -223,9 +223,17 @@ // Simplify mul instructions with a constant RHS. if (isa(Op1)) { // Canonicalize (X+C1)*CI -> X*CI+C1*CI. + // Canonicalize (X|C1)*CI -> X*CI+C1*CI. Value *X; Constant *C1; - if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) { + const APInt *C1Val, *C2; + + if ((match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) || + (match(Op0, m_OneUse(m_Or(m_Value(X), m_Constant(C1)))) && + match(C1, m_APInt(C1Val)) && + ((match(X, m_Shl(m_Value(), m_APInt(C2))) && + (C1Val->getZExtValue() < (1 << C2->getZExtValue()))) || + match(X, m_Mul(m_Value(), m_APInt(C2)))))) { Value *Mul = Builder.CreateMul(C1, Op1); // Only go forward with the transform if C1*CI simplifies to a tidier // constant. Index: llvm/test/Transforms/InstCombine/distributive_const.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/distributive_const.ll @@ -0,0 +1,208 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define void @foo_1(float* noalias noundef %fi, float* noalias noundef %f, i32 noundef %ci) { +; CHECK-LABEL: @foo_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[FI:%.*]], align 4 +; CHECK-NEXT: [[MUL1:%.*]] = mul nsw i32 [[CI:%.*]], 12 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[MUL1]] to i64 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[F:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[TMP1]], [[TMP0]] +; CHECK-NEXT: store float [[ADD4]], float* [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[FI]], i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ADD9:%.*]] = or i32 [[MUL1]], 1 +; CHECK-NEXT: [[IDXPROM10:%.*]] = sext i32 [[ADD9]] to i64 +; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float, float* [[F]], i64 [[IDXPROM10]] +; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[ARRAYIDX11]], align 4 +; CHECK-NEXT: [[ADD12:%.*]] = fadd fast float [[TMP3]], [[TMP2]] +; CHECK-NEXT: store float [[ADD12]], float* [[ARRAYIDX11]], align 4 +; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[FI]], i64 2 +; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[ARRAYIDX13]], align 4 +; CHECK-NEXT: [[ADD17:%.*]] = or i32 [[MUL1]], 2 +; CHECK-NEXT: [[IDXPROM18:%.*]] = sext i32 [[ADD17]] to i64 +; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[F]], i64 [[IDXPROM18]] +; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[ARRAYIDX19]], align 4 +; CHECK-NEXT: [[ADD20:%.*]] = fadd fast float [[TMP5]], [[TMP4]] +; CHECK-NEXT: store float [[ADD20]], float* [[ARRAYIDX19]], align 4 +; CHECK-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds float, float* [[FI]], i64 3 +; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[ARRAYIDX21]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[CI]], 12 +; CHECK-NEXT: [[MUL24:%.*]] = or i32 [[TMP7]], 3 +; CHECK-NEXT: [[IDXPROM26:%.*]] = sext i32 [[MUL24]] to i64 +; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds float, float* [[F]], i64 [[IDXPROM26]] +; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX27]], align 4 +; CHECK-NEXT: [[ADD28:%.*]] = fadd fast float [[TMP8]], [[TMP6]] +; CHECK-NEXT: store float [[ADD28]], float* [[ARRAYIDX27]], align 4 +; CHECK-NEXT: [[ARRAYIDX29:%.*]] = getelementptr inbounds float, float* [[FI]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX29]], align 4 +; CHECK-NEXT: [[ADD33:%.*]] = add i32 [[TMP7]], 4 +; CHECK-NEXT: [[IDXPROM34:%.*]] = sext i32 [[ADD33]] to i64 +; CHECK-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds float, float* [[F]], i64 [[IDXPROM34]] +; CHECK-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX35]], align 4 +; CHECK-NEXT: [[ADD36:%.*]] = fadd fast float [[TMP10]], [[TMP9]] +; CHECK-NEXT: store float [[ADD36]], float* [[ARRAYIDX35]], align 4 +; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, float* [[FI]], i64 5 +; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX37]], align 4 +; CHECK-NEXT: [[ADD41:%.*]] = add i32 [[TMP7]], 5 +; CHECK-NEXT: [[IDXPROM42:%.*]] = sext i32 [[ADD41]] to i64 +; CHECK-NEXT: [[ARRAYIDX43:%.*]] = getelementptr inbounds float, float* [[F]], i64 [[IDXPROM42]] +; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX43]], align 4 +; CHECK-NEXT: [[ADD44:%.*]] = fadd fast float [[TMP12]], [[TMP11]] +; CHECK-NEXT: store float [[ADD44]], float* [[ARRAYIDX43]], align 4 +; CHECK-NEXT: [[ARRAYIDX45:%.*]] = getelementptr inbounds float, float* [[FI]], i64 6 +; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX45]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[CI]], 12 +; CHECK-NEXT: [[MUL48:%.*]] = add i32 [[TMP14]], 6 +; CHECK-NEXT: [[IDXPROM50:%.*]] = sext i32 [[MUL48]] to i64 +; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, float* [[F]], i64 [[IDXPROM50]] +; CHECK-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX51]], align 4 +; CHECK-NEXT: [[ADD52:%.*]] = fadd fast float [[TMP15]], [[TMP13]] +; CHECK-NEXT: store float [[ADD52]], float* [[ARRAYIDX51]], align 4 +; CHECK-NEXT: [[ARRAYIDX53:%.*]] = getelementptr inbounds float, float* [[FI]], i64 7 +; CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX53]], align 4 +; CHECK-NEXT: [[ADD57:%.*]] = add i32 [[TMP14]], 7 +; CHECK-NEXT: [[IDXPROM58:%.*]] = sext i32 [[ADD57]] to i64 +; CHECK-NEXT: [[ARRAYIDX59:%.*]] = getelementptr inbounds float, float* [[F]], i64 [[IDXPROM58]] +; CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX59]], align 4 +; CHECK-NEXT: [[ADD60:%.*]] = fadd fast float [[TMP17]], [[TMP16]] +; CHECK-NEXT: store float [[ADD60]], float* [[ARRAYIDX59]], align 4 +; CHECK-NEXT: [[ARRAYIDX61:%.*]] = getelementptr inbounds float, float* [[FI]], i64 8 +; CHECK-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX61]], align 4 +; CHECK-NEXT: [[ADD65:%.*]] = add i32 [[TMP14]], 8 +; CHECK-NEXT: [[IDXPROM66:%.*]] = sext i32 [[ADD65]] to i64 +; CHECK-NEXT: [[ARRAYIDX67:%.*]] = getelementptr inbounds float, float* [[F]], i64 [[IDXPROM66]] +; CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX67]], align 4 +; CHECK-NEXT: [[ADD68:%.*]] = fadd fast float [[TMP19]], [[TMP18]] +; CHECK-NEXT: store float [[ADD68]], float* [[ARRAYIDX67]], align 4 +; CHECK-NEXT: [[ARRAYIDX69:%.*]] = getelementptr inbounds float, float* [[FI]], i64 9 +; CHECK-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX69]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = mul i32 [[CI]], 12 +; CHECK-NEXT: [[MUL72:%.*]] = add i32 [[TMP21]], 9 +; CHECK-NEXT: [[IDXPROM74:%.*]] = sext i32 [[MUL72]] to i64 +; CHECK-NEXT: [[ARRAYIDX75:%.*]] = getelementptr inbounds float, float* [[F]], i64 [[IDXPROM74]] +; CHECK-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX75]], align 4 +; CHECK-NEXT: [[ADD76:%.*]] = fadd fast float [[TMP22]], [[TMP20]] +; CHECK-NEXT: store float [[ADD76]], float* [[ARRAYIDX75]], align 4 +; CHECK-NEXT: [[ARRAYIDX77:%.*]] = getelementptr inbounds float, float* [[FI]], i64 10 +; CHECK-NEXT: [[TMP23:%.*]] = load float, float* [[ARRAYIDX77]], align 4 +; CHECK-NEXT: [[ADD81:%.*]] = add i32 [[TMP21]], 10 +; CHECK-NEXT: [[IDXPROM82:%.*]] = sext i32 [[ADD81]] to i64 +; CHECK-NEXT: [[ARRAYIDX83:%.*]] = getelementptr inbounds float, float* [[F]], i64 [[IDXPROM82]] +; CHECK-NEXT: [[TMP24:%.*]] = load float, float* [[ARRAYIDX83]], align 4 +; CHECK-NEXT: [[ADD84:%.*]] = fadd fast float [[TMP24]], [[TMP23]] +; CHECK-NEXT: store float [[ADD84]], float* [[ARRAYIDX83]], align 4 +; CHECK-NEXT: [[ARRAYIDX85:%.*]] = getelementptr inbounds float, float* [[FI]], i64 11 +; CHECK-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX85]], align 4 +; CHECK-NEXT: [[ADD89:%.*]] = add i32 [[TMP21]], 11 +; CHECK-NEXT: [[IDXPROM90:%.*]] = sext i32 [[ADD89]] to i64 +; CHECK-NEXT: [[ARRAYIDX91:%.*]] = getelementptr inbounds float, float* [[F]], i64 [[IDXPROM90]] +; CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[ARRAYIDX91]], align 4 +; CHECK-NEXT: [[ADD92:%.*]] = fadd fast float [[TMP26]], [[TMP25]] +; CHECK-NEXT: store float [[ADD92]], float* [[ARRAYIDX91]], align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = load float, float* %fi, align 4 + %mul = mul nsw i32 %ci, 4 + %mul1 = mul nsw i32 %mul, 3 + %idxprom = sext i32 %mul1 to i64 + %arrayidx3 = getelementptr inbounds float, float* %f, i64 %idxprom + %1 = load float, float* %arrayidx3, align 4 + %add4 = fadd fast float %1, %0 + store float %add4, float* %arrayidx3, align 4 + %arrayidx5 = getelementptr inbounds float, float* %fi, i64 1 + %2 = load float, float* %arrayidx5, align 4 + %add9 = add nsw i32 %mul1, 1 + %idxprom10 = sext i32 %add9 to i64 + %arrayidx11 = getelementptr inbounds float, float* %f, i64 %idxprom10 + %3 = load float, float* %arrayidx11, align 4 + %add12 = fadd fast float %3, %2 + store float %add12, float* %arrayidx11, align 4 + %arrayidx13 = getelementptr inbounds float, float* %fi, i64 2 + %4 = load float, float* %arrayidx13, align 4 + %add17 = add nsw i32 %mul1, 2 + %idxprom18 = sext i32 %add17 to i64 + %arrayidx19 = getelementptr inbounds float, float* %f, i64 %idxprom18 + %5 = load float, float* %arrayidx19, align 4 + %add20 = fadd fast float %5, %4 + store float %add20, float* %arrayidx19, align 4 + %arrayidx21 = getelementptr inbounds float, float* %fi, i64 3 + %6 = load float, float* %arrayidx21, align 4 + %add23 = add nsw i32 %mul, 1 + %mul24 = mul nsw i32 %add23, 3 + %idxprom26 = sext i32 %mul24 to i64 + %arrayidx27 = getelementptr inbounds float, float* %f, i64 %idxprom26 + %7 = load float, float* %arrayidx27, align 4 + %add28 = fadd fast float %7, %6 + store float %add28, float* %arrayidx27, align 4 + %arrayidx29 = getelementptr inbounds float, float* %fi, i64 4 + %8 = load float, float* %arrayidx29, align 4 + %add33 = add nsw i32 %mul24, 1 + %idxprom34 = sext i32 %add33 to i64 + %arrayidx35 = getelementptr inbounds float, float* %f, i64 %idxprom34 + %9 = load float, float* %arrayidx35, align 4 + %add36 = fadd fast float %9, %8 + store float %add36, float* %arrayidx35, align 4 + %arrayidx37 = getelementptr inbounds float, float* %fi, i64 5 + %10 = load float, float* %arrayidx37, align 4 + %add41 = add nsw i32 %mul24, 2 + %idxprom42 = sext i32 %add41 to i64 + %arrayidx43 = getelementptr inbounds float, float* %f, i64 %idxprom42 + %11 = load float, float* %arrayidx43, align 4 + %add44 = fadd fast float %11, %10 + store float %add44, float* %arrayidx43, align 4 + %arrayidx45 = getelementptr inbounds float, float* %fi, i64 6 + %12 = load float, float* %arrayidx45, align 4 + %add47 = add nsw i32 %mul, 2 + %mul48 = mul nsw i32 %add47, 3 + %idxprom50 = sext i32 %mul48 to i64 + %arrayidx51 = getelementptr inbounds float, float* %f, i64 %idxprom50 + %13 = load float, float* %arrayidx51, align 4 + %add52 = fadd fast float %13, %12 + store float %add52, float* %arrayidx51, align 4 + %arrayidx53 = getelementptr inbounds float, float* %fi, i64 7 + %14 = load float, float* %arrayidx53, align 4 + %add57 = add nsw i32 %mul48, 1 + %idxprom58 = sext i32 %add57 to i64 + %arrayidx59 = getelementptr inbounds float, float* %f, i64 %idxprom58 + %15 = load float, float* %arrayidx59, align 4 + %add60 = fadd fast float %15, %14 + store float %add60, float* %arrayidx59, align 4 + %arrayidx61 = getelementptr inbounds float, float* %fi, i64 8 + %16 = load float, float* %arrayidx61, align 4 + %add65 = add nsw i32 %mul48, 2 + %idxprom66 = sext i32 %add65 to i64 + %arrayidx67 = getelementptr inbounds float, float* %f, i64 %idxprom66 + %17 = load float, float* %arrayidx67, align 4 + %add68 = fadd fast float %17, %16 + store float %add68, float* %arrayidx67, align 4 + %arrayidx69 = getelementptr inbounds float, float* %fi, i64 9 + %18 = load float, float* %arrayidx69, align 4 + %add71 = add nsw i32 %mul, 3 + %mul72 = mul nsw i32 %add71, 3 + %idxprom74 = sext i32 %mul72 to i64 + %arrayidx75 = getelementptr inbounds float, float* %f, i64 %idxprom74 + %19 = load float, float* %arrayidx75, align 4 + %add76 = fadd fast float %19, %18 + store float %add76, float* %arrayidx75, align 4 + %arrayidx77 = getelementptr inbounds float, float* %fi, i64 10 + %20 = load float, float* %arrayidx77, align 4 + %add81 = add nsw i32 %mul72, 1 + %idxprom82 = sext i32 %add81 to i64 + %arrayidx83 = getelementptr inbounds float, float* %f, i64 %idxprom82 + %21 = load float, float* %arrayidx83, align 4 + %add84 = fadd fast float %21, %20 + store float %add84, float* %arrayidx83, align 4 + %arrayidx85 = getelementptr inbounds float, float* %fi, i64 11 + %22 = load float, float* %arrayidx85, align 4 + %add89 = add nsw i32 %mul72, 2 + %idxprom90 = sext i32 %add89 to i64 + %arrayidx91 = getelementptr inbounds float, float* %f, i64 %idxprom90 + %23 = load float, float* %arrayidx91, align 4 + %add92 = fadd fast float %23, %22 + store float %add92, float* %arrayidx91, align 4 + ret void +}