Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -765,6 +765,18 @@ tryFactorization(I, SQ, Builder, RHSOpcode, LHS, Ident, C, D)) return V; + // The instruction has the form "(A * B) op (C op D)". Try to factorize + // common term for "(A * B) op C op D". + if (Op0 && Op1 && LHSOpcode == Instruction::Mul && isa(D) && + LHS->hasOneUse() && RHS->hasOneUse() && TopLevelOpcode == RHSOpcode && + Instruction::isCommutative(RHSOpcode)) + if (Value *Ident = getIdentityValue(LHSOpcode, C)) + if (Value *V = + tryFactorization(I, SQ, Builder, LHSOpcode, A, B, C, Ident)) { + Value *New = Builder.CreateBinOp(RHSOpcode, V, D); + return New; + } + return nullptr; } @@ -778,11 +790,33 @@ BinaryOperator *Op0 = dyn_cast(LHS); BinaryOperator *Op1 = dyn_cast(RHS); Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); + Value *A, *B, *C, *D; + Instruction::BinaryOps LHSOpcode, RHSOpcode; // Factorization. if (Value *R = tryFactorizationFolds(I)) return R; + if (Op0) + LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B); + if (Op1) + RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D); + + // The instruction has the form "(A op' B) op' (C * D)". See if expanding it + // out to "(C * D) op' (A op' B)" results in simplifications. + if (Op0 && Op1 && RHSOpcode == Instruction::Mul && A == C && + LHSOpcode == TopLevelOpcode && + rightDistributesOverLeft(TopLevelOpcode, RHSOpcode)) { + bool InnerCommutative = Instruction::isCommutative(TopLevelOpcode); + if (isa(B) && isa(D) && InnerCommutative) { + // They do! Return "RHS op' LHS". + ++NumExpand; + Value *New = Builder.CreateBinOp(TopLevelOpcode, RHS, LHS); + New->takeName(&I); + return New; + } + } + // Expansion. if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { // The instruction has the form "(A op' B) op C". See if expanding it out Index: llvm/test/Transforms/InstCombine/add4.ll =================================================================== --- llvm/test/Transforms/InstCombine/add4.ll +++ llvm/test/Transforms/InstCombine/add4.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -S | FileCheck %s +declare i32 @use32(i32) + define i64 @match_unsigned(i64 %x) { ; CHECK-LABEL: @match_unsigned( ; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[X:%.*]], 19136 @@ -127,3 +129,184 @@ %t4 = add i32 %t, %t3 ret i32 %t4 } + +; (x + (-1)) + (x * 5) --> (x * 6) + (-1) +define i8 @mul_add_common_factor_0(i8 %x) { +; CHECK-LABEL: @mul_add_common_factor_0( +; CHECK-NEXT: [[A1:%.*]] = mul i8 [[X:%.*]], 6 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[A1]], -1 +; CHECK-NEXT: ret i8 [[TMP1]] +; + %a0 = add i8 %x, -1 + %m = mul i8 %x, 5 + %a1 = add i8 %a0, %m ; the mul operand is the right operand, should swap the operand + ret i8 %a1 +} + +; (x * 4) + (x - 1) --> (x * 5) + (-1) +define i16 @mul_add_common_factor_1(i16 %x) { +; CHECK-LABEL: @mul_add_common_factor_1( +; CHECK-NEXT: [[A1:%.*]] = mul i16 [[X:%.*]], 5 +; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[A1]], -1 +; CHECK-NEXT: ret i16 [[TMP1]] +; + %a0 = add i16 %x, -1 + %m = mul i16 %x, 4 + %a1 = add i16 %m, %a0 ; the mul operand is the left operand + ret i16 %a1 +} + +; Negative test: y is not a const for (x + y) + (x * 4) --> (x * 5) + y +define i32 @mul_add_common_factor_2(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_add_common_factor_2( +; CHECK-NEXT: [[A0:%.*]] = add i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[M:%.*]] = shl i32 [[X]], 2 +; CHECK-NEXT: [[A1:%.*]] = add i32 [[A0]], [[M]] +; CHECK-NEXT: ret i32 [[A1]] +; + %a0 = add i32 %x, %y + %m = mul i32 %x, 4 + %a1 = add i32 %a0, %m + ret i32 %a1 +} + +; Negative test: y is not a const for (y + x) + (x * 4) --> (x * 5) + y +define i32 @mul_add_common_factor_2_commute(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_add_common_factor_2_commute( +; CHECK-NEXT: [[A0:%.*]] = add i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[M:%.*]] = shl i32 [[X]], 2 +; CHECK-NEXT: [[A1:%.*]] = add i32 [[A0]], [[M]] +; CHECK-NEXT: ret i32 [[A1]] +; + %a0 = add i32 %y, %x + %m = mul i32 %x, 4 + %a1 = add i32 %a0, %m + ret i32 %a1 +} + +; Negative test: t is not a const for (x + 2) + (x * t) --> (t + 1) * x + 2 +define i128 @mul_add_common_factor_3(i128 %x, i128 %t) { +; CHECK-LABEL: @mul_add_common_factor_3( +; CHECK-NEXT: [[A0:%.*]] = add i128 [[X:%.*]], 2 +; CHECK-NEXT: [[M:%.*]] = mul i128 [[X]], [[T:%.*]] +; CHECK-NEXT: [[A1:%.*]] = add i128 [[A0]], [[M]] +; CHECK-NEXT: ret i128 [[A1]] +; + %a0 = add i128 %x, 2 + %m = mul i128 %x, %t + %a1 = add i128 %a0, %m + ret i128 %a1 +} + +; Negative test: t is not a const for (x + 2) + (t * x) --> (t + 1) * x + 2 +define i128 @mul_add_common_factor_3_commute(i128 %x, i128 %t) { +; CHECK-LABEL: @mul_add_common_factor_3_commute( +; CHECK-NEXT: [[A0:%.*]] = add i128 [[X:%.*]], 2 +; CHECK-NEXT: [[M:%.*]] = mul i128 [[T:%.*]], [[X]] +; CHECK-NEXT: [[A1:%.*]] = add i128 [[A0]], [[M]] +; CHECK-NEXT: ret i128 [[A1]] +; + %a0 = add i128 %x, 2 + %m = mul i128 %t, %x + %a1 = add i128 %a0, %m + ret i128 %a1 +} + +; Negative test: The transformation should not create more instructions +define i32 @mul_add_common_factor_4(i32 %x) { +; CHECK-LABEL: @mul_add_common_factor_4( +; CHECK-NEXT: [[A0:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[M:%.*]] = shl i32 [[X]], 2 +; CHECK-NEXT: call void @use32(i32 [[A0]]) +; CHECK-NEXT: [[A1:%.*]] = add i32 [[M]], [[A0]] +; CHECK-NEXT: ret i32 [[A1]] +; + %a0 = add i32 %x, -1 + %m = mul i32 %x, 4 + call void @use32(i32 %a0) ; an extra use + %a1 = add i32 %m, %a0 + ret i32 %a1 +} + +; (x * 4) + (x + 3) --> (x * 5) + 3 +define <2 x i8> @mul_add_common_factor_5(<2 x i8> %x) { +; CHECK-LABEL: @mul_add_common_factor_5( +; CHECK-NEXT: [[A1:%.*]] = mul <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[A1]], +; CHECK-NEXT: ret <2 x i8> [[TMP1]] +; + %a0 = add <2 x i8> %x, + %m = mul <2 x i8> %x, ; vector type + %a1 = add <2 x i8> %m, %a0 + ret <2 x i8> %a1 +} + +; (x << 2) + (x - 1) --> (x * 5) + (-1) +define i16 @shl_add_common_factor_1(i16 %x) { +; CHECK-LABEL: @shl_add_common_factor_1( +; CHECK-NEXT: [[A1:%.*]] = mul i16 [[X:%.*]], 5 +; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[A1]], -1 +; CHECK-NEXT: ret i16 [[TMP1]] +; + %a0 = add i16 %x, -1 + %s = shl i16 %x, 2 + %a1 = add i16 %s, %a0 ; the shl operand is the left operand + ret i16 %a1 +} + +; Negative test: y is not a const for (y + x) + (x << 2) +define i32 @shl_add_common_factor_2(i32 %x, i32 %y) { +; CHECK-LABEL: @shl_add_common_factor_2( +; CHECK-NEXT: [[A0:%.*]] = add i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[S:%.*]] = shl i32 [[X]], 2 +; CHECK-NEXT: [[A1:%.*]] = add i32 [[A0]], [[S]] +; CHECK-NEXT: ret i32 [[A1]] +; + %a0 = add i32 %x, %y + %s = shl i32 %x, 2 + %a1 = add i32 %a0, %s ; the shl operand is the right operand + ret i32 %a1 +} + +; Negative test: y is not a const for (y + x) + (x << 2) +define i32 @shl_add_common_factor_2_commute(i32 %x, i32 %y) { +; CHECK-LABEL: @shl_add_common_factor_2_commute( +; CHECK-NEXT: [[A0:%.*]] = add i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = shl i32 [[X]], 2 +; CHECK-NEXT: [[A1:%.*]] = add i32 [[A0]], [[S]] +; CHECK-NEXT: ret i32 [[A1]] +; + %a0 = add i32 %y, %x ; swap the operand + %s = shl i32 %x, 2 + %a1 = add i32 %a0, %s + ret i32 %a1 +} + +; Negative test: The transformation should not create more instructions +define i32 @shl_add_common_factor_4(i32 %x) { +; CHECK-LABEL: @shl_add_common_factor_4( +; CHECK-NEXT: [[A0:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[S:%.*]] = shl i32 [[X]], 2 +; CHECK-NEXT: call void @use32(i32 [[S]]) +; CHECK-NEXT: [[A1:%.*]] = add i32 [[S]], [[A0]] +; CHECK-NEXT: ret i32 [[A1]] +; + %a0 = add i32 %x, -1 + %s = shl i32 %x, 2 + call void @use32(i32 %s) ; an extra use + %a1 = add i32 %s, %a0 + ret i32 %a1 +} + +; (x << 2) + (x + 3) --> (x * 5) + 3 +define <2 x i64> @shl_add_common_factor_5(<2 x i64> %x) { +; CHECK-LABEL: @shl_add_common_factor_5( +; CHECK-NEXT: [[A1:%.*]] = mul <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[A1]], +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; + %a0 = add <2 x i64> %x, + %s = shl <2 x i64> %x, ; vector type + %a1 = add <2 x i64> %s, %a0 + ret <2 x i64> %a1 +} Index: llvm/test/Transforms/PGOProfile/chr.ll =================================================================== --- llvm/test/Transforms/PGOProfile/chr.ll +++ llvm/test/Transforms/PGOProfile/chr.ll @@ -1050,10 +1050,9 @@ ; CHECK-NEXT: br label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[BB0]] ], [ [[TMP5]], [[BB2_NONCHR]] ], [ [[TMP5]], [[BB1_NONCHR]] ] -; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 42 -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], -99 -; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP9]], [[TMP10]] -; CHECK-NEXT: ret i32 [[TMP11]] +; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 43 +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], -99 +; CHECK-NEXT: ret i32 [[TMP10]] ; entry: %0 = load i32, ptr %i