Index: include/llvm/Transforms/Scalar/Reassociate.h =================================================================== --- include/llvm/Transforms/Scalar/Reassociate.h +++ include/llvm/Transforms/Scalar/Reassociate.h @@ -113,6 +113,7 @@ void OptimizeInst(Instruction *I); Instruction *canonicalizeNegConstExpr(Instruction *I); void BuildPairMap(ReversePostOrderTraversal &RPOT); + void swapOperandsToMatchBinops(BinaryOperator &B); }; } // end namespace llvm Index: lib/Transforms/Scalar/Reassociate.cpp =================================================================== --- lib/Transforms/Scalar/Reassociate.cpp +++ lib/Transforms/Scalar/Reassociate.cpp @@ -63,6 +63,7 @@ using namespace llvm; using namespace reassociate; +using namespace PatternMatch; #define DEBUG_TYPE "reassociate" @@ -620,6 +621,17 @@ return Changed; } +/// Discard any debug info related to an expression that has changed. +static void discardDebugInfo(BinaryOperator *BO) { + SmallVector DbgUsers; + findDbgUsers(DbgUsers, BO); + for (auto *DII : DbgUsers) { + Value *Undef = UndefValue::get(BO->getType()); + DII->setOperand(0, MetadataAsValue::get(DII->getContext(), + ValueAsMetadata::get(Undef))); + } +} + /// Now that the operands for this expression tree are /// linearized and optimized, emit them in-order. void ReassociatePass::RewriteExprTree(BinaryOperator *I, @@ -782,17 +794,7 @@ if (ExpressionChanged == I) break; - // Discard any debug info related to the expressions that has changed (we - // can leave debug infor related to the root, since the result of the - // expression tree should be the same even after reassociation). - SmallVector DbgUsers; - findDbgUsers(DbgUsers, ExpressionChanged); - for (auto *DII : DbgUsers) { - Value *Undef = UndefValue::get(ExpressionChanged->getType()); - DII->setOperand(0, MetadataAsValue::get(DII->getContext(), - ValueAsMetadata::get(Undef))); - } - + discardDebugInfo(ExpressionChanged); ExpressionChanged->moveBefore(I); ExpressionChanged = cast(*ExpressionChanged->user_begin()); } while (true); @@ -2119,6 +2121,68 @@ ReassociateExpression(BO); } +/// If we have an associative pair of binops with the same opcode and 2 of the 3 +/// operands to that pair of binops are some other matching binop, rearrange the +/// operands of the associative binops so the matching ops are paired together. +/// This transform creates factoring opportunities by pairing opcodes. +/// TODO: Should those factoring optimizations be handled here or InstCombine? +/// Example: +/// ((X << S) & Y) & (Z << S) --> ((X << S) & (Z << S)) & Y (reassociation) +/// --> ((X & Z) << S) & Y (factorize shift from 'and' ops optimization) +void ReassociatePass::swapOperandsToMatchBinops(BinaryOperator &B) { + BinaryOperator *B0, *B1; + if (!B.isAssociative() || !B.isCommutative() || + !match(&B, m_BinOp(m_BinOp(B0), m_BinOp(B1)))) + return; + + // Canonicalize a matching binop to B0 and a non-matching binop to B1. + Instruction::BinaryOps TopOpc = B.getOpcode(); + if (B0->getOpcode() != TopOpc) { + if (B1->getOpcode() != TopOpc || !B1->hasOneUse()) + return; + std::swap(B0, B1); + B.swapOperands(); + } + + // If both operands already have the same opcode, nothing to do. + Instruction::BinaryOps OtherOpc = B1->getOpcode(); + if (B0->getOpcode() != TopOpc || !B0->hasOneUse() || OtherOpc == TopOpc) + return; + + // Canonicalize a binop that matches B1 to B00 (operand 0 of B0) and a value + // that does not match B1 to V01. + bool SwappedB0 = false; + BinaryOperator *B00; + if (!match(B0->getOperand(0), m_BinOp(B00)) || B00->getOpcode() != OtherOpc) { + B0->swapOperands(); + SwappedB0 = true; + } + + // B00 must match B1 and not match V01. + if (match(B0->getOperand(0), m_BinOp(B00)) && B00->getOpcode() == OtherOpc) { + Value *V01 = B0->getOperand(1); + BinaryOperator *B01; + if (!match(V01, m_BinOp(B01)) || B01->getOpcode() != OtherOpc) { + // B00 and B1 are displaced matching binops, so pull them together: + // (B00 & V01) & B1 --> (B00 & B1) & V01 + B.setOperand(1, V01); + B0->setOperand(1, B1); + B0->moveBefore(&B); + // Conservatively clear integer flags; FP fast-math-flags propagate. + if (!isa(&B)) + B.clearSubclassOptionalData(); + // The intermediate result is not the same, so invalidate debug info for + // that value. + discardDebugInfo(B0); + MadeChange = true; + return; + } + } + // Restore unnecessary swap to reduce churn. + if (SwappedB0) + B0->swapOperands(); +} + void ReassociatePass::ReassociateExpression(BinaryOperator *I) { // First, walk the expression tree, linearizing the tree, collecting the // operand information. @@ -2238,6 +2302,9 @@ // Now that we ordered and optimized the expressions, splat them back into // the expression tree, removing any unneeded nodes. RewriteExprTree(I, Ops); + + // Try a final reassociation of the root of the tree. + swapOperandsToMatchBinops(*I); } void Index: test/Transforms/Reassociate/matching-binops.ll =================================================================== --- test/Transforms/Reassociate/matching-binops.ll +++ test/Transforms/Reassociate/matching-binops.ll @@ -16,8 +16,8 @@ ; CHECK-LABEL: @and_shl( ; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]] ; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = and i8 [[SX]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[SY]] +; CHECK-NEXT: [[A:%.*]] = and i8 [[SY]], [[SX]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[Z:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = shl i8 %x, %shamt @@ -31,8 +31,8 @@ ; CHECK-LABEL: @or_shl( ; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]] ; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[SY]] +; CHECK-NEXT: [[A:%.*]] = or i8 [[SY]], [[SX]] +; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[Z:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = shl i8 %x, %shamt @@ -46,8 +46,8 @@ ; CHECK-LABEL: @xor_shl( ; CHECK-NEXT: [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]] ; CHECK-NEXT: [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[SY]] +; CHECK-NEXT: [[A:%.*]] = xor i8 [[SY]], [[SX]] +; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[Z:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = shl i8 %x, %shamt @@ -61,8 +61,8 @@ ; CHECK-LABEL: @and_lshr( ; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]] ; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = and i8 [[SX]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[SY]] +; CHECK-NEXT: [[A:%.*]] = and i8 [[SY]], [[SX]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[Z:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = lshr i8 %x, %shamt @@ -76,8 +76,8 @@ ; CHECK-LABEL: @or_lshr( ; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]] ; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[SY]] +; CHECK-NEXT: [[A:%.*]] = or i8 [[SY]], [[SX]] +; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[Z:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = lshr i8 %x, %shamt @@ -91,8 +91,8 @@ ; CHECK-LABEL: @xor_lshr( ; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]] ; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[SY]] +; CHECK-NEXT: [[A:%.*]] = xor i8 [[SY]], [[SX]] +; CHECK-NEXT: [[R:%.*]] = xor i8 [[A]], [[Z:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = lshr i8 %x, %shamt @@ -106,8 +106,8 @@ ; CHECK-LABEL: @and_ashr( ; CHECK-NEXT: [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]] ; CHECK-NEXT: [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = and i8 [[SX]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[SY]] +; CHECK-NEXT: [[A:%.*]] = and i8 [[SY]], [[SX]] +; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], [[Z:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = ashr i8 %x, %shamt @@ -121,8 +121,8 @@ ; CHECK-LABEL: @or_ashr( ; CHECK-NEXT: [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]] ; CHECK-NEXT: [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = or i8 [[SX]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[SY]] +; CHECK-NEXT: [[A:%.*]] = or i8 [[SY]], [[SX]] +; CHECK-NEXT: [[R:%.*]] = or i8 [[A]], [[Z:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = ashr i8 %x, %shamt @@ -138,8 +138,8 @@ ; CHECK-LABEL: @xor_ashr( ; CHECK-NEXT: [[SX:%.*]] = ashr <2 x i8> [[X:%.*]], [[SHAMT:%.*]] ; CHECK-NEXT: [[SY:%.*]] = ashr <2 x i8> [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[A:%.*]] = xor <2 x i8> [[SX]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[A]], [[SY]] +; CHECK-NEXT: [[A:%.*]] = xor <2 x i8> [[SY]], [[SX]] +; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[A]], [[Z:%.*]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %sx = ashr <2 x i8> %x, %shamt @@ -207,9 +207,9 @@ define i8 @add_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) { ; CHECK-LABEL: @add_lshr( ; CHECK-NEXT: [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]] -; CHECK-NEXT: [[A:%.*]] = add i8 [[SX]], [[Z:%.*]] ; CHECK-NEXT: [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]] -; CHECK-NEXT: [[R:%.*]] = add i8 [[A]], [[SY]] +; CHECK-NEXT: [[A:%.*]] = add i8 [[SY]], [[SX]] +; CHECK-NEXT: [[R:%.*]] = add i8 [[A]], [[Z:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = lshr i8 %x, %shamt @@ -225,8 +225,8 @@ ; CHECK-LABEL: @mul_sub( ; CHECK-NEXT: [[SX:%.*]] = sub i8 [[X:%.*]], [[M:%.*]] ; CHECK-NEXT: [[SY:%.*]] = sub i8 [[Y:%.*]], [[M]] -; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[SX]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = mul nuw i8 [[A]], [[SY]] +; CHECK-NEXT: [[A:%.*]] = mul i8 [[SY]], [[SX]] +; CHECK-NEXT: [[R:%.*]] = mul i8 [[A]], [[Z:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = sub i8 %x, %m @@ -239,9 +239,9 @@ define i8 @add_mul(i8 %x, i8 %y, i8 %z, i8 %m) { ; CHECK-LABEL: @add_mul( ; CHECK-NEXT: [[SX:%.*]] = mul nuw i8 [[X:%.*]], 42 -; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[Z:%.*]], [[SX]] ; CHECK-NEXT: [[SY:%.*]] = mul nsw i8 [[M:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = add nsw i8 [[A]], [[SY]] +; CHECK-NEXT: [[A:%.*]] = add i8 [[SX]], [[SY]] +; CHECK-NEXT: [[R:%.*]] = add i8 [[A]], [[Z:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sx = mul nuw i8 %x, 42 @@ -257,9 +257,9 @@ define float @fadd_fmul(float %x, float %y, float %z, float %m) { ; CHECK-LABEL: @fadd_fmul( ; CHECK-NEXT: [[SX:%.*]] = fmul float [[X:%.*]], [[M:%.*]] -; CHECK-NEXT: [[A:%.*]] = fadd fast float [[SX]], [[Z:%.*]] ; CHECK-NEXT: [[SY:%.*]] = fmul float [[Y:%.*]], [[M]] -; CHECK-NEXT: [[R:%.*]] = fadd fast float [[A]], [[SY]] +; CHECK-NEXT: [[A:%.*]] = fadd fast float [[SY]], [[SX]] +; CHECK-NEXT: [[R:%.*]] = fadd fast float [[A]], [[Z:%.*]] ; CHECK-NEXT: ret float [[R]] ; %sx = fmul float %x, %m @@ -273,8 +273,8 @@ ; CHECK-LABEL: @fmul_fdiv( ; CHECK-NEXT: [[SX:%.*]] = fdiv float [[X:%.*]], [[M:%.*]] ; CHECK-NEXT: [[SY:%.*]] = fdiv float [[Y:%.*]], 4.200000e+01 -; CHECK-NEXT: [[A:%.*]] = fmul fast float [[SY]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = fmul fast float [[A]], [[SX]] +; CHECK-NEXT: [[A:%.*]] = fmul fast float [[SY]], [[SX]] +; CHECK-NEXT: [[R:%.*]] = fmul fast float [[A]], [[Z:%.*]] ; CHECK-NEXT: ret float [[R]] ; %sx = fdiv float %x, %m @@ -296,9 +296,9 @@ ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SHL]], metadata !16, metadata !DIExpression()), !dbg !25 ; CHECK-NEXT: [[SHL1:%.*]] = shl i32 [[Y]], [[SHAMT]], !dbg !26 ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SHL1]], metadata !17, metadata !DIExpression()), !dbg !27 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[Z]], !dbg !28 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[AND]], metadata !18, metadata !DIExpression()), !dbg !29 -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND]], [[SHL1]], !dbg !30 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !18, metadata !DIExpression()), !dbg !28 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL1]], [[SHL]], !dbg !29 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND]], [[Z]], !dbg !30 ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[AND2]], metadata !19, metadata !DIExpression()), !dbg !31 ; CHECK-NEXT: ret i32 [[AND2]], !dbg !32 ;