Index: llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp +++ llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp @@ -321,10 +321,8 @@ // If this is a not or neg instruction, do not count it for rank. This // assures us that X and ~X will have the same rank. - Type *Ty = V->getType(); - if ((!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) || - (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) && - !BinaryOperator::isFNeg(I))) + if (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) && + !BinaryOperator::isFNeg(I)) ++Rank; DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n"); @@ -351,7 +349,7 @@ static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp) { - if (S1->getType()->isIntegerTy()) + if (S1->getType()->isIntOrIntVectorTy()) return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore); else { BinaryOperator *Res = @@ -363,7 +361,7 @@ static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp) { - if (S1->getType()->isIntegerTy()) + if (S1->getType()->isIntOrIntVectorTy()) return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore); else { BinaryOperator *Res = @@ -375,7 +373,7 @@ static BinaryOperator *CreateNeg(Value *S1, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp) { - if (S1->getType()->isIntegerTy()) + if (S1->getType()->isIntOrIntVectorTy()) return BinaryOperator::CreateNeg(S1, Name, InsertBefore); else { BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore); @@ -388,8 +386,8 @@ /// static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { Type *Ty = Neg->getType(); - Constant *NegOne = Ty->isIntegerTy() ? ConstantInt::getAllOnesValue(Ty) - : ConstantFP::get(Ty, -1.0); + Constant *NegOne = Ty->isIntOrIntVectorTy() ? + ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0); BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg); Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op. @@ -872,7 +870,7 @@ Constant *Undef = UndefValue::get(I->getType()); NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode), Undef, Undef, "", I); - if (NewOp->getType()->isFloatingPointTy()) + if (NewOp->getType()->isFPOrFPVectorTy()) NewOp->setFastMathFlags(I->getFastMathFlags()); } else { NewOp = NodesToRewrite.pop_back_val(); @@ -1520,8 +1518,8 @@ // Insert a new multiply. Type *Ty = TheOp->getType(); - Constant *C = Ty->isIntegerTy() ? ConstantInt::get(Ty, NumFound) - : ConstantFP::get(Ty, NumFound); + Constant *C = Ty->isIntOrIntVectorTy() ? + ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound); Instruction *Mul = CreateMul(TheOp, C, "factor", I, I); // Now that we have inserted a multiply, optimize it. This allows us to @@ -1661,7 +1659,7 @@ // from an expression will drop a use of maxocc, and this can cause // RemoveFactorFromExpression on successive values to behave differently. Instruction *DummyInst = - I->getType()->isIntegerTy() + I->getType()->isIntOrIntVectorTy() ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal) : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal); @@ -1792,7 +1790,7 @@ Value *LHS = Ops.pop_back_val(); do { - if (LHS->getType()->isIntegerTy()) + if (LHS->getType()->isIntOrIntVectorTy()) LHS = Builder.CreateMul(LHS, Ops.pop_back_val()); else LHS = Builder.CreateFMul(LHS, Ops.pop_back_val()); @@ -2090,8 +2088,9 @@ if (I->isCommutative()) canonicalizeOperands(I); - // Don't optimize vector instructions. - if (I->getType()->isVectorTy()) + // TODO: We should optimize vector Xor instructions, but they are + // currently unsupported. + if (I->getType()->isVectorTy() && I->getOpcode() == Instruction::Xor) return; // Don't optimize floating point instructions that don't have unsafe algebra. @@ -2170,9 +2169,6 @@ } void Reassociate::ReassociateExpression(BinaryOperator *I) { - assert(!I->getType()->isVectorTy() && - "Reassociation of vector instructions is not supported."); - // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector Tree; Index: llvm/trunk/test/Transforms/Reassociate/fast-ReassociateVector.ll =================================================================== --- llvm/trunk/test/Transforms/Reassociate/fast-ReassociateVector.ll +++ llvm/trunk/test/Transforms/Reassociate/fast-ReassociateVector.ll @@ -1,46 +1,192 @@ ; RUN: opt < %s -reassociate -S | FileCheck %s -; Canonicalize operands, but don't optimize floating point vector operations. -define <4 x float> @test1() { -; CHECK-LABEL: test1 -; CHECK-NEXT: %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer -; CHECK-NEXT: %tmp2 = fmul fast <4 x float> %tmp1, zeroinitializer - - %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer - %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1 - ret <4 x float> %tmp2 -} - -; Commute integer vector operations. -define <2 x i32> @test2(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: test2 -; CHECK-NEXT: %tmp1 = add <2 x i32> %x, %y -; CHECK-NEXT: %tmp2 = add <2 x i32> %x, %y -; CHECK-NEXT: %tmp3 = add <2 x i32> %tmp1, %tmp2 - - %tmp1 = add <2 x i32> %x, %y - %tmp2 = add <2 x i32> %y, %x - %tmp3 = add <2 x i32> %tmp1, %tmp2 - ret <2 x i32> %tmp3 -} - -define <2 x i32> @test3(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: test3 -; CHECK-NEXT: %tmp1 = mul <2 x i32> %x, %y -; CHECK-NEXT: %tmp2 = mul <2 x i32> %x, %y -; CHECK-NEXT: %tmp3 = mul <2 x i32> %tmp1, %tmp2 - - %tmp1 = mul <2 x i32> %x, %y - %tmp2 = mul <2 x i32> %y, %x - %tmp3 = mul <2 x i32> %tmp1, %tmp2 - ret <2 x i32> %tmp3 -} - -define <2 x i32> @test4(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: test4 -; CHECK-NEXT: %tmp1 = and <2 x i32> %x, %y -; CHECK-NEXT: %tmp2 = and <2 x i32> %x, %y -; CHECK-NEXT: %tmp3 = and <2 x i32> %tmp1, %tmp2 +; Check that a*c+b*c is turned into (a+b)*c +define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: @test1 +; CHECK-NEXT: %tmp = fadd fast <4 x float> %b, %a +; CHECK-NEXT: %tmp1 = fmul fast <4 x float> %tmp, %c +; CHECK-NEXT: ret <4 x float> %tmp1 + + %mul = fmul fast <4 x float> %a, %c + %mul1 = fmul fast <4 x float> %b, %c + %add = fadd fast <4 x float> %mul, %mul1 + ret <4 x float> %add +} + +; Check that a*a*b+a*a*c is turned into a*(a*(b+c)). +define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c) { +; CHECK-LABEL: @test2 +; CHECK-NEXT: fadd fast <2 x float> %c, %b +; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2 +; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a +; CHECK-NEXT: ret <2 x float> + + %t0 = fmul fast <2 x float> %a, %b + %t1 = fmul fast <2 x float> %a, %t0 + %t2 = fmul fast <2 x float> %a, %c + %t3 = fmul fast <2 x float> %a, %t2 + %t4 = fadd fast <2 x float> %t1, %t3 + ret <2 x float> %t4 +} + +; Check that a*b+a*c+d is turned into a*(b+c)+d. +define <2 x double> @test3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) { +; CHECK-LABEL: @test3 +; CHECK-NEXT: fadd fast <2 x double> %c, %b +; CHECK-NEXT: fmul fast <2 x double> %tmp, %a +; CHECK-NEXT: fadd fast <2 x double> %tmp1, %d +; CHECK-NEXT: ret <2 x double> + + %t0 = fmul fast <2 x double> %a, %b + %t1 = fmul fast <2 x double> %a, %c + %t2 = fadd fast <2 x double> %t1, %d + %t3 = fadd fast <2 x double> %t0, %t2 + ret <2 x double> %t3 +} + +; No fast-math. +define <2 x float> @test4(<2 x float> %A) { +; CHECK-LABEL: @test4 +; CHECK-NEXT: %X = fadd <2 x float> %A, +; CHECK-NEXT: %Y = fadd <2 x float> %A, +; CHECK-NEXT: %R = fsub <2 x float> %X, %Y +; CHECK-NEXT: ret <2 x float> %R + + %X = fadd <2 x float> %A, < float 1.000000e+00, float 1.000000e+00 > + %Y = fadd <2 x float> %A, < float 1.000000e+00, float 1.000000e+00 > + %R = fsub <2 x float> %X, %Y + ret <2 x float> %R +} + +; Check 47*X + 47*X -> 94*X. +define <2 x float> @test5(<2 x float> %X) { +; CHECK-LABEL: @test5 +; CHECK-NEXT: fmul fast <2 x float> %X, +; CHECK-NEXT: ret <2 x float> + + %Y = fmul fast <2 x float> %X, + %Z = fadd fast <2 x float> %Y, %Y + ret <2 x float> %Z +} + +; Check X+X+X -> 3*X. +define <2 x float> @test6(<2 x float> %X) { +; CHECK-LABEL: @test6 +; CHECK-NEXT: fmul fast <2 x float> %X, +; CHECK-NEXT: ret <2 x float> + + %Y = fadd fast <2 x float> %X ,%X + %Z = fadd fast <2 x float> %Y, %X + ret <2 x float> %Z +} + +; Check 127*W+50*W -> 177*W. +define <2 x double> @test7(<2 x double> %W) { +; CHECK-LABEL: @test7 +; CHECK-NEXT: fmul fast <2 x double> %W, +; CHECK-NEXT: ret <2 x double> + + %X = fmul fast <2 x double> %W, + %Y = fmul fast <2 x double> %W, + %Z = fadd fast <2 x double> %Y, %X + ret <2 x double> %Z +} + +; Check X*12*12 -> X*144. +define <2 x float> @test8(<2 x float> %arg) { +; CHECK-LABEL: @test8 +; CHECK: fmul fast <2 x float> %arg, +; CHECK-NEXT: ret <2 x float> %tmp2 + + %tmp1 = fmul fast <2 x float> , %arg + %tmp2 = fmul fast <2 x float> %tmp1, + ret <2 x float> %tmp2 +} + +; Check (b+(a+1234))+-a -> b+1234. +define <2 x double> @test9(<2 x double> %b, <2 x double> %a) { +; CHECK-LABEL: @test9 +; CHECK: fadd fast <2 x double> %b, +; CHECK-NEXT: ret <2 x double> + + %1 = fadd fast <2 x double> %a, + %2 = fadd fast <2 x double> %b, %1 + %3 = fsub fast <2 x double> , %a + %4 = fadd fast <2 x double> %2, %3 + ret <2 x double> %4 +} + +; Check -(-(z*40)*a) -> a*40*z. +define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) { +; CHECK-LABEL: @test10 +; CHECK: fmul fast <2 x float> %a, +; CHECK-NEXT: fmul fast <2 x float> %e, %z +; CHECK-NEXT: ret <2 x float> + + %d = fmul fast <2 x float> %z, + %c = fsub fast <2 x float> , %d + %e = fmul fast <2 x float> %a, %c + %f = fsub fast <2 x float> , %e + ret <2 x float> %f +} + +; Check x*y+y*x -> x*y*2. +define <2 x double> @test11(<2 x double> %x, <2 x double> %y) { +; CHECK-LABEL: @test11 +; CHECK-NEXT: %factor = fmul fast <2 x double> %y, +; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %x +; CHECK-NEXT: ret <2 x double> %tmp1 + + %1 = fmul fast <2 x double> %x, %y + %2 = fmul fast <2 x double> %y, %x + %3 = fadd fast <2 x double> %1, %2 + ret <2 x double> %3 +} + +; FIXME: shifts should be converted to mul to assist further reassociation. +define <2 x i64> @test12(<2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: @test12 +; CHECK-NEXT: %mul = mul <2 x i64> %c, %b +; CHECK-NEXT: %shl = shl <2 x i64> %mul, +; CHECK-NEXT: ret <2 x i64> %shl + + %mul = mul <2 x i64> %c, %b + %shl = shl <2 x i64> %mul, + ret <2 x i64> %shl +} + +; FIXME: expressions with a negative const should be canonicalized to assist +; further reassociation. +; We would expect (-5*b)+a -> a-(5*b) but only the constant operand is commuted. +define <4 x float> @test13(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: @test13 +; CHECK-NEXT: %mul = fmul fast <4 x float> %b, +; CHECK-NEXT: %add = fadd fast <4 x float> %mul, %a +; CHECK-NEXT: ret <4 x float> %add + + %mul = fmul fast <4 x float> , %b + %add = fadd fast <4 x float> %mul, %a + ret <4 x float> %add +} + +; Break up subtract to assist further reassociation. +; Check a+b-c -> a+b+-c. +define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: @test14 +; CHECK-NEXT: %add = add <2 x i64> %b, %a +; CHECK-NEXT: %c.neg = sub <2 x i64> zeroinitializer, %c +; CHECK-NEXT: %sub = add <2 x i64> %add, %c.neg +; CHECK-NEXT: ret <2 x i64> %sub + + %add = add <2 x i64> %b, %a + %sub = sub <2 x i64> %add, %c + ret <2 x i64> %sub +} + +define <2 x i32> @test15(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: test15 +; CHECK-NEXT: %tmp3 = and <2 x i32> %y, %x +; CHECK-NEXT: ret <2 x i32> %tmp3 %tmp1 = and <2 x i32> %x, %y %tmp2 = and <2 x i32> %y, %x @@ -48,11 +194,10 @@ ret <2 x i32> %tmp3 } -define <2 x i32> @test5(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: test5 -; CHECK-NEXT: %tmp1 = or <2 x i32> %x, %y -; CHECK-NEXT: %tmp2 = or <2 x i32> %x, %y -; CHECK-NEXT: %tmp3 = or <2 x i32> %tmp1, %tmp2 +define <2 x i32> @test16(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: test16 +; CHECK-NEXT: %tmp3 = or <2 x i32> %y, %x +; CHECK-NEXT: ret <2 x i32> %tmp3 %tmp1 = or <2 x i32> %x, %y %tmp2 = or <2 x i32> %y, %x @@ -60,8 +205,9 @@ ret <2 x i32> %tmp3 } -define <2 x i32> @test6(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: test6 +; FIXME: Optimize vector xor. Currently only commute operands. +define <2 x i32> @test17(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: test17 ; CHECK-NEXT: %tmp1 = xor <2 x i32> %x, %y ; CHECK-NEXT: %tmp2 = xor <2 x i32> %x, %y ; CHECK-NEXT: %tmp3 = xor <2 x i32> %tmp1, %tmp2