Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -678,6 +678,7 @@ Value *SimplifyVectorOp(BinaryOperator &Inst); + Value *SimplifyTrigOp(Instruction &Inst); /// Given a binary operator, cast instruction, or select which has a PHI node /// as operand #0, see if we can fold the instruction into the PHI (which is Index: lib/Transforms/InstCombine/InstCombineMulDivRem.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -193,6 +193,9 @@ bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyTrigOp(I)) + return replaceInstUsesWith(I, V); + if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); @@ -613,6 +616,9 @@ bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyTrigOp(I)) + return replaceInstUsesWith(I, V); + if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); @@ -1167,6 +1173,9 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyTrigOp(I)) + return replaceInstUsesWith(I, V); + if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); @@ -1236,6 +1245,9 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyTrigOp(I)) + return replaceInstUsesWith(I, V); + if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); @@ -1347,6 +1359,9 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyTrigOp(I)) + return replaceInstUsesWith(I, V); + if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -107,23 +107,23 @@ #define DEBUG_TYPE "instcombine" -STATISTIC(NumCombined , "Number of insts combined"); +STATISTIC(NumCombined, "Number of insts combined"); STATISTIC(NumConstProp, "Number of constant folds"); -STATISTIC(NumDeadInst , "Number of dead inst eliminated"); -STATISTIC(NumSunkInst , "Number of instructions sunk"); -STATISTIC(NumExpand, "Number of expansions"); -STATISTIC(NumFactor , "Number of factorizations"); -STATISTIC(NumReassoc , "Number of reassociations"); +STATISTIC(NumDeadInst, "Number of dead inst eliminated"); +STATISTIC(NumSunkInst, "Number of instructions sunk"); +STATISTIC(NumExpand, "Number of expansions"); +STATISTIC(NumFactor, "Number of factorizations"); +STATISTIC(NumReassoc, "Number of reassociations"); DEBUG_COUNTER(VisitCounter, "instcombine-visit", "Controls which instructions are visited"); static cl::opt -EnableExpensiveCombines("expensive-combines", - cl::desc("Enable expensive instruction combines")); + EnableExpensiveCombines("expensive-combines", + cl::desc("Enable expensive instruction combines")); -static cl::opt -MaxArraySize("instcombine-maxarray-size", cl::init(1024), - cl::desc("Maximum array size considered when doing a combine")); +static cl::opt MaxArraySize( + "instcombine-maxarray-size", cl::init(1024), + cl::desc("Maximum array size considered when doing a combine")); // FIXME: Remove this flag when it is no longer necessary to convert // llvm.dbg.declare to avoid inaccurate debug info. Setting this to false @@ -289,8 +289,8 @@ // Order operands such that they are listed from right (least complex) to // left (most complex). This puts constants before unary operators before // binary operators. - if (I.isCommutative() && getComplexity(I.getOperand(0)) < - getComplexity(I.getOperand(1))) + if (I.isCommutative() && + getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) Changed = !I.swapOperands(); BinaryOperator *Op0 = dyn_cast(I.getOperand(0)); @@ -396,11 +396,10 @@ // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" // if C1 and C2 are constants. - if (Op0 && Op1 && - Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && - isa(Op0->getOperand(1)) && - isa(Op1->getOperand(1)) && - Op0->hasOneUse() && Op1->hasOneUse()) { + if (Op0 && Op1 && Op0->getOpcode() == Opcode && + Op1->getOpcode() == Opcode && isa(Op0->getOperand(1)) && + isa(Op1->getOperand(1)) && Op0->hasOneUse() && + Op1->hasOneUse()) { Value *A = Op0->getOperand(0); Constant *C1 = cast(Op0->getOperand(1)); Value *B = Op1->getOperand(0); @@ -664,16 +663,14 @@ // term. if (Op0) if (Value *Ident = getIdentityValue(LHSOpcode, RHS)) - if (Value *V = - tryFactorization(I, LHSOpcode, A, B, RHS, Ident)) + if (Value *V = tryFactorization(I, LHSOpcode, A, B, RHS, Ident)) return V; // The instruction has the form "(B) op (C op' D)". Try to factorize common // term. if (Op1) if (Value *Ident = getIdentityValue(RHSOpcode, LHS)) - if (Value *V = - tryFactorization(I, RHSOpcode, LHS, Ident, C, D)) + if (Value *V = tryFactorization(I, RHSOpcode, LHS, Ident, C, D)) return V; } @@ -856,8 +853,8 @@ std::swap(Op0, Op1); auto *BO = cast(&I); - Value *RI = Builder.CreateBinOp(BO->getOpcode(), Op0, Op1, - SO->getName() + ".op"); + Value *RI = + Builder.CreateBinOp(BO->getOpcode(), Op0, Op1, SO->getName() + ".op"); auto *FPInst = dyn_cast(RI); if (FPInst && isa(FPInst)) FPInst->copyFastMathFlags(BO); @@ -965,8 +962,10 @@ if (isa(InVal) && !isa(InVal)) continue; - if (isa(InVal)) return nullptr; // Itself a phi. - if (NonConstBB) return nullptr; // More than one non-const value. + if (isa(InVal)) + return nullptr; // Itself a phi. + if (NonConstBB) + return nullptr; // More than one non-const value. NonConstBB = PN->getIncomingBlock(i); @@ -989,7 +988,8 @@ // do this if the pred block is unconditionally branching into the phi block. if (NonConstBB != nullptr) { BranchInst *BI = dyn_cast(NonConstBB->getTerminator()); - if (!BI || !BI->isUnconditional()) return nullptr; + if (!BI || !BI->isUnconditional()) + return nullptr; } // Okay, we can do the transformation: create the new PHI node. @@ -1045,17 +1045,17 @@ if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); else if (isa(CI)) - InV = Builder.CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), - C, "phitmp"); + InV = Builder.CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), C, + "phitmp"); else - InV = Builder.CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), - C, "phitmp"); + InV = Builder.CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), C, + "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else if (auto *BO = dyn_cast(&I)) { for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), - Builder); + Value *InV = + foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), Builder); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else { @@ -1074,7 +1074,8 @@ for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) { Instruction *User = cast(*UI++); - if (User == &I) continue; + if (User == &I) + continue; replaceInstUsesWith(*User, NewPN); eraseInstFromFunction(*User); } @@ -1110,8 +1111,8 @@ Type *IntPtrTy = DL.getIntPtrType(PtrTy); int64_t FirstIdx = 0; if (int64_t TySize = DL.getTypeAllocSize(Ty)) { - FirstIdx = Offset/TySize; - Offset -= FirstIdx*TySize; + FirstIdx = Offset / TySize; + Offset -= FirstIdx * TySize; // Handle hosts where % returns negative instead of values [0..TySize). if (Offset < 0) { @@ -1136,15 +1137,15 @@ "Offset must stay within the indexed type"); unsigned Elt = SL->getElementContainingOffset(Offset); - NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()), - Elt)); + NewIndices.push_back( + ConstantInt::get(Type::getInt32Ty(Ty->getContext()), Elt)); Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); } else if (ArrayType *AT = dyn_cast(Ty)) { uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); - NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); + NewIndices.push_back(ConstantInt::get(IntPtrTy, Offset / EltSize)); Offset %= EltSize; Ty = AT->getElementType(); } else { @@ -1160,8 +1161,7 @@ // If this GEP has only 0 indices, it is the same pointer as // Src. If Src is not a trivial GEP too, don't combine // the indices. - if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() && - !Src.hasOneUse()) + if (GEP.hasAllZeroIndices() && !Src.hasAllZeroIndices() && !Src.hasOneUse()) return false; return true; } @@ -1171,7 +1171,8 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { assert(isa(Val->getType()) && "Can only descale integers!"); assert(cast(Val->getType())->getBitWidth() == - Scale.getBitWidth() && "Scale not compatible with value!"); + Scale.getBitWidth() && + "Scale not compatible with value!"); // If Val is zero or Scale is one then Val = Val * Scale. if (match(Val, m_Zero()) || Scale == 1) { @@ -1277,8 +1278,8 @@ return nullptr; Value *LHS = BO->getOperand(0); - int32_t Amt = cast(BO->getOperand(1))-> - getLimitedValue(Scale.getBitWidth()); + int32_t Amt = cast(BO->getOperand(1)) + ->getLimitedValue(Scale.getBitWidth()); // Op = LHS << Amt. if (Amt == logScale) { @@ -1419,12 +1420,144 @@ return BO; } +static bool isTrigLibCall(CallInst *CI) { + // We can only hope to do anything useful if we can ignore things like errno + // and floating-point exceptions. + // We already checked the prototype. + return CI->hasFnAttr(Attribute::NoUnwind) && + CI->hasFnAttr(Attribute::ReadNone); +} + +/// \brief Replaces the division or multiplication operations on trigonometric +/// functions to equivalent trigonometric operation + +Value *InstCombiner::SimplifyTrigOp(Instruction &I) { + + if (((I.getOpcode() == Instruction::FMul) || + (I.getOpcode() == Instruction::Mul))) { + if (isa(I.getOperand(0)) && isa(I.getOperand(1))) { + CallInst *call1 = dyn_cast(I.getOperand(0)); + CallInst *call2 = dyn_cast(I.getOperand(1)); + + // return nullptr if argument to calls are not same + if (!(call1->getOperand(0) == call2->getOperand(0))) + return nullptr; + + // tan(x)*cos(x)=sin(x); + + Value *Op1 = call1->getArgOperand(1); + Value *Op2 = call2->getArgOperand(1); + + if ((isTrigLibCall(call1) && + ((Op1->getName() == "tan") || (Op1->getName() == "tanf") || + (Op1->getName() == "tanf")) && + (Op2->getName().startswith("llvm.cos"))) || + (isTrigLibCall(call2) && (Op1->getName().startswith("llvm.cos")) && + ((Op2->getName() == "tan") || (Op2->getName() == "tanf") || + (Op2->getName() == "tanl")))) { + IRBuilder<>::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + Function *fun = Intrinsic::getDeclaration(I.getModule(), Intrinsic::sin, + I.getType()); + CallInst *SinCall = Builder.CreateCall(fun, call2->getOperand(0)); + + SinCall->takeName(&I); + SinCall->copyFastMathFlags(&I); + + return SinCall; + } + + // sin(x)*cos(x)=sin(2x)*0.5 + + else if (((call1->getOperand(1)->getName().startswith("llvm.sin")) && + (call2->getOperand(1)->getName().startswith("llvm.cos"))) || + ((call1->getOperand(1)->getName().startswith("llvm.cos")) && + (call2->getOperand(1)->getName().startswith("llvm.sin")))) { + IRBuilder<>::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + Function *fun = Intrinsic::getDeclaration(I.getModule(), Intrinsic::sin, + I.getType()); + + Constant *ConstFloatTwo = + ConstantFP::get(Type::getDoubleTy(I.getContext()), 2.0); + Instruction *MulIns = dyn_cast( + Builder.CreateFMul(ConstFloatTwo, call1->getOperand(0))); + CallInst *SinCall = Builder.CreateCall(fun, MulIns); + Instruction *DivIns = + dyn_cast(Builder.CreateFDiv(SinCall, ConstFloatTwo)); + DivIns->takeName(&I); + DivIns->copyFastMathFlags(&I); + + return DivIns; + } + } + } + + else if ((I.getOpcode() == Instruction::FDiv) || + (I.getOpcode() == Instruction::SDiv) || + (I.getOpcode() == Instruction::UDiv)) { + if (isa(I.getOperand(0)) && isa(I.getOperand(1))) { + CallInst *call1 = dyn_cast(I.getOperand(0)); + CallInst *call2 = dyn_cast(I.getOperand(1)); + + // return nullptr if argument to calls are not same + + if (!(call1->getOperand(0) == call2->getOperand(0))) + return nullptr; + + Value *Op1 = call1->getArgOperand(1); + Value *Op2 = call2->getArgOperand(1); + + // sin/tan=cos; + if ((Op1->getName().startswith("llvm.sin")) && + + (isTrigLibCall(call2) && + ((Op2->getName() == "tan") || (Op2->getName() == "tanf") || + (Op2->getName() == "tanl")))) { + IRBuilder<>::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + Function *fun = Intrinsic::getDeclaration(I.getModule(), Intrinsic::cos, + I.getType()); + CallInst *CosCall = Builder.CreateCall(fun, call1->getOperand(0)); + CosCall->takeName(&I); + CosCall->copyFastMathFlags(&I); + return CosCall; + } + + // tan/sin=1/cos; + else if ((Op2->getName().startswith("llvm.sin")) && + + (isTrigLibCall(call1) && + ((Op1->getName() == "tan") || (Op1->getName() == "tanf") || + (Op1->getName() == "tanl")))) { + IRBuilder<>::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + Function *fun = Intrinsic::getDeclaration(I.getModule(), Intrinsic::cos, + I.getType()); + CallInst *CosCall = Builder.CreateCall(fun, call1->getOperand(0)); + Constant *ConstFloatOne = + ConstantFP::get(Type::getDoubleTy(I.getContext()), 1.0); + + Instruction *DivIns = + dyn_cast(Builder.CreateFDiv(ConstFloatOne, CosCall)); + DivIns->takeName(&I); + DivIns->copyFastMathFlags(&I); + + return DivIns; + } + } + } + + return nullptr; +} + /// \brief Makes transformation of binary operation specific for vector types. /// \param Inst Binary operator to transform. /// \return Pointer to node that must replace the original binary operator, or /// null pointer if no transformation was made. Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { - if (!Inst.getType()->isVectorTy()) return nullptr; + if (!Inst.getType()->isVectorTy()) + return nullptr; // It may not be safe to reorder shuffles and things like div, urem, etc. // because we may trap when executing those ops on unknown vector elements. @@ -1448,18 +1581,22 @@ LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType()) { Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0), RShuf->getOperand(0), Builder); - return Builder.CreateShuffleVector( - NewBO, UndefValue::get(NewBO->getType()), LShuf->getMask()); + return Builder.CreateShuffleVector(NewBO, UndefValue::get(NewBO->getType()), + LShuf->getMask()); } // If one argument is a shuffle within one vector, the other is a constant, // try moving the shuffle after the binary operation. ShuffleVectorInst *Shuffle = nullptr; Constant *C1 = nullptr; - if (isa(LHS)) Shuffle = cast(LHS); - if (isa(RHS)) Shuffle = cast(RHS); - if (isa(LHS)) C1 = cast(LHS); - if (isa(RHS)) C1 = cast(RHS); + if (isa(LHS)) + Shuffle = cast(LHS); + if (isa(RHS)) + Shuffle = cast(RHS); + if (isa(LHS)) + C1 = cast(LHS); + if (isa(RHS)) + C1 = cast(RHS); if (Shuffle && C1 && (isa(C1) || isa(C1)) && isa(Shuffle->getOperand(1)) && @@ -1469,8 +1606,8 @@ // shuffle(C2, ShMask) = C1 // If such constant does not exist (example: ShMask=<0,0> and C1=<1,2>) // reorder is not possible. - SmallVector C2M(VWidth, - UndefValue::get(C1->getType()->getScalarType())); + SmallVector C2M( + VWidth, UndefValue::get(C1->getType()->getScalarType())); bool MayChange = true; for (unsigned I = 0; I < VWidth; ++I) { if (ShMask[I] >= 0) { @@ -1487,8 +1624,8 @@ Value *NewLHS = isa(LHS) ? C2 : Shuffle->getOperand(0); Value *NewRHS = isa(LHS) ? Shuffle->getOperand(0) : C2; Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder); - return Builder.CreateShuffleVector(NewBO, - UndefValue::get(Inst.getType()), Shuffle->getMask()); + return Builder.CreateShuffleVector(NewBO, UndefValue::get(Inst.getType()), + Shuffle->getMask()); } } @@ -1496,7 +1633,7 @@ } Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { - SmallVector Ops(GEP.op_begin(), GEP.op_end()); + SmallVector Ops(GEP.op_begin(), GEP.op_end()); if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, SQ.getWithInstruction(&GEP))) @@ -1508,7 +1645,7 @@ // by multiples of a zero size type with zero. bool MadeChange = false; Type *IntPtrTy = - DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType()); + DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; @@ -1519,8 +1656,10 @@ // Index type should have the same width as IntPtr Type *IndexTy = (*I)->getType(); - Type *NewIndexType = IndexTy->isVectorTy() ? - VectorType::get(IntPtrTy, IndexTy->getVectorNumElements()) : IntPtrTy; + Type *NewIndexType = + IndexTy->isVectorTy() + ? VectorType::get(IntPtrTy, IndexTy->getVectorNumElements()) + : IntPtrTy; // If the element type has zero size then any index over it is equivalent // to an index of zero, so replace it with zero if it is not zero already. @@ -1559,7 +1698,7 @@ int DI = -1; - for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { + for (auto I = PN->op_begin() + 1, E = PN->op_end(); I != E; ++I) { GetElementPtrInst *Op2 = dyn_cast(*I); if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands()) return nullptr; @@ -1659,12 +1798,11 @@ // Note that if our source is a gep chain itself then we wait for that // chain to be resolved before we perform this transformation. This // avoids us creating a TON of code in some cases. - if (GEPOperator *SrcGEP = - dyn_cast(Src->getOperand(0))) + if (GEPOperator *SrcGEP = dyn_cast(Src->getOperand(0))) if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) - return nullptr; // Wait until our source is folded to completion. + return nullptr; // Wait until our source is folded to completion. - SmallVector Indices; + SmallVector Indices; // Find out whether the last index in the source GEP is a sequential idx. bool EndsWithSequential = false; @@ -1676,7 +1814,7 @@ if (EndsWithSequential) { // Replace: gep (gep %P, long B), long A, ... // With: T = long A+B; gep %P, T, ... - Value *SO1 = Src->getOperand(Src->getNumOperands()-1); + Value *SO1 = Src->getOperand(Src->getNumOperands() - 1); Value *GO1 = GEP.getOperand(1); // If they aren't the same type, then the input hasn't been processed @@ -1699,15 +1837,15 @@ GEP.setOperand(1, Sum); return &GEP; } - Indices.append(Src->op_begin()+1, Src->op_end()-1); + Indices.append(Src->op_begin() + 1, Src->op_end() - 1); Indices.push_back(Sum); - Indices.append(GEP.op_begin()+2, GEP.op_end()); + Indices.append(GEP.op_begin() + 2, GEP.op_end()); } else if (isa(*GEP.idx_begin()) && cast(*GEP.idx_begin())->isNullValue() && Src->getNumOperands() != 1) { // Otherwise we can do the fold if the first index of the GEP is a zero - Indices.append(Src->op_begin()+1, Src->op_end()); - Indices.append(GEP.idx_begin()+1, GEP.idx_end()); + Indices.append(Src->op_begin() + 1, Src->op_end()); + Indices.append(GEP.idx_begin() + 1, GEP.idx_end()); } if (!Indices.empty()) @@ -1787,12 +1925,11 @@ // // This occurs when the program declares an array extern like "int X[];" if (HasZeroPointerIndex) { - if (ArrayType *CATy = - dyn_cast(GEP.getSourceElementType())) { + if (ArrayType *CATy = dyn_cast(GEP.getSourceElementType())) { // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == StrippedPtrTy->getElementType()) { // -> GEP i8* X, ... - SmallVector Idx(GEP.idx_begin()+1, GEP.idx_end()); + SmallVector Idx(GEP.idx_begin() + 1, GEP.idx_end()); GetElementPtrInst *Res = GetElementPtrInst::Create( StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); @@ -1808,7 +1945,7 @@ } if (ArrayType *XATy = - dyn_cast(StrippedPtrTy->getElementType())){ + dyn_cast(StrippedPtrTy->getElementType())) { // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == XATy->getElementType()) { // -> GEP [10 x i8]* X, i32 0, ... @@ -1830,7 +1967,7 @@ // -> // %0 = GEP [10 x i8] addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* - SmallVector Idx(GEP.idx_begin(), GEP.idx_end()); + SmallVector Idx(GEP.idx_begin(), GEP.idx_end()); Value *NewGEP = GEP.isInBounds() ? Builder.CreateInBoundsGEP( nullptr, StrippedPtr, Idx, GEP.getName()) @@ -1850,7 +1987,7 @@ DL.getTypeAllocSize(SrcElTy->getArrayElementType()) == DL.getTypeAllocSize(ResElTy)) { Type *IdxType = DL.getIntPtrType(GEP.getType()); - Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; + Value *Idx[2] = {Constant::getNullValue(IdxType), GEP.getOperand(1)}; Value *NewGEP = GEP.isInBounds() ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, Idx, @@ -1985,7 +2122,8 @@ } } - if (Operand->getType()->getPointerAddressSpace() != GEP.getAddressSpace()) + if (Operand->getType()->getPointerAddressSpace() != + GEP.getAddressSpace()) return new AddrSpaceCastInst(Operand, GEP.getType()); return new BitCastInst(Operand, GEP.getType()); } @@ -1993,7 +2131,7 @@ // Otherwise, if the offset is non-zero, we need to find out if there is a // field at Offset in 'A's type. If so, we can pull the cast through the // GEP. - SmallVector NewIndices; + SmallVector NewIndices; if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { Value *NGEP = GEP.isInBounds() @@ -2016,8 +2154,7 @@ DL.getPointerSizeInBits(PtrOp->getType()->getPointerAddressSpace()); APInt BasePtrOffset(PtrWidth, 0); Value *UnderlyingPtrOp = - PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, - BasePtrOffset); + PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, BasePtrOffset); if (auto *AI = dyn_cast(UnderlyingPtrOp)) { if (GEP.accumulateConstantOffset(DL, BasePtrOffset) && BasePtrOffset.isNonNegative()) { @@ -2050,7 +2187,7 @@ static bool isAllocSiteRemovable(Instruction *AI, SmallVectorImpl &Users, const TargetLibraryInfo *TLI) { - SmallVector Worklist; + SmallVector Worklist; Worklist.push_back(AI); do { @@ -2148,7 +2285,7 @@ // Lowering all @llvm.objectsize calls first because they may // use a bitcast/GEP of the alloca we are removing. if (!Users[i]) - continue; + continue; Instruction *I = cast(&*Users[i]); @@ -2186,8 +2323,8 @@ // Replace invoke with a NOP intrinsic to maintain the original CFG Module *M = II->getModule(); Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing); - InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), - None, "", II->getParent()); + InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(), None, "", + II->getParent()); } for (auto *DII : DIIs) @@ -2213,8 +2350,7 @@ /// The profitability is out-of concern here and this function should /// be called only if the caller knows this transformation would be /// profitable (e.g., for code size). -static Instruction * -tryToMoveFreeBeforeNullTest(CallInst &FI) { +static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI) { Value *Op = FI.getArgOperand(0); BasicBlock *FreeInstrBB = FI.getParent(); BasicBlock *PredBB = FreeInstrBB->getSinglePredecessor(); @@ -2368,7 +2504,8 @@ LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes()); } - unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes); + unsigned NewWidth = + Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes); // Shrink the condition operand if the new type is smaller than the old type. // This may produce a non-standard type for the switch, but that's ok because @@ -2402,10 +2539,9 @@ if (InsertValueInst *IV = dyn_cast(Agg)) { // We're extracting from an insertvalue instruction, compare the indices const unsigned *exti, *exte, *insi, *inse; - for (exti = EV.idx_begin(), insi = IV->idx_begin(), - exte = EV.idx_end(), inse = IV->idx_end(); - exti != exte && insi != inse; - ++exti, ++insi) { + for (exti = EV.idx_begin(), insi = IV->idx_begin(), exte = EV.idx_end(), + inse = IV->idx_end(); + exti != exte && insi != inse; ++exti, ++insi) { if (*insi != *exti) // The insert and extract both reference distinctly different elements. // This means the extract is not influenced by the insert, and we can @@ -2461,7 +2597,7 @@ switch (II->getIntrinsicID()) { case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: - if (*EV.idx_begin() == 0) { // Normal result. + if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); replaceInstUsesWith(*II, UndefValue::get(II->getType())); eraseInstFromFunction(*II); @@ -2478,7 +2614,7 @@ break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: - if (*EV.idx_begin() == 0) { // Normal result. + if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); replaceInstUsesWith(*II, UndefValue::get(II->getType())); eraseInstFromFunction(*II); @@ -2487,7 +2623,7 @@ break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: - if (*EV.idx_begin() == 0) { // Normal result. + if (*EV.idx_begin() == 0) { // Normal result. Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); replaceInstUsesWith(*II, UndefValue::get(II->getType())); eraseInstFromFunction(*II); @@ -2507,11 +2643,11 @@ // don't want to do the transformation as it loses padding knowledge. if (L->isSimple() && L->hasOneUse()) { // extractvalue has integer indices, getelementptr has Value*s. Convert. - SmallVector Indices; + SmallVector Indices; // Prefix an i32 0 since we need the first element. Indices.push_back(Builder.getInt32(0)); for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end(); - I != E; ++I) + I != E; ++I) Indices.push_back(Builder.getInt32(*I)); // We need to insert these at the location of the old load, not at that of @@ -2568,10 +2704,8 @@ } static bool shorter_filter(const Value *LHS, const Value *RHS) { - return - cast(LHS->getType())->getNumElements() - < - cast(RHS->getType())->getNumElements(); + return cast(LHS->getType())->getNumElements() < + cast(RHS->getType())->getNumElements(); } Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { @@ -2585,7 +2719,7 @@ // (these are often created by inlining). bool MakeNewInstruction = false; // If true, recreate using the following: SmallVector NewClauses; // - Clauses for the new instruction; - bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. + bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. SmallPtrSet AlreadyCaught; // Typeinfos known caught already. for (unsigned i = 0, e = LI.getNumClauses(); i != e; ++i) { @@ -2637,13 +2771,13 @@ break; } - bool MakeNewFilter = false; // If true, make a new filter. + bool MakeNewFilter = false; // If true, make a new filter. SmallVector NewFilterElts; // New elements. if (isa(FilterClause)) { // Not an empty filter - it contains at least one null typeinfo. assert(NumTypeInfos > 0 && "Should have handled empty filter already!"); Constant *TypeInfo = - Constant::getNullValue(FilterType->getElementType()); + Constant::getNullValue(FilterType->getElementType()); // If this typeinfo is a catch-all then the filter can never match. if (isCatchAll(Personality, TypeInfo)) { // Throw the filter away. @@ -2708,8 +2842,8 @@ MakeNewFilter = true; } if (MakeNewFilter) { - FilterType = ArrayType::get(FilterType->getElementType(), - NewFilterElts.size()); + FilterType = + ArrayType::get(FilterType->getElementType(), NewFilterElts.size()); FilterClause = ConstantArray::get(FilterType, NewFilterElts); MakeNewInstruction = true; } @@ -2733,7 +2867,7 @@ // advantageous because shorter filters are more likely to match, speeding up // unwinding, but mostly because it increases the effectiveness of the other // filter optimizations below. - for (unsigned i = 0, e = NewClauses.size(); i + 1 < e; ) { + for (unsigned i = 0, e = NewClauses.size(); i + 1 < e;) { unsigned j; // Find the maximal 'j' s.t. the range [i, j) consists entirely of filters. for (j = i; j != e; ++j) @@ -2744,7 +2878,7 @@ // if sorting them is actually going to do anything so that we only make a // new landingpad instruction if it does. for (unsigned k = i; k + 1 < j; ++k) - if (shorter_filter(NewClauses[k+1], NewClauses[k])) { + if (shorter_filter(NewClauses[k + 1], NewClauses[k])) { // Not sorted, so sort the filters now. Doing an unstable sort would be // correct too but reordering filters pointlessly might confuse users. std::stable_sort(NewClauses.begin() + i, NewClauses.begin() + j, @@ -2859,8 +2993,8 @@ // If we changed any of the clauses, replace the old landingpad instruction // with a new one. if (MakeNewInstruction) { - LandingPadInst *NLI = LandingPadInst::Create(LI.getType(), - NewClauses.size()); + LandingPadInst *NLI = + LandingPadInst::Create(LI.getType(), NewClauses.size()); for (unsigned i = 0, e = NewClauses.size(); i != e; ++i) NLI->addClause(NewClauses[i]); // A landing pad with no clauses must have the cleanup flag set. It is @@ -2896,8 +3030,8 @@ return false; // Do not sink alloca instructions out of the entry block. - if (isa(I) && I->getParent() == - &DestBlock->getParent()->getEntryBlock()) + if (isa(I) && + I->getParent() == &DestBlock->getParent()->getEntryBlock()) return false; // Do not sink into catchswitch blocks. @@ -2928,7 +3062,8 @@ bool InstCombiner::run() { while (!Worklist.isEmpty()) { Instruction *I = Worklist.RemoveOne(); - if (I == nullptr) continue; // skip null values. + if (I == nullptr) + continue; // skip null values. // Check to see if we can DCE the instruction. if (isInstructionTriviallyDead(I, &TLI)) { @@ -2962,11 +3097,11 @@ // a value even when the operands are not all constants. Type *Ty = I->getType(); if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) { - KnownBits Known = computeKnownBits(I, /*Depth*/0, I); + KnownBits Known = computeKnownBits(I, /*Depth*/ 0, I); if (Known.isConstant()) { Constant *C = ConstantInt::get(Ty, Known.getConstant()); - DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C << - " from: " << *I << '\n'); + DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C + << " from: " << *I << '\n'); // Add operands to the worklist. replaceInstUsesWith(*I, C); @@ -3093,10 +3228,10 @@ InstCombineWorklist &ICWorklist, const TargetLibraryInfo *TLI) { bool MadeIRChange = false; - SmallVector Worklist; + SmallVector Worklist; Worklist.push_back(BB); - SmallVector InstrsForInstCombineWorklist; + SmallVector InstrsForInstCombineWorklist; DenseMap FoldedConstants; do { @@ -3106,7 +3241,7 @@ if (!Visited.insert(BB).second) continue; - for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { Instruction *Inst = &*BBI++; // DCE instruction if trivially dead. @@ -3123,8 +3258,8 @@ if (!Inst->use_empty() && (Inst->getNumOperands() == 0 || isa(Inst->getOperand(0)))) if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) { - DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " - << *Inst << '\n'); + DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *Inst + << '\n'); Inst->replaceAllUsesWith(C); ++NumConstProp; if (isInstructionTriviallyDead(Inst, TLI)) @@ -3147,15 +3282,16 @@ if (FoldRes != C) { DEBUG(dbgs() << "IC: ConstFold operand of: " << *Inst - << "\n Old = " << *C - << "\n New = " << *FoldRes << '\n'); + << "\n Old = " << *C << "\n New = " << *FoldRes + << '\n'); U = FoldRes; MadeIRChange = true; } } - // Skip processing debug intrinsics in InstCombine. Processing these call instructions - // consumes non-trivial amount of time and provides no value for the optimization. + // Skip processing debug intrinsics in InstCombine. Processing these call + // instructions consumes non-trivial amount of time and provides no value + // for the optimization. if (!isa(Inst)) InstrsForInstCombineWorklist.push_back(Inst); } Index: test/Transforms/InstCombine/trigonometric-optimizations.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/trigonometric-optimizations.ll @@ -0,0 +1,89 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare double @tan(double) +declare double @llvm.cos.f64(double) +declare double @llvm.sin.f64(double) + +define double @_Z11tan_mul_cosd(double) { + %2 = call fast double @tan(double %0) #8 + %3 = call fast double @llvm.cos.f64(double %0) + %4 = fmul fast double %2, %3 + ret double %4 + +; CHECK-LABEL:@_Z11tan_mul_cosd( +; CHECK-NEXT: %2 = call fast double @llvm.sin.f64(double %0) +; CHECK-NEXT: ret double %2 + +} + +define double @_Z11sin_mul_cosd(double) { + %2 = call fast double @llvm.sin.f64(double %0) + %3 = call fast double @llvm.cos.f64(double %0) + %4 = fmul fast double %2, %3 + ret double %4 + +; CHECK-LABEL: @_Z11sin_mul_cosd( +; CHECK-NEXT: %2 = fmul fast double %0, 2.000000e+00 +; CHECK-NEXT: %3 = call fast double @llvm.sin.f64(double %2) +; CHECK-NEXT: %4 = fmul fast double %3, 5.000000e-01 +; CHECK-NEXT: ret double %4 +} + +define double @_Z11sin_div_tand(double) { + %2 = call fast double @llvm.sin.f64(double %0) + %3 = call fast double @tan(double %0) #8 + %4 = fdiv fast double %2, %3 + ret double %4 + +; CHECK-LABEL: @_Z11sin_div_tand( +; CHECK-NEXT: %2 = call fast double @llvm.cos.f64(double %0) +; CHECK-NEXT: ret double %2 +} + +define double @_Z11tan_div_sind(double) { + %2 = call fast double @tan(double %0) #8 + %3 = call fast double @llvm.sin.f64(double %0) + %4 = fdiv fast double %2, %3 + ret double %4 + +; CHECK-LABEL: @_Z11tan_div_sind( +; CHECK-NEXT: %2 = call fast double @llvm.cos.f64(double %0) +; CHECK-NEXT: %3 = fdiv fast double 1.000000e+00, %2 +; CHECK-NEXT: ret double %3 +} + +define double @_Z8combinedd(double) { + %2 = call fast double @llvm.sin.f64(double %0) + %3 = call fast double @tan(double %0) #8 + %4 = fdiv fast double %2, %3 + %5 = call fast double @tan(double %0) #8 + %6 = call fast double @llvm.cos.f64(double %0) + %7 = fmul fast double %5, %6 + %8 = fmul fast double %4, %7 + ret double %8 + +; CHECK-LABEL: @_Z8combinedd( +; CHECK-NEXT: %2 = fmul fast double %0, 2.000000e+00 +; CHECK-NEXT: %3 = call fast double @llvm.sin.f64(double %2) +; CHECK-NEXT: %4 = fmul fast double %3, 5.000000e-01 +; CHECK-NEXT: ret double %4 +} + +define double @_Z19different_argumentsdd(double, double) { + %3 = call fast double @llvm.sin.f64(double %0) + %4 = call fast double @tan(double %0) #8 + %5 = fdiv fast double %3, %4 + %6 = call fast double @tan(double %1) #8 + %7 = call fast double @llvm.cos.f64(double %1) + %8 = fmul fast double %6, %7 + %9 = fmul fast double %5, %8 + ret double %9 + +; CHECK-LABEL: @_Z19different_argumentsdd( +; CHECK-NEXT: %3 = call fast double @llvm.cos.f64(double %0) +; CHECK-NEXT: %4 = call fast double @llvm.sin.f64(double %1) +; CHECK-NEXT: %5 = fmul fast double %3, %4 +; CHECK-NEXT: ret double %5 +} + +attributes #8 = { nounwind readnone }