Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -155,8 +155,8 @@ } /// EvaluateInDifferentType - Given an expression that -/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually -/// insert the code to evaluate the expression. +/// IsProfitableToEvaluateTruncated or CanEvaluateSExtd returns true for, +/// actually insert the code to evaluate the expression. Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned) { if (Constant *C = dyn_cast(V)) { @@ -221,9 +221,26 @@ } default: // TODO: Can handle more cases here. - llvm_unreachable("Unreachable!"); + // This is not a recognized instruction. We need an explicit cast here. + // Currently we can reach this only when truncating expression type. + assert(CastInst::castIsValid(Instruction::Trunc, I, Ty) && + "can not insert explicit cast"); + // hasOneUse property is guaranteed by "EstimateCostForTruncatedEvaluation" + assert(I->hasOneUse() && + "can not truncate branching expression trees"); + + // Create truncate from "I" type to the Ty + Instruction *Trunc = CastInst::CreateTruncOrBitCast(I, Ty); + Trunc->setName(I->getName() + ".truncated"); + + // Insert new truncate after "I" + Instruction *NextI = I->getNextNode(); + assert(NextI != I->getParent()->end() && + "can not insert explicit truncate"); + return InsertNewInstBefore(Trunc, *NextI); } - + + assert(Res && "failed to evaluate instruction in different type"); Res->takeName(I); return InsertNewInstWith(Res, *I); } @@ -318,19 +335,16 @@ return nullptr; } -/// CanEvaluateTruncated - Return true if we can evaluate the specified -/// expression tree as type Ty instead of its larger type, and arrive with the -/// same value. This is used by code that tries to eliminate truncates. -/// -/// Ty will always be a type smaller than V. We should return true if trunc(V) -/// can be computed by computing V in the smaller type. If V is an instruction, -/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only -/// makes sense if x and y can be efficiently truncated. -/// -/// This function works on both vectors and scalars. -/// -static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, - Instruction *CxtI) { +/// EstimateCostForTruncatedEvaluation - Return true if we can evaluate the +/// specified expression tree as type Ty instead of its larger type, and +/// arrive at the same value. Also calculate cost of this transformation +/// and store it into the "Cost" argument. +/// Currently cost model is simple - increase cost by one for each additional +/// instruction required, decrease cost by one for each instruction that +/// will be removed after truncating expression tree. +static bool EstimateCostForTruncatedEvaluation(int &Cost, Value *V, + Type *Ty, InstCombiner &IC, + Instruction *CxtI) { // We can always evaluate constants in another type. if (isa(V)) return true; @@ -340,11 +354,17 @@ Type *OrigTy = V->getType(); - // If this is an extension from the dest type, we can eliminate it, even if it - // has multiple uses. + // If this is an extension from the dest type, we will not need any additional + // instructions to truncate it, even if it has multiple uses. if ((isa(I) || isa(I)) && - I->getOperand(0)->getType() == Ty) + I->getOperand(0)->getType() == Ty) { + + // Add a cost bonus if we can eliminate this cast. + if (I->hasOneUse()) + Cost -= 1; + return true; + } // We can't extend or shrink something that has multiple uses: doing so would // require duplicating the instruction in general, which isn't profitable. @@ -359,8 +379,10 @@ case Instruction::Or: case Instruction::Xor: // These operators can all arbitrarily be extended or truncated. - return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && - CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); + return EstimateCostForTruncatedEvaluation(Cost, I->getOperand(0), Ty, IC, + CxtI) && + EstimateCostForTruncatedEvaluation(Cost, I->getOperand(1), Ty, IC, + CxtI); case Instruction::UDiv: case Instruction::URem: { @@ -371,8 +393,10 @@ APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) && IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) { - return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && - CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); + return EstimateCostForTruncatedEvaluation(Cost, I->getOperand(0), Ty, + IC, CxtI) && + EstimateCostForTruncatedEvaluation(Cost, I->getOperand(1), Ty, + IC, CxtI); } } break; @@ -383,7 +407,8 @@ if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { uint32_t BitWidth = Ty->getScalarSizeInBits(); if (CI->getLimitedValue(BitWidth) < BitWidth) - return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); + return EstimateCostForTruncatedEvaluation(Cost, I->getOperand(0), Ty, + IC, CxtI); } break; case Instruction::LShr: @@ -396,7 +421,8 @@ if (IC.MaskedValueIsZero(I->getOperand(0), APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth), 0, CxtI) && CI->getLimitedValue(BitWidth) < BitWidth) { - return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); + return EstimateCostForTruncatedEvaluation(Cost, I->getOperand(0), Ty, + IC, CxtI); } } break; @@ -410,8 +436,10 @@ return true; case Instruction::Select: { SelectInst *SI = cast(I); - return CanEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) && - CanEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI); + return EstimateCostForTruncatedEvaluation(Cost, SI->getTrueValue(), Ty, IC, + CxtI) && + EstimateCostForTruncatedEvaluation(Cost, SI->getFalseValue(), Ty, IC, + CxtI); } case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never @@ -419,16 +447,46 @@ // instructions with a single use. PHINode *PN = cast(I); for (Value *IncValue : PN->incoming_values()) - if (!CanEvaluateTruncated(IncValue, Ty, IC, CxtI)) + if (!EstimateCostForTruncatedEvaluation(Cost, IncValue, Ty, IC, CxtI)) return false; return true; } - default: - // TODO: Can handle more cases here. - break; + + // TODO: Can handle more cases here. } - return false; + // This is not a recognized instruction, but we can always truncate it by + // adding additional explicit trunc. + Cost += 1; + return true; +} + +/// IsProfitableToEvaluateTruncated - Given the expression tree starting with +/// truncate instruction return true if it would be profitable to remove +/// this truncate and evaluate expression in a narrower type. This is used by +/// code that tries to eliminate truncates. +/// +/// This function works on both vectors and scalars. +/// +static bool IsProfitableToEvaluateTruncated(TruncInst *Trunc, + InstCombiner &IC) { + Type *DestTy = Trunc->getType(), *SrcTy = Trunc->getOperand(0)->getType(); + + // Only truncate if the dest type is a simple type, don't convert the + // expression tree to something weird like i93 unless the source is also + // strange. + if (!DestTy->isVectorTy() && !IC.ShouldChangeType(SrcTy, DestTy)) + return false; + + int Cost = 0; + bool CanTruncate = EstimateCostForTruncatedEvaluation( + Cost, Trunc->getOperand(0), DestTy, IC, Trunc); + + // Threshold picked in a way that it will allow only cases when number of + // removed instructions is strictly greater than number of the additional + // instructions required. + const int CostThreshold = 1; + return CanTruncate && Cost < CostThreshold; } Instruction *InstCombiner::visitTrunc(TruncInst &CI) { @@ -450,15 +508,11 @@ return &CI; Value *Src = CI.getOperand(0); - Type *DestTy = CI.getType(), *SrcTy = Src->getType(); + Type *DestTy = CI.getType(); // Attempt to truncate the entire input expression tree to the destination - // type. Only do this if the dest type is a simple type, don't convert the - // expression tree to something weird like i93 unless the source is also - // strange. - if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) && - CanEvaluateTruncated(Src, DestTy, *this, &CI)) { - + // type. + if (IsProfitableToEvaluateTruncated(&CI, *this)) { // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -340,8 +340,8 @@ bool replacedSelectWithOperand(SelectInst *SI, const ICmpInst *Icmp, const unsigned SIOpd); -private: bool ShouldChangeType(Type *From, Type *To) const; +private: Value *dyn_castNegVal(Value *V) const; Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const; Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset, Index: test/Transforms/InstCombine/trunc.ll =================================================================== --- test/Transforms/InstCombine/trunc.ll +++ test/Transforms/InstCombine/trunc.ll @@ -118,3 +118,70 @@ ; CHECK: and ; CHECK: ret } + +; Here by eliminating trunc we can get rid of a redundant zext. However +; additional trunc should also be inserted. Check that we consider this +; transform to be profitable. +define void @test11(i8 *%res, i32* %t, i8* %s) { + entry: + %m = load i32, i32* %t + %n = load i8, i8* %s + + %n.i32 = zext i8 %n to i32 + %v = sub i32 %n.i32, %m + %v.i8 = trunc i32 %v to i8 + + store i8 %v.i8, i8* %res + ret void +; CHECK-LABEL: @test11( +; CHECK: %m = load i32, i32* %t +; CHECK: %m.truncated = trunc i32 %m to i8 +; CHECK: %n = load i8, i8* %s +; CHECK: %v = sub i8 %n, %m.truncated +; CHECK: store i8 %v, i8* %res +} + +; Here by eliminating trunc we will not remove any additional instructions. +; Check that this case considered to be not profitable. +define void @test12(i8 *%res, i32* %t, i32* %s) { + entry: + %m = load i32, i32* %t + %n = load i32, i32* %s + + %v = sub i32 %n, %m + %v.i8 = trunc i32 %v to i8 + + store i8 %v.i8, i8* %res + ret void +; CHECK-LABEL: @test12( +; CHECK: %m = load i32, i32* %t +; CHECK: %n = load i32, i32* %s +; CHECK: %v = sub i32 %n, %m +; CHECK: %v.i8 = trunc i32 %v to i8 +; CHECK: store i8 %v.i8, i8* %res +} + +; Check that casts with multiple uses are considered not possible to remove. +declare void @use.test13(i32) +define void @test13(i8 *%res, i32* %t, i8* %s) { + entry: + %m = load i32, i32* %t + %n = load i8, i8* %s + + %n.i32 = zext i8 %n to i32 + %v = sub i32 %n.i32, %m + %v.i8 = trunc i32 %v to i8 + + store i8 %v.i8, i8* %res + call void @use.test13(i32 %n.i32) + ret void +; CHECK-LABEL: @test13( +; CHECK: %m = load i32, i32* %t +; CHECK: %n = load i8, i8* %s +; CHECK: %n.i32 = zext i8 %n to i32 +; CHECK: %v = sub i32 %n.i32, %m +; CHECK: %v.i8 = trunc i32 %v to i8 +; CHECK: store i8 %v.i8, i8* %res +; CHECK: call void @use.test13(i32 %n.i32) +; CHECK: ret void +}