Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -156,10 +156,18 @@ return replaceInstUsesWith(CI, New); } -/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns -/// true for, actually insert the code to evaluate the expression. +/// Given an expression that CanEvaluate{Truncated, SExtd, ZExtd}, returns a new +/// Value representing the inserted code to evaluate the expression. Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, - bool isSigned) { + bool isSigned) { + EvaluatedInstMap.clear(); + return EvaluateInDifferentTypeImpl(V, Ty, isSigned); +} + +/// Recursive helper function for EvaluateInDifferentType. +/// This function should not be called directly! +Value *InstCombiner::EvaluateInDifferentTypeImpl(Value *V, Type *Ty, + bool isSigned) { if (Constant *C = dyn_cast(V)) { C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); // If we got a constantexpr back, try to simplify it with DL info. @@ -170,6 +178,11 @@ // Otherwise, it must be an instruction. Instruction *I = cast(V); + + // Check if this instruction have already been evaluated. + if (EvaluatedInstMap.count(I)) + return EvaluatedInstMap[I]; + Instruction *Res = nullptr; unsigned Opc = I->getOpcode(); switch (Opc) { @@ -184,8 +197,8 @@ case Instruction::Shl: case Instruction::UDiv: case Instruction::URem: { - Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned); - Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); + Value *LHS = EvaluateInDifferentTypeImpl(I->getOperand(0), Ty, isSigned); + Value *RHS = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned); Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); break; } @@ -204,17 +217,19 @@ Opc == Instruction::SExt); break; case Instruction::Select: { - Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); - Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned); + Value *True = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned); + Value *False = EvaluateInDifferentTypeImpl(I->getOperand(2), Ty, isSigned); Res = SelectInst::Create(I->getOperand(0), True, False); break; } case Instruction::PHI: { PHINode *OPN = cast(I); PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues()); + // Must update the evaluated instruction map here to prevent infinite loop. + EvaluatedInstMap[I] = NPN; for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { Value *V = - EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); + EvaluateInDifferentTypeImpl(OPN->getIncomingValue(i), Ty, isSigned); NPN->addIncoming(V, OPN->getIncomingBlock(i)); } Res = NPN; @@ -225,6 +240,7 @@ llvm_unreachable("Unreachable!"); } + EvaluatedInstMap[I] = Res; Res->takeName(I); return InsertNewInstWith(Res, *I); } @@ -298,8 +314,25 @@ /// /// This function works on both vectors and scalars. /// -static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, - Instruction *CxtI) { +bool InstCombiner::canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, + Instruction *CxtI) { + VisitedInsts.clear(); + ToVisitInsts.clear(); + if (!canEvaluateTruncatedImpl(V, Ty, IC, CxtI)) + return false; + // We can truncate the given expression tree only if we had visited all + // instructions that are expected to be visited during expression evaluation. + for (auto *I : ToVisitInsts) + if (!VisitedInsts.count(I)) + return false; + return true; +} + +/// Recursive helper function for canEvaluateTruncated. +/// This function should not be called directly! +bool InstCombiner::canEvaluateTruncatedImpl(Value *V, Type *Ty, + InstCombiner &IC, + Instruction *CxtI) { // We can always evaluate constants in another type. if (isa(V)) return true; @@ -307,6 +340,11 @@ Instruction *I = dyn_cast(V); if (!I) return false; + if (VisitedInsts.count(I)) + return true; + + VisitedInsts.insert(I); + Type *OrigTy = V->getType(); // If this is an extension from the dest type, we can eliminate it, even if it @@ -315,9 +353,18 @@ I->getOperand(0)->getType() == Ty) return true; - // We can't extend or shrink something that has multiple uses: doing so would - // require duplicating the instruction in general, which isn't profitable. - if (!I->hasOneUse()) return false; + // We don't want to duplicate instructiosn, which isn't profitable. Thus, we + // can't extend or shrink something that has multiple uses, unless all users + // are dominated by the trunc instruction and will be visited during the + // expresion evaluation. Mark all users that were not visited yet to be + // checked later against visited list. + if (!I->hasOneUse()) { + for (auto *U : I->users()) { + auto *UI = dyn_cast(U); + if (UI && !VisitedInsts.count(UI)) + ToVisitInsts.insert(UI); + } + } unsigned Opc = I->getOpcode(); switch (Opc) { @@ -328,8 +375,8 @@ case Instruction::Or: case Instruction::Xor: // These operators can all arbitrarily be extended or truncated. - return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && - canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); + return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) && + canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI); case Instruction::UDiv: case Instruction::URem: { @@ -340,8 +387,8 @@ APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) && IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) { - return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) && - canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI); + return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) && + canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI); } } break; @@ -353,7 +400,7 @@ if (match(I->getOperand(1), m_APInt(Amt))) { uint32_t BitWidth = Ty->getScalarSizeInBits(); if (Amt->getLimitedValue(BitWidth) < BitWidth) - return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); + return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI); } break; } @@ -368,7 +415,7 @@ if (IC.MaskedValueIsZero(I->getOperand(0), APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth), 0, CxtI) && Amt->getLimitedValue(BitWidth) < BitWidth) { - return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); + return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI); } } break; @@ -386,7 +433,7 @@ if (Amt->getLimitedValue(BitWidth) < BitWidth && OrigBitWidth - BitWidth < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI)) - return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI); + return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI); } break; } @@ -400,8 +447,8 @@ return true; case Instruction::Select: { SelectInst *SI = cast(I); - return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) && - canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI); + return canEvaluateTruncatedImpl(SI->getTrueValue(), Ty, IC, CxtI) && + canEvaluateTruncatedImpl(SI->getFalseValue(), Ty, IC, CxtI); } case Instruction::PHI: { // We can change a phi if we can change all operands. Note that we never @@ -409,7 +456,7 @@ // instructions with a single use. PHINode *PN = cast(I); for (Value *IncValue : PN->incoming_values()) - if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI)) + if (!canEvaluateTruncatedImpl(IncValue, Ty, IC, CxtI)) return false; return true; } Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -233,6 +233,13 @@ bool MadeIRChange; + // Map evaluated instruction to its new instruction. + DenseMap EvaluatedInstMap; + // Set of instructions that have been visited during expression evaluation. + SmallPtrSet VisitedInsts; + // Set of instruction expected to be visited during expression evaluation. + SmallPtrSet ToVisitInsts; + public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, @@ -749,6 +756,11 @@ Instruction *SimplifyMemSet(MemSetInst *MI); Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); + Value *EvaluateInDifferentTypeImpl(Value *V, Type *Ty, bool isSigned); + bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, + Instruction *CxtI); + bool canEvaluateTruncatedImpl(Value *V, Type *Ty, InstCombiner &IC, + Instruction *CxtI); /// \brief Returns a value X such that Val = X * Scale, or null if none. /// Index: test/Transforms/InstCombine/cast.ll =================================================================== --- test/Transforms/InstCombine/cast.ll +++ test/Transforms/InstCombine/cast.ll @@ -1366,6 +1366,78 @@ ret i16 %t } +; Cases with truncate expression tree where not all nodes have a single usage. +; However, all the nodes are dominated by the trunc instruction. +; Case 1: same basic block +define i16 @test_trunc_multi_use_case1(i16 %v) { +; CHECK-LABEL: @test_trunc_multi_use_case1( +; CHECK-NEXT: [[TMP1:%.*]] = ashr i16 [[V:%.*]], 1 +; CHECK-NEXT: [[S:%.*]] = mul i16 [[TMP1]], [[TMP1]] +; CHECK-NEXT: ret i16 [[S]] +; + %a = sext i16 %v to i32 + %b = ashr i32 %a, 1 + %s = mul i32 %b, %b + %t = trunc i32 %s to i16 + ret i16 %t +} + +; Case 2: cross basic block +define i16 @test_trunc_multi_use_case2(i16 %v, i1 %cond) { +; CHECK-LABEL: @test_trunc_multi_use_case2( +; CHECK-NEXT: BB0: +; CHECK-NEXT: [[B:%.*]] = shl i16 [[V:%.*]], 1 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: BB1: +; CHECK-NEXT: [[D:%.*]] = mul i16 [[B]], [[B]] +; CHECK-NEXT: br label [[BB2]] +; CHECK: BB2: +; CHECK-NEXT: [[S:%.*]] = phi i16 [ [[B]], [[BB0:%.*]] ], [ [[D]], [[BB1]] ] +; CHECK-NEXT: ret i16 [[S]] +; +BB0: + %a = sext i16 %v to i32 + %b = shl i32 %a, 1 + br i1 %cond, label %BB1, label %BB2 +BB1: + %d = mul i32 %b, %b + br label %BB2 +BB2: + %s = phi i32 [ %b, %BB0 ], [ %d, %BB1 ] + %t = trunc i32 %s to i16 + ret i16 %t +} + +; Case 3: cross basic block - not all nodes are dominated by trunc instruction. +define i16 @test_trunc_multi_use_case3(i16 %v) { +; CHECK-LABEL: @test_trunc_multi_use_case3( +; CHECK-NEXT: BB0: +; CHECK-NEXT: [[A:%.*]] = sext i16 [[V:%.*]] to i32 +; CHECK-NEXT: [[B:%.*]] = shl nsw i32 [[A]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[B]], 777 +; CHECK-NEXT: br i1 [[COND]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: BB1: +; CHECK-NEXT: [[D:%.*]] = mul nsw i32 [[A]], 1554 +; CHECK-NEXT: br label [[BB2]] +; CHECK: BB2: +; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[B]], [[BB0:%.*]] ], [ [[D]], [[BB1]] ] +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[S]] to i16 +; CHECK-NEXT: ret i16 [[T]] +; +BB0: + %a = sext i16 %v to i32 + %b = shl i32 %a, 1 + %cond = icmp ult i32 %b, 777 + br i1 %cond, label %BB1, label %BB2 +BB1: + %d = mul i32 %b, 777 + br label %BB2 +BB2: + %s = phi i32 [ %b, %BB0 ], [ %d, %BB1 ] + %t = trunc i32 %s to i16 + ret i16 %t +} + ; Overflow on a float to int or int to float conversion is undefined (PR21130). define i8 @overflow_fptosi() { Index: test/Transforms/InstCombine/pr33765.ll =================================================================== --- test/Transforms/InstCombine/pr33765.ll +++ test/Transforms/InstCombine/pr33765.ll @@ -5,15 +5,13 @@ define void @patatino(i8 %beth) { ; CHECK-LABEL: @patatino( -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i16 ; CHECK-NEXT: br i1 false, label [[IF_THEN9:%.*]], label [[IF_THEN9]] ; CHECK: if.then9: -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]] +; CHECK-NEXT: [[MUL:%.*]] = mul nuw i16 [[CONV]], [[CONV]] ; CHECK-NEXT: [[TINKY:%.*]] = load i16, i16* @glob, align 2 -; CHECK-NEXT: [[CONV131:%.*]] = zext i16 [[TINKY]] to i32 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], [[CONV131]] -; CHECK-NEXT: [[CONV14:%.*]] = trunc i32 [[AND]] to i16 -; CHECK-NEXT: store i16 [[CONV14]], i16* @glob, align 2 +; CHECK-NEXT: [[AND:%.*]] = and i16 [[MUL]], [[TINKY]] +; CHECK-NEXT: store i16 [[AND]], i16* @glob, align 2 ; CHECK-NEXT: ret void ; %conv = zext i8 %beth to i32