Index: lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCasts.cpp +++ lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -156,8 +156,16 @@ return replaceInstUsesWith(CI, New); } -/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns -/// true for, actually insert the code to evaluate the expression. +/// Given an expression that CanEvaluate{Truncated, SExtd, ZExtd}, returns a new +/// Value representing the inserted code to evaluate the expression. +Value *InstCombiner::EvaluateInDifferentTypeWrapper(Value *V, Type *Ty, + bool isSigned) { + EvaluatedInstMap.clear(); + return EvaluateInDifferentType(V, Ty, isSigned); +} + +/// Recursive helper function for EvaluateInDifferentTypeWrapper. +/// This function should not be called directly! Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned) { if (Constant *C = dyn_cast(V)) { @@ -170,6 +178,11 @@ // Otherwise, it must be an instruction. Instruction *I = cast(V); + + // Check if this instruction have already been evaluated. + if (EvaluatedInstMap.count(I)) + return EvaluatedInstMap[I]; + Instruction *Res = nullptr; unsigned Opc = I->getOpcode(); switch (Opc) { @@ -212,6 +225,8 @@ case Instruction::PHI: { PHINode *OPN = cast(I); PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues()); + // Must update the evaluated instruction map here to prevent infinite loop. + EvaluatedInstMap[I] = NPN; for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { Value *V = EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); @@ -225,6 +240,7 @@ llvm_unreachable("Unreachable!"); } + EvaluatedInstMap[I] = Res; Res->takeName(I); return InsertNewInstWith(Res, *I); } @@ -298,8 +314,26 @@ /// /// This function works on both vectors and scalars. /// -static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, - Instruction *CxtI) { +bool InstCombiner::canEvaluateTruncatedWrapper(Value *V, Type *Ty, + InstCombiner &IC, + Instruction *CxtI) { + VisitedInsts.clear(); + ToVisitInsts.clear(); + if (!canEvaluateTruncated(V, Ty, IC, CxtI)) + return false; + // We can truncate the given expression tree only if we had visited all + // instructions that are expected to be visited during expression evaluation. + for (auto *I : ToVisitInsts) + if (!VisitedInsts.count(I)) + return false; + return true; +} + +/// Recursive helper function for canEvaluateTruncatedWrapper. +/// This function should not be called directly! +bool InstCombiner::canEvaluateTruncated(Value *V, Type *Ty, + InstCombiner &IC, + Instruction *CxtI) { // We can always evaluate constants in another type. if (isa(V)) return true; @@ -307,6 +341,11 @@ Instruction *I = dyn_cast(V); if (!I) return false; + if (VisitedInsts.count(I)) + return true; + + VisitedInsts.insert(I); + Type *OrigTy = V->getType(); // If this is an extension from the dest type, we can eliminate it, even if it @@ -315,9 +354,18 @@ I->getOperand(0)->getType() == Ty) return true; - // We can't extend or shrink something that has multiple uses: doing so would - // require duplicating the instruction in general, which isn't profitable. - if (!I->hasOneUse()) return false; + // We don't want to duplicate instructiosn, which isn't profitable. Thus, we + // can't extend or shrink something that has multiple uses, unless all users + // are dominated by the trunc instruction and will be visited during the + // expresion evaluation. Mark all users that were not visited yet to be + // checked later against visited list. + if (!I->hasOneUse()) { + for (auto *U : I->users()) { + auto *UI = dyn_cast(U); + if (UI && !VisitedInsts.count(UI)) + ToVisitInsts.insert(UI); + } + } unsigned Opc = I->getOpcode(); switch (Opc) { @@ -656,13 +704,13 @@ // expression tree to something weird like i93 unless the source is also // strange. if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) && - canEvaluateTruncated(Src, DestTy, *this, &CI)) { + canEvaluateTruncatedWrapper(Src, DestTy, *this, &CI)) { // If this cast is a truncate, evaluting in a different type always // eliminates the cast, so it is always a win. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" " to avoid cast: " << CI << '\n'); - Value *Res = EvaluateInDifferentType(Src, DestTy, false); + Value *Res = EvaluateInDifferentTypeWrapper(Src, DestTy, false); assert(Res->getType() == DestTy); return replaceInstUsesWith(CI, Res); } @@ -1046,7 +1094,7 @@ // Okay, we can transform this! Insert the new expression now. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" " to avoid zero extend: " << CI << '\n'); - Value *Res = EvaluateInDifferentType(Src, DestTy, false); + Value *Res = EvaluateInDifferentTypeWrapper(Src, DestTy, false); assert(Res->getType() == DestTy); uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear; @@ -1329,7 +1377,7 @@ // Okay, we can transform this! Insert the new expression now. DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type" " to avoid sign extend: " << CI << '\n'); - Value *Res = EvaluateInDifferentType(Src, DestTy, true); + Value *Res = EvaluateInDifferentTypeWrapper(Src, DestTy, true); assert(Res->getType() == DestTy); uint32_t SrcBitSize = SrcTy->getScalarSizeInBits(); Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -233,6 +233,13 @@ bool MadeIRChange; + // Map evaluated instruction to its new instruction. + DenseMap EvaluatedInstMap; + // Set of instructions that have been visited during expression evaluation. + SmallPtrSet VisitedInsts; + // Set of instruction expected to be visited during expression evaluation. + SmallPtrSet ToVisitInsts; + public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, @@ -748,7 +755,12 @@ Instruction *SimplifyMemTransfer(MemIntrinsic *MI); Instruction *SimplifyMemSet(MemSetInst *MI); + Value *EvaluateInDifferentTypeWrapper(Value *V, Type *Ty, bool isSigned); Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); + bool canEvaluateTruncatedWrapper(Value *V, Type *Ty, InstCombiner &IC, + Instruction *CxtI); + bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, + Instruction *CxtI); /// \brief Returns a value X such that Val = X * Scale, or null if none. /// Index: test/Transforms/InstCombine/cast.ll =================================================================== --- test/Transforms/InstCombine/cast.ll +++ test/Transforms/InstCombine/cast.ll @@ -1366,6 +1366,78 @@ ret i16 %t } +; Cases with truncate expression tree where not all nodes have a single usage. +; However, all the nodes are dominated by the tranc instruction. +; Case 1: same basic block +define i16 @test_trunc_multi_use_case1(i16 %v) { +; CHECK-LABEL: @test_trunc_multi_use_case1( +; CHECK-NEXT: [[TMP1:%.*]] = ashr i16 [[V:%.*]], 1 +; CHECK-NEXT: [[S:%.*]] = mul i16 [[TMP1]], [[TMP1]] +; CHECK-NEXT: ret i16 [[S]] +; + %a = sext i16 %v to i32 + %b = ashr i32 %a, 1 + %s = mul i32 %b, %b + %t = trunc i32 %s to i16 + ret i16 %t +} + +; Case 2: cross basic block +define i16 @test_trunc_multi_use_case2(i16 %v, i1 %cond) { +; CHECK-LABEL: @test_trunc_multi_use_case2( +; CHECK-NEXT: BB0: +; CHECK-NEXT: [[B:%.*]] = shl i16 [[V:%.*]], 1 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: BB1: +; CHECK-NEXT: [[D:%.*]] = mul i16 [[B]], [[B]] +; CHECK-NEXT: br label [[BB2]] +; CHECK: BB2: +; CHECK-NEXT: [[S:%.*]] = phi i16 [ [[B]], [[BB0:%.*]] ], [ [[D]], [[BB1]] ] +; CHECK-NEXT: ret i16 [[S]] +; +BB0: + %a = sext i16 %v to i32 + %b = shl i32 %a, 1 + br i1 %cond, label %BB1, label %BB2 +BB1: + %d = mul i32 %b, %b + br label %BB2 +BB2: + %s = phi i32 [ %b, %BB0 ], [ %d, %BB1 ] + %t = trunc i32 %s to i16 + ret i16 %t +} + +; Case 3: cross basic block - not all nodes are dominated by trunc instruction. +define i16 @test_trunc_multi_use_case3(i16 %v) { +; CHECK-LABEL: @test_trunc_multi_use_case3( +; CHECK-NEXT: BB0: +; CHECK-NEXT: [[A:%.*]] = sext i16 [[V:%.*]] to i32 +; CHECK-NEXT: [[B:%.*]] = shl nsw i32 [[A]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[B]], 777 +; CHECK-NEXT: br i1 [[COND]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: BB1: +; CHECK-NEXT: [[D:%.*]] = mul nsw i32 [[A]], 1554 +; CHECK-NEXT: br label [[BB2]] +; CHECK: BB2: +; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[B]], [[BB0:%.*]] ], [ [[D]], [[BB1]] ] +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[S]] to i16 +; CHECK-NEXT: ret i16 [[T]] +; +BB0: + %a = sext i16 %v to i32 + %b = shl i32 %a, 1 + %cond = icmp ult i32 %b, 777 + br i1 %cond, label %BB1, label %BB2 +BB1: + %d = mul i32 %b, 777 + br label %BB2 +BB2: + %s = phi i32 [ %b, %BB0 ], [ %d, %BB1 ] + %t = trunc i32 %s to i16 + ret i16 %t +} + ; Overflow on a float to int or int to float conversion is undefined (PR21130). define i8 @overflow_fptosi() { Index: test/Transforms/InstCombine/pr33765.ll =================================================================== --- test/Transforms/InstCombine/pr33765.ll +++ test/Transforms/InstCombine/pr33765.ll @@ -5,15 +5,13 @@ define void @patatino(i8 %beth) { ; CHECK-LABEL: @patatino( -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i16 ; CHECK-NEXT: br i1 false, label [[IF_THEN9:%.*]], label [[IF_THEN9]] ; CHECK: if.then9: -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]] +; CHECK-NEXT: [[MUL:%.*]] = mul nuw i16 [[CONV]], [[CONV]] ; CHECK-NEXT: [[TINKY:%.*]] = load i16, i16* @glob, align 2 -; CHECK-NEXT: [[CONV131:%.*]] = zext i16 [[TINKY]] to i32 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], [[CONV131]] -; CHECK-NEXT: [[CONV14:%.*]] = trunc i32 [[AND]] to i16 -; CHECK-NEXT: store i16 [[CONV14]], i16* @glob, align 2 +; CHECK-NEXT: [[AND:%.*]] = and i16 [[MUL]], [[TINKY]] +; CHECK-NEXT: store i16 [[AND]], i16* @glob, align 2 ; CHECK-NEXT: ret void ; %conv = zext i8 %beth to i32