Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -149,10 +149,10 @@ "enable memory promotion.")); static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); -static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - TargetTransformInfo *TTI, bool &FreeInLoop, - bool LoopNestMode); +static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop, + const LoopSafetyInfo *SafetyInfo, + TargetTransformInfo *TTI, + bool &FoldableInLoop, bool LoopNestMode); static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU, ScalarEvolution *SE, @@ -582,14 +582,15 @@ // outside of the loop. In this case, it doesn't even matter if the // operands of the instruction are loop invariant. // - bool FreeInLoop = false; + bool FoldableInLoop = false; bool LoopNestMode = OutermostLoop != nullptr; if (!I.mayHaveSideEffects() && - isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop, - SafetyInfo, TTI, FreeInLoop, LoopNestMode) && + isNotUsedOrFoldableInLoop(I, LoopNestMode ? OutermostLoop : CurLoop, + SafetyInfo, TTI, FoldableInLoop, + LoopNestMode) && canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) { if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) { - if (!FreeInLoop) { + if (!FoldableInLoop) { ++II; salvageDebugInfo(I); eraseInstruction(I, *SafetyInfo, MSSAU); @@ -1338,13 +1339,12 @@ return true; } -/// Return true if the instruction is free in the loop. -static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, +/// Return true if the instruction is foldable in the loop. +static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI) { - InstructionCost CostI = - TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); - if (auto *GEP = dyn_cast(&I)) { + InstructionCost CostI = + TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency); if (CostI != TargetTransformInfo::TCC_Free) return false; // For a GEP, we cannot simply use getInstructionCost because currently @@ -1361,7 +1361,7 @@ return true; } - return CostI == TargetTransformInfo::TCC_Free; + return false; } /// Return true if the only users of this instruction are outside of @@ -1370,12 +1370,12 @@ /// /// We also return true if the instruction could be folded away in lowering. /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). -static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - TargetTransformInfo *TTI, bool &FreeInLoop, - bool LoopNestMode) { +static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop, + const LoopSafetyInfo *SafetyInfo, + TargetTransformInfo *TTI, + bool &FoldableInLoop, bool LoopNestMode) { const auto &BlockColors = SafetyInfo->getBlockColors(); - bool IsFree = isFreeInLoop(I, CurLoop, TTI); + bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI); for (const User *U : I.users()) { const Instruction *UI = cast(U); if (const PHINode *PN = dyn_cast(UI)) { @@ -1402,8 +1402,8 @@ } if (CurLoop->contains(UI)) { - if (IsFree) { - FreeInLoop = true; + if (IsFoldable) { + FoldableInLoop = true; continue; } return false; Index: llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll =================================================================== --- llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll +++ llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll @@ -126,17 +126,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: sync ; CHECK-NEXT: li 6, 0 -; CHECK-NEXT: lwz 7, 0(3) +; CHECK-NEXT: lwz 5, 0(3) ; CHECK-NEXT: b .LBB2_2 ; CHECK-NEXT: .LBB2_1: # %atomicrmw.start ; CHECK-NEXT: # ; CHECK-NEXT: cmplw 5, 7 -; CHECK-NEXT: mr 7, 5 ; CHECK-NEXT: beq 0, .LBB2_7 ; CHECK-NEXT: .LBB2_2: # %atomicrmw.start ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB2_5 Depth 2 -; CHECK-NEXT: addi 5, 7, 1 +; CHECK-NEXT: mr 7, 5 +; CHECK-NEXT: addi 5, 5, 1 ; CHECK-NEXT: cmplw 7, 4 ; CHECK-NEXT: bc 12, 0, .LBB2_4 ; CHECK-NEXT: # %bb.3: # %atomicrmw.start @@ -169,18 +169,18 @@ ; CHECK-LABEL: atomicrmw_uinc_wrap_i64: ; CHECK: # %bb.0: ; CHECK-NEXT: sync -; CHECK-NEXT: ld 7, 0(3) +; CHECK-NEXT: ld 5, 0(3) ; CHECK-NEXT: li 6, 0 ; CHECK-NEXT: b .LBB3_2 ; CHECK-NEXT: .LBB3_1: # %atomicrmw.start ; CHECK-NEXT: # ; CHECK-NEXT: cmpld 5, 7 -; CHECK-NEXT: mr 7, 5 ; CHECK-NEXT: beq 0, .LBB3_7 ; CHECK-NEXT: .LBB3_2: # %atomicrmw.start ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB3_5 Depth 2 -; CHECK-NEXT: addi 5, 7, 1 +; CHECK-NEXT: mr 7, 5 +; CHECK-NEXT: addi 5, 5, 1 ; CHECK-NEXT: cmpld 7, 4 ; CHECK-NEXT: bc 12, 0, .LBB3_4 ; CHECK-NEXT: # %bb.3: # %atomicrmw.start @@ -334,19 +334,19 @@ ; CHECK-LABEL: atomicrmw_udec_wrap_i32: ; CHECK: # %bb.0: ; CHECK-NEXT: sync -; CHECK-NEXT: lwz 6, 0(3) +; CHECK-NEXT: lwz 5, 0(3) ; CHECK-NEXT: b .LBB6_2 ; CHECK-NEXT: .LBB6_1: # %atomicrmw.start ; CHECK-NEXT: # ; CHECK-NEXT: cmplw 5, 6 -; CHECK-NEXT: mr 6, 5 ; CHECK-NEXT: beq 0, .LBB6_7 ; CHECK-NEXT: .LBB6_2: # %atomicrmw.start ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB6_5 Depth 2 +; CHECK-NEXT: mr 6, 5 ; CHECK-NEXT: cmpwi 6, 0 ; CHECK-NEXT: cmplw 1, 6, 4 -; CHECK-NEXT: addi 5, 6, -1 +; CHECK-NEXT: addi 5, 5, -1 ; CHECK-NEXT: cror 20, 2, 5 ; CHECK-NEXT: bc 12, 20, .LBB6_4 ; CHECK-NEXT: # %bb.3: # %atomicrmw.start @@ -379,19 +379,18 @@ ; CHECK-LABEL: atomicrmw_udec_wrap_i64: ; CHECK: # %bb.0: ; CHECK-NEXT: sync -; CHECK-NEXT: ld 6, 0(3) +; CHECK-NEXT: ld 5, 0(3) ; CHECK-NEXT: b .LBB7_2 ; CHECK-NEXT: .LBB7_1: # %atomicrmw.start ; CHECK-NEXT: # ; CHECK-NEXT: cmpld 5, 6 -; CHECK-NEXT: mr 6, 5 ; CHECK-NEXT: beq 0, .LBB7_7 ; CHECK-NEXT: .LBB7_2: # %atomicrmw.start ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB7_5 Depth 2 -; CHECK-NEXT: cmpdi 6, 0 +; CHECK-NEXT: mr. 6, 5 ; CHECK-NEXT: cmpld 1, 6, 4 -; CHECK-NEXT: addi 5, 6, -1 +; CHECK-NEXT: addi 5, 5, -1 ; CHECK-NEXT: cror 20, 2, 5 ; CHECK-NEXT: bc 12, 20, .LBB7_4 ; CHECK-NEXT: # %bb.3: # %atomicrmw.start Index: llvm/test/Transforms/LICM/pr23608.ll =================================================================== --- llvm/test/Transforms/LICM/pr23608.ll +++ llvm/test/Transforms/LICM/pr23608.ll @@ -25,9 +25,8 @@ ; NO_ASSUME-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[TMP4]], 0 ; NO_ASSUME-NEXT: br i1 [[TOBOOL]], label [[BB13:%.*]], label [[BB15:%.*]] ; NO_ASSUME: bb13: -; NO_ASSUME-NEXT: [[F_IBLOCK_LCSSA:%.*]] = phi ptr [ [[TMP]], [[BB2]] ] -; NO_ASSUME-NEXT: [[TMP4_LE:%.*]] = ptrtoint ptr [[F_IBLOCK_LCSSA]] to i64 -; NO_ASSUME-NEXT: [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LE]] to ptr +; NO_ASSUME-NEXT: [[TMP4_LCSSA:%.*]] = phi i64 [ [[TMP4]], [[BB2]] ] +; NO_ASSUME-NEXT: [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LCSSA]] to ptr ; NO_ASSUME-NEXT: call void @__msan_warning_noreturn() ; NO_ASSUME-NEXT: unreachable ; NO_ASSUME: bb15: @@ -54,9 +53,8 @@ ; USE_ASSUME-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[TMP4]], 0 ; USE_ASSUME-NEXT: br i1 [[TOBOOL]], label [[BB13:%.*]], label [[BB15:%.*]] ; USE_ASSUME: bb13: -; USE_ASSUME-NEXT: [[F_IBLOCK_LCSSA:%.*]] = phi ptr [ [[TMP]], [[BB2]] ] -; USE_ASSUME-NEXT: [[TMP4_LE:%.*]] = ptrtoint ptr [[F_IBLOCK_LCSSA]] to i64 -; USE_ASSUME-NEXT: [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LE]] to ptr +; USE_ASSUME-NEXT: [[TMP4_LCSSA:%.*]] = phi i64 [ [[TMP4]], [[BB2]] ] +; USE_ASSUME-NEXT: [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LCSSA]] to ptr ; USE_ASSUME-NEXT: call void @__msan_warning_noreturn() ; USE_ASSUME-NEXT: unreachable ; USE_ASSUME: bb15: Index: llvm/test/Transforms/LICM/sink-foldable.ll =================================================================== --- llvm/test/Transforms/LICM/sink-foldable.ll +++ llvm/test/Transforms/LICM/sink-foldable.ll @@ -188,11 +188,11 @@ ; CHECK-NEXT: [[P1:%.*]] = phi ptr [ [[ARRAYIDX0]], [[FOR_BODY]] ] ; CHECK-NEXT: br label [[RETURN]] ; CHECK: loopexit1: -; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[IF_END]] ] +; CHECK-NEXT: [[TRUNC_LCSSA1:%.*]] = phi i32 [ [[TRUNC]], [[IF_END]] ] ; CHECK-NEXT: [[P_ADDR_LCSSA:%.*]] = phi ptr [ [[P_ADDR]], [[IF_END]] ] -; CHECK-NEXT: [[TRUNC_LE:%.*]] = trunc i64 [[ADD_LCSSA]] to i32 -; CHECK-NEXT: [[ARRAYIDX1_LE:%.*]] = getelementptr inbounds ptr, ptr [[P_ADDR_LCSSA]], i32 [[TRUNC_LE]] -; CHECK-NEXT: call void @dummy(i32 [[TRUNC_LE]]) +; CHECK-NEXT: [[TRUNC_LCSSA:%.*]] = phi i32 [ [[TRUNC]], [[IF_END]] ] +; CHECK-NEXT: [[ARRAYIDX1_LE:%.*]] = getelementptr inbounds ptr, ptr [[P_ADDR_LCSSA]], i32 [[TRUNC_LCSSA1]] +; CHECK-NEXT: call void @dummy(i32 [[TRUNC_LCSSA]]) ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: ; CHECK-NEXT: [[RETVAL_0:%.*]] = phi ptr [ [[P1]], [[LOOPEXIT0]] ], [ [[ARRAYIDX1_LE]], [[LOOPEXIT1]] ], [ null, [[ENTRY:%.*]] ] Index: llvm/test/Transforms/LICM/sinking.ll =================================================================== --- llvm/test/Transforms/LICM/sinking.ll +++ llvm/test/Transforms/LICM/sinking.ll @@ -1001,6 +1001,7 @@ declare void @use.i32(i32) declare void @use.i64(i64) +; Don't duplicate freeze just because it's free. define i32 @duplicate_freeze(i1 %c, i32 %x) { ; CHECK-LABEL: @duplicate_freeze( ; CHECK-NEXT: entry: @@ -1010,8 +1011,8 @@ ; CHECK-NEXT: call void @use.i32(i32 [[FR]]) ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[FR_LE:%.*]] = freeze i32 [[X]] -; CHECK-NEXT: ret i32 [[FR_LE]] +; CHECK-NEXT: [[FR_LCSSA:%.*]] = phi i32 [ [[FR]], [[LOOP]] ] +; CHECK-NEXT: ret i32 [[FR_LCSSA]] ; entry: br label %loop @@ -1025,6 +1026,7 @@ ret i32 %fr } +; Don't duplicate ptrtoint just because it's free. define i64 @duplicate_ptrtoint(i1 %c, ptr %p) { ; CHECK-LABEL: @duplicate_ptrtoint( ; CHECK-NEXT: entry: @@ -1034,8 +1036,8 @@ ; CHECK-NEXT: call void @use.i64(i64 [[PI]]) ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[PI_LE:%.*]] = ptrtoint ptr [[P]] to i64 -; CHECK-NEXT: ret i64 [[PI_LE]] +; CHECK-NEXT: [[PI_LCSSA:%.*]] = phi i64 [ [[PI]], [[LOOP]] ] +; CHECK-NEXT: ret i64 [[PI_LCSSA]] ; entry: br label %loop