Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -4975,7 +4975,7 @@ Instruction *I, SmallVectorImpl> &MemoryUses, SmallPtrSetImpl &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI, int SeenInsts = 0) { + BlockFrequencyInfo *BFI, int &SeenInsts) { // If we already considered this instruction, we're done. if (!ConsideredInsts.insert(I).second) return false; @@ -5046,6 +5046,17 @@ return false; } +static bool FindAllMemoryUses( + Instruction *I, SmallVectorImpl> &MemoryUses, + const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, + ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { + int SeenInsts = 0; + SmallPtrSet ConsideredInsts; + return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, + PSI, BFI, SeenInsts); +} + + /// Return true if Val is already known to be live at the use site that we're /// folding it into. If so, there is no cost to include it in the addressing /// mode. KnownLive1 and KnownLive2 are two values that we know are live at the @@ -5128,9 +5139,7 @@ // for another (at worst.) In this context, folding an addressing mode into // the use is just a particularly nice way of sinking it. SmallVector, 16> MemoryUses; - SmallPtrSet ConsideredInsts; - if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, PSI, - BFI)) + if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI)) return false; // Has a non-memory, non-foldable use! // Now that we know that all uses of this instruction are part of a chain of Index: llvm/test/CodeGen/Generic/addr-use-count.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Generic/addr-use-count.ll @@ -0,0 +1,284 @@ +; RUN: llc --stop-after=codegenprepare < %s | FileCheck %s + +; Originally, in the function `f`, `%addr` is not folded into its uses, since CodeGenPrepare +; hits the instruction scan limit, but it is folded in `g` even though `g` contains +; the same number of uses. +; Test that now `addr` is not folded in either function. +; CHECK-NOT: sunkaddr +define i32 @f(i32 %x, ptr %p) { +entry: + %addr = getelementptr i32 *, ptr %p, i32 4 + switch i32 %x, label %exit [ + i32 0, label %case0 + i32 1, label %case1 + i32 2, label %case2 + i32 3, label %case3 + i32 4, label %case4 + i32 5, label %case5 + i32 6, label %case6 + i32 7, label %case7 + i32 8, label %case8 + i32 9, label %case9 + i32 10, label %case10 + i32 11, label %case11 + i32 12, label %case12 + i32 13, label %case13 + i32 14, label %case14 + i32 15, label %case15 + i32 16, label %case16 + i32 17, label %case17 + i32 18, label %case18 + i32 19, label %case19 + i32 20, label %case20] + +case0: + %t0 = load i32, ptr %addr + %v0 = add i32 %t0, 0 + br label %exit + +case1: + %t1 = load i32, ptr %addr + %v1 = add i32 %t1, 1 + br label %exit + +case2: + %t2 = load i32, ptr %addr + %v2 = add i32 %t2, 2 + br label %exit + +case3: + %t3 = load i32, ptr %addr + %v3 = add i32 %t3, 3 + br label %exit + +case4: + %t4 = load i32, ptr %addr + %v4 = add i32 %t4, 4 + br label %exit + +case5: + %t5 = load i32, ptr %addr + %v5 = add i32 %t5, 5 + br label %exit + +case6: + %t6 = load i32, ptr %addr + %v6 = add i32 %t6, 6 + br label %exit + +case7: + %t7 = load i32, ptr %addr + %v7 = add i32 %t7, 7 + br label %exit + +case8: + %t8 = load i32, ptr %addr + %v8 = add i32 %t8, 8 + br label %exit + +case9: + %t9 = load i32, ptr %addr + %v9 = add i32 %t9, 9 + br label %exit + +case10: + %t10 = load i32, ptr %addr + %v10 = add i32 %t10, 10 + br label %exit + +case11: + %t11 = load i32, ptr %addr + %v11 = add i32 %t11, 11 + br label %exit + +case12: + %t12 = load i32, ptr %addr + %v12 = add i32 %t12, 12 + br label %exit + +case13: + %t13 = load i32, ptr %addr + %v13 = add i32 %t13, 13 + br label %exit + +case14: + %t14 = load i32, ptr %addr + %v14 = add i32 %t14, 14 + br label %exit + +case15: + %t15 = load i32, ptr %addr + %v15 = add i32 %t15, 15 + br label %exit + +case16: + %t16 = load i32, ptr %addr + %v16 = add i32 %t16, 16 + br label %exit + +case17: + %t17 = load i32, ptr %addr + %v17 = add i32 %t17, 17 + br label %exit + +case18: + %t18 = load i32, ptr %addr + %v18 = add i32 %t18, 18 + br label %exit + +case19: + %t19 = load i32, ptr %addr + %v19 = add i32 %t19, 19 + br label %exit + +case20: + %t20 = load i32, ptr %addr + %v20 = add i32 %t20, 20 + br label %exit + +exit: + %v = phi i32 [0, %entry], + [ %v0, %case0], [%v1, %case1], [%v2, %case2], [%v3, %case3], + [%v4, %case4], [%v5, %case5], [%v6, %case6], [%v7, %case7], + [%v8, %case8], [%v9, %case9], [%v10, %case10], [%v11, %case11], + [%v12, %case12], [%v13, %case13], [%v14, %case14], [%v15, %case15], + [%v16, %case16], [%v17, %case17], [%v18, %case18], [%v19, %case19], + [%v20, %case20] + ret i32 %v +} + +define i32 @g(i32 %x, ptr %p) { +entry: + %addr = getelementptr i32 *, ptr %p, i32 4 + switch i32 %x, label %exit [ + i32 0, label %case0 + i32 1, label %case1 + i32 2, label %case2 + i32 3, label %case3 + i32 4, label %case4 + i32 5, label %case5 + i32 6, label %case6 + i32 7, label %case7 + i32 8, label %case8 + i32 9, label %case9 + i32 10, label %case10 + i32 11, label %case11 + i32 12, label %case12 + i32 13, label %case13 + i32 14, label %case14 + i32 15, label %case15 + i32 16, label %case16 + i32 17, label %case17 + i32 18, label %case18 + i32 19, label %case19] + +case0: + %t0 = load i32, ptr %addr + %v0 = add i32 %t0, 0 + br label %exit + +case1: + %t1 = load i32, ptr %addr + %v1 = add i32 %t1, 1 + br label %exit + +case2: + %t2 = load i32, ptr %addr + %v2 = add i32 %t2, 2 + br label %exit + +case3: + %t3 = load i32, ptr %addr + %v3 = add i32 %t3, 3 + br label %exit + +case4: + %t4 = load i32, ptr %addr + %v4 = add i32 %t4, 4 + br label %exit + +case5: + %t5 = load i32, ptr %addr + %v5 = add i32 %t5, 5 + br label %exit + +case6: + %t6 = load i32, ptr %addr + %v6 = add i32 %t6, 6 + br label %exit + +case7: + %t7 = load i32, ptr %addr + %v7 = add i32 %t7, 7 + br label %exit + +case8: + %t8 = load i32, ptr %addr + %v8 = add i32 %t8, 8 + br label %exit + +case9: + %t9 = load i32, ptr %addr + %v9 = add i32 %t9, 9 + br label %exit + +case10: + %t10 = load i32, ptr %addr + %v10 = add i32 %t10, 10 + br label %exit + +case11: + %t11 = load i32, ptr %addr + %v11 = add i32 %t11, 11 + br label %exit + +case12: + %t12 = load i32, ptr %addr + %v12 = add i32 %t12, 12 + br label %exit + +case13: + %t13 = load i32, ptr %addr + %v13 = add i32 %t13, 13 + br label %exit + +case14: + %t14 = load i32, ptr %addr + %v14 = add i32 %t14, 14 + br label %exit + +case15: + %t15 = load i32, ptr %addr + %v15 = add i32 %t15, 15 + br label %exit + +case16: + %t16 = load i32, ptr %addr + %v16 = add i32 %t16, 16 + br label %exit + +case17: + %t17 = load i32, ptr %addr + %v17 = add i32 %t17, 17 + br label %exit + +case18: + %t18 = load i32, ptr %addr + %v18 = add i32 %t18, 18 + br label %exit + +case19: + %t19 = getelementptr i32 *, ptr %addr, i32 1 + %t20 = load i32, ptr %t19 + %v19 = add i32 %t20, 19 + br label %exit + +exit: + %v = phi i32 [0, %entry], + [ %v0, %case0], [%v1, %case1], [%v2, %case2], [%v3, %case3], + [%v4, %case4], [%v5, %case5], [%v6, %case6], [%v7, %case7], + [%v8, %case8], [%v9, %case9], [%v10, %case10], [%v11, %case11], + [%v12, %case12], [%v13, %case13], [%v14, %case14], [%v15, %case15], + [%v16, %case16], [%v17, %case17], [%v18, %case18], [%v19, %case19] + ret i32 %v +}