Index: lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3094,6 +3094,7 @@ // IVOper will replace the current IV User's operand. IVSrc is the IV // value currently held in a register. Value *IVOper = IVSrc; + DebugLoc Loc = InsertPt->getDebugLoc(); if (!Inc.IncExpr->isZero()) { // IncExpr was the result of subtraction of two narrow values, so must // be signed. @@ -3104,10 +3105,10 @@ if (LeftOverExpr && !LeftOverExpr->isZero()) { // Expand the IV increment. Rewriter.clearPostInc(); - Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt); + Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, Loc, IntTy, InsertPt); const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc), SE.getUnknown(IncV)); - IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt); + IVOper = Rewriter.expandCodeFor(IVOperExpr, Loc, IVTy, InsertPt); // If an IV increment can't be folded, use it as the next IV value. if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) { @@ -3121,6 +3122,7 @@ assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) && "cannot extend a chained IV"); IRBuilder<> Builder(InsertPt); + Builder.SetCurrentDebugLocation(Loc); IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain"); } Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper); @@ -4900,6 +4902,14 @@ return IP; } +/// Return the DebugLoc attached to \p V if one is available. Otherwise, return +/// a special unknown location. +static DebugLoc getDebugLocFromValue(SCEVExpander &Rewriter, Value *V) { + if (auto *I = dyn_cast(V)) + return I->getDebugLoc(); + return Rewriter.getUnknownDebugLocation(); +} + /// Emit instructions for the leading candidate expression for this LSRUse (this /// is called "expanding"). Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, @@ -4934,13 +4944,16 @@ // Build up a list of operands to add together to form the full base. SmallVector Ops; + // Find a debug location for the expanded instructions. + DebugLoc Loc = getDebugLocFromValue(Rewriter, LF.OperandValToReplace); + // Expand the BaseRegs portion. for (const SCEV *Reg : F.BaseRegs) { assert(!Reg->isZero() && "Zero allocated in a base register!"); // If we're expanding for a post-inc user, make the post-inc adjustment. Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE); - Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr))); + Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, Loc, nullptr))); } // Expand the ScaledReg portion. @@ -4956,14 +4969,14 @@ // Expand ScaleReg as if it was part of the base regs. if (F.Scale == 1) Ops.push_back( - SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr))); + SE.getUnknown(Rewriter.expandCodeFor(ScaledS, Loc, nullptr))); else { // An interesting way of "folding" with an icmp is to use a negated // scale, which we'll implement by inserting it into the other operand // of the icmp. assert(F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"); - ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr); + ICmpScaledV = Rewriter.expandCodeFor(ScaledS, Loc, nullptr); } } else { // Otherwise just expand the scaled register and an explicit scale, @@ -4973,11 +4986,11 @@ // Unless the addressing mode will not be folded. if (!Ops.empty() && LU.Kind == LSRUse::Address && isAMCompletelyFolded(TTI, LU, F)) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Loc, Ty); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } - ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)); + ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, Loc, nullptr)); if (F.Scale != 1) ScaledS = SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); @@ -4989,7 +5002,7 @@ if (F.BaseGV) { // Flush the operand list to suppress SCEVExpander hoisting. if (!Ops.empty()) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Loc, Ty); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } @@ -4999,7 +5012,7 @@ // Flush the operand list to suppress SCEVExpander hoisting of both folded and // unfolded offsets. LSR assumes they both live next to their uses. if (!Ops.empty()) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Loc, Ty); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } @@ -5035,7 +5048,7 @@ const SCEV *FullS = Ops.empty() ? SE.getConstant(IntTy, 0) : SE.getAddExpr(Ops); - Value *FullV = Rewriter.expandCodeFor(FullS, Ty); + Value *FullV = Rewriter.expandCodeFor(FullS, Loc, Ty); // We're done expanding now, so reset the rewriter. Rewriter.clearPostInc(); @@ -5054,6 +5067,7 @@ CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false, OpTy, false), ICmpScaledV, OpTy, "tmp", CI); + Cast->setDebugLoc(getDebugLocFromValue(Rewriter, ICmpScaledV)); ICmpScaledV = Cast; } CI->setOperand(1, ICmpScaledV); @@ -5138,12 +5152,14 @@ // If this is reuse-by-noop-cast, insert the noop cast. Type *OpTy = LF.OperandValToReplace->getType(); - if (FullV->getType() != OpTy) - FullV = - CastInst::Create(CastInst::getCastOpcode(FullV, false, + if (FullV->getType() != OpTy) { + auto *Cast = CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false), FullV, LF.OperandValToReplace->getType(), "tmp", BB->getTerminator()); + Cast->setDebugLoc(getDebugLocFromValue(Rewriter, FullV)); + FullV = Cast; + } PN->setIncomingValue(i, FullV); Pair.first->second = FullV; @@ -5171,6 +5187,7 @@ Instruction *Cast = CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false), FullV, OpTy, "tmp", LF.UserInst); + Cast->setDebugLoc(getDebugLocFromValue(Rewriter, FullV)); FullV = Cast; } Index: test/Transforms/LoopStrengthReduce/expand-with-debug-loc.ll =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/expand-with-debug-loc.ll @@ -0,0 +1,161 @@ +; RUN: opt -loop-reduce < %s -S -o - | FileCheck %s + +; This test case comes from PR25630: +; int sum(int x, int y) { return x + y; } +; +; int main() { +; int s, i = 0; +; while (i++ < 400) +; if (i != 100) s = sum(i, s); else s = 30; +; return s; +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +define i32 @sum(i32 %x, i32 %y) local_unnamed_addr !dbg !8 { +entry: + %add = add nsw i32 %y, %x, !dbg !17 + ret i32 %add, !dbg !18 +} + +; CHECK-LABEL: define i32 @main +; "@main" contains the test case from PR25630. +define i32 @main() local_unnamed_addr !dbg !19 { +entry: + br label %while.body, !dbg !27 + +while.body: ; preds = %while.body, %entry + ; CHECK: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], {{.*}} ], [ 5, {{.*}} ], !dbg ![[IV_LOC:[0-9]+]] + + %inc8 = phi i32 [ 1, %entry ], [ %inc.4, %while.body ] + %s.07 = phi i32 [ undef, %entry ], [ %spec.select.4, %while.body ] + %cmp1 = icmp eq i32 %inc8, 100, !dbg !28 + %add.i = add nsw i32 %inc8, %s.07, !dbg !30 + %spec.select = select i1 %cmp1, i32 30, i32 %add.i, !dbg !32 + %inc = add nuw nsw i32 %inc8, 1, !dbg !33 + %cmp1.1 = icmp eq i32 %inc, 100, !dbg !28 + %add.i.1 = add nsw i32 %inc, %spec.select, !dbg !30 + %spec.select.1 = select i1 %cmp1.1, i32 30, i32 %add.i.1, !dbg !32 + %inc.1 = add nuw nsw i32 %inc8, 2, !dbg !33 + %cmp1.2 = icmp eq i32 %inc.1, 100, !dbg !28 + %add.i.2 = add nsw i32 %inc.1, %spec.select.1, !dbg !30 + %spec.select.2 = select i1 %cmp1.2, i32 30, i32 %add.i.2, !dbg !32 + %inc.2 = add nuw nsw i32 %inc8, 3, !dbg !33 + %cmp1.3 = icmp eq i32 %inc.2, 100, !dbg !28 + %add.i.3 = add nsw i32 %inc.2, %spec.select.2, !dbg !30 + %spec.select.3 = select i1 %cmp1.3, i32 30, i32 %add.i.3, !dbg !32 + %inc.3 = add nuw nsw i32 %inc8, 4, !dbg !33 + %cmp1.4 = icmp eq i32 %inc.3, 100, !dbg !28 + %add.i.4 = add nsw i32 %inc.3, %spec.select.3, !dbg !30 + %spec.select.4 = select i1 %cmp1.4, i32 30, i32 %add.i.4, !dbg !32 + %inc.4 = add nuw nsw i32 %inc8, 5, !dbg !33 + %exitcond.4 = icmp eq i32 %inc.4, 401, !dbg !34 + br i1 %exitcond.4, label %while.end, label %while.body, !dbg !27, !llvm.loop !35 + +; CHECK: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 5, !dbg ![[IV_LOC]] + +while.end: ; preds = %while.body + ret i32 %spec.select.4, !dbg !37 +} + +; CHECK-LABEL: define i32 @simple +; "@simple" contains hand-rolled IR to test GenerateIVChain. +define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind !dbg !38 { +entry: + br label %loop +loop: + %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ] + %s = phi i32 [ 0, %entry ], [ %s4, %loop ] + ; CHECK: load i32, i32* {{.*}}, !dbg ![[V1_LOC:[0-9]+]] + %v = load i32, i32* %iv, !dbg !45 + ; CHECK: getelementptr i8, i8* {{.*}}, i64 {{.*}}, !dbg ![[V2_LOC:[0-9]+]] + ; CHECK: load i32, i32* {{.*}}, !dbg ![[V2_LOC]] + %iv1 = getelementptr inbounds i32, i32* %iv, i32 %x + %v1 = load i32, i32* %iv1, !dbg !46 + ; CHECK: getelementptr i8, i8* {{.*}}, i64 {{.*}}, !dbg ![[V3_LOC:[0-9]+]] + ; CHECK: load i32, i32* {{.*}}, !dbg ![[V3_LOC]] + %iv2 = getelementptr inbounds i32, i32* %iv1, i32 %x + %v2 = load i32, i32* %iv2, !dbg !47 + ; CHECK: getelementptr i8, i8* {{.*}}, i64 {{.*}}, !dbg ![[V4_LOC:[0-9]+]] + ; CHECK: load i32, i32* {{.*}}, !dbg ![[V4_LOC]] + %iv3 = getelementptr inbounds i32, i32* %iv2, i32 %x + %v3 = load i32, i32* %iv3, !dbg !48 + %s1 = add i32 %s, %v + %s2 = add i32 %s1, %v1 + %s3 = add i32 %s2, %v2 + %s4 = add i32 %s3, %v3 + ; CHECK: getelementptr i8, i8* {{.*}}, i64 {{.*}}, !dbg ![[V5_LOC:[0-9]+]] + ; CHECK: icmp eq i32* {{.*}}, {{.*}}, !dbg ![[V5_LOC]] + %iv4 = getelementptr inbounds i32, i32* %iv3, i32 %x + %cmp = icmp eq i32* %iv4, %b, !dbg !49 + br i1 %cmp, label %exit, label %loop +exit: + ret i32 %s4 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} +!llvm.ident = !{!7} + +; CHECK-DAG: ![[INLINE_SCOPE:[0-9]+]] = distinct !DILocation(line: 6, column: 23 +; CHECK-DAG: ![[IV_LOC]] = !DILocation(line: 1, column: 34, scope: !8, inlinedAt: ![[INLINE_SCOPE]]) + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 (trunk 317279) (llvm/trunk 317288)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "t.c", directory: "/Users/vk/src/llvm.org-dbginf/PR29600836-missing-inlined-at-tag") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 7, !"PIC Level", i32 2} +!7 = !{!"clang version 6.0.0 (trunk 317279) (llvm/trunk 317288)"} +!8 = distinct !DISubprogram(name: "sum", scope: !1, file: !1, line: 1, type: !9, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !12) +!9 = !DISubroutineType(types: !10) +!10 = !{!11, !11, !11} +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !{!13, !14} +!13 = !DILocalVariable(name: "x", arg: 1, scope: !8, file: !1, line: 1, type: !11) +!14 = !DILocalVariable(name: "y", arg: 2, scope: !8, file: !1, line: 1, type: !11) +!15 = !DILocation(line: 1, column: 13, scope: !8) +!16 = !DILocation(line: 1, column: 20, scope: !8) +!17 = !DILocation(line: 1, column: 34, scope: !8) +!18 = !DILocation(line: 1, column: 25, scope: !8) +!19 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !20, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, unit: !0, variables: !22) +!20 = !DISubroutineType(types: !21) +!21 = !{!11} +!22 = !{!23, !24} +!23 = !DILocalVariable(name: "s", scope: !19, file: !1, line: 4, type: !11) +!24 = !DILocalVariable(name: "i", scope: !19, file: !1, line: 4, type: !11) +!25 = !DILocation(line: 4, column: 10, scope: !19) +!26 = !DILocation(line: 4, column: 7, scope: !19) +!27 = !DILocation(line: 5, column: 3, scope: !19) +!28 = !DILocation(line: 6, column: 11, scope: !29) +!29 = distinct !DILexicalBlock(scope: !19, file: !1, line: 6, column: 9) +!30 = !DILocation(line: 1, column: 34, scope: !8, inlinedAt: !31) +!31 = distinct !DILocation(line: 6, column: 23, scope: !29) +!32 = !DILocation(line: 6, column: 9, scope: !19) +!33 = !DILocation(line: 5, column: 11, scope: !19) +!34 = !DILocation(line: 5, column: 14, scope: !19) +!35 = distinct !{!35, !27, !36} +!36 = !DILocation(line: 6, column: 43, scope: !19) +!37 = !DILocation(line: 7, column: 3, scope: !19) + +; Custom debug info for "@simple". +!38 = distinct !DISubprogram(name: "simple", scope: !1, file: !1, line: 100, type: !39, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !41) +!39 = !DISubroutineType(types: !40) +!40 = !{!11, !11, !11, !11} +!41 = !{!42, !43, !44} +!42 = !DILocalVariable(name: "a", arg: 1, scope: !8, file: !1, line: 100, type: !11) +!43 = !DILocalVariable(name: "b", arg: 2, scope: !8, file: !1, line: 100, type: !11) +!44 = !DILocalVariable(name: "x", arg: 3, scope: !8, file: !1, line: 100, type: !11) +!45 = !DILocation(line: 100, column: 13, scope: !38) +!46 = !DILocation(line: 100, column: 14, scope: !38) +!47 = !DILocation(line: 100, column: 15, scope: !38) +!48 = !DILocation(line: 100, column: 16, scope: !38) +!49 = !DILocation(line: 100, column: 17, scope: !38) + +; CHECK-DAG: ![[V1_LOC]] = !DILocation(line: 100, column: 13 +; CHECK-DAG: ![[V2_LOC]] = !DILocation(line: 100, column: 14 +; CHECK-DAG: ![[V3_LOC]] = !DILocation(line: 100, column: 15 +; CHECK-DAG: ![[V4_LOC]] = !DILocation(line: 100, column: 16 +; CHECK-DAG: ![[V5_LOC]] = !DILocation(line: 100, column: 17