Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1302,9 +1302,13 @@ return false; Value *TmpCast = MDep->getSource(); - if (MDep->getSource()->getType() != ByValArg->getType()) - TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), - "tmpcast", CS.getInstruction()); + if (MDep->getSource()->getType() != ByValArg->getType()) { + BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), + "tmpcast", CS.getInstruction()); + // Set the tmpcast's DebugLoc to MDep's + TmpBitCast->setDebugLoc(MDep->getDebugLoc()); + TmpCast = TmpBitCast; + } LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n" << " " << *MDep << "\n" Index: llvm/test/Transforms/MemCpyOpt/pr37967.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/MemCpyOpt/pr37967.ll @@ -0,0 +1,63 @@ +; RUN: opt -debugify-each -disable-output -instcombine -memcpyopt < %s 2>&1| FileCheck %s + +; CHECK: CheckFunctionDebugify [MemCpy Optimization]: PASS +; CHECK-NOT: ERROR: Instruction with empty DebugLoc in function _Z3bar3FooS_RiS_ -- %tmpcast = bitcast i8* %1 to %struct.Foo* + +%struct.Foo = type { i64, i64 } + +@a = dso_local global %struct.Foo* null, align 8 + +define dso_local i32 @_Z3bar3FooS_RiS_(i64 %.coerce0, i64 %.coerce1, i64 %.coerce01, i64 %.coerce12, i32* dereferenceable(4) %c, %struct.Foo* byval(%struct.Foo) align 8 %0) #0 { +entry: + %1 = alloca %struct.Foo, align 8 + %2 = alloca %struct.Foo, align 8 + %c.addr = alloca i32*, align 8 + %agg.tmp = alloca %struct.Foo, align 8 + %agg.tmp3 = alloca %struct.Foo, align 8 + %agg.tmp5 = alloca %struct.Foo, align 8 + %3 = bitcast %struct.Foo* %1 to { i64, i64 }* + %4 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %3, i32 0, i32 0 + store i64 %.coerce0, i64* %4, align 8 + %5 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %3, i32 0, i32 1 + store i64 %.coerce1, i64* %5, align 8 + %6 = bitcast %struct.Foo* %2 to { i64, i64 }* + %7 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %6, i32 0, i32 0 + store i64 %.coerce01, i64* %7, align 8 + %8 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %6, i32 0, i32 1 + store i64 %.coerce12, i64* %8, align 8 + store i32* %c, i32** %c.addr, align 8 + %9 = load %struct.Foo*, %struct.Foo** @a, align 8 + %arrayidx = getelementptr inbounds %struct.Foo, %struct.Foo* %9, i64 0 + %10 = bitcast %struct.Foo* %agg.tmp to i8* + %11 = bitcast %struct.Foo* %arrayidx to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %10, i8* align 8 %11, i64 16, i1 false) + %12 = load %struct.Foo*, %struct.Foo** @a, align 8 + %arrayidx4 = getelementptr inbounds %struct.Foo, %struct.Foo* %12, i64 0 + %13 = bitcast %struct.Foo* %agg.tmp3 to i8* + %14 = bitcast %struct.Foo* %arrayidx4 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %13, i8* align 8 %14, i64 16, i1 false) + %15 = load i32*, i32** %c.addr, align 8 + %16 = load %struct.Foo*, %struct.Foo** @a, align 8 + %arrayidx6 = getelementptr inbounds %struct.Foo, %struct.Foo* %16, i64 0 + %17 = bitcast %struct.Foo* %agg.tmp5 to i8* + %18 = bitcast %struct.Foo* %arrayidx6 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %17, i8* align 8 %18, i64 16, i1 false) + %19 = bitcast %struct.Foo* %agg.tmp to { i64, i64 }* + %20 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %19, i32 0, i32 0 + %21 = load i64, i64* %20, align 8 + %22 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %19, i32 0, i32 1 + %23 = load i64, i64* %22, align 8 + %24 = bitcast %struct.Foo* %agg.tmp3 to { i64, i64 }* + %25 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %24, i32 0, i32 0 + %26 = load i64, i64* %25, align 8 + %27 = getelementptr inbounds { i64, i64 }, { i64, i64 }* %24, i32 0, i32 1 + %28 = load i64, i64* %27, align 8 + %call = call i32 @_Z3bar3FooS_RiS_(i64 %21, i64 %23, i64 %26, i64 %28, i32* dereferenceable(4) %15, %struct.Foo* byval(%struct.Foo) align 8 %agg.tmp5) + %29 = load i32*, i32** %c.addr, align 8 + store i32 %call, i32* %29, align 4 + %30 = load i32*, i32** %c.addr, align 8 + %31 = load i32, i32* %30, align 4 + ret i32 %31 +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1