diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1515,6 +1515,20 @@ return selectXRayCustomEvent(II); case Intrinsic::xray_typedevent: return selectXRayTypedEvent(II); + + case Intrinsic::memcpy: + case Intrinsic::memcpy_element_unordered_atomic: + case Intrinsic::memcpy_inline: + case Intrinsic::memmove: + case Intrinsic::memmove_element_unordered_atomic: + case Intrinsic::memset: + case Intrinsic::memset_element_unordered_atomic: + // Flush the local value map just like we do for regular calls, + // to avoid excessive spills and reloads. + // These intrinsics mostly turn into library calls at O0; and + // even memcpy_inline should be treated like one for this purpose. + flushLocalValueMap(); + break; } return fastLowerIntrinsicCall(II); diff --git a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll --- a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll +++ b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll @@ -290,13 +290,13 @@ ; Space for s2 is allocated at sp ; FAST-LABEL: caller42 -; FAST: sub sp, sp, #96 -; Space for s1 is allocated at fp-24 = sp+56 -; FAST: sub x[[A:[0-9]+]], x29, #24 +; FAST: sub sp, sp, #64 +; Space for s1 is allocated at fp-24 = sp+24 +; FAST: add x[[A:[0-9]+]], sp, #24 ; Call memcpy with size = 24 (0x18) ; FAST: mov {{x[0-9]+}}, #24 -; Space for s2 is allocated at sp+32 -; FAST: add x[[A:[0-9]+]], sp, #32 +; Space for s2 is allocated at sp +; FAST: mov x[[A:[0-9]+]], sp ; FAST: bl _memcpy %tmp = alloca %struct.s42, align 4 %tmp1 = alloca %struct.s42, align 4 @@ -339,8 +339,8 @@ ; Call memcpy with size = 24 (0x18) ; FAST: mov {{x[0-9]+}}, #24 ; FAST: bl _memcpy -; Space for s2 is allocated at fp-48 -; FAST: sub x[[B:[0-9]+]], x29, #48 +; Space for s2 is allocated at sp+32 +; FAST: add x[[B:[0-9]+]], sp, #32 ; Call memcpy again ; FAST: bl _memcpy ; Address of s1 is passed on stack at sp+8 diff --git a/llvm/test/CodeGen/X86/fastisel-memset-flush.ll b/llvm/test/CodeGen/X86/fastisel-memset-flush.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fastisel-memset-flush.ll @@ -0,0 +1,40 @@ +; RUN: %llc_dwarf -O0 < %s | FileCheck %s + +define dso_local void @foo() !dbg !7 { +entry: + %a = alloca i32, align 4 + store i32 0, i32* %a, align 4, !dbg !9 + %0 = bitcast i32* %a to i8*, !dbg !10 + call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 -86, i64 4, i1 false), !dbg !10 + %1 = bitcast i32* %a to i8*, !dbg !11 + call void @other(i8* %1), !dbg !12 + ret void, !dbg !13 +} +; CHECK: callq memset +; CHECK-NEXT: .loc 1 9 9 +; CHECK-NEXT: leaq +; CHECK-NEXT: .loc 1 9 3 +; CHECK-NEXT: callq other + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) + +declare dso_local void @other(i8*) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0 (https://github.com/llvm/llvm-project eaae6fdf67e1f61599331d69a41a7dafe6199667)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "memset-test.c", directory: "/home/probinson/projects/scratch") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project eaae6fdf67e1f61599331d69a41a7dafe6199667)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, type: !8, scopeLine: 5, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 6, column: 7, scope: !7) +!10 = !DILocation(line: 8, column: 3, scope: !7) +!11 = !DILocation(line: 9, column: 9, scope: !7) +!12 = !DILocation(line: 9, column: 3, scope: !7) +!13 = !DILocation(line: 10, column: 1, scope: !7)