diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1515,6 +1515,20 @@
     return selectXRayCustomEvent(II);
   case Intrinsic::xray_typedevent:
     return selectXRayTypedEvent(II);
+
+  case Intrinsic::memcpy:
+  case Intrinsic::memcpy_element_unordered_atomic:
+  case Intrinsic::memcpy_inline:
+  case Intrinsic::memmove:
+  case Intrinsic::memmove_element_unordered_atomic:
+  case Intrinsic::memset:
+  case Intrinsic::memset_element_unordered_atomic:
+    // Flush the local value map just like we do for regular calls,
+    // to avoid excessive spills and reloads.
+    // These intrinsics mostly turn into library calls at O0; and
+    // even memcpy_inline should be treated like one for this purpose.
+    flushLocalValueMap();
+    break;
   }
 
   return fastLowerIntrinsicCall(II);
diff --git a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
--- a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
@@ -290,13 +290,13 @@
 ; Space for s2 is allocated at sp
 
 ; FAST-LABEL: caller42
-; FAST: sub sp, sp, #96
-; Space for s1 is allocated at fp-24 = sp+56
-; FAST: sub x[[A:[0-9]+]], x29, #24
+; FAST: sub sp, sp, #64
+; Space for s1 is allocated at fp-24 = sp+24
+; FAST: add x[[A:[0-9]+]], sp, #24
 ; Call memcpy with size = 24 (0x18)
 ; FAST: mov {{x[0-9]+}}, #24
-; Space for s2 is allocated at sp+32
-; FAST: add x[[A:[0-9]+]], sp, #32
+; Space for s2 is allocated at sp
+; FAST: mov x[[A:[0-9]+]], sp
 ; FAST: bl _memcpy
   %tmp = alloca %struct.s42, align 4
   %tmp1 = alloca %struct.s42, align 4
@@ -339,8 +339,8 @@
 ; Call memcpy with size = 24 (0x18)
 ; FAST: mov {{x[0-9]+}}, #24
 ; FAST: bl _memcpy
-; Space for s2 is allocated at fp-48
-; FAST: sub x[[B:[0-9]+]], x29, #48
+; Space for s2 is allocated at sp+32
+; FAST: add x[[B:[0-9]+]], sp, #32
 ; Call memcpy again
 ; FAST: bl _memcpy
 ; Address of s1 is passed on stack at sp+8
diff --git a/llvm/test/CodeGen/X86/fastisel-memset-flush.ll b/llvm/test/CodeGen/X86/fastisel-memset-flush.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fastisel-memset-flush.ll
@@ -0,0 +1,40 @@
+; RUN: %llc_dwarf -O0 < %s | FileCheck %s
+
+define dso_local void @foo() !dbg !7 {
+entry:
+  %a = alloca i32, align 4
+  store i32 0, i32* %a, align 4, !dbg !9
+  %0 = bitcast i32* %a to i8*, !dbg !10
+  call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 -86, i64 4, i1 false), !dbg !10
+  %1 = bitcast i32* %a to i8*, !dbg !11
+  call void @other(i8* %1), !dbg !12
+  ret void, !dbg !13
+}
+; CHECK:      callq memset
+; CHECK-NEXT: .loc 1 9 9
+; CHECK-NEXT: leaq
+; CHECK-NEXT: .loc 1 9 3
+; CHECK-NEXT: callq other
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
+
+declare dso_local void @other(i8*)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0 (https://github.com/llvm/llvm-project eaae6fdf67e1f61599331d69a41a7dafe6199667)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "memset-test.c", directory: "/home/probinson/projects/scratch")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project eaae6fdf67e1f61599331d69a41a7dafe6199667)"}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, type: !8, scopeLine: 5, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !2)
+!9 = !DILocation(line: 6, column: 7, scope: !7)
+!10 = !DILocation(line: 8, column: 3, scope: !7)
+!11 = !DILocation(line: 9, column: 9, scope: !7)
+!12 = !DILocation(line: 9, column: 3, scope: !7)
+!13 = !DILocation(line: 10, column: 1, scope: !7)