diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -300,11 +300,13 @@
   { X86::MOV32rr,             X86::MOV32mr,             TB_FOLDED_STORE },
   { X86::MOV64ri32,           X86::MOV64mi32,           TB_FOLDED_STORE },
   { X86::MOV64rr,             X86::MOV64mr,             TB_FOLDED_STORE },
+  { X86::MOV64toSDrr,         X86::MOV64mr,             TB_FOLDED_STORE | TB_NO_REVERSE },
   { X86::MOV8ri,              X86::MOV8mi,              TB_FOLDED_STORE },
   { X86::MOV8rr,              X86::MOV8mr,              TB_FOLDED_STORE },
   { X86::MOV8rr_NOREX,        X86::MOV8mr_NOREX,        TB_FOLDED_STORE },
   { X86::MOVAPDrr,            X86::MOVAPDmr,            TB_FOLDED_STORE | TB_ALIGN_16 },
   { X86::MOVAPSrr,            X86::MOVAPSmr,            TB_FOLDED_STORE | TB_ALIGN_16 },
+  { X86::MOVDI2SSrr,          X86::MOV32mr,             TB_FOLDED_STORE | TB_NO_REVERSE },
   { X86::MOVDQArr,            X86::MOVDQAmr,            TB_FOLDED_STORE | TB_ALIGN_16 },
   { X86::MOVDQUrr,            X86::MOVDQUmr,            TB_FOLDED_STORE },
   { X86::MOVPDI2DIrr,         X86::MOVPDI2DImr,         TB_FOLDED_STORE },
@@ -357,6 +359,8 @@
   { X86::VEXTRACTI64x4Zrr,    X86::VEXTRACTI64x4Zmr,    TB_FOLDED_STORE },
   { X86::VEXTRACTPSZrr,       X86::VEXTRACTPSZmr,       TB_FOLDED_STORE },
   { X86::VEXTRACTPSrr,        X86::VEXTRACTPSmr,        TB_FOLDED_STORE },
+  { X86::VMOV64toSDZrr,       X86::MOV64mr,             TB_FOLDED_STORE | TB_NO_REVERSE },
+  { X86::VMOV64toSDrr,        X86::MOV64mr,             TB_FOLDED_STORE | TB_NO_REVERSE },
   { X86::VMOVAPDYrr,          X86::VMOVAPDYmr,          TB_FOLDED_STORE | TB_ALIGN_32 },
   { X86::VMOVAPDZ128rr,       X86::VMOVAPDZ128mr,       TB_FOLDED_STORE | TB_ALIGN_16 },
   { X86::VMOVAPDZ256rr,       X86::VMOVAPDZ256mr,       TB_FOLDED_STORE | TB_ALIGN_32 },
@@ -367,6 +371,8 @@
   { X86::VMOVAPSZ256rr,       X86::VMOVAPSZ256mr,       TB_FOLDED_STORE | TB_ALIGN_32 },
   { X86::VMOVAPSZrr,          X86::VMOVAPSZmr,          TB_FOLDED_STORE | TB_ALIGN_64 },
   { X86::VMOVAPSrr,           X86::VMOVAPSmr,           TB_FOLDED_STORE | TB_ALIGN_16 },
+  { X86::VMOVDI2SSZrr,        X86::MOV32mr,             TB_FOLDED_STORE | TB_NO_REVERSE },
+  { X86::VMOVDI2SSrr,         X86::MOV32mr,             TB_FOLDED_STORE | TB_NO_REVERSE },
   { X86::VMOVDQA32Z128rr,     X86::VMOVDQA32Z128mr,     TB_FOLDED_STORE | TB_ALIGN_16 },
   { X86::VMOVDQA32Z256rr,     X86::VMOVDQA32Z256mr,     TB_FOLDED_STORE | TB_ALIGN_32 },
   { X86::VMOVDQA32Zrr,        X86::VMOVDQA32Zmr,        TB_FOLDED_STORE | TB_ALIGN_64 },
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5526,6 +5526,10 @@
 
   if (I != nullptr) {
     unsigned Opcode = I->DstOp;
+    bool FoldedLoad =
+        isTwoAddrFold || (OpNum == 0 && I->Flags & TB_FOLDED_LOAD) || OpNum > 0;
+    bool FoldedStore =
+        isTwoAddrFold || (OpNum == 0 && I->Flags & TB_FOLDED_STORE);
     MaybeAlign MinAlign =
         decodeMaybeAlign((I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT);
     if (MinAlign && Alignment < *MinAlign)
@@ -5536,20 +5540,25 @@
       const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum,
                                                   &RI, MF);
       unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
-      if (Size < RCSize) {
-        // FIXME: Allow scalar intrinsic instructions like ADDSSrm_Int.
-        // Check if it's safe to fold the load. If the size of the object is
-        // narrower than the load width, then it's not.
-        if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
-          return nullptr;
+      // Check if it's safe to fold the load. If the size of the object is
+      // narrower than the load width, then it's not.
+      // FIXME: Allow scalar intrinsic instructions like ADDSSrm_Int.
+      if (FoldedLoad && Size < RCSize) {
         // If this is a 64-bit load, but the spill slot is 32, then we can do
         // a 32-bit load which is implicitly zero-extended. This likely is
         // due to live interval analysis remat'ing a load from stack slot.
+        if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
+          return nullptr;
         if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
           return nullptr;
         Opcode = X86::MOV32rm;
         NarrowToMOV32rm = true;
       }
+      // For stores, make sure the size of the object is equal to the size of
+      // the store. If the object is larger, the extra bits would be garbage. If
+      // the object is smaller we might overwrite another object or fault.
+      if (FoldedStore && Size != RCSize)
+        return nullptr;
     }
 
     if (isTwoAddrFold)
diff --git a/llvm/test/CodeGen/X86/pr47874.ll b/llvm/test/CodeGen/X86/pr47874.ll
--- a/llvm/test/CodeGen/X86/pr47874.ll
+++ b/llvm/test/CodeGen/X86/pr47874.ll
@@ -9,8 +9,7 @@
 ; SSE2-NEXT:    testl %esi, %esi
 ; SSE2-NEXT:    jle LBB0_3
 ; SSE2-NEXT:  ## %bb.1: ## %bb2
-; SSE2-NEXT:    movd %esi, %xmm0
-; SSE2-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
+; SSE2-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
 ; SSE2-NEXT:    movl %esi, %eax
 ; SSE2-NEXT:    .p2align 4, 0x90
 ; SSE2-NEXT:  LBB0_2: ## %bb6
@@ -31,8 +30,7 @@
 ; AVX-NEXT:    testl %esi, %esi
 ; AVX-NEXT:    jle LBB0_3
 ; AVX-NEXT:  ## %bb.1: ## %bb2
-; AVX-NEXT:    vmovd %esi, %xmm0
-; AVX-NEXT:    vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
+; AVX-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
 ; AVX-NEXT:    movl %esi, %eax
 ; AVX-NEXT:    .p2align 4, 0x90
 ; AVX-NEXT:  LBB0_2: ## %bb6
@@ -78,8 +76,7 @@
 ; SSE2-NEXT:    testq %rsi, %rsi
 ; SSE2-NEXT:    jle LBB1_3
 ; SSE2-NEXT:  ## %bb.1: ## %bb2
-; SSE2-NEXT:    movq %rsi, %xmm0
-; SSE2-NEXT:    movq %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Folded Spill
+; SSE2-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; SSE2-NEXT:    .p2align 4, 0x90
 ; SSE2-NEXT:  LBB1_2: ## %bb6
 ; SSE2-NEXT:    ## =>This Inner Loop Header: Depth=1
@@ -99,8 +96,7 @@
 ; AVX-NEXT:    testq %rsi, %rsi
 ; AVX-NEXT:    jle LBB1_3
 ; AVX-NEXT:  ## %bb.1: ## %bb2
-; AVX-NEXT:    vmovq %rsi, %xmm0
-; AVX-NEXT:    vmovq %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Folded Spill
+; AVX-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
 ; AVX-NEXT:    .p2align 4, 0x90
 ; AVX-NEXT:  LBB1_2: ## %bb6
 ; AVX-NEXT:    ## =>This Inner Loop Header: Depth=1