Index: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
===================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h
@@ -2058,6 +2058,14 @@
     return true;
   }
 
+  /// Return true if the specified immediate is legal for the value input of a
+  /// store instruction.
+  virtual bool isLegalStoreImmediate(int64_t Value) const {
+    // Default implementation assumes that at least 0 works since it is likely
+    // that a zero register exists or a zero immediate is allowed.
+    return Value == 0;
+  }
+
   /// Return true if it's significantly cheaper to shift a vector by a uniform
   /// scalar than by an amount which will vary across each lane. On x86, for
   /// example, there is a "psllw" instruction for the former case, but no simple
Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15029,7 +15029,9 @@
 
   // FIXME: is there such a thing as a truncating indexed store?
   if (ST->isTruncatingStore() && ST->isUnindexed() &&
-      Value.getValueType().isInteger()) {
+      Value.getValueType().isInteger() &&
+      (!isa<ConstantSDNode>(Value) ||
+       !cast<ConstantSDNode>(Value)->isOpaque())) {
     // See if we can simplify the input to this truncstore with knowledge that
     // only the low bits are being used.  For example:
     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3889,9 +3889,12 @@
     case ISD::SIGN_EXTEND:
       return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
                          C->isTargetOpcode(), C->isOpaque());
+    case ISD::TRUNCATE:
+      if (C->isOpaque())
+        break;
+      LLVM_FALLTHROUGH;
     case ISD::ANY_EXTEND:
     case ISD::ZERO_EXTEND:
-    case ISD::TRUNCATE:
       return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
                          C->isTargetOpcode(), C->isOpaque());
     case ISD::UINT_TO_FP:
@@ -5158,8 +5161,11 @@
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
     assert(C->getAPIntValue().getBitWidth() == 8);
     APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
-    if (VT.isInteger())
-      return DAG.getConstant(Val, dl, VT);
+    if (VT.isInteger()) {
+      bool IsOpaque = VT.getSizeInBits() > 64 ||
+          !DAG.getTargetLoweringInfo().isLegalStoreImmediate(C->getSExtValue());
+      return DAG.getConstant(Val, dl, VT, false, IsOpaque);
+    }
     return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl,
                              VT);
   }
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h
@@ -940,6 +940,8 @@
     /// the immediate into a register.
     bool isLegalAddImmediate(int64_t Imm) const override;
 
+    bool isLegalStoreImmediate(int64_t Imm) const override;
+
     /// Return the cost of the scaling factor used in the addressing
     /// mode represented by AM for this target, for a load/store
     /// of the specified type.
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
@@ -26890,6 +26890,10 @@
   return isInt<32>(Imm);
 }
 
+bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const {
+  return isInt<32>(Imm);
+}
+
 bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
   if (!VT1.isInteger() || !VT2.isInteger())
     return false;
Index: llvm/trunk/test/CodeGen/AArch64/arm64-memset-inline.ll
===================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-memset-inline.ll
+++ llvm/trunk/test/CodeGen/AArch64/arm64-memset-inline.ll
@@ -242,14 +242,12 @@
   ret void
 }
 
-; FIXME This could be better: x9 is a superset of w8's bit-pattern.
 define void @memset_12_stack() {
 ; CHECK-LABEL: memset_12_stack:
-; CHECK:       mov w8, #-1431655766
-; CHECK-NEXT:  mov x9, #-6148914691236517206
+; CHECK:       mov x8, #-6148914691236517206
 ; CHECK-NEXT:  mov x0, sp
+; CHECK-NEXT:  str x8, [sp]
 ; CHECK-NEXT:  str w8, [sp, #8]
-; CHECK-NEXT:  str x9, [sp]
 ; CHECK-NEXT:  bl something
   %buf = alloca [12 x i8], align 1
   %cast = bitcast [12 x i8]* %buf to i8*
@@ -272,14 +270,12 @@
   ret void
 }
 
-; FIXME This could be better: x9 is a superset of w8's bit-pattern.
 define void @memset_20_stack() {
 ; CHECK-LABEL: memset_20_stack:
-; CHECK:       mov w8, #-1431655766
-; CHECK-NEXT:  mov x9, #-6148914691236517206
+; CHECK:       mov x8, #-6148914691236517206
 ; CHECK-NEXT:  add x0, sp, #8
+; CHECK-NEXT:  stp x8, x8, [sp, #8]
 ; CHECK-NEXT:  str w8, [sp, #24]
-; CHECK-NEXT:  stp x9, x9, [sp, #8]
 ; CHECK-NEXT:  bl something
   %buf = alloca [20 x i8], align 1
   %cast = bitcast [20 x i8]* %buf to i8*
@@ -288,15 +284,13 @@
   ret void
 }
 
-; FIXME This could be better: x9 is a superset of w8's bit-pattern.
 define void @memset_26_stack() {
 ; CHECK-LABEL: memset_26_stack:
-; CHECK:       mov w8, #43690
-; CHECK-NEXT:  mov x9, #-6148914691236517206
+; CHECK:       mov x8, #-6148914691236517206
 ; CHECK-NEXT:  mov x0, sp
+; CHECK-NEXT:  stp x8, x8, [sp, #8]
+; CHECK-NEXT:  str x8, [sp]
 ; CHECK-NEXT:  strh w8, [sp, #24]
-; CHECK-NEXT:  stp x9, x9, [sp, #8]
-; CHECK-NEXT:  str x9, [sp]
 ; CHECK-NEXT:  bl something
   %buf = alloca [26 x i8], align 1
   %cast = bitcast [26 x i8]* %buf to i8*
Index: llvm/trunk/test/CodeGen/X86/pr38771.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/pr38771.ll
+++ llvm/trunk/test/CodeGen/X86/pr38771.ll
@@ -1,24 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
-
-define void @function() nounwind {
-; CHECK-LABEL: function:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movabsq $281474976710656, %rax # imm = 0x1000000000000
-; CHECK-NEXT:    notq %rax
-; CHECK-NEXT:    movl $2147483647, %ecx # imm = 0x7FFFFFFF
-; CHECK-NEXT:    shldq $65, %rax, %rcx
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    movb $64, %dl
-; CHECK-NEXT:    testb %dl, %dl
-; CHECK-NEXT:    cmoveq %rcx, %rax
-; CHECK-NEXT:    movq %rax, (%rax)
-; CHECK-NEXT:    movl $0, (%rax)
-; CHECK-NEXT:    retq
-entry:
-  %B68 = sub i96 39614081257132168796771975167, 281474976710656
-  %B49 = or i96 39614081257132168796771975167, 39614081257132168796771975167
-  %B33 = lshr i96 %B68, %B68
-  store i96 %B33, i96* undef
-  ret void
-}