Index: llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td
===================================================================
--- llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td
+++ llvm/trunk/lib/Target/Mips/Mips64InstrInfo.td
@@ -419,6 +419,10 @@
 defm : SetgeImmPats<GPR64, SLTi64, SLTiu64>;
 
 // truncate
+def : MipsPat<(trunc (assertsext GPR64:$src)),
+              (EXTRACT_SUBREG GPR64:$src, sub_32)>;
+def : MipsPat<(trunc (assertzext GPR64:$src)),
+              (EXTRACT_SUBREG GPR64:$src, sub_32)>;
 def : MipsPat<(i32 (trunc GPR64:$src)),
               (SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>;
 
Index: llvm/trunk/lib/Target/Mips/MipsCCState.h
===================================================================
--- llvm/trunk/lib/Target/Mips/MipsCCState.h
+++ llvm/trunk/lib/Target/Mips/MipsCCState.h
@@ -52,6 +52,9 @@
   /// Records whether the value has been lowered from an f128.
   SmallVector<bool, 4> OriginalArgWasF128;
 
+  /// Records whether the value has been lowered from float.
+  SmallVector<bool, 4> OriginalArgWasFloat;
+
   /// Records whether the value was a fixed argument.
   /// See ISD::OutputArg::IsFixed,
   SmallVector<bool, 4> CallOperandIsFixed;
@@ -74,6 +77,7 @@
     PreAnalyzeCallOperands(Outs, FuncArgs, CallNode);
     CCState::AnalyzeCallOperands(Outs, Fn);
     OriginalArgWasF128.clear();
+    OriginalArgWasFloat.clear();
     CallOperandIsFixed.clear();
   }
 
@@ -90,6 +94,7 @@
                               CCAssignFn Fn) {
     PreAnalyzeFormalArgumentsForF128(Ins);
     CCState::AnalyzeFormalArguments(Ins, Fn);
+    OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
   }
 
@@ -98,6 +103,7 @@
                          const TargetLowering::CallLoweringInfo &CLI) {
     PreAnalyzeCallResultForF128(Ins, CLI);
     CCState::AnalyzeCallResult(Ins, Fn);
+    OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
   }
 
@@ -105,6 +111,7 @@
                      CCAssignFn Fn) {
     PreAnalyzeReturnForF128(Outs);
     CCState::AnalyzeReturn(Outs, Fn);
+    OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
   }
 
@@ -112,11 +119,15 @@
                    CCAssignFn Fn) {
     PreAnalyzeReturnForF128(ArgsFlags);
     bool Return = CCState::CheckReturn(ArgsFlags, Fn);
+    OriginalArgWasFloat.clear();
     OriginalArgWasF128.clear();
     return Return;
   }
 
   bool WasOriginalArgF128(unsigned ValNo) { return OriginalArgWasF128[ValNo]; }
+  bool WasOriginalArgFloat(unsigned ValNo) {
+      return OriginalArgWasFloat[ValNo];
+  }
   bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
   SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
 };
Index: llvm/trunk/lib/Target/Mips/MipsCCState.cpp
===================================================================
--- llvm/trunk/lib/Target/Mips/MipsCCState.cpp
+++ llvm/trunk/lib/Target/Mips/MipsCCState.cpp
@@ -80,9 +80,11 @@
 void MipsCCState::PreAnalyzeCallResultForF128(
     const SmallVectorImpl<ISD::InputArg> &Ins,
     const TargetLowering::CallLoweringInfo &CLI) {
-  for (unsigned i = 0; i < Ins.size(); ++i)
+  for (unsigned i = 0; i < Ins.size(); ++i) {
     OriginalArgWasF128.push_back(
         originalTypeIsF128(CLI.RetTy, CLI.Callee.getNode()));
+    OriginalArgWasFloat.push_back(CLI.RetTy->isFloatingPointTy());
+  }
 }
 
 /// Identify lowered values that originated from f128 arguments and record
@@ -90,9 +92,12 @@
 void MipsCCState::PreAnalyzeReturnForF128(
     const SmallVectorImpl<ISD::OutputArg> &Outs) {
   const MachineFunction &MF = getMachineFunction();
-  for (unsigned i = 0; i < Outs.size(); ++i)
+  for (unsigned i = 0; i < Outs.size(); ++i) {
     OriginalArgWasF128.push_back(
         originalTypeIsF128(MF.getFunction()->getReturnType(), nullptr));
+    OriginalArgWasFloat.push_back(
+        MF.getFunction()->getReturnType()->isFloatingPointTy());
+  }
 }
 
 /// Identify lowered values that originated from f128 arguments and record
@@ -104,6 +109,8 @@
   for (unsigned i = 0; i < Outs.size(); ++i) {
     OriginalArgWasF128.push_back(
         originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, CallNode));
+    OriginalArgWasFloat.push_back(
+        FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy());
     CallOperandIsFixed.push_back(Outs[i].IsFixed);
   }
 }
@@ -129,5 +136,6 @@
 
     OriginalArgWasF128.push_back(
         originalTypeIsF128(FuncArg->getType(), nullptr));
+    OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy());
   }
 }
Index: llvm/trunk/lib/Target/Mips/MipsCallingConv.td
===================================================================
--- llvm/trunk/lib/Target/Mips/MipsCallingConv.td
+++ llvm/trunk/lib/Target/Mips/MipsCallingConv.td
@@ -78,16 +78,28 @@
 // Mips N32/64 Calling Convention
 //===----------------------------------------------------------------------===//
 
+def CC_MipsN_SoftFloat : CallingConv<[
+  CCAssignToRegWithShadow<[A0, A1, A2, A3,
+                           T0, T1, T2, T3],
+                          [D12_64, D13_64, D14_64, D15_64,
+                           D16_64, D17_64, D18_64, D19_64]>,
+  CCAssignToStack<4, 8>
+]>;
+
 def CC_MipsN : CallingConv<[
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i8, i16, i32],
+      CCIfSubtargetNot<"isLittle()",
+          CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
 
-  // Integer arguments are passed in integer registers.
-  CCIfType<[i32], CCAssignToRegWithShadow<[A0, A1, A2, A3,
-                                           T0, T1, T2, T3],
-                                          [F12, F13, F14, F15,
-                                           F16, F17, F18, F19]>>,
+  // All integers (except soft-float integers) are promoted to 64-bit.
+  CCIfType<[i8, i16, i32],
+     CCIf<"!static_cast<MipsCCState *>(&State)->WasOriginalArgFloat(ValNo)",
+          CCPromoteToType<i64>>>,
+
+  // The only i32's we have left are soft-float arguments.
+  CCIfSubtarget<"abiUsesSoftFloat()", CCIfType<[i32], CCDelegateTo<CC_MipsN_SoftFloat>>>,
 
+  // Integer arguments are passed in integer registers.
   CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64,
                                            T0_64, T1_64, T2_64, T3_64],
                                           [D12_64, D13_64, D14_64, D15_64,
@@ -106,23 +118,23 @@
                                            T0_64, T1_64, T2_64, T3_64]>>,
 
   // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
-  CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+  CCIfType<[f32], CCAssignToStack<4, 8>>,
   CCIfType<[i64, f64], CCAssignToStack<8, 8>>
 ]>;
 
 // N32/64 variable arguments.
 // All arguments are passed in integer registers.
 def CC_MipsN_VarArg : CallingConv<[
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  // All integers are promoted to 64-bit.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
 
-  CCIfType<[i32, f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+  CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
 
   CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64,
                                       T0_64, T1_64, T2_64, T3_64]>>,
 
   // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
-  CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+  CCIfType<[f32], CCAssignToStack<4, 8>>,
   CCIfType<[i64, f64], CCAssignToStack<8, 8>>
 ]>;
 
Index: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp
+++ llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp
@@ -2522,6 +2522,7 @@
     CCValAssign &VA = ArgLocs[i];
     MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT();
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    bool UseUpperBits = false;
 
     // ByVal Arg.
     if (Flags.isByVal()) {
@@ -2543,7 +2544,8 @@
 
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
-    default: llvm_unreachable("Unknown loc info!");
+    default:
+      llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full:
       if (VA.isRegLoc()) {
         if ((ValVT == MVT::f32 && LocVT == MVT::i32) ||
@@ -2568,17 +2570,34 @@
     case CCValAssign::BCvt:
       Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg);
       break;
+    case CCValAssign::SExtUpper:
+      UseUpperBits = true;
+      // Fallthrough
     case CCValAssign::SExt:
       Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg);
       break;
+    case CCValAssign::ZExtUpper:
+      UseUpperBits = true;
+      // Fallthrough
     case CCValAssign::ZExt:
       Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg);
       break;
+    case CCValAssign::AExtUpper:
+      UseUpperBits = true;
+      // Fallthrough
     case CCValAssign::AExt:
       Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg);
       break;
     }
 
+    if (UseUpperBits) {
+      unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits();
+      unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
+      Arg = DAG.getNode(
+          ISD::SHL, DL, VA.getLocVT(), Arg,
+          DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+    }
+
     // Arguments that can be passed on register must be kept at
     // RegsToPass vector
     if (VA.isRegLoc()) {
@@ -2741,6 +2760,60 @@
   return Chain;
 }
 
+static SDValue UnpackFromArgumentSlot(SDValue Val, const CCValAssign &VA,
+                                      EVT ArgVT, SDLoc DL, SelectionDAG &DAG) {
+  MVT LocVT = VA.getLocVT();
+  EVT ValVT = VA.getValVT();
+
+  // Shift into the upper bits if necessary.
+  switch (VA.getLocInfo()) {
+  default:
+    break;
+  case CCValAssign::AExtUpper:
+  case CCValAssign::SExtUpper:
+  case CCValAssign::ZExtUpper: {
+    unsigned ValSizeInBits = ArgVT.getSizeInBits();
+    unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
+    unsigned Opcode =
+        VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA;
+    Val = DAG.getNode(
+        Opcode, DL, VA.getLocVT(), Val,
+        DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+    break;
+  }
+  }
+
+  // If this is an value smaller than the argument slot size (32-bit for O32,
+  // 64-bit for N32/N64), it has been promoted in some way to the argument slot
+  // size. Extract the value and insert any appropriate assertions regarding
+  // sign/zero extension.
+  switch (VA.getLocInfo()) {
+  default:
+    llvm_unreachable("Unknown loc info!");
+  case CCValAssign::Full:
+    break;
+  case CCValAssign::AExtUpper:
+  case CCValAssign::AExt:
+    Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+    break;
+  case CCValAssign::SExtUpper:
+  case CCValAssign::SExt:
+    Val = DAG.getNode(ISD::AssertSext, DL, LocVT, Val, DAG.getValueType(ValVT));
+    Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+    break;
+  case CCValAssign::ZExtUpper:
+  case CCValAssign::ZExt:
+    Val = DAG.getNode(ISD::AssertZext, DL, LocVT, Val, DAG.getValueType(ValVT));
+    Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
+    break;
+  case CCValAssign::BCvt:
+    Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val);
+    break;
+  }
+
+  return Val;
+}
+
 //===----------------------------------------------------------------------===//
 //             Formal Arguments Calling Convention Implementation
 //===----------------------------------------------------------------------===//
@@ -2812,28 +2885,7 @@
       unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC);
       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
 
-      // If this is an 8 or 16-bit value, it has been passed promoted
-      // to 32 bits.  Insert an assert[sz]ext to capture this, then
-      // truncate to the right size.
-      switch (VA.getLocInfo()) {
-      default:
-        llvm_unreachable("Unknown loc info!");
-      case CCValAssign::Full:
-        break;
-      case CCValAssign::SExt:
-        ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue,
-                               DAG.getValueType(ValVT));
-        ArgValue = DAG.getNode(ISD::TRUNCATE, DL, ValVT, ArgValue);
-        break;
-      case CCValAssign::ZExt:
-        ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue,
-                               DAG.getValueType(ValVT));
-        ArgValue = DAG.getNode(ISD::TRUNCATE, DL, ValVT, ArgValue);
-        break;
-      case CCValAssign::BCvt:
-        ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue);
-        break;
-      }
+      ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG);
 
       // Handle floating point arguments passed in integer registers and
       // long double arguments passed in floating point registers.
@@ -2854,21 +2906,34 @@
 
       InVals.push_back(ArgValue);
     } else { // VA.isRegLoc()
+      MVT LocVT = VA.getLocVT();
+
+      if (Subtarget.isABI_O32()) {
+        // We ought to be able to use LocVT directly but O32 sets it to i32
+        // when allocating floating point values to integer registers.
+        // This shouldn't influence how we load the value into registers unless
+        // we are targetting softfloat.
+        if (VA.getValVT().isFloatingPoint() && !Subtarget.abiUsesSoftFloat())
+          LocVT = VA.getValVT();
+      }
 
       // sanity check
       assert(VA.isMemLoc());
 
       // The stack pointer offset is relative to the caller stack frame.
-      int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
+      int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8,
                                       VA.getLocMemOffset(), true);
 
       // Create load nodes to retrieve arguments from the stack
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      SDValue Load = DAG.getLoad(ValVT, DL, Chain, FIN,
-                                 MachinePointerInfo::getFixedStack(FI),
-                                 false, false, false, 0);
-      InVals.push_back(Load);
-      OutChains.push_back(Load.getValue(1));
+      SDValue ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
+                                     MachinePointerInfo::getFixedStack(FI),
+                                     false, false, false, 0);
+      OutChains.push_back(ArgValue.getValue(1));
+
+      ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG);
+
+      InVals.push_back(ArgValue);
     }
   }
 
Index: llvm/trunk/test/CodeGen/Mips/atomic.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/atomic.ll
+++ llvm/trunk/test/CodeGen/Mips/atomic.ll
@@ -12,7 +12,7 @@
 
 @x = common global i32 0, align 4
 
-define i32 @AtomicLoadAdd32(i32 %incr) nounwind {
+define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind {
 entry:
   %0 = atomicrmw add i32* @x, i32 %incr monotonic
   ret i32 %0
@@ -29,7 +29,7 @@
 ; ALL:           beqz    $[[R2]], $[[BB0]]
 }
 
-define i32 @AtomicLoadNand32(i32 %incr) nounwind {
+define i32 @AtomicLoadNand32(i32 signext %incr) nounwind {
 entry:
   %0 = atomicrmw nand i32* @x, i32 %incr monotonic
   ret i32 %0
@@ -47,7 +47,7 @@
 ; ALL:           beqz    $[[R2]], $[[BB0]]
 }
 
-define i32 @AtomicSwap32(i32 %newval) nounwind {
+define i32 @AtomicSwap32(i32 signext %newval) nounwind {
 entry:
   %newval.addr = alloca i32, align 4
   store i32 %newval, i32* %newval.addr, align 4
@@ -66,7 +66,7 @@
 ; ALL:           beqz    $[[R2]], $[[BB0]]
 }
 
-define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind {
+define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind {
 entry:
   %newval.addr = alloca i32, align 4
   store i32 %newval, i32* %newval.addr, align 4
@@ -293,7 +293,7 @@
 ; HAS-SEB-SEH:   seb     $2, $[[R17]]
 }
 
-define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 %oldval, i8 signext %newval) nounwind {
+define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) nounwind {
 entry:
   %0 = cmpxchg i8* %ptr, i8 %oldval, i8 %newval monotonic monotonic
   %1 = extractvalue { i8, i1 } %0, 1
@@ -381,7 +381,7 @@
 
 @countsint = common global i32 0, align 4
 
-define i32 @CheckSync(i32 %v) nounwind noinline {
+define i32 @CheckSync(i32 signext %v) nounwind noinline {
 entry:
   %0 = atomicrmw add i32* @countsint, i32 %v seq_cst
   ret i32 %0 
@@ -415,7 +415,7 @@
 
 ; Check that MIPS32R6 has the correct offset range.
 ; FIXME: At the moment, we don't seem to do addr+offset for any atomic load/store.
-define i32 @AtomicLoadAdd32_OffGt9Bit(i32 %incr) nounwind {
+define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind {
 entry:
   %0 = atomicrmw add i32* getelementptr(i32* @x, i32 256), i32 %incr monotonic
   ret i32 %0
Index: llvm/trunk/test/CodeGen/Mips/bswap.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/bswap.ll
+++ llvm/trunk/test/CodeGen/Mips/bswap.ll
@@ -2,7 +2,7 @@
 ; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
 ; RUN: llc  < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32r2 -mattr=+mips16 | FileCheck %s -check-prefix=MIPS16
 
-define i32 @bswap32(i32 %x) nounwind readnone {
+define i32 @bswap32(i32 signext %x) nounwind readnone {
 entry:
 ; MIPS32-LABEL: bswap32:
 ; MIPS32: wsbh $[[R0:[0-9]+]]
@@ -29,7 +29,7 @@
   ret i32 %or.3
 }
 
-define i64 @bswap64(i64 %x) nounwind readnone {
+define i64 @bswap64(i64 signext %x) nounwind readnone {
 entry:
 ; MIPS32-LABEL: bswap64:
 ; MIPS32: wsbh $[[R0:[0-9]+]]
@@ -72,24 +72,24 @@
 define <4 x i32> @bswapv4i32(<4 x i32> %x) nounwind readnone {
 entry:
 ; MIPS32-LABEL: bswapv4i32:
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS32: wsbh $[[R0:[0-9]+]]
-; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS32-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS32-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
 
 ; MIPS64-LABEL: bswapv4i32:
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
-; MIPS64: wsbh $[[R0:[0-9]+]]
-; MIPS64: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: wsbh $[[R0:[0-9]+]]
+; MIPS64-DAG: rotr ${{[0-9]+}}, $[[R0]], 16
 
 ; Don't bother with a MIPS16 version. It's just bswap32 repeated four times and
 ; would be very long
Index: llvm/trunk/test/CodeGen/Mips/cconv/arguments-float.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/cconv/arguments-float.ll
+++ llvm/trunk/test/CodeGen/Mips/cconv/arguments-float.ll
@@ -135,8 +135,8 @@
 ; SYM64-DAG:           ld [[R2:\$[0-9]]], %got_disp(floats)(
 
 ; The first four arguments are the same in O32/N32/N64.
-; The first argument isn't floating point so floating point registers are not
-; used.
+; The first argument is floating point but soft-float is enabled so floating
+; point registers are not used.
 ; MD00305 and GCC disagree on this one. MD00305 says that floats are treated
 ; as 8-byte aligned and occupy two slots on O32. GCC is treating them as 4-byte
 ; aligned and occupying one slot. We'll use GCC's definition.
@@ -195,7 +195,7 @@
 ; O32-DAG:           sw $7, 12([[R2]])
 ; NEW-DAG:           sd $5, 8([[R2]])
 
-define void @float_arg2(i8 %a, float %b) nounwind {
+define void @float_arg2(i8 signext %a, float %b) nounwind {
 entry:
         %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
         store volatile i8 %a, i8* %0
Index: llvm/trunk/test/CodeGen/Mips/cconv/arguments.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/cconv/arguments.ll
+++ llvm/trunk/test/CodeGen/Mips/cconv/arguments.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
-; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
+; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
 
 ; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
 ; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
@@ -23,8 +23,10 @@
 @floats = global [11 x float] zeroinitializer
 @doubles = global [11 x double] zeroinitializer
 
-define void @align_to_arg_slots(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g,
-                                i8 %h, i8 %i, i8 %j) nounwind {
+define void @align_to_arg_slots(i8 signext %a, i8 signext %b, i8 signext %c,
+                                i8 signext %d, i8 signext %e, i8 signext %f,
+                                i8 signext %g, i8 signext %h, i8 signext %i,
+                                i8 signext %j) nounwind {
 entry:
         %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
         store volatile i8 %a, i8* %0
@@ -82,15 +84,16 @@
 ; increase by 4 for O32 and 8 for N32/N64.
 ; O32-DAG:           lw [[R3:\$[0-9]+]], 32($sp)
 ; O32-DAG:           sb [[R3]], 9([[R1]])
-; NEW-DAG:           lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG:           ld [[R3:\$[0-9]+]], 0($sp)
 ; NEW-DAG:           sb [[R3]], 9([[R1]])
 ; O32-DAG:           lw [[R3:\$[0-9]+]], 36($sp)
 ; O32-DAG:           sb [[R3]], 10([[R1]])
-; NEW-DAG:           lw [[R3:\$[0-9]+]], 8($sp)
+; NEW-DAG:           ld [[R3:\$[0-9]+]], 8($sp)
 ; NEW-DAG:           sb [[R3]], 10([[R1]])
 
-define void @slot_skipping(i8 %a, i64 %b, i8 %c, i8 %d,
-                           i8 %e, i8 %f, i8 %g, i64 %i, i8 %j) nounwind {
+define void @slot_skipping(i8 signext %a, i64 signext %b, i8 signext %c,
+                           i8 signext %d, i8 signext %e, i8 signext %f,
+                           i8 signext %g, i64 signext %i, i8 signext %j) nounwind {
 entry:
         %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
         store volatile i8 %a, i8* %0
@@ -137,8 +140,7 @@
 ; It's not clear why O32 uses lbu for this argument, but it's not wrong so we'll
 ; accept it for now. The only IR difference is that this argument has
 ; anyext from i8 and align 8 on it.
-; O32LE-DAG:           lbu [[R3:\$[0-9]+]], 16($sp)
-; O32BE-DAG:           lbu [[R3:\$[0-9]+]], 19($sp)
+; O32-DAG:           lw [[R3:\$[0-9]+]], 16($sp)
 ; O32-DAG:           sb [[R3]], 2([[R1]])
 ; NEW-DAG:           sb $6, 2([[R1]])
 ; O32-DAG:           lw [[R3:\$[0-9]+]], 20($sp)
@@ -166,5 +168,5 @@
 ; increase by 4 for O32 and 8 for N32/N64.
 ; O32-DAG:           lw [[R3:\$[0-9]+]], 48($sp)
 ; O32-DAG:           sb [[R3]], 7([[R1]])
-; NEW-DAG:           lw [[R3:\$[0-9]+]], 0($sp)
+; NEW-DAG:           ld [[R3:\$[0-9]+]], 0($sp)
 ; NEW-DAG:           sb [[R3]], 7([[R1]])
Index: llvm/trunk/test/CodeGen/Mips/cmov.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/cmov.ll
+++ llvm/trunk/test/CodeGen/Mips/cmov.ll
@@ -38,7 +38,7 @@
 ; 64-CMP-DAG:   or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
 ; 64-CMP-DAG:   ld $2, 0($[[T2]])
 
-define i32* @cmov1(i32 %s) nounwind readonly {
+define i32* @cmov1(i32 signext %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
   %tmp1 = load i32** @i3, align 4
@@ -78,7 +78,7 @@
 ; 64-CMP-DAG:   or $[[T2:[0-9]+]], $[[T0]], $[[T1]]
 ; 64-CMP-DAG:   lw $2, 0($[[T2]])
 
-define i32 @cmov2(i32 %s) nounwind readonly {
+define i32 @cmov2(i32 signext %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
   %tmp1 = load i32* @c, align 4
@@ -109,7 +109,7 @@
 ; 64-CMP-DAG:   selnez $[[T1:[0-9]+]], $6, $[[CC]]
 ; 64-CMP-DAG:   or $2, $[[T0]], $[[T1]]
 
-define i32 @cmov3(i32 %a, i32 %b, i32 %c) nounwind readnone {
+define i32 @cmov3(i32 signext %a, i32 signext %b, i32 signext %c) nounwind readnone {
 entry:
   %cmp = icmp eq i32 %a, 234
   %cond = select i1 %cmp, i32 %b, i32 %c
@@ -142,7 +142,7 @@
 ; 64-CMP-DAG:   seleqz $[[T1:[0-9]+]], $6, $[[CC]]
 ; 64-CMP-DAG:   or $2, $[[T0]], $[[T1]]
 
-define i32 @cmov3_ne(i32 %a, i32 %b, i32 %c) nounwind readnone {
+define i32 @cmov3_ne(i32 signext %a, i32 signext %b, i32 signext %c) nounwind readnone {
 entry:
   %cmp = icmp ne i32 %a, 234
   %cond = select i1 %cmp, i32 %b, i32 %c
@@ -179,7 +179,7 @@
 ; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $6, $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
-define i64 @cmov4(i32 %a, i64 %b, i64 %c) nounwind readnone {
+define i64 @cmov4(i32 signext %a, i64 %b, i64 %c) nounwind readnone {
 entry:
   %cmp = icmp eq i32 %a, 234
   %cond = select i1 %cmp, i64 %b, i64 %c
@@ -220,7 +220,7 @@
 ; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $6, $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
-define i64 @cmov4_ne(i32 %a, i64 %b, i64 %c) nounwind readnone {
+define i64 @cmov4_ne(i32 signext %a, i64 %b, i64 %c) nounwind readnone {
 entry:
   %cmp = icmp ne i32 %a, 234
   %cond = select i1 %cmp, i64 %b, i64 %c
@@ -263,7 +263,7 @@
 ; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
-define i32 @slti0(i32 %a) {
+define i32 @slti0(i32 signext %a) {
 entry:
   %cmp = icmp sgt i32 %a, 32766
   %cond = select i1 %cmp, i32 3, i32 5
@@ -302,7 +302,7 @@
 ; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
-define i32 @slti1(i32 %a) {
+define i32 @slti1(i32 signext %a) {
 entry:
   %cmp = icmp sgt i32 %a, 32767
   %cond = select i1 %cmp, i32 7, i32 5
@@ -337,7 +337,7 @@
 ; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
-define i32 @slti2(i32 %a) {
+define i32 @slti2(i32 signext %a) {
 entry:
   %cmp = icmp sgt i32 %a, -32769
   %cond = select i1 %cmp, i32 3, i32 5
@@ -380,7 +380,7 @@
 ; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
-define i32 @slti3(i32 %a) {
+define i32 @slti3(i32 signext %a) {
 entry:
   %cmp = icmp sgt i32 %a, -32770
   %cond = select i1 %cmp, i32 3, i32 5
@@ -567,7 +567,7 @@
 ; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
-define i32 @sltiu0(i32 %a) {
+define i32 @sltiu0(i32 signext %a) {
 entry:
   %cmp = icmp ugt i32 %a, 32766
   %cond = select i1 %cmp, i32 3, i32 5
@@ -606,7 +606,7 @@
 ; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
-define i32 @sltiu1(i32 %a) {
+define i32 @sltiu1(i32 signext %a) {
 entry:
   %cmp = icmp ugt i32 %a, 32767
   %cond = select i1 %cmp, i32 7, i32 5
@@ -641,7 +641,7 @@
 ; 64-CMP-DAG:  selnez $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
-define i32 @sltiu2(i32 %a) {
+define i32 @sltiu2(i32 signext %a) {
 entry:
   %cmp = icmp ugt i32 %a, -32769
   %cond = select i1 %cmp, i32 3, i32 5
@@ -684,7 +684,7 @@
 ; 64-CMP-DAG:  seleqz $[[T1:[0-9]+]], $[[I5]], $[[R0]]
 ; 64-CMP-DAG:  or $2, $[[T0]], $[[T1]]
 
-define i32 @sltiu3(i32 %a) {
+define i32 @sltiu3(i32 signext %a) {
 entry:
   %cmp = icmp ugt i32 %a, -32770
   %cond = select i1 %cmp, i32 3, i32 5
@@ -697,7 +697,7 @@
 ; doesn't generate conditional moves
 ; for constant operands whose difference is |1|
 
-define i32 @slti4(i32 %a) nounwind readnone {
+define i32 @slti4(i32 signext %a) nounwind readnone {
   %1 = icmp slt i32 %a, 7
   %2 = select i1 %1, i32 4, i32 3
   ret i32 %2
@@ -723,7 +723,7 @@
 ; 64-CMP-NOT:  seleqz
 ; 64-CMP-NOT:  selnez
 
-define i32 @slti5(i32 %a) nounwind readnone {
+define i32 @slti5(i32 signext %a) nounwind readnone {
   %1 = icmp slt i32 %a, 7
   %2 = select i1 %1, i32 -3, i32 -4
   ret i32 %2
@@ -749,7 +749,7 @@
 ; 64-CMP-NOT:  seleqz
 ; 64-CMP-NOT:  selnez
 
-define i32 @slti6(i32 %a) nounwind readnone {
+define i32 @slti6(i32 signext %a) nounwind readnone {
   %1 = icmp slt i32 %a, 7
   %2 = select i1 %1, i32 3, i32 4
   ret i32 %2
Index: llvm/trunk/test/CodeGen/Mips/const-mult.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/const-mult.ll
+++ llvm/trunk/test/CodeGen/Mips/const-mult.ll
@@ -5,7 +5,7 @@
 ; CHECK: sll $[[R0:[0-9]+]], $4, 2
 ; CHECK: addu ${{[0-9]+}}, $[[R0]], $4
 
-define i32 @mul5_32(i32 %a) {
+define i32 @mul5_32(i32 signext %a) {
 entry:
   %mul = mul nsw i32 %a, 5
   ret i32 %mul
@@ -17,7 +17,7 @@
 ; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 5
 ; CHECK:     subu ${{[0-9]+}}, $[[R2]], $[[R1]]
 
-define i32 @mul27_32(i32 %a) {
+define i32 @mul27_32(i32 signext %a) {
 entry:
   %mul = mul nsw i32 %a, 27
   ret i32 %mul
@@ -29,7 +29,7 @@
 ; CHECK-DAG: sll $[[R2:[0-9]+]], $4, 31
 ; CHECK:     addu ${{[0-9]+}}, $[[R2]], $[[R1]]
 
-define i32 @muln2147483643_32(i32 %a) {
+define i32 @muln2147483643_32(i32 signext %a) {
 entry:
   %mul = mul nsw i32 %a, -2147483643
   ret i32 %mul
@@ -41,7 +41,7 @@
 ; CHECK64-DAG: dsll $[[R2:[0-9]+]], $4, 63
 ; CHECK64:     daddu ${{[0-9]+}}, $[[R2]], $[[R1]]
 
-define i64 @muln9223372036854775805_64(i64 %a) {
+define i64 @muln9223372036854775805_64(i64 signext %a) {
 entry:
   %mul = mul nsw i64 %a, -9223372036854775805
   ret i64 %mul
Index: llvm/trunk/test/CodeGen/Mips/countleading.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/countleading.ll
+++ llvm/trunk/test/CodeGen/Mips/countleading.ll
@@ -11,7 +11,7 @@
 ;   MIPS32-GT-R1 - MIPS64r1 and above (does not include MIPS64's)
 ;   MIPS64-GT-R1 - MIPS64r1 and above
 
-define i32 @ctlz_i32(i32 %X) nounwind readnone {
+define i32 @ctlz_i32(i32 signext %X) nounwind readnone {
 entry:
 ; ALL-LABEL: ctlz_i32:
 
@@ -27,7 +27,7 @@
 
 declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 
-define i32 @ctlo_i32(i32 %X) nounwind readnone {
+define i32 @ctlo_i32(i32 signext %X) nounwind readnone {
 entry:
 ; ALL-LABEL: ctlo_i32:
 
Index: llvm/trunk/test/CodeGen/Mips/ctlz-v.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/ctlz-v.ll
+++ llvm/trunk/test/CodeGen/Mips/ctlz-v.ll
@@ -6,12 +6,12 @@
 define <2 x i32> @ctlzv2i32(<2 x i32> %x) {
 entry:
 ; MIPS32: clz     $2, $4
-; MIPS32: jr      $ra
 ; MIPS32: clz     $3, $5
 
-; MIPS64: clz     $2, $4
-; MIPS64: jr      $ra
-; MIPS64: clz     $3, $5
+; MIPS64-DAG: sll $[[A0:[0-9]+]], $4, 0
+; MIPS64-DAG: clz $2, $[[A0]]
+; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0
+; MIPS64-DAG: clz $3, $[[A1]]
 
   %ret = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true)
   ret <2 x i32> %ret
Index: llvm/trunk/test/CodeGen/Mips/cttz-v.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/cttz-v.ll
+++ llvm/trunk/test/CodeGen/Mips/cttz-v.ll
@@ -18,14 +18,16 @@
 ; MIPS32-DAG: jr      $ra
 ; MIPS32-DAG: subu    $3, $[[R4]], $[[R8]]
 
-; MIPS64-DAG: addiu   $[[R0:[0-9]+]], $4, -1
-; MIPS64-DAG: not     $[[R1:[0-9]+]], $4
+; MIPS64-DAG: sll     $[[A0:[0-9]+]], $4, 0
+; MIPS64-DAG: addiu   $[[R0:[0-9]+]], $[[A0]], -1
+; MIPS64-DAG: not     $[[R1:[0-9]+]], $[[A0]]
 ; MIPS64-DAG: and     $[[R2:[0-9]+]], $[[R1]], $[[R0]]
 ; MIPS64-DAG: clz     $[[R3:[0-9]+]], $[[R2]]
 ; MIPS64-DAG: addiu   $[[R4:[0-9]+]], $zero, 32
 ; MIPS64-DAG: subu    $2, $[[R4]], $[[R3]]
-; MIPS64-DAG: addiu   $[[R5:[0-9]+]], $5, -1
-; MIPS64-DAG: not     $[[R6:[0-9]+]], $5
+; MIPS64-DAG: sll     $[[A1:[0-9]+]], $5, 0
+; MIPS64-DAG: addiu   $[[R5:[0-9]+]], $[[A1]], -1
+; MIPS64-DAG: not     $[[R6:[0-9]+]], $[[A1]]
 ; MIPS64-DAG: and     $[[R7:[0-9]+]], $[[R6]], $[[R5]]
 ; MIPS64-DAG: clz     $[[R8:[0-9]+]], $[[R7]]
 ; MIPS64-DAG: jr      $ra
Index: llvm/trunk/test/CodeGen/Mips/divrem.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/divrem.ll
+++ llvm/trunk/test/CodeGen/Mips/divrem.ll
@@ -27,7 +27,7 @@
 @g0 = common global i32 0, align 4
 @g1 = common global i32 0, align 4
 
-define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @sdiv1(i32 signext %a0, i32 signext %a1) nounwind readnone {
 entry:
 ; ALL-LABEL: sdiv1:
 
@@ -54,7 +54,7 @@
   ret i32 %div
 }
 
-define i32 @srem1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @srem1(i32 signext %a0, i32 signext %a1) nounwind readnone {
 entry:
 ; ALL-LABEL: srem1:
 
@@ -81,7 +81,7 @@
   ret i32 %rem
 }
 
-define i32 @udiv1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @udiv1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone {
 entry:
 ; ALL-LABEL: udiv1:
 
@@ -107,7 +107,7 @@
   ret i32 %div
 }
 
-define i32 @urem1(i32 %a0, i32 %a1) nounwind readnone {
+define i32 @urem1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone {
 entry:
 ; ALL-LABEL: urem1:
 
@@ -134,7 +134,7 @@
   ret i32 %rem
 }
 
-define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+define i32 @sdivrem1(i32 signext %a0, i32 signext %a1, i32* nocapture %r) nounwind {
 entry:
 ; ALL-LABEL: sdivrem1:
 
@@ -175,7 +175,7 @@
   ret i32 %div
 }
 
-define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+define i32 @udivrem1(i32 zeroext %a0, i32 zeroext %a1, i32* nocapture %r) nounwind {
 entry:
 ; ALL-LABEL: udivrem1:
 
Index: llvm/trunk/test/CodeGen/Mips/fastcc.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/fastcc.ll
+++ llvm/trunk/test/CodeGen/Mips/fastcc.ll
@@ -272,7 +272,7 @@
 define void @caller2() {
 entry:
 
-; NOODDSPREG-LABEL:  caller2
+; NOODDSPREG-LABEL:  caller2:
 
 ; Check that first 10 arguments are passed in even float registers
 ; f0, f2, ... , f18. Check that 11th argument is passed on stack.
@@ -314,7 +314,7 @@
                             float %a8, float %a9, float %a10) {
 entry:
 
-; NOODDSPREG-LABEL:  callee2
+; NOODDSPREG-LABEL:  callee2:
 
 ; NOODDSPREG:        addiu   $sp, $sp, -[[OFFSET:[0-9]+]]
 
@@ -353,7 +353,7 @@
 define void @caller3() {
 entry:
 
-; FP64-NOODDSPREG-LABEL:  caller3
+; FP64-NOODDSPREG-LABEL:  caller3:
 
 ; Check that first 10 arguments are passed in even float registers
 ; f0, f2, ... , f18. Check that 11th argument is passed on stack.
@@ -395,7 +395,7 @@
                             double %a8, double %a9, double %a10) {
 entry:
 
-; FP64-NOODDSPREG-LABEL:  callee3
+; FP64-NOODDSPREG-LABEL:  callee3:
 
 ; FP64-NOODDSPREG:        addiu   $sp, $sp, -[[OFFSET:[0-9]+]]
 
Index: llvm/trunk/test/CodeGen/Mips/load-store-left-right.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/load-store-left-right.ll
+++ llvm/trunk/test/CodeGen/Mips/load-store-left-right.ll
@@ -47,7 +47,7 @@
   ret i32 %0
 }
 
-define void @store_SI(i32 %a) nounwind {
+define void @store_SI(i32 signext %a) nounwind {
 entry:
 ; ALL-LABEL: store_SI:
 
@@ -201,7 +201,7 @@
   ret void
 }
 
-define void @store_SI_trunc_from_i64(i32 %a) nounwind {
+define void @store_SI_trunc_from_i64(i32 signext %a) nounwind {
 entry:
 ; ALL-LABEL: store_SI_trunc_from_i64:
 
Index: llvm/trunk/test/CodeGen/Mips/longbranch.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/longbranch.ll
+++ llvm/trunk/test/CodeGen/Mips/longbranch.ll
@@ -13,7 +13,7 @@
 
 @x = external global i32
 
-define void @test1(i32 %s) {
+define void @test1(i32 signext %s) {
 entry:
   %cmp = icmp eq i32 %s, 0
   br i1 %cmp, label %end, label %then
Index: llvm/trunk/test/CodeGen/Mips/madd-msub.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/madd-msub.ll
+++ llvm/trunk/test/CodeGen/Mips/madd-msub.ll
@@ -76,26 +76,14 @@
 ; 32R6-DAG:      muhu $[[T3:[0-9]+]], ${{[45]}}, ${{[45]}}
 ; 32R6-DAG:      addu $2, $[[T3]], $[[T2]]
 
-; 64-DAG:        dsll $[[T0:[0-9]+]], $4, 32
-; 64-DAG:        dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64-DAG:        dsll $[[T2:[0-9]+]], $5, 32
-; 64-DAG:        dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64-DAG:        d[[m:m]]ult $[[T3]], $[[T1]]
-; 64-DAG:        [[m]]flo $[[T4:[0-9]+]]
-; 64-DAG:        dsll $[[T5:[0-9]+]], $6, 32
-; 64-DAG:        dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64-DAG:        daddu $2, $[[T4]], $[[T6]]
-
-; 64R6-DAG:      dsll $[[T0:[0-9]+]], $4, 32
-; 64R6-DAG:      dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64R6-DAG:      dsll $[[T2:[0-9]+]], $5, 32
-; 64R6-DAG:      dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64R6-DAG:      dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]]
-; 64R6-DAG:      dsll $[[T5:[0-9]+]], $6, 32
-; 64R6-DAG:      dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64R6-DAG:      daddu $2, $[[T4]], $[[T6]]
+; 64-DAG:        d[[m:m]]ult $5, $4
+; 64-DAG:        [[m]]flo $[[T0:[0-9]+]]
+; 64-DAG:        daddu $2, $[[T0]], $6
 
-define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+; 64R6-DAG:      dmul $[[T0:[0-9]+]], $5, $4
+; 64R6-DAG:      daddu $2, $[[T0]], $6
+
+define i64 @madd2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readnone {
 entry:
   %conv = zext i32 %a to i64
   %conv2 = zext i32 %b to i64
@@ -214,26 +202,14 @@
 ; 32R6-DAG:      negu $2, $[[T3]]
 ; 32R6-DAG:      subu $3, $6, $[[T1]]
 
-; 64-DAG:        dsll $[[T0:[0-9]+]], $4, 32
-; 64-DAG:        dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64-DAG:        dsll $[[T2:[0-9]+]], $5, 32
-; 64-DAG:        dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64-DAG:        d[[m:m]]ult $[[T3]], $[[T1]]
-; 64-DAG:        [[m]]flo $[[T4:[0-9]+]]
-; 64-DAG:        dsll $[[T5:[0-9]+]], $6, 32
-; 64-DAG:        dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64-DAG:        dsubu $2, $[[T6]], $[[T4]]
-
-; 64R6-DAG:      dsll $[[T0:[0-9]+]], $4, 32
-; 64R6-DAG:      dsrl $[[T1:[0-9]+]], $[[T0]], 32
-; 64R6-DAG:      dsll $[[T2:[0-9]+]], $5, 32
-; 64R6-DAG:      dsrl $[[T3:[0-9]+]], $[[T2]], 32
-; 64R6-DAG:      dmul $[[T4:[0-9]+]], $[[T3]], $[[T1]]
-; 64R6-DAG:      dsll $[[T5:[0-9]+]], $6, 32
-; 64R6-DAG:      dsrl $[[T6:[0-9]+]], $[[T5]], 32
-; 64R6-DAG:      dsubu $2, $[[T6]], $[[T4]]
+; 64-DAG:        d[[m:m]]ult $5, $4
+; 64-DAG:        [[m]]flo $[[T0:[0-9]+]]
+; 64-DAG:        dsubu $2, $6, $[[T0]]
+
+; 64R6-DAG:      dmul $[[T0:[0-9]+]], $5, $4
+; 64R6-DAG:      dsubu $2, $6, $[[T0]]
 
-define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+define i64 @msub2(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c) nounwind readnone {
 entry:
   %conv = zext i32 %c to i64
   %conv2 = zext i32 %a to i64
Index: llvm/trunk/test/CodeGen/Mips/mips64-f128.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/mips64-f128.ll
+++ llvm/trunk/test/CodeGen/Mips/mips64-f128.ll
@@ -114,7 +114,7 @@
 ; ALL-LABEL: conv_LD_UInt:
 ; ALL: ld $25, %call16(__floatunsitf)
 
-define fp128 @conv_LD_UInt(i32 %a) {
+define fp128 @conv_LD_UInt(i32 signext %a) {
 entry:
   %conv = uitofp i32 %a to fp128
   ret fp128 %conv
@@ -545,7 +545,7 @@
 
 ; ALL-LABEL: load_LD_float:
 ; ALL: ld   $[[R0:[0-9]+]], %got_disp(gf1)
-; ALL: lw   $4, 0($[[R0]])
+; ALL: lwu  $4, 0($[[R0]])
 ; ALL: ld   $25, %call16(__extendsftf2)
 ; ALL: jalr $25
 
@@ -635,7 +635,7 @@
 ; CMP_CC_FMT-DAG: selnez $[[NE2:[0-9]+]], $7, $[[CC]]
 ; CMP_CC_FMT-DAG: or $4, $[[NE2]], $[[EQ2]]
 
-define fp128 @select_LD(i32 %a, i64, fp128 %b, fp128 %c) {
+define fp128 @select_LD(i32 signext %a, i64, fp128 %b, fp128 %c) {
 entry:
   %tobool = icmp ne i32 %a, 0
   %cond = select i1 %tobool, fp128 %b, fp128 %c
Index: llvm/trunk/test/CodeGen/Mips/mips64-sret.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/mips64-sret.ll
+++ llvm/trunk/test/CodeGen/Mips/mips64-sret.ll
@@ -11,7 +11,7 @@
   ret void
 }
 
-define void @bar(i32 %v, i32* noalias sret %agg.result) nounwind {
+define void @bar(i32 signext %v, i32* noalias sret %agg.result) nounwind {
 entry:
 ; CHECK-LABEL: bar:
 ; CHECK: sw $4, 0($5)
Index: llvm/trunk/test/CodeGen/Mips/octeon_popcnt.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/octeon_popcnt.ll
+++ llvm/trunk/test/CodeGen/Mips/octeon_popcnt.ll
@@ -21,7 +21,7 @@
 ; MIPS64-NOT: pop
 }
 
-define i32 @cnt32(i32 %x) nounwind readnone {
+define i32 @cnt32(i32 zeroext %x) nounwind readnone {
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
 ; OCTEON-LABEL: cnt32:
Index: llvm/trunk/test/CodeGen/Mips/select.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/select.ll
+++ llvm/trunk/test/CodeGen/Mips/select.ll
@@ -8,7 +8,7 @@
 @d2 = external global double
 @d3 = external global double
 
-define i32 @i32_icmp_ne_i32_val(i32 %s, i32 %f0, i32 %f1) nounwind readnone {
+define i32 @i32_icmp_ne_i32_val(i32 signext %s, i32 signext %f0, i32 signext %f1) nounwind readnone {
 entry:
 ; ALL-LABEL: i32_icmp_ne_i32_val:
 
@@ -37,7 +37,7 @@
   ret i32 %cond
 }
 
-define i64 @i32_icmp_ne_i64_val(i32 %s, i64 %f0, i64 %f1) nounwind readnone {
+define i64 @i32_icmp_ne_i64_val(i32 signext %s, i64 %f0, i64 %f1) nounwind readnone {
 entry:
 ; ALL-LABEL: i32_icmp_ne_i64_val:
 
@@ -128,7 +128,7 @@
   ret i64 %cond
 }
 
-define float @i32_icmp_ne_f32_val(i32 %s, float %f0, float %f1) nounwind readnone {
+define float @i32_icmp_ne_f32_val(i32 signext %s, float %f0, float %f1) nounwind readnone {
 entry:
 ; ALL-LABEL: i32_icmp_ne_f32_val:
 
@@ -161,7 +161,7 @@
   ret float %cond
 }
 
-define double @i32_icmp_ne_f64_val(i32 %s, double %f0, double %f1) nounwind readnone {
+define double @i32_icmp_ne_f64_val(i32 signext %s, double %f0, double %f1) nounwind readnone {
 entry:
 ; ALL-LABEL: i32_icmp_ne_f64_val:
 
@@ -496,7 +496,7 @@
   ret float %cond
 }
 
-define i32 @f32_fcmp_oeq_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_oeq_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone {
 entry:
 ; ALL-LABEL: f32_fcmp_oeq_i32_val:
 
@@ -541,7 +541,7 @@
   ret i32 %cond
 }
 
-define i32 @f32_fcmp_olt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_olt_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone {
 entry:
 ; ALL-LABEL: f32_fcmp_olt_i32_val:
 
@@ -585,7 +585,7 @@
   ret i32 %cond
 }
 
-define i32 @f32_fcmp_ogt_i32_val(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone {
+define i32 @f32_fcmp_ogt_i32_val(i32 signext %f0, i32 signext %f1, float %f2, float %f3) nounwind readnone {
 entry:
 ; ALL-LABEL: f32_fcmp_ogt_i32_val:
 
@@ -630,7 +630,7 @@
   ret i32 %cond
 }
 
-define i32 @f64_fcmp_oeq_i32_val(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_oeq_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly {
 entry:
 ; ALL-LABEL: f64_fcmp_oeq_i32_val:
 
@@ -707,7 +707,7 @@
   ret i32 %cond
 }
 
-define i32 @f64_fcmp_olt_i32_val(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_olt_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly {
 entry:
 ; ALL-LABEL: f64_fcmp_olt_i32_val:
 
@@ -784,7 +784,7 @@
   ret i32 %cond
 }
 
-define i32 @f64_fcmp_ogt_i32_val(i32 %f0, i32 %f1) nounwind readonly {
+define i32 @f64_fcmp_ogt_i32_val(i32 signext %f0, i32 signext %f1) nounwind readonly {
 entry:
 ; ALL-LABEL: f64_fcmp_ogt_i32_val:
 
Index: llvm/trunk/test/CodeGen/Mips/zeroreg.ll
===================================================================
--- llvm/trunk/test/CodeGen/Mips/zeroreg.ll
+++ llvm/trunk/test/CodeGen/Mips/zeroreg.ll
@@ -8,7 +8,7 @@
 
 @g1 = external global i32
 
-define i32 @sel_icmp_nez_i32_z0(i32 %s) nounwind readonly {
+define i32 @sel_icmp_nez_i32_z0(i32 signext %s) nounwind readonly {
 entry:
 ; ALL-LABEL: sel_icmp_nez_i32_z0:
 
@@ -30,7 +30,7 @@
   ret i32 %cond
 }
 
-define i32 @sel_icmp_nez_i32_z1(i32 %s) nounwind readonly {
+define i32 @sel_icmp_nez_i32_z1(i32 signext %s) nounwind readonly {
 entry:
 ; ALL-LABEL: sel_icmp_nez_i32_z1: