Index: include/llvm/CodeGen/TargetLowering.h
===================================================================
--- include/llvm/CodeGen/TargetLowering.h
+++ include/llvm/CodeGen/TargetLowering.h
@@ -3538,6 +3538,15 @@
     return false;
   }
 
+  /// For some targets, an LLVM type must be broken down into multiple
+  /// smaller types. Usually the halves are ordered according to the endianness
+  /// but for some platform that would break. So this method will default to
+  /// matching the endianness but can be overridden.
+  virtual bool
+  shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const {
+    return DL.isLittleEndian();
+  }
+
   /// Returns a 0 terminated array of registers that can be safely used as
   /// scratch registers.
   virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5846,7 +5846,7 @@
     // being a legal type for the architecture and thus has to be split to
     // two arguments.
     SDValue Ret;
-    if (DAG.getDataLayout().isLittleEndian()) {
+    if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
       // Halves of WideVT are packed into registers in different order
       // depending on platform endianness. This is usually handled by
       // the C calling convention, but we can't defer to it in
Index: lib/Target/AVR/AVRISelLowering.h
===================================================================
--- lib/Target/AVR/AVRISelLowering.h
+++ lib/Target/AVR/AVRISelLowering.h
@@ -128,6 +128,11 @@
   unsigned getRegisterByName(const char* RegName, EVT VT,
                              SelectionDAG &DAG) const override;
 
+  bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL)
+    const override {
+    return false;
+  }
+
 private:
   SDValue getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AVRcc,
                     SelectionDAG &DAG, SDLoc dl) const;
Index: test/CodeGen/AVR/umul.with.overflow.i16-bug.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AVR/umul.with.overflow.i16-bug.ll
@@ -0,0 +1,49 @@
+; RUN: llc -O1 < %s -march=avr | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9"
+
+%Vs6UInt16 = type <{ i16 }>
+%Sb = type <{ i1 }>
+
+define hidden void @setServoAngle(i16) {
+  ; CHECK-LABEL: entry
+entry:
+  %adjustedAngle = alloca %Vs6UInt16, align 2
+  %1 = bitcast %Vs6UInt16* %adjustedAngle to i8*
+  %adjustedAngle._value = getelementptr inbounds %Vs6UInt16, %Vs6UInt16* %adjustedAngle, i32 0, i32 0
+  store i16 %0, i16* %adjustedAngle._value, align 2
+
+;print(unsignedInt: adjustedAngle &* UInt16(11))
+; breaks here
+  %adjustedAngle._value2 = getelementptr inbounds %Vs6UInt16, %Vs6UInt16* %adjustedAngle, i32 0, i32 0
+  %2 = load i16, i16* %adjustedAngle._value2, align 2
+
+; CHECK: mov r22, r24
+; CHECK: mov r23, r25
+
+; CHECK-DAG: ldi r20, 0
+; CHECK-DAG: ldi r21, 0
+; CHECK-DAG: ldi r18, 11
+; CHECK-DAG: ldi r19, 0
+
+; CHECK: mov r24, r20
+; CHECK: mov r25, r21
+; CHECK: call  __mulsi3
+  %3 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %2, i16 11)
+  %4 = extractvalue { i16, i1 } %3, 0
+  %5 = extractvalue { i16, i1 } %3, 1
+
+  ; above code looks fine, how is it lowered?
+  %6 = call i1 @printDefaultParam()
+  call void @print(i16 %4, i1 %6)
+
+; CHECK: ret
+  ret void
+}
+
+declare void @print(i16, i1)
+declare i1 @printDefaultParam()
+
+; Function Attrs: nounwind readnone speculatable
+declare { i16, i1 } @llvm.umul.with.overflow.i16(i16, i16)