diff --git a/clang/include/clang/CodeGen/CGFunctionInfo.h b/clang/include/clang/CodeGen/CGFunctionInfo.h
--- a/clang/include/clang/CodeGen/CGFunctionInfo.h
+++ b/clang/include/clang/CodeGen/CGFunctionInfo.h
@@ -93,6 +93,7 @@
   bool SRetAfterThis : 1;   // isIndirect()
   bool InReg : 1;           // isDirect() || isExtend() || isIndirect()
   bool CanBeFlattened: 1;   // isDirect()
+  bool EndianAlign: 1;      // isDirect()
   bool SignExt : 1;         // isExtend()
 
   bool canHavePaddingType() const {
@@ -110,19 +111,21 @@
 
 public:
   ABIArgInfo(Kind K = Direct)
-      : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0),
-        TheKind(K), PaddingInReg(false), InAllocaSRet(false),
-        IndirectByVal(false), IndirectRealign(false), SRetAfterThis(false),
-        InReg(false), CanBeFlattened(false), SignExt(false) {}
+      : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K),
+        PaddingInReg(false), InAllocaSRet(false), IndirectByVal(false),
+        IndirectRealign(false), SRetAfterThis(false), InReg(false),
+        CanBeFlattened(false), EndianAlign(false), SignExt(false) {}
 
   static ABIArgInfo getDirect(llvm::Type *T = nullptr, unsigned Offset = 0,
                               llvm::Type *Padding = nullptr,
-                              bool CanBeFlattened = true) {
+                              bool CanBeFlattened = true,
+                              bool EndianAlign = false) {
     auto AI = ABIArgInfo(Direct);
     AI.setCoerceToType(T);
     AI.setPaddingType(Padding);
     AI.setDirectOffset(Offset);
     AI.setCanBeFlattened(CanBeFlattened);
+    AI.setEndianAlign(EndianAlign);
     return AI;
   }
   static ABIArgInfo getDirectInReg(llvm::Type *T = nullptr) {
@@ -402,6 +405,16 @@
     CanBeFlattened = Flatten;
   }
 
+  bool getEndianAlign() const {
+    assert(isDirect() && "Invalid kind!");
+    return EndianAlign;
+  }
+
+  void setEndianAlign(bool Align) {
+    assert(isDirect() && "Invalid kind!");
+    EndianAlign = Align;
+  }
+
   void dump() const;
 };
 
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1221,6 +1221,7 @@
 /// destination type; in this situation the values of bits which not
 /// present in the src are undefined.
 static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
+                                      bool EndianAlign,
                                       CodeGenFunction &CGF) {
   llvm::Type *SrcTy = Src.getElementType();
 
@@ -1261,6 +1262,11 @@
   // Otherwise do coercion through memory. This is stupid, but simple.
   Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment());
   Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
+  if (EndianAlign && CGF.CGM.getDataLayout().isBigEndian()) {
+    // Offset address to match LSBs if endian alignment is required
+    auto Offset = CharUnits::fromQuantity(DstSize - SrcSize);
+    Casted = CGF.Builder.CreateConstInBoundsByteGEP(Casted, Offset);
+  }
   Address SrcCasted = CGF.Builder.CreateElementBitCast(Src,CGF.Int8Ty);
   CGF.Builder.CreateMemCpy(Casted, SrcCasted,
       llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
@@ -1296,6 +1302,7 @@
 static void CreateCoercedStore(llvm::Value *Src,
                                Address Dst,
                                bool DstIsVolatile,
+                               bool EndianAlign,
                                CodeGenFunction &CGF) {
   llvm::Type *SrcTy = Src->getType();
   llvm::Type *DstTy = Dst.getType()->getElementType();
@@ -1348,6 +1355,11 @@
     Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
     CGF.Builder.CreateStore(Src, Tmp);
     Address Casted = CGF.Builder.CreateElementBitCast(Tmp,CGF.Int8Ty);
+    if (EndianAlign && CGF.CGM.getDataLayout().isBigEndian()) {
+      // Offset address to match LSBs if endian alignment is required
+      auto Offset = CharUnits::fromQuantity(SrcSize - DstSize);
+      Casted = CGF.Builder.CreateConstInBoundsByteGEP(Casted, Offset);
+    }
     Address DstCasted = CGF.Builder.CreateElementBitCast(Dst,CGF.Int8Ty);
     CGF.Builder.CreateMemCpy(DstCasted, Casted,
         llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
@@ -2557,7 +2569,8 @@
         assert(NumIRArgs == 1);
         auto AI = FnArgs[FirstIRArg];
         AI->setName(Arg->getName() + ".coerce");
-        CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this);
+        CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false,
+                           ArgI.getEndianAlign(), *this);
       }
 
       // Match to what EmitParmDecl is expecting for this type.
@@ -2947,7 +2960,8 @@
       // If the value is offset in memory, apply the offset now.
       Address V = emitAddressAtOffset(*this, ReturnValue, RetAI);
 
-      RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), *this);
+      RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), RetAI.getEndianAlign(),
+                             *this);
     }
 
     // In ARC, end functions that return a retainable type with a call
@@ -4307,8 +4321,8 @@
       } else {
         // In the simple case, just pass the coerced loaded value.
         assert(NumIRArgs == 1);
-        IRCallArgs[FirstIRArg] =
-          CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this);
+        IRCallArgs[FirstIRArg] = CreateCoercedLoad(
+            Src, ArgInfo.getCoerceToType(), ArgInfo.getEndianAlign(), *this);
       }
 
       break;
@@ -4763,7 +4777,8 @@
 
       // If the value is offset in memory, apply the offset now.
       Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
-      CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
+      CreateCoercedStore(CI, StorePtr, DestIsVolatile, RetAI.getEndianAlign(),
+                         *this);
 
       return convertTempToRValue(DestPtr, RetTy, SourceLocation());
     }
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -5934,7 +5934,8 @@
     llvm::Type *ResType = IsAAPCS_VFP ?
       llvm::Type::getFloatTy(getVMContext()) :
       llvm::Type::getInt32Ty(getVMContext());
-    return ABIArgInfo::getDirect(ResType);
+    return ABIArgInfo::getDirect(ResType, /*Offset=*/0, /*Padding=*/nullptr,
+                                 /*CanBeFlattened=*/true, /*EndianAlign=*/true);
   }
 
   if (!isAggregateTypeForABI(Ty)) {
@@ -6141,7 +6142,8 @@
     llvm::Type *ResType = IsAAPCS_VFP ?
       llvm::Type::getFloatTy(getVMContext()) :
       llvm::Type::getInt32Ty(getVMContext());
-    return ABIArgInfo::getDirect(ResType);
+    return ABIArgInfo::getDirect(ResType, /*Offset=*/0, /*Padding=*/nullptr,
+        /*CanBeFlattened=*/true, /*EndianAlign=*/true);
   }
 
   if (!isAggregateTypeForABI(RetTy)) {
diff --git a/clang/test/CodeGen/arm-fp16-arguments.c b/clang/test/CodeGen/arm-fp16-arguments.c
--- a/clang/test/CodeGen/arm-fp16-arguments.c
+++ b/clang/test/CodeGen/arm-fp16-arguments.c
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
 // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
 // RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=NATIVE
+// RUN: %clang_cc1 -triple armebv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
+// RUN: %clang_cc1 -triple armebv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
+// RUN: %clang_cc1 -triple armebv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fnative-half-arguments-and-returns -emit-llvm -o - -O2 %s | FileCheck %s --check-prefix=NATIVE
 
 __fp16 g;