This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Implement allowsUnalignedMemoryAccesses()
ClosedPublic

Authored by zzheng on Apr 8 2014, 10:46 AM.

Download Raw Diff

Details

Reviewers

t.p.northover
zzheng
Jiangning
apazos

Summary

This patch enables unaligned memory accesses of vector types on AArch64 back end. This should boost vectorized code performance. Currently we mimic ARMv7's behavior on the same API.

Diff Detail

Event Timeline

This looks fine to me.

Tim.

This looks fine to me.

Tim.
http://reviews.llvm.org/D3319

See below for rebased patch and updated test cases.

Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, hosted by
The Linux Foundation

From e1c992f12bfe168a38d5885c91984a9003f07eb7 Mon Sep 17 00:00:00 2001
From: Zhaoshi Zheng <zhaoshiz@codeaurora.org>
Date: Wed, 19 Mar 2014 18:32:22 -0700
Subject: [PATCH 1/3] [AArch64] Implement allowsUnalignedMemoryAccesses()

lib/Target/AArch64/AArch64ISelLowering.cpp | 44 ++++++
lib/Target/AArch64/AArch64ISelLowering.h | 6 +
lib/Target/AArch64/AArch64Subtarget.cpp | 34 +++++
lib/Target/AArch64/AArch64Subtarget.h | 7 +
test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll | 172
+++++++++++++++++++++++
5 files changed, 263 insertions(+)
create mode 100644 test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll

diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp
b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 236d5ec..1e79894 100644

a/lib/Target/AArch64/AArch64ISelLowering.cpp

+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4412,6 +4412,50 @@
AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {

return false;

}
+
+bool AArch64TargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ unsigned
AddrSpace,
+ bool *Fast)
const {
+ const AArch64Subtarget *Subtarget = getSubtarget();
+ The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
+ bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return false;
+ Scalar types
+ case MVT::i8: case MVT::i16:
+ case MVT::i32: case MVT::i64:
+ case MVT::f32: case MVT::f64: {
+ Unaligned access can use (for example) LRDB, LRDH, LDRW
+ if (AllowsUnaligned) {
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
+ return false;
+ }
+ 64-bit vector types
+ case MVT::v8i8: case MVT::v4i16:
+ case MVT::v2i32: case MVT::v1i64:
+ case MVT::v2f32: case MVT::v1f64:
+ 128-bit vector types
+ case MVT::v16i8: case MVT::v8i16:
+ case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64: {
+ For any little-endian targets with neon, we can support unaligned
+ load/store of V registers using ld1/st1.
+ A big-endian target may also explicitly support unaligned accesses
+ if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
+ return false;
+ }
+ }
+}
+
// Check whether a shuffle_vector could be presented as concat_vector.
bool AArch64TargetLowering::isConcatVector(SDValue Op, SelectionDAG &DAG,

SDValue V0, SDValue V1,

diff --git a/lib/Target/AArch64/AArch64ISelLowering.h
b/lib/Target/AArch64/AArch64ISelLowering.h
index f83c1ab..154c1d7 100644

a/lib/Target/AArch64/AArch64ISelLowering.h

+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -355,6 +355,12 @@ public:

/// expanded to fmul + fadd.
virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;

+ / allowsUnalignedMemoryAccesses - Returns true if the target allows
+ / unaligned memory accesses of the specified type. Returns whether it
+ /// is "fast" by reference in the second argument.
+ virtual bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
+ bool *Fast) const;
+

ConstraintType getConstraintType(const std::string &Constraint) const;

ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info,

diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp
b/lib/Target/AArch64/AArch64Subtarget.cpp
index 9140bbd..53cdf30 100644

a/lib/Target/AArch64/AArch64Subtarget.cpp

+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -25,6 +25,25 @@

using namespace llvm;

+enum AlignMode {
+ DefaultAlign,
+ StrictAlign,
+ NoStrictAlign
+};
+
+static cl::opt<AlignMode>
+Align(cl::desc("Load/store alignment support"),
+ cl::Hidden, cl::init(DefaultAlign),
+ cl::values(
+ clEnumValN(DefaultAlign, "aarch64-default-align",
+ "Generate unaligned accesses only on hardware/OS "
+ "combinations that are known to support them"),
+ clEnumValN(StrictAlign, "aarch64-strict-align",
+ "Disallow all unaligned memory accesses"),
+ clEnumValN(NoStrictAlign, "aarch64-no-strict-align",
+ "Allow unaligned memory accesses"),
+ clEnumValEnd));
+
// Pin the vtable to this file.
void AArch64Subtarget::anchor() {}

@@ -39,6 +58,8 @@ AArch64Subtarget::AArch64Subtarget(StringRef TT,
StringRef CPU, StringRef FS,

void AArch64Subtarget::initializeSubtargetFeatures(StringRef CPU,

StringRef FS) {

+ AllowsUnalignedMem = false;
+

if (CPU.empty())
  CPUString = "generic";

@@ -52,6 +73,19 @@ void
AArch64Subtarget::initializeSubtargetFeatures(StringRef CPU,

}

ParseSubtargetFeatures(CPU, FullFS);

+
+ switch (Align) {
+ case DefaultAlign:
+ // Linux targets support unaligned accesses on AARCH64
+ AllowsUnalignedMem = isTargetLinux();
+ break;
+ case StrictAlign:
+ AllowsUnalignedMem = false;
+ break;
+ case NoStrictAlign:
+ AllowsUnalignedMem = true;
+ break;
+ }
}

bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV,
diff --git a/lib/Target/AArch64/AArch64Subtarget.h
b/lib/Target/AArch64/AArch64Subtarget.h
index 68c6c4b..45e5a5e 100644

a/lib/Target/AArch64/AArch64Subtarget.h

+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -38,6 +38,11 @@ protected:

bool HasNEON;
bool HasCrypto;

+ / AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+ / accesses for some types. For details, see
+ /// AArch64TargetLowering::allowsUnalignedMemoryAccesses().
+ bool AllowsUnalignedMem;
+

/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;

@@ -74,6 +79,8 @@ public:

bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }

+ bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+

bool isLittle() const { return IsLittleEndian; }

const std::string & getCPUString() const { return CPUString; }

diff --git a/test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll
b/test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll
new file mode 100644
index 0000000..2e3f7bf

/dev/null

+++ b/test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll
@@ -0,0 +1,172 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -o - |
FileCheck %s
+; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -o - |
FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -aarch64-no-strict-align
-mattr=+neon -o - | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu
-aarch64-no-strict-align -mattr=+neon -o - | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -aarch64-strict-align
-mattr=+neon -o - | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -aarch64-strict-align
-mattr=+neon -o - | FileCheck %s --check-prefix=BE-STRICT-ALIGN
+
+;; Check element-aligned 128-bit vector load/store - integer
+define <16 x i8> @qwordint (<16 x i8>* %head.v16i8, <8 x i16>*
%head.v8i16, <4 x i32>* %head.v4i32, <2 x i64>* %head.v2i64,
+ <16 x i8>* %tail.v16i8, <8 x i16>*
%tail.v8i16, <4 x i32>* %tail.v4i32, <2 x i64>* %tail.v2i64) {
+; CHECK-LABEL: qwordint
+; CHECK: ld1 {v0.16b}, [x0]
+; CHECK: ld1 {v1.8h}, [x1]
+; CHECK: ld1 {v2.4s}, [x2]
+; CHECK: ld1 {v3.2d}, [x3]
+; CHECK: st1 {v0.16b}, [x4]
+; CHECK: st1 {v1.8h}, [x5]
+; CHECK: st1 {v2.4s}, [x6]
+; CHECK: st1 {v3.2d}, [x7]
+; BE-STRICT-ALIGN-LABEL: qwordint
+; BE-STRICT-ALIGN: ldrb
+; BE-STRICT-ALIGN: ldrh
+; BE-STRICT-ALIGN: ldr
+; BE-STRICT-ALIGN: ldr
+; BE-STRICT-ALIGN: strb
+; BE-STRICT-ALIGN: strh
+; BE-STRICT-ALIGN: str
+; BE-STRICT-ALIGN: str
+entry:
+ %val.v16i8 = load <16 x i8>* %head.v16i8, align 1
+ %val.v8i16 = load <8 x i16>* %head.v8i16, align 2
+ %val.v4i32 = load <4 x i32>* %head.v4i32, align 4
+ %val.v2i64 = load <2 x i64>* %head.v2i64, align 8
+ store <16 x i8> %val.v16i8, <16 x i8>* %tail.v16i8, align 1
+ store <8 x i16> %val.v8i16, <8 x i16>* %tail.v8i16, align 2
+ store <4 x i32> %val.v4i32, <4 x i32>* %tail.v4i32, align 4
+ store <2 x i64> %val.v2i64, <2 x i64>* %tail.v2i64, align 8
+ ret <16 x i8> %val.v16i8
+}
+
+;; Check element-aligned 128-bit vector load/store - floating point
+define <4 x float> @qwordfloat (<4 x float>* %head.v4f32, <2 x double>*
%head.v2f64,
+ <4 x float>* %tail.v4f32, <2 x double>*
%tail.v2f64) {
+; CHECK-LABEL: qwordfloat
+; CHECK: ld1 {v0.4s}, [x0]
+; CHECK: ld1 {v1.2d}, [x1]
+; CHECK: st1 {v0.4s}, [x2]
+; CHECK: st1 {v1.2d}, [x3]
+; BE-STRICT-ALIGN-LABEL: qwordfloat
+; BE-STRICT-ALIGN: ldr
+; BE-STRICT-ALIGN: ldr
+; BE-STRICT-ALIGN: str
+; BE-STRICT-ALIGN: str
+entry:
+ %val.v4f32 = load <4 x float>* %head.v4f32, align 4
+ %val.v2f64 = load <2 x double>* %head.v2f64, align 8
+ store <4 x float> %val.v4f32, <4 x float>* %tail.v4f32, align 4
+ store <2 x double> %val.v2f64, <2 x double>* %tail.v2f64, align 8
+ ret <4 x float> %val.v4f32
+}
+
+;; Check element-aligned 64-bit vector load/store - integer
+define <8 x i8> @dwordint (<8 x i8>* %head.v8i8, <4 x i16>*
%head.v4i16, <2 x i32>* %head.v2i32, <1 x i64>* %head.v1i64,
+ <8 x i8>* %tail.v8i8, <4 x i16>*
%tail.v4i16, <2 x i32>* %tail.v2i32, <1 x i64>* %tail.v1i64) {
+; CHECK-LABEL: dwordint
+; CHECK: ld1 {v0.8b}, [x0]
+; CHECK: ld1 {v1.4h}, [x1]
+; CHECK: ld1 {v2.2s}, [x2]
+; CHECK: ld1 {v3.1d}, [x3]
+; CHECK: st1 {v0.8b}, [x4]
+; CHECK: st1 {v1.4h}, [x5]
+; CHECK: st1 {v2.2s}, [x6]
+; CHECK: st1 {v3.1d}, [x7]
+; BE-STRICT-ALIGN-LABEL: dwordint
+; BE-STRICT-ALIGN: ldrb
+; BE-STRICT-ALIGN: ldrh
+; BE-STRICT-ALIGN: ldr
+; BE-STRICT-ALIGN: ld1 {v1.1d}, [x3]
+; BE-STRICT-ALIGN: strb
+; BE-STRICT-ALIGN: strh
+; BE-STRICT-ALIGN: str
+; BE-STRICT-ALIGN: st1 {v1.1d}, [x7]
+entry:
+ %val.v8i8 = load <8 x i8>* %head.v8i8, align 1
+ %val.v4i16 = load <4 x i16>* %head.v4i16, align 2
+ %val.v2i32 = load <2 x i32>* %head.v2i32, align 4
+ %val.v1i64 = load <1 x i64>* %head.v1i64, align 8
+ store <8 x i8> %val.v8i8, <8 x i8>* %tail.v8i8 , align 1
+ store <4 x i16> %val.v4i16, <4 x i16>* %tail.v4i16, align 2
+ store <2 x i32> %val.v2i32, <2 x i32>* %tail.v2i32, align 4
+ store <1 x i64> %val.v1i64, <1 x i64>* %tail.v1i64, align 8
+ ret <8 x i8> %val.v8i8
+}
+
+;; Check element-aligned 64-bit vector load/store - floating point
+define <2 x float> @dwordfloat (<2 x float>* %head.v2f32, <1 x double>*
%head.v1f64,
+ <2 x float>* %tail.v2f32, <1 x double>*
%tail.v1f64) {
+; CHECK-LABEL: dwordfloat
+; CHECK: ld1 {v0.2s}, [x0]
+; CHECK: ld1 {v1.1d}, [x1]
+; CHECK: st1 {v0.2s}, [x2]
+; CHECK: st1 {v1.1d}, [x3]
+; BE-STRICT-ALIGN-LABEL: dwordfloat
+; BE-STRICT-ALIGN: ldr
+; BE-STRICT-ALIGN: ld1 {v1.1d}, [x1]
+; BE-STRICT-ALIGN: str
+; BE-STRICT-ALIGN: st1 {v1.1d}, [x3]
+entry:
+ %val.v2f32 = load <2 x float>* %head.v2f32, align 4
+ %val.v1f64 = load <1 x double>* %head.v1f64, align 8
+ store <2 x float> %val.v2f32, <2 x float>* %tail.v2f32, align 4
+ store <1 x double> %val.v1f64, <1 x double>* %tail.v1f64, align 8
+ ret <2 x float> %val.v2f32
+}
+
+;; Check load/store of 128-bit vectors with less-than 16-byte alignment
+define <2 x i64> @align2vi64 (<2 x i64>* %head.byte, <2 x i64>*
%head.half, <2 x i64>* %head.word, <2 x i64>* %head.dword,
+ <2 x i64>* %tail.byte, <2 x i64>*
%tail.half, <2 x i64>* %tail.word, <2 x i64>* %tail.dword) {
+; CHECK-LABEL: align2vi64
+; CHECK: ld1 {v0.2d}, [x0]
+; CHECK: ld1 {v1.2d}, [x1]
+; CHECK: ld1 {v2.2d}, [x2]
+; CHECK: ld1 {v3.2d}, [x3]
+; CHECK: st1 {v0.2d}, [x4]
+; CHECK: st1 {v1.2d}, [x5]
+; CHECK: st1 {v2.2d}, [x6]
+; CHECK: st1 {v3.2d}, [x7]
+; BE-STRICT-ALIGN-LABEL: align2vi64
+; BE-STRICT-ALIGN: ldrb
+; BE-STRICT-ALIGN: ldrh
+; BE-STRICT-ALIGN: ldr
+; BE-STRICT-ALIGN: strb
+; BE-STRICT-ALIGN: strh
+; BE-STRICT-ALIGN: str
+entry:
+ %val.byte = load <2 x i64>* %head.byte, align 1
+ %val.half = load <2 x i64>* %head.half, align 2
+ %val.word = load <2 x i64>* %head.word, align 4
+ %val.dword = load <2 x i64>* %head.dword, align 8
+ store <2 x i64> %val.byte, <2 x i64>* %tail.byte, align 1
+ store <2 x i64> %val.half, <2 x i64>* %tail.half, align 2
+ store <2 x i64> %val.word, <2 x i64>* %tail.word, align 4
+ store <2 x i64> %val.dword, <2 x i64>* %tail.dword, align 8
+ ret <2 x i64> %val.byte
+}
+
+;; Check load/store of 64-bit vectors with less-than 8-byte alignment
+define <2 x float> @align2vf32 (<2 x float>* %head.byte, <2 x float>*
%head.half, <2 x float>* %head.word, <2 x float>* %head.dword,
+ <2 x float>* %tail.byte, <2 x float>*
%tail.half, <2 x float>* %tail.word, <2 x float>* %tail.dword) {
+; CHECK-LABEL: align2vf32
+; CHECK: ld1 {v0.2s}, [x0]
+; CHECK: ld1 {v1.2s}, [x1]
+; CHECK: ld1 {v2.2s}, [x2]
+; CHECK: st1 {v0.2s}, [x4]
+; CHECK: st1 {v1.2s}, [x5]
+; CHECK: st1 {v2.2s}, [x6]
+; BE-STRICT-ALIGN-LABEL: align2vf32
+; BE-STRICT-ALIGN: ldrb
+; BE-STRICT-ALIGN: ldrh
+; BE-STRICT-ALIGN: ldr
+; BE-STRICT-ALIGN: strb
+; BE-STRICT-ALIGN: strh
+; BE-STRICT-ALIGN: str
+entry:
+ %val.byte = load <2 x float>* %head.byte, align 1
+ %val.half = load <2 x float>* %head.half, align 2
+ %val.word = load <2 x float>* %head.word, align 4
+ store <2 x float> %val.byte, <2 x float>* %tail.byte, align 1
+ store <2 x float> %val.half, <2 x float>* %tail.half, align 2
+ store <2 x float> %val.word, <2 x float>* %tail.word, align 4
+ ret <2 x float> %val.byte
+}

{F54319, layout=link}

This revision includes additional tests exercise new flag -aarch64-no-strict-align and -aarch64-strict-align on both BE and LE.

BTW, I don't have commit access to llvm trunk. If this revision looks good to you, please merge it. Thank you.

Committed as r206557.

I think the test case relates to big-endian and needs to be ported to ARM64
later on.

Thanks,
-Jiangning

2014-04-18 8:43 GMT+08:00 Z. Zheng <zhaoshiz@codeaurora.org>:

This revision includes additional tests exercise new flag
-aarch64-no-strict-align and -aarch64-strict-align on both BE and LE.
BTW, I don't have commit access to llvm trunk. If this revision looks
good to you, please merge it. Thank you.

Hi Jiangning, t.p.northover,

http://reviews.llvm.org/D3319

CHANGE SINCE LAST DIFF
http://reviews.llvm.org/D3319?vs=8427&id=8616#toc
Files:
lib/Target/AArch64/AArch64ISelLowering.cpp
lib/Target/AArch64/AArch64ISelLowering.h
lib/Target/AArch64/AArch64Subtarget.cpp
lib/Target/AArch64/AArch64Subtarget.h
test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll

zzheng accepted this revision.Oct 31 2014, 10:44 AM

zzheng added a reviewer: zzheng.

This revision is now accepted and ready to land.Oct 31 2014, 10:44 AM

zzheng closed this revision.Oct 31 2014, 10:45 AM

Revision Contents

Path

Size

lib/

Target/

AArch64/

AArch64ISelLowering.h

6 lines

AArch64ISelLowering.cpp

44 lines

AArch64Subtarget.h

7 lines

AArch64Subtarget.cpp

34 lines

test/

CodeGen/

AArch64/

unaligned-vector-ld1-st1.ll

172 lines

Diff 8616

lib/Target/AArch64/AArch64ISelLowering.h

Context not available.
	/// expanded to fmul + fadd.	/// expanded to fmul + fadd.
	virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;	virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;

		/// allowsUnalignedMemoryAccesses - Returns true if the target allows
		/// unaligned memory accesses of the specified type. Returns whether it
		/// is "fast" by reference in the second argument.
		virtual bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
		bool *Fast) const;

	ConstraintType getConstraintType(const std::string &Constraint) const;	ConstraintType getConstraintType(const std::string &Constraint) const;

	ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info,	ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info,
Context not available.

lib/Target/AArch64/AArch64ISelLowering.cpp

Context not available.

	return false;	return false;
	}	}

		bool AArch64TargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
		unsigned AddrSpace,
		bool *Fast) const {
		const AArch64Subtarget *Subtarget = getSubtarget();
		// The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
		bool AllowsUnaligned = Subtarget->allowsUnalignedMem();

		switch (VT.getSimpleVT().SimpleTy) {
		default:
		return false;
		// Scalar types
		case MVT::i8: case MVT::i16:
		case MVT::i32: case MVT::i64:
		case MVT::f32: case MVT::f64: {
		// Unaligned access can use (for example) LRDB, LRDH, LDRW
		if (AllowsUnaligned) {
		if (Fast)
		*Fast = true;
		return true;
		}
		return false;
		}
		// 64-bit vector types
		case MVT::v8i8: case MVT::v4i16:
		case MVT::v2i32: case MVT::v1i64:
		case MVT::v2f32: case MVT::v1f64:
		// 128-bit vector types
		case MVT::v16i8: case MVT::v8i16:
		case MVT::v4i32: case MVT::v2i64:
		case MVT::v4f32: case MVT::v2f64: {
		// For any little-endian targets with neon, we can support unaligned
		// load/store of V registers using ld1/st1.
		// A big-endian target may also explicitly support unaligned accesses
		if (Subtarget->hasNEON() && (AllowsUnaligned \|\| isLittleEndian())) {
		if (Fast)
		*Fast = true;
		return true;
		}
		return false;
		}
		}
		}

	// Check whether a shuffle_vector could be presented as concat_vector.	// Check whether a shuffle_vector could be presented as concat_vector.
	bool AArch64TargetLowering::isConcatVector(SDValue Op, SelectionDAG &DAG,	bool AArch64TargetLowering::isConcatVector(SDValue Op, SelectionDAG &DAG,
	SDValue V0, SDValue V1,	SDValue V0, SDValue V1,
Context not available.

lib/Target/AArch64/AArch64Subtarget.h

Context not available.
	bool HasNEON;	bool HasNEON;
	bool HasCrypto;	bool HasCrypto;

		/// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
		/// accesses for some types. For details, see
		/// AArch64TargetLowering::allowsUnalignedMemoryAccesses().
		bool AllowsUnalignedMem;

	/// TargetTriple - What processor and OS we're targeting.	/// TargetTriple - What processor and OS we're targeting.
	Triple TargetTriple;	Triple TargetTriple;

Context not available.
	bool hasNEON() const { return HasNEON; }	bool hasNEON() const { return HasNEON; }
	bool hasCrypto() const { return HasCrypto; }	bool hasCrypto() const { return HasCrypto; }

		bool allowsUnalignedMem() const { return AllowsUnalignedMem; }

	bool isLittle() const { return IsLittleEndian; }	bool isLittle() const { return IsLittleEndian; }

	const std::string & getCPUString() const { return CPUString; }	const std::string & getCPUString() const { return CPUString; }
Context not available.

lib/Target/AArch64/AArch64Subtarget.cpp

Context not available.

	using namespace llvm;	using namespace llvm;

		enum AlignMode {
		DefaultAlign,
		StrictAlign,
		NoStrictAlign
		};

		static cl::opt<AlignMode>
		Align(cl::desc("Load/store alignment support"),
		cl::Hidden, cl::init(DefaultAlign),
		cl::values(
		clEnumValN(DefaultAlign, "aarch64-default-align",
		"Generate unaligned accesses only on hardware/OS "
		"combinations that are known to support them"),
		clEnumValN(StrictAlign, "aarch64-strict-align",
		"Disallow all unaligned memory accesses"),
		clEnumValN(NoStrictAlign, "aarch64-no-strict-align",
		"Allow unaligned memory accesses"),
		clEnumValEnd));

	// Pin the vtable to this file.	// Pin the vtable to this file.
	void AArch64Subtarget::anchor() {}	void AArch64Subtarget::anchor() {}

Context not available.

	void AArch64Subtarget::initializeSubtargetFeatures(StringRef CPU,	void AArch64Subtarget::initializeSubtargetFeatures(StringRef CPU,
	StringRef FS) {	StringRef FS) {
		AllowsUnalignedMem = false;

	if (CPU.empty())	if (CPU.empty())
	CPUString = "generic";	CPUString = "generic";

Context not available.
	}	}

	ParseSubtargetFeatures(CPU, FullFS);	ParseSubtargetFeatures(CPU, FullFS);

		switch (Align) {
		case DefaultAlign:
		// Linux targets support unaligned accesses on AARCH64
		AllowsUnalignedMem = isTargetLinux();
		break;
		case StrictAlign:
		AllowsUnalignedMem = false;
		break;
		case NoStrictAlign:
		AllowsUnalignedMem = true;
		break;
		}
	}	}

	bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV,	bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV,
Context not available.

test/CodeGen/AArch64/unaligned-vector-ld1-st1.ll

This file was added.

				; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -o - \| FileCheck %s
				; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -o - \| FileCheck %s
				; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -aarch64-no-strict-align -mattr=+neon -o - \| FileCheck %s
				; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -aarch64-no-strict-align -mattr=+neon -o - \| FileCheck %s
				; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -aarch64-strict-align -mattr=+neon -o - \| FileCheck %s
				; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -aarch64-strict-align -mattr=+neon -o - \| FileCheck %s --check-prefix=BE-STRICT-ALIGN

				;; Check element-aligned 128-bit vector load/store - integer
				define <16 x i8> @qwordint (<16 x i8>* %head.v16i8, <8 x i16>* %head.v8i16, <4 x i32>* %head.v4i32, <2 x i64>* %head.v2i64,
				<16 x i8>* %tail.v16i8, <8 x i16>* %tail.v8i16, <4 x i32>* %tail.v4i32, <2 x i64>* %tail.v2i64) {
				; CHECK-LABEL: qwordint
				; CHECK: ld1 {v0.16b}, [x0]
				; CHECK: ld1 {v1.8h}, [x1]
				; CHECK: ld1 {v2.4s}, [x2]
				; CHECK: ld1 {v3.2d}, [x3]
				; CHECK: st1 {v0.16b}, [x4]
				; CHECK: st1 {v1.8h}, [x5]
				; CHECK: st1 {v2.4s}, [x6]
				; CHECK: st1 {v3.2d}, [x7]
				; BE-STRICT-ALIGN-LABEL: qwordint
				; BE-STRICT-ALIGN: ldrb
				; BE-STRICT-ALIGN: ldrh
				; BE-STRICT-ALIGN: ldr
				; BE-STRICT-ALIGN: ldr
				; BE-STRICT-ALIGN: strb
				; BE-STRICT-ALIGN: strh
				; BE-STRICT-ALIGN: str
				; BE-STRICT-ALIGN: str
				entry:
				%val.v16i8 = load <16 x i8>* %head.v16i8, align 1
				%val.v8i16 = load <8 x i16>* %head.v8i16, align 2
				%val.v4i32 = load <4 x i32>* %head.v4i32, align 4
				%val.v2i64 = load <2 x i64>* %head.v2i64, align 8
				store <16 x i8> %val.v16i8, <16 x i8>* %tail.v16i8, align 1
				store <8 x i16> %val.v8i16, <8 x i16>* %tail.v8i16, align 2
				store <4 x i32> %val.v4i32, <4 x i32>* %tail.v4i32, align 4
				store <2 x i64> %val.v2i64, <2 x i64>* %tail.v2i64, align 8
				ret <16 x i8> %val.v16i8
				}

				;; Check element-aligned 128-bit vector load/store - floating point
				define <4 x float> @qwordfloat (<4 x float>* %head.v4f32, <2 x double>* %head.v2f64,
				<4 x float>* %tail.v4f32, <2 x double>* %tail.v2f64) {
				; CHECK-LABEL: qwordfloat
				; CHECK: ld1 {v0.4s}, [x0]
				; CHECK: ld1 {v1.2d}, [x1]
				; CHECK: st1 {v0.4s}, [x2]
				; CHECK: st1 {v1.2d}, [x3]
				; BE-STRICT-ALIGN-LABEL: qwordfloat
				; BE-STRICT-ALIGN: ldr
				; BE-STRICT-ALIGN: ldr
				; BE-STRICT-ALIGN: str
				; BE-STRICT-ALIGN: str
				entry:
				%val.v4f32 = load <4 x float>* %head.v4f32, align 4
				%val.v2f64 = load <2 x double>* %head.v2f64, align 8
				store <4 x float> %val.v4f32, <4 x float>* %tail.v4f32, align 4
				store <2 x double> %val.v2f64, <2 x double>* %tail.v2f64, align 8
				ret <4 x float> %val.v4f32
				}

				;; Check element-aligned 64-bit vector load/store - integer
				define <8 x i8> @dwordint (<8 x i8>* %head.v8i8, <4 x i16>* %head.v4i16, <2 x i32>* %head.v2i32, <1 x i64>* %head.v1i64,
				<8 x i8>* %tail.v8i8, <4 x i16>* %tail.v4i16, <2 x i32>* %tail.v2i32, <1 x i64>* %tail.v1i64) {
				; CHECK-LABEL: dwordint
				; CHECK: ld1 {v0.8b}, [x0]
				; CHECK: ld1 {v1.4h}, [x1]
				; CHECK: ld1 {v2.2s}, [x2]
				; CHECK: ld1 {v3.1d}, [x3]
				; CHECK: st1 {v0.8b}, [x4]
				; CHECK: st1 {v1.4h}, [x5]
				; CHECK: st1 {v2.2s}, [x6]
				; CHECK: st1 {v3.1d}, [x7]
				; BE-STRICT-ALIGN-LABEL: dwordint
				; BE-STRICT-ALIGN: ldrb
				; BE-STRICT-ALIGN: ldrh
				; BE-STRICT-ALIGN: ldr
				; BE-STRICT-ALIGN: ld1 {v1.1d}, [x3]
				; BE-STRICT-ALIGN: strb
				; BE-STRICT-ALIGN: strh
				; BE-STRICT-ALIGN: str
				; BE-STRICT-ALIGN: st1 {v1.1d}, [x7]
				entry:
				%val.v8i8 = load <8 x i8>* %head.v8i8, align 1
				%val.v4i16 = load <4 x i16>* %head.v4i16, align 2
				%val.v2i32 = load <2 x i32>* %head.v2i32, align 4
				%val.v1i64 = load <1 x i64>* %head.v1i64, align 8
				store <8 x i8> %val.v8i8, <8 x i8>* %tail.v8i8 , align 1
				store <4 x i16> %val.v4i16, <4 x i16>* %tail.v4i16, align 2
				store <2 x i32> %val.v2i32, <2 x i32>* %tail.v2i32, align 4
				store <1 x i64> %val.v1i64, <1 x i64>* %tail.v1i64, align 8
				ret <8 x i8> %val.v8i8
				}

				;; Check element-aligned 64-bit vector load/store - floating point
				define <2 x float> @dwordfloat (<2 x float>* %head.v2f32, <1 x double>* %head.v1f64,
				<2 x float>* %tail.v2f32, <1 x double>* %tail.v1f64) {
				; CHECK-LABEL: dwordfloat
				; CHECK: ld1 {v0.2s}, [x0]
				; CHECK: ld1 {v1.1d}, [x1]
				; CHECK: st1 {v0.2s}, [x2]
				; CHECK: st1 {v1.1d}, [x3]
				; BE-STRICT-ALIGN-LABEL: dwordfloat
				; BE-STRICT-ALIGN: ldr
				; BE-STRICT-ALIGN: ld1 {v1.1d}, [x1]
				; BE-STRICT-ALIGN: str
				; BE-STRICT-ALIGN: st1 {v1.1d}, [x3]
				entry:
				%val.v2f32 = load <2 x float>* %head.v2f32, align 4
				%val.v1f64 = load <1 x double>* %head.v1f64, align 8
				store <2 x float> %val.v2f32, <2 x float>* %tail.v2f32, align 4
				store <1 x double> %val.v1f64, <1 x double>* %tail.v1f64, align 8
				ret <2 x float> %val.v2f32
				}

				;; Check load/store of 128-bit vectors with less-than 16-byte alignment
				define <2 x i64> @align2vi64 (<2 x i64>* %head.byte, <2 x i64>* %head.half, <2 x i64>* %head.word, <2 x i64>* %head.dword,
				<2 x i64>* %tail.byte, <2 x i64>* %tail.half, <2 x i64>* %tail.word, <2 x i64>* %tail.dword) {
				; CHECK-LABEL: align2vi64
				; CHECK: ld1 {v0.2d}, [x0]
				; CHECK: ld1 {v1.2d}, [x1]
				; CHECK: ld1 {v2.2d}, [x2]
				; CHECK: ld1 {v3.2d}, [x3]
				; CHECK: st1 {v0.2d}, [x4]
				; CHECK: st1 {v1.2d}, [x5]
				; CHECK: st1 {v2.2d}, [x6]
				; CHECK: st1 {v3.2d}, [x7]
				; BE-STRICT-ALIGN-LABEL: align2vi64
				; BE-STRICT-ALIGN: ldrb
				; BE-STRICT-ALIGN: ldrh
				; BE-STRICT-ALIGN: ldr
				; BE-STRICT-ALIGN: strb
				; BE-STRICT-ALIGN: strh
				; BE-STRICT-ALIGN: str
				entry:
				%val.byte = load <2 x i64>* %head.byte, align 1
				%val.half = load <2 x i64>* %head.half, align 2
				%val.word = load <2 x i64>* %head.word, align 4
				%val.dword = load <2 x i64>* %head.dword, align 8
				store <2 x i64> %val.byte, <2 x i64>* %tail.byte, align 1
				store <2 x i64> %val.half, <2 x i64>* %tail.half, align 2
				store <2 x i64> %val.word, <2 x i64>* %tail.word, align 4
				store <2 x i64> %val.dword, <2 x i64>* %tail.dword, align 8
				ret <2 x i64> %val.byte
				}

				;; Check load/store of 64-bit vectors with less-than 8-byte alignment
				define <2 x float> @align2vf32 (<2 x float>* %head.byte, <2 x float>* %head.half, <2 x float>* %head.word, <2 x float>* %head.dword,
				<2 x float>* %tail.byte, <2 x float>* %tail.half, <2 x float>* %tail.word, <2 x float>* %tail.dword) {
				; CHECK-LABEL: align2vf32
				; CHECK: ld1 {v0.2s}, [x0]
				; CHECK: ld1 {v1.2s}, [x1]
				; CHECK: ld1 {v2.2s}, [x2]
				; CHECK: st1 {v0.2s}, [x4]
				; CHECK: st1 {v1.2s}, [x5]
				; CHECK: st1 {v2.2s}, [x6]
				; BE-STRICT-ALIGN-LABEL: align2vf32
				; BE-STRICT-ALIGN: ldrb
				; BE-STRICT-ALIGN: ldrh
				; BE-STRICT-ALIGN: ldr
				; BE-STRICT-ALIGN: strb
				; BE-STRICT-ALIGN: strh
				; BE-STRICT-ALIGN: str
				entry:
				%val.byte = load <2 x float>* %head.byte, align 1
				%val.half = load <2 x float>* %head.half, align 2
				%val.word = load <2 x float>* %head.word, align 4
				store <2 x float> %val.byte, <2 x float>* %tail.byte, align 1
				store <2 x float> %val.half, <2 x float>* %tail.half, align 2
				store <2 x float> %val.word, <2 x float>* %tail.word, align 4
				ret <2 x float> %val.byte
				}