Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td +++ lib/Target/ARM/ARMInstrNEON.td @@ -610,14 +610,14 @@ def VLDMQIA : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), IIC_fpLoad_m, "", - [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; + [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>; // Use VSTM to store a Q register as a D register pair. // This is a pseudo instruction that is expanded to VSTMD after reg alloc. def VSTMQIA : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), IIC_fpStore_m, "", - [(store (v2f64 DPair:$src), GPR:$Rn)]>; + [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>; // Classes for VLD* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -6849,6 +6849,16 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; } +// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian +def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), + (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>; +def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>, Requires<[IsBE]>; +def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), + (VREV64q16 (VLD1q16 addrmode6:$addr))>, Requires<[IsBE]>; +def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), + (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>, Requires<[IsBE]>; + // Fold extracting an element out of a v2i32 into a vfp register. def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; Index: test/CodeGen/ARM/load_store_multiple.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/load_store_multiple.ll @@ -0,0 +1,52 @@ +; RUN: llc -mtriple=armv7-eabi -mattr=+neon %s -o - | FileCheck %s --check-prefix=CHECK-LE +; RUN: llc -mtriple=armv7eb-eabi -mattr=+neon %s -o - | FileCheck %s --check-prefix=CHECK-BE + +define void @ld_st_vec_i8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK-LE-LABEL: ld_st_vec_i8: +;CHECK-LE: vld1.8 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [{{r[0-9]+}}] +;CHECK-LE-NOT: vrev +;CHECK-LE: vst1.8 {[[D1]], [[D2]]}, [{{r[0-9]+}}] + +;CHECK-BE-LABEL: ld_st_vec_i8: +;CHECK-BE: vld1.8 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [{{r[0-9]+}}] +;CHECK-BE: vrev64.8 [[Q1:q[0-9]+]], [[Q2:q[0-9]+]] +;CHECK-BE: vrev64.8 [[Q1]], [[Q2]] +;CHECK-BE: vst1.8 {[[D1]], [[D2]]}, [{{r[0-9]+}}] + +%load = load <16 x i8>, <16 x i8>* %A, align 1 +store <16 x i8> %load, <16 x i8>* %B, align 1 +ret void +} + +define void @ld_st_vec_i16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LE-LABEL: ld_st_vec_i16: +;CHECK-LE: vld1.16 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [{{r[0-9]+}}] +;CHECK-LE-NOT: vrev +;CHECK-LE: vst1.16 {[[D1]], [[D2]]}, [{{r[0-9]+}}] + +;CHECK-BE-LABEL: ld_st_vec_i16: +;CHECK-BE: vld1.16 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [{{r[0-9]+}}] +;CHECK-BE: vrev64.16 [[Q1:q[0-9]+]], [[Q2:q[0-9]+]] +;CHECK-BE: vrev64.16 [[Q1]], [[Q2]] +;CHECK-BE: vst1.16 {[[D1]], [[D2]]}, [{{r[0-9]+}}] + +%load = load <8 x i16>, <8 x i16>* %A, align 2 +store <8 x i16> %load, <8 x i16>* %B, align 2 +ret void +} + +define void @ld_st_vec_i32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LE-LABEL: ld_st_vec_i32: +;CHECK-LE: vld1.32 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [{{r[0-9]+}}] +;CHECK-LE-NOT: vrev +;CHECK-LE: vst1.32 {[[D1]], [[D2]]}, [{{r[0-9]+}}] + +;CHECK-BE-LABEL: ld_st_vec_i32: +;CHECK-BE: vldmia {{r[0-9]+}}, {[[D1:d[0-9]+]], [[D2:d[0-9]+]]} +;CHECK-BE-NOT: vrev +;CHECK-BE: vstmia {{r[0-9]+}}, {[[D1]], [[D2]]} + +%load = load <4 x i32>, <4 x i32>* %A, align 4 +store <4 x i32> %load, <4 x i32>* %B, align 4 +ret void +}