Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -21,6 +21,9 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto","crypto">; +def IsLE : Predicate<"Subtarget->isLittle()">; +def IsBE : Predicate<"!Subtarget->isLittle()">; + // Use fused MAC if more precision in FP computation is allowed. def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" " FPOpFusion::Fast)">; @@ -4849,6 +4852,85 @@ : ls_neutral_pats, ls_atomic_pats; + +// Wrappers to instantiate all allowed same-size fp/vector loads + +// NEON-BE: allow all neon vectors as well, since ld1/st1 must be disabled +// LD1 & ST1 are not ABI conforming in big endian: wrong arg memory layout +// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf +// section 4.1.2, 2nd paragraph: LDR/STR layout +// "on a big-endian system element 0 will contain the highest-addressed +// element of a short vector." +// FIXME: eventually also enable for LE +// (desired by ARM - smaller code due to more powerful adressing modes) + +// NEON 8 bit types +multiclass ls_FPR8_pats { + let Predicates = [HasNEON] in { + defm : ls_neutral_pats; + } +} + +// NEON 16 bit types +multiclass ls_FPR16_pats { + let Predicates = [HasFPARMv8] in { + defm : ls_neutral_pats; + } + + let Predicates = [HasNEON] in { + defm : ls_neutral_pats; + } +} + +// NEON 32 bit types +multiclass ls_FPR32_pats { + let Predicates = [HasFPARMv8] in { + defm : ls_neutral_pats; + } + + let Predicates = [HasNEON] in { + defm : ls_neutral_pats; +// defm : ls_neutral_pats; does not exist - v1f64 DOES -- WHY ? + } +} + +// NEON 64 bit types +multiclass ls_FPR64_pats { + let Predicates = [HasFPARMv8] in { + defm : ls_neutral_pats; + } + + let Predicates = [HasNEON] in { + defm : ls_neutral_pats; + defm : ls_neutral_pats; + defm : ls_neutral_pats; + defm : ls_neutral_pats; + defm : ls_neutral_pats; + defm : ls_neutral_pats; + } +} + +// NEON 128 bit types FPR128 +multiclass ls_FPR128_pats { + let Predicates = [HasFPARMv8] in { + defm : ls_neutral_pats; + } + + let Predicates = [HasNEON] in { + defm : ls_neutral_pats; + defm : ls_neutral_pats; + defm : ls_neutral_pats; + defm : ls_neutral_pats; + defm : ls_neutral_pats; + defm : ls_neutral_pats; + } +} + //===------------------------------ // 2.2. Addressing-mode instantiations //===------------------------------ @@ -4892,37 +4974,40 @@ !subst(ALIGN, min_align8, decls.pattern))), i64>; - defm : ls_neutral_pats; + + defm : ls_FPR16_pats< LSFP16_LDR, LSFP16_STR, Base, + !foreach(decls.pattern, Offset, !subst(OFFSET, hword_uimm12, decls.pattern)), !foreach(decls.pattern, address, !subst(OFFSET, hword_uimm12, - !subst(ALIGN, min_align2, decls.pattern))), - f16>; + !subst(ALIGN, min_align2, decls.pattern)))>; - defm : ls_neutral_pats; + !subst(ALIGN, min_align4, decls.pattern)))>; - defm : ls_neutral_pats; + !subst(ALIGN, min_align8, decls.pattern)))>; - defm : ls_neutral_pats; + !subst(ALIGN, min_align16, decls.pattern)))>; defm : load_signed_pats<"B", "", Base, !foreach(decls.pattern, Offset, @@ -4992,11 +5077,10 @@ defm : ls_int_neutral_pats; defm : ls_int_neutral_pats; - defm : ls_neutral_pats; - defm : ls_neutral_pats; - defm : ls_neutral_pats; - defm : ls_neutral_pats; + defm : ls_FPR16_pats; + defm : ls_FPR32_pats; + defm : ls_FPR64_pats; + defm : ls_FPR128_pats; def : Pat<(i64 (zextloadi32 address)), (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>; Index: lib/Target/AArch64/AArch64InstrNEON.td =================================================================== --- lib/Target/AArch64/AArch64InstrNEON.td +++ lib/Target/AArch64/AArch64InstrNEON.td @@ -104,6 +104,14 @@ defm : ls_128_pats; } +// LDR is only valid for little endian. +// In BE LDR needs correctly byte-swapped 128bit literals, so simple array +// initializers won't work right now. +// Big-endian must - for now - do the element swaps using vector intrinsics. +// That's an additional "add offset12" instruction, there. +// According to ARM, BE & LE should use intrinsics for initialization. +// That's also the only portable code. +// FIXME: BE could use vector-literal-swapping before emit pass. defm : uimm12_neon_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN), (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>; @@ -3341,9 +3349,31 @@ // the three 64-bit vectors list {BA, DC, FE}. // E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three // 64-bit vectors list {DA, EB, FC}. -// Store instructions store multiple structure to N registers like load. +// Store instructions store multiple structure from N registers like load. +// +// Problem for Big Endian (BE): +// LD1/ST1 do "array" loads/stores - reading elements from ascending addresses +// into ascending indexes in the register, in big-endian byte-swapping is done +// per element. (hence LD1 & Co are sometimes referred to as "array loads".) +// +// LDR/STR read the whole register doing byte-swapping on the whole register +// in big-endian mode. +// +// Obviously the two layouts differ by reversing the elements so they can't be +// mixed without explicit element-swap operations in BE. +// +// The only overlap is reading single elements to registers: +// LDR i128/f128 - doing byte-swapping for the whole register. +// LD1/ST1 i128/f128 - also doing byte-swapping within the 128bit element. +// Analogously for stores. +// For this reason there are IsLE guards around the respective patterns, or - +// when no patterns are defined, yet - around the instruction definition. +// In a PBQP matcher, one would add a separate set of "reversed" nonterminals +// with the element swap operations as chain rules - and let the matcher find +// the optimal coverage. FIXME: How to do that here ? + class NeonI_LDVList opcode, bits<2> size, RegisterOperand VecList, string asmop> : NeonI_LdStMult; + +// LD1 disallowed in BE, when LDR and STR are used exclusively as per the ABI. +// reason: LDR/STR use different memory/register layout (no element swaps). +// If different types of loads were used from the same memory address the results +// will be inconsistent. +// The only allowed use of LD1 is in initializations using explicit intrinsics to do +// the element-swaps. + +// Single element has no swapping problem in BE. def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">; -defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">; +// Multiple elements would be reversed in BE. +let Predicates = [IsLE] in { + defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">; -defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">; + defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">; -defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">; + defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">; + defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">; +} + // Load multiple 1-element structure to N consecutive registers (N = 2,3,4) defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">; def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">; @@ -3433,73 +3476,79 @@ } // Store multiple N-element structures from N registers (N = 1,2,3,4) -defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">; +// ARM ABI: default memory layout in BE is LDR/STR +// Single element has no swapping problem in BE. def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">; -defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">; +// Multiple elements would be reversed in BE. +let Predicates = [IsLE] in { + defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">; -defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">; + defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">; -defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">; + defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">; -// Store multiple 1-element structures from N consecutive registers (N = 2,3,4) -defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">; -def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">; + defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">; -defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">; -def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">; + // Store multiple 1-element structures from N consecutive registers (N = 2,3,4) + defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">; + def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">; -defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">; -def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">; + defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">; + def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">; -def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; -def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; + defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">; + def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">; -def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; -def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; + def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; + def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; -def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>; -def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>; + def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; + def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; -def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; -def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; + def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>; + def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>; -def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; -def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; + def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; + def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; -def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>; -def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>; + def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; + def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; -def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr), - (ST1_2D GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr), - (ST1_2D GPR64xsp:$addr, VPR128:$value)>; + def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>; + def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>; -def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr), - (ST1_4S GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr), - (ST1_4S GPR64xsp:$addr, VPR128:$value)>; + def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr), + (ST1_2D GPR64xsp:$addr, VPR128:$value)>; + def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr), + (ST1_2D GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr), - (ST1_8H GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr), - (ST1_16B GPR64xsp:$addr, VPR128:$value)>; + def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr), + (ST1_4S GPR64xsp:$addr, VPR128:$value)>; + def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr), + (ST1_4S GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr), - (ST1_1D GPR64xsp:$addr, VPR64:$value)>; -def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr), - (ST1_1D GPR64xsp:$addr, VPR64:$value)>; + def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr), + (ST1_8H GPR64xsp:$addr, VPR128:$value)>; + def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr), + (ST1_16B GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr), - (ST1_2S GPR64xsp:$addr, VPR64:$value)>; -def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr), - (ST1_2S GPR64xsp:$addr, VPR64:$value)>; + def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr), + (ST1_1D GPR64xsp:$addr, VPR64:$value)>; + def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr), + (ST1_1D GPR64xsp:$addr, VPR64:$value)>; -def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr), - (ST1_4H GPR64xsp:$addr, VPR64:$value)>; -def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr), - (ST1_8B GPR64xsp:$addr, VPR64:$value)>; + def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr), + (ST1_2S GPR64xsp:$addr, VPR64:$value)>; + def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr), + (ST1_2S GPR64xsp:$addr, VPR64:$value)>; + def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr), + (ST1_4H GPR64xsp:$addr, VPR64:$value)>; + def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr), + (ST1_8B GPR64xsp:$addr, VPR64:$value)>; +} + // Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store. // FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal, // these patterns are not needed any more. @@ -3681,35 +3730,40 @@ ImmTy2, asmop>; } -// Post-index load multiple N-element structures from N registers (N = 1,2,3,4) -defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">; +// Single element loads are ok for BE. defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8, - "ld1">; + "ld1">; -defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">; +// Multiple elements would be reversed in BE. +let Predicates = [IsLE] in { + // Post-index load multiple N-element structures from N registers (N = 1,2,3,4) + defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">; -defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, - "ld3">; + defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">; -defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">; + defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, + "ld3">; -// Post-index load multiple 1-element structures from N consecutive registers -// (N = 2,3,4) -defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, - "ld1">; -defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand, - uimm_exact16, "ld1">; + defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">; -defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, - "ld1">; -defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand, - uimm_exact24, "ld1">; + // Post-index load multiple 1-element structures to N consecutive registers + // (N = 2,3,4) + defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, + "ld1">; + defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand, + uimm_exact16, "ld1">; -defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, - "ld1">; -defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand, - uimm_exact32, "ld1">; + defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, + "ld1">; + defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand, + uimm_exact24, "ld1">; + defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, + "ld1">; + defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand, + uimm_exact32, "ld1">; +} + multiclass NeonI_STWB_VList opcode, bits<2> size, RegisterOperand VecList, Operand ImmTy, string asmop> { @@ -3764,33 +3818,36 @@ } // Post-index load multiple N-element structures from N registers (N = 1,2,3,4) -defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">; -defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8, - "st1">; +// Loading multiple elements in BE mode suffers from element-reversal. +let Predicates = [IsLE] in { + defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8, + "st1">; + defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">; -defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">; + defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">; -defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, - "st3">; + defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, + "st3">; -defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">; + defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">; -// Post-index load multiple 1-element structures from N consecutive registers -// (N = 2,3,4) -defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, - "st1">; -defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand, - uimm_exact16, "st1">; + // Post-index load multiple 1-element structures from N consecutive registers + // (N = 2,3,4) + defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, + "st1">; + defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand, + uimm_exact16, "st1">; -defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, - "st1">; -defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand, - uimm_exact24, "st1">; + defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, + "st1">; + defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand, + uimm_exact24, "st1">; -defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, - "st1">; -defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand, - uimm_exact32, "st1">; + defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, + "st1">; + defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand, + uimm_exact32, "st1">; +} // End of post-index vector load/store multiple N-element structure // (class SIMD lselem-post) @@ -3865,13 +3922,17 @@ } // Load single 1-element structure to all lanes of 1 register +// Single element loads are fine in BE defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">; // Load single N-element structure to all lanes of N consecutive // registers (N = 2,3,4) -defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">; -defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">; -defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">; +// Multi-element loads suffer from element reversal in BE. +let Predicates = [IsLE] in { + defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">; + defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">; + defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">; +} class LD1R_pattern ; // Match all LD1R instructions -def : LD1R_pattern; +// This won't work as intended in BE mode, as STR q0 stored the elements swapped. +let Predicates = [IsLE] in { + def : LD1R_pattern; + def : LD1R_pattern; -def : LD1R_pattern; + def : LD1R_pattern; -def : LD1R_pattern; + def : LD1R_pattern; -def : LD1R_pattern; + def : LD1R_pattern; + def : LD1R_pattern; -def : LD1R_pattern; -def : LD1R_pattern; + def : LD1R_pattern; + def : LD1R_pattern; -def : LD1R_pattern; -def : LD1R_pattern; + def : LD1R_pattern; + def : LD1R_pattern; +} -def : LD1R_pattern; -def : LD1R_pattern; - class LD1R_pattern_v1 : Pat<(VTy (scalar_to_vector (DTy (LoadOp GPR64xsp:$Rn)))), (VTy (INST GPR64xsp:$Rn))>; +// Single element operations are swap-safe in BE. def : LD1R_pattern_v1; def : LD1R_pattern_v1; + multiclass VectorList_Bare_BHSD { defm B : VectorList_operands; @@ -3965,46 +4030,64 @@ } // Load single 1-element structure to one lane of 1 register. +// No dangerous element swaps in BE. :-) defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">; // Load single N-element structure to one lane of N consecutive registers // (N = 2,3,4) -defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">; -defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">; -defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">; +// +// This will not work as intended in BE mode, if the matcher generates it to +// load a vector to a lane. (STR q0 stored the vector's elements swapped) +// Must always use an intrinsic, so the user knows it's loading from an array +// layout. +let Predicates = [IsLE] in { + defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">; + defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">; + defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">; +} -multiclass LD1LN_patterns { - def : Pat<(VTy (vector_insert (VTy VPR64:$src), - (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))), - (VTy (EXTRACT_SUBREG - (INST GPR64xsp:$Rn, - (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - ImmOp:$lane), - sub_64))>; - - def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src), - (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))), - (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>; +// Multiple elements would be reversed in BE. +let Predicates = [IsLE] in { + multiclass LD1LN_patterns { + def : Pat<(VTy (vector_insert (VTy VPR64:$src), + (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))), + (VTy (EXTRACT_SUBREG + (INST GPR64xsp:$Rn, + (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + ImmOp:$lane), + sub_64))>; + + def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src), + (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))), + (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>; + } } // Match all LD1LN instructions -defm : LD1LN_patterns; +// +// This will not work as intended in BE mode, if the matcher generates it to +// load a vector to a lane. (STR q0 stored the elements swapped in BE) +// Must always use an intrinsic, so the user knows it's loading from an array +// layout. +let Predicates = [IsLE] in { + defm : LD1LN_patterns; -defm : LD1LN_patterns; + defm : LD1LN_patterns; -defm : LD1LN_patterns; -defm : LD1LN_patterns; + defm : LD1LN_patterns; + defm : LD1LN_patterns; -defm : LD1LN_patterns; -defm : LD1LN_patterns; + defm : LD1LN_patterns; + defm : LD1LN_patterns; +} class NeonI_STN_Lane op2_1, bit op0, RegisterOperand VList, Operand ImmOp, string asmop> @@ -4049,13 +4132,17 @@ } // Store single 1-element structure from one lane of 1 register. +// single element should be fine in BE - no swapping of elements. defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">; // Store single N-element structure from one lane of N consecutive registers // (N = 2,3,4) -defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">; -defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">; -defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">; +// Multiple elements would be reversed in BE. +let Predicates = [IsLE] in { + defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">; + defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">; + defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">; +} multiclass ST1LN_patterns; +// +// Multiple elements would be reversed in BE. +let Predicates = [IsLE] in { + defm : ST1LN_patterns; -defm : ST1LN_patterns; + defm : ST1LN_patterns; -defm : ST1LN_patterns; -defm : ST1LN_patterns; + defm : ST1LN_patterns; + defm : ST1LN_patterns; -defm : ST1LN_patterns; -defm : ST1LN_patterns; - + defm : ST1LN_patterns; + defm : ST1LN_patterns; +} // End of vector load/store single N-element structure (class SIMD lsone). @@ -4154,17 +4244,21 @@ } // Post-index load single 1-element structure to all lanes of 1 register +// one element duplication should be fine in BE - no swapping of elements. defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1, uimm_exact2, uimm_exact4, uimm_exact8>; // Post-index load single N-element structure to all lanes of N consecutive // registers (N = 2,3,4) -defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2, - uimm_exact4, uimm_exact8, uimm_exact16>; -defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3, - uimm_exact6, uimm_exact12, uimm_exact24>; -defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4, - uimm_exact8, uimm_exact16, uimm_exact32>; +// Multiple elements would be reversed in BE. +let Predicates = [IsLE] in { + defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2, + uimm_exact4, uimm_exact8, uimm_exact16>; + defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3, + uimm_exact6, uimm_exact12, uimm_exact24>; + defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4, + uimm_exact8, uimm_exact16, uimm_exact32>; +} let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb, $Rt = $src", @@ -4253,18 +4347,22 @@ } // Post-index load single 1-element structure to one lane of 1 register. +// One element from 1 lane is fine in BE - no swapping of elements. defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1, uimm_exact2, uimm_exact4, uimm_exact8>; // Post-index load single N-element structure to one lane of N consecutive // registers // (N = 2,3,4) -defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2, - uimm_exact4, uimm_exact8, uimm_exact16>; -defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3, - uimm_exact6, uimm_exact12, uimm_exact24>; -defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4, - uimm_exact8, uimm_exact16, uimm_exact32>; +// Multiple elements would be reversed in BE. +let Predicates = [IsLE] in { + defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2, + uimm_exact4, uimm_exact8, uimm_exact16>; + defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3, + uimm_exact6, uimm_exact12, uimm_exact24>; + defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4, + uimm_exact8, uimm_exact16, uimm_exact32>; +} let mayStore = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb", @@ -4353,17 +4451,21 @@ } // Post-index store single 1-element structure from one lane of 1 register. +// one element from 1 lane should be fine in BE - no swapping of elements. defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1, uimm_exact2, uimm_exact4, uimm_exact8>; // Post-index store single N-element structure from one lane of N consecutive // registers (N = 2,3,4) -defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2, - uimm_exact4, uimm_exact8, uimm_exact16>; -defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3, - uimm_exact6, uimm_exact12, uimm_exact24>; -defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4, - uimm_exact8, uimm_exact16, uimm_exact32>; +// Multiple elements would be reversed in BE. +let Predicates = [IsLE] in { + defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2, + uimm_exact4, uimm_exact8, uimm_exact16>; + defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3, + uimm_exact6, uimm_exact12, uimm_exact24>; + defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4, + uimm_exact8, uimm_exact16, uimm_exact32>; +} // End of post-index load/store single N-element instructions // (class SIMD lsone-post) Index: test/CodeGen/AArch64/128bit_load_store.ll =================================================================== --- test/CodeGen/AArch64/128bit_load_store.ll +++ test/CodeGen/AArch64/128bit_load_store.ll @@ -1,5 +1,118 @@ +; R UN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=neon | FileCheck %s +define void @test_store_v1i8(<1 x i8>* %ptr, <1 x i8> %val) #0 { +; CHECK: test_store_v1i8 +; CHECK: str {{b[0-9]+}}, [{{x[0-9]+}}] +entry: + store <1 x i8> %val, <1 x i8>* %ptr, align 8 + ret void +} + + + +define void @test_store_f16(half* %ptr, half %val) #0 { +; CHECK: test_store_f16 +; CHECK: str {{h[0-9]+}}, [{{x[0-9]+}}] +entry: + store half %val, half* %ptr, align 8 + ret void +} + +define void @test_store_v1i16(<1 x i16>* %ptr, <1 x i16> %val) #0 { +; CHECK: test_store_v1i16 +; CHECK: str {{h[0-9]+}}, [{{x[0-9]+}}] +entry: + store <1 x i16> %val, <1 x i16>* %ptr, align 8 + ret void +} + + + +define void @test_store_f32(float* %ptr, float %val) #0 { +; CHECK: test_store_f32 +; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}] +entry: + store float %val, float* %ptr, align 8 + ret void +} + +define void @test_store_v1f32(<1 x float>* %ptr, <1 x float> %val) #0 { +; CHECK: test_store_v1f32 +; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}] +entry: + store <1 x float> %val, <1 x float>* %ptr, align 8 + ret void +} + +define void @test_store_v1i32(<1 x i32>* %ptr, <1 x i32> %val) #0 { +; CHECK: test_store_v1i32 +; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}] +entry: + store <1 x i32> %val, <1 x i32>* %ptr, align 8 + ret void +} + + +define void @test_store_f64(double *%ptr, double %val) #0 { +; CHECK: test_store_f64 +; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] +entry: + store double %val, double* %ptr, align 8 + ret void +} + +define void @test_store_v1f64(<1 x double>* %ptr, <1 x double> %val) #0 { +; CHECK: test_store_v1f64 +; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] +entry: + store <1 x double> %val, <1 x double>* %ptr, align 8 + ret void +} + +define void @test_store_v2f32(<2 x float>* %ptr, <2 x float> %val) #0 { +; CHECK: test_store_v2f32 +; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] +entry: + store <2 x float> %val, <2 x float>* %ptr, align 8 + ret void +} + +define void @test_store_v1i64(<1 x i64>* %ptr, <1 x i64> %val) #0 { +; CHECK: test_store_v1i64 +; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] +entry: + store <1 x i64> %val, <1 x i64>* %ptr, align 8 + ret void +} + +define void @test_store_v2i32(<2 x i32>* %ptr, <2 x i32> %val) #0 { +; CHECK: test_store_v2i32 +; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] +entry: + store <2 x i32> %val, <2 x i32>* %ptr, align 8 + ret void +} + +define void @test_store_v4i16(<4 x i16>* %ptr, <4 x i16> %val) #0 { +; CHECK: test_store_v4i16 +; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] +entry: + store <4 x i16> %val, <4 x i16>* %ptr, align 8 + ret void +} + +define void @test_store_v8i8(<8 x i8>* %ptr, <8 x i8> %val) #0 { +; CHECK: test_store_v8i8 +; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}] +entry: + store <8 x i8> %val, <8 x i8>* %ptr, align 8 + ret void +} + + + + define void @test_store_f128(fp128* %ptr, fp128 %val) #0 { ; CHECK: test_store_f128 ; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}] @@ -8,6 +121,54 @@ ret void } +define void @test_store_v2f64(<2 x double>* %ptr, <2 x double> %val) #0 { +; CHECK: test_store_v2f64 +; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}] +entry: + store <2 x double> %val, <2 x double>* %ptr, align 16 + ret void +} + +define void @test_store_v4f32(<4 x float>* %ptr, <4 x float> %val) #0 { +; CHECK: test_store_v4f32 +; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}] +entry: + store <4 x float> %val, <4 x float>* %ptr, align 16 + ret void +} + +define void @test_store_v2i64(<2 x i64>* %ptr, <2 x i64> %val) #0 { +; CHECK: test_store_v2i64 +; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}] +entry: + store <2 x i64> %val, <2 x i64>* %ptr, align 16 + ret void +} + +define void @test_store_v4i32(<4 x i32>* %ptr, <4 x i32> %val) #0 { +; CHECK: test_store_v4i32 +; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}] +entry: + store <4 x i32> %val, <4 x i32>* %ptr, align 16 + ret void +} + +define void @test_store_v8i16(<8 x i16>* %ptr, <8 x i16> %val) #0 { +; CHECK: test_store_v8i16 +; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}] +entry: + store <8 x i16> %val, <8 x i16>* %ptr, align 16 + ret void +} + +define void @test_store_v16i8(<16 x i8>* %ptr, <16 x i8> %val) #0 { +; CHECK: test_store_v16i8 +; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}] +entry: + store <16 x i8> %val, <16 x i8>* %ptr, align 16 + ret void +} + define fp128 @test_load_f128(fp128* readonly %ptr) #2 { ; CHECK: test_load_f128 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}] Index: test/CodeGen/AArch64/addsub-shifted.ll =================================================================== --- test/CodeGen/AArch64/addsub-shifted.ll +++ test/CodeGen/AArch64/addsub-shifted.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @var32 = global i32 0 Index: test/CodeGen/AArch64/addsub.ll =================================================================== --- test/CodeGen/AArch64/addsub.ll +++ test/CodeGen/AArch64/addsub.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; Note that this should be refactored (for efficiency if nothing else) Index: test/CodeGen/AArch64/addsub_ext.ll =================================================================== --- test/CodeGen/AArch64/addsub_ext.ll +++ test/CodeGen/AArch64/addsub_ext.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @var8 = global i8 0 Index: test/CodeGen/AArch64/alloca.ll =================================================================== --- test/CodeGen/AArch64/alloca.ll +++ test/CodeGen/AArch64/alloca.ll @@ -1,3 +1,5 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP %s Index: test/CodeGen/AArch64/analyze-branch.ll =================================================================== --- test/CodeGen/AArch64/analyze-branch.ll +++ test/CodeGen/AArch64/analyze-branch.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; This test checks that LLVM can do basic stripping and reapplying of branches Index: test/CodeGen/AArch64/assertion-rc-mismatch.ll =================================================================== --- test/CodeGen/AArch64/assertion-rc-mismatch.ll +++ test/CodeGen/AArch64/assertion-rc-mismatch.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; Test case related to . Index: test/CodeGen/AArch64/atomic-ops-not-barriers.ll =================================================================== --- test/CodeGen/AArch64/atomic-ops-not-barriers.ll +++ test/CodeGen/AArch64/atomic-ops-not-barriers.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s define i32 @foo(i32* %var, i1 %cond) { Index: test/CodeGen/AArch64/atomic-ops.ll =================================================================== --- test/CodeGen/AArch64/atomic-ops.ll +++ test/CodeGen/AArch64/atomic-ops.ll @@ -1,3 +1,5 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-REG %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-REG %s Index: test/CodeGen/AArch64/basic-pic.ll =================================================================== --- test/CodeGen/AArch64/basic-pic.ll +++ test/CodeGen/AArch64/basic-pic.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s @var = global i32 0 Index: test/CodeGen/AArch64/bitfield-insert-0.ll =================================================================== --- test/CodeGen/AArch64/bitfield-insert-0.ll +++ test/CodeGen/AArch64/bitfield-insert-0.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -filetype=obj < %s | llvm-objdump -disassemble - | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -disassemble - | FileCheck %s ; The encoding of lsb -> immr in the CGed bitfield instructions was wrong at one Index: test/CodeGen/AArch64/bitfield-insert.ll =================================================================== --- test/CodeGen/AArch64/bitfield-insert.ll +++ test/CodeGen/AArch64/bitfield-insert.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; First, a simple example from Clang. The registers could plausibly be Index: test/CodeGen/AArch64/bitfield.ll =================================================================== --- test/CodeGen/AArch64/bitfield.ll +++ test/CodeGen/AArch64/bitfield.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s Index: test/CodeGen/AArch64/blockaddress.ll =================================================================== --- test/CodeGen/AArch64/blockaddress.ll +++ test/CodeGen/AArch64/blockaddress.ll @@ -1,3 +1,5 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -code-model=large -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s Index: test/CodeGen/AArch64/bool-loads.ll =================================================================== --- test/CodeGen/AArch64/bool-loads.ll +++ test/CodeGen/AArch64/bool-loads.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s @var = global i1 0 Index: test/CodeGen/AArch64/breg.ll =================================================================== --- test/CodeGen/AArch64/breg.ll +++ test/CodeGen/AArch64/breg.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @stored_label = global i8* null Index: test/CodeGen/AArch64/callee-save.ll =================================================================== --- test/CodeGen/AArch64/callee-save.ll +++ test/CodeGen/AArch64/callee-save.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @var = global float 0.0 Index: test/CodeGen/AArch64/code-model-large-abs.ll =================================================================== --- test/CodeGen/AArch64/code-model-large-abs.ll +++ test/CodeGen/AArch64/code-model-large-abs.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -code-model=large < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large < %s | FileCheck %s @var8 = global i8 0 Index: test/CodeGen/AArch64/compare-branch.ll =================================================================== --- test/CodeGen/AArch64/compare-branch.ll +++ test/CodeGen/AArch64/compare-branch.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @var32 = global i32 0 Index: test/CodeGen/AArch64/complex-copy-noneon.ll =================================================================== --- test/CodeGen/AArch64/complex-copy-noneon.ll +++ test/CodeGen/AArch64/complex-copy-noneon.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=-neon < %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-neon < %s ; The DAG combiner decided to use a vector load/store for this struct copy Index: test/CodeGen/AArch64/concatvector-v8i8-bug.ll =================================================================== --- test/CodeGen/AArch64/concatvector-v8i8-bug.ll +++ test/CodeGen/AArch64/concatvector-v8i8-bug.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon ; Bug: i8 type in FRP8 register but not registering with register class causes segmentation fault. ; Fix: Removed i8 type from FPR8 register class. Index: test/CodeGen/AArch64/cond-sel.ll =================================================================== --- test/CodeGen/AArch64/cond-sel.ll +++ test/CodeGen/AArch64/cond-sel.ll @@ -1,3 +1,5 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s Index: test/CodeGen/AArch64/cpus.ll =================================================================== --- test/CodeGen/AArch64/cpus.ll +++ test/CodeGen/AArch64/cpus.ll @@ -1,3 +1,7 @@ +; RUN: llc < %s -mtriple=aarch64_be-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64_be-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64_be-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64_be-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID ; This tests that llc accepts all valid AArch64 CPUs ; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s Index: test/CodeGen/AArch64/directcond.ll =================================================================== --- test/CodeGen/AArch64/directcond.ll +++ test/CodeGen/AArch64/directcond.ll @@ -1,3 +1,5 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s Index: test/CodeGen/AArch64/dp-3source.ll =================================================================== --- test/CodeGen/AArch64/dp-3source.ll +++ test/CodeGen/AArch64/dp-3source.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s define i32 @test_madd32(i32 %val0, i32 %val1, i32 %val2) { Index: test/CodeGen/AArch64/dp1.ll =================================================================== --- test/CodeGen/AArch64/dp1.ll +++ test/CodeGen/AArch64/dp1.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @var32 = global i32 0 Index: test/CodeGen/AArch64/dp2.ll =================================================================== --- test/CodeGen/AArch64/dp2.ll +++ test/CodeGen/AArch64/dp2.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @var32_0 = global i32 0 Index: test/CodeGen/AArch64/extern-weak.ll =================================================================== --- test/CodeGen/AArch64/extern-weak.ll +++ test/CodeGen/AArch64/extern-weak.ll @@ -1,3 +1,5 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -o - < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -code-model=large -o - < %s | FileCheck --check-prefix=CHECK-LARGE %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -o - < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large -o - < %s | FileCheck --check-prefix=CHECK-LARGE %s Index: test/CodeGen/AArch64/extract.ll =================================================================== --- test/CodeGen/AArch64/extract.ll +++ test/CodeGen/AArch64/extract.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s define i64 @ror_i64(i64 %in) { Index: test/CodeGen/AArch64/fastcc-reserved.ll =================================================================== --- test/CodeGen/AArch64/fastcc-reserved.ll +++ test/CodeGen/AArch64/fastcc-reserved.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -tailcallopt | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s ; This test is designed to be run in the situation where the Index: test/CodeGen/AArch64/fastcc.ll =================================================================== --- test/CodeGen/AArch64/fastcc.ll +++ test/CodeGen/AArch64/fastcc.ll @@ -1,3 +1,5 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s Index: test/CodeGen/AArch64/fcmp.ll =================================================================== --- test/CodeGen/AArch64/fcmp.ll +++ test/CodeGen/AArch64/fcmp.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s declare void @bar(i32) Index: test/CodeGen/AArch64/fcvt-fixed.ll =================================================================== --- test/CodeGen/AArch64/fcvt-fixed.ll +++ test/CodeGen/AArch64/fcvt-fixed.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -O0 | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s @var32 = global i32 0 Index: test/CodeGen/AArch64/fcvt-int.ll =================================================================== --- test/CodeGen/AArch64/fcvt-int.ll +++ test/CodeGen/AArch64/fcvt-int.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s define i32 @test_floattoi32(float %in) { Index: test/CodeGen/AArch64/flags-multiuse.ll =================================================================== --- test/CodeGen/AArch64/flags-multiuse.ll +++ test/CodeGen/AArch64/flags-multiuse.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; LLVM should be able to cope with multiple uses of the same flag-setting Index: test/CodeGen/AArch64/floatdp_1source.ll =================================================================== --- test/CodeGen/AArch64/floatdp_1source.ll +++ test/CodeGen/AArch64/floatdp_1source.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @varhalf = global half 0.0 Index: test/CodeGen/AArch64/floatdp_2source.ll =================================================================== --- test/CodeGen/AArch64/floatdp_2source.ll +++ test/CodeGen/AArch64/floatdp_2source.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @varfloat = global float 0.0 Index: test/CodeGen/AArch64/fp-cond-sel.ll =================================================================== --- test/CodeGen/AArch64/fp-cond-sel.ll +++ test/CodeGen/AArch64/fp-cond-sel.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @varfloat = global float 0.0 Index: test/CodeGen/AArch64/fp-dp3.ll =================================================================== --- test/CodeGen/AArch64/fp-dp3.ll +++ test/CodeGen/AArch64/fp-dp3.ll @@ -1,3 +1,5 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -fp-contract=fast | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s -check-prefix=CHECK-NOFAST ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -check-prefix=CHECK-NOFAST Index: test/CodeGen/AArch64/fp128-folding.ll =================================================================== --- test/CodeGen/AArch64/fp128-folding.ll +++ test/CodeGen/AArch64/fp128-folding.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s declare void @bar(i8*, i8*, i32*) Index: test/CodeGen/AArch64/fp128.ll =================================================================== --- test/CodeGen/AArch64/fp128.ll +++ test/CodeGen/AArch64/fp128.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s @lhs = global fp128 zeroinitializer Index: test/CodeGen/AArch64/fpimm.ll =================================================================== --- test/CodeGen/AArch64/fpimm.ll +++ test/CodeGen/AArch64/fpimm.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @varf32 = global float 0.0 Index: test/CodeGen/AArch64/frameaddr.ll =================================================================== --- test/CodeGen/AArch64/frameaddr.ll +++ test/CodeGen/AArch64/frameaddr.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s define i8* @t() nounwind { Index: test/CodeGen/AArch64/func-argpassing.ll =================================================================== --- test/CodeGen/AArch64/func-argpassing.ll +++ test/CodeGen/AArch64/func-argpassing.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s Index: test/CodeGen/AArch64/func-calls.ll =================================================================== --- test/CodeGen/AArch64/func-calls.ll +++ test/CodeGen/AArch64/func-calls.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s Index: test/CodeGen/AArch64/global-alignment.ll =================================================================== --- test/CodeGen/AArch64/global-alignment.ll +++ test/CodeGen/AArch64/global-alignment.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s @var32 = global [3 x i32] zeroinitializer Index: test/CodeGen/AArch64/got-abuse.ll =================================================================== --- test/CodeGen/AArch64/got-abuse.ll +++ test/CodeGen/AArch64/got-abuse.ll @@ -1,3 +1,5 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -relocation-model=pic < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -relocation-model=pic -filetype=obj < %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s Index: test/CodeGen/AArch64/i128-align.ll =================================================================== --- test/CodeGen/AArch64/i128-align.ll +++ test/CodeGen/AArch64/i128-align.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s %struct = type { i32, i128, i8 } Index: test/CodeGen/AArch64/illegal-float-ops.ll =================================================================== --- test/CodeGen/AArch64/illegal-float-ops.ll +++ test/CodeGen/AArch64/illegal-float-ops.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s @varfloat = global float 0.0 Index: test/CodeGen/AArch64/init-array.ll =================================================================== --- test/CodeGen/AArch64/init-array.ll +++ test/CodeGen/AArch64/init-array.ll @@ -1,3 +1,5 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-none-none-eabi -verify-machineinstrs -use-init-array < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array < %s | FileCheck %s Index: test/CodeGen/AArch64/inline-asm-constraints-badI.ll =================================================================== --- test/CodeGen/AArch64/inline-asm-constraints-badI.ll +++ test/CodeGen/AArch64/inline-asm-constraints-badI.ll @@ -1,3 +1,4 @@ +; RUN: not llc -mtriple=aarch64_be-none-linux-gnu < %s ; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s define void @foo() { Index: test/CodeGen/AArch64/inline-asm-constraints-badK.ll =================================================================== --- test/CodeGen/AArch64/inline-asm-constraints-badK.ll +++ test/CodeGen/AArch64/inline-asm-constraints-badK.ll @@ -1,3 +1,4 @@ +; RUN: not llc -mtriple=aarch64_be-none-linux-gnu < %s ; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s define void @foo() { Index: test/CodeGen/AArch64/inline-asm-constraints-badK2.ll =================================================================== --- test/CodeGen/AArch64/inline-asm-constraints-badK2.ll +++ test/CodeGen/AArch64/inline-asm-constraints-badK2.ll @@ -1,3 +1,4 @@ +; RUN: not llc -mtriple=aarch64_be-none-linux-gnu < %s ; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s define void @foo() { Index: test/CodeGen/AArch64/inline-asm-constraints-badL.ll =================================================================== --- test/CodeGen/AArch64/inline-asm-constraints-badL.ll +++ test/CodeGen/AArch64/inline-asm-constraints-badL.ll @@ -1,3 +1,4 @@ +; RUN: not llc -mtriple=aarch64_be-none-linux-gnu < %s ; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s define void @foo() { Index: test/CodeGen/AArch64/inline-asm-modifiers.ll =================================================================== --- test/CodeGen/AArch64/inline-asm-modifiers.ll +++ test/CodeGen/AArch64/inline-asm-modifiers.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -relocation-model=pic -no-integrated-as < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -no-integrated-as < %s | FileCheck %s @var_simple = hidden global i32 0 Index: test/CodeGen/AArch64/jump-table.ll =================================================================== --- test/CodeGen/AArch64/jump-table.ll +++ test/CodeGen/AArch64/jump-table.ll @@ -1,3 +1,5 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s +; RUN: llc -code-model=large -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -code-model=large -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s Index: test/CodeGen/AArch64/large-consts.ll =================================================================== --- test/CodeGen/AArch64/large-consts.ll +++ test/CodeGen/AArch64/large-consts.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-linux-gnu -o - %s -code-model=large -show-mc-encoding | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu -o - %s -code-model=large -show-mc-encoding | FileCheck %s ; Make sure the shift amount is encoded into the instructions by LLVM because Index: test/CodeGen/AArch64/large-frame.ll =================================================================== --- test/CodeGen/AArch64/large-frame.ll +++ test/CodeGen/AArch64/large-frame.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s declare void @use_addr(i8*) Index: test/CodeGen/AArch64/ldst-regoffset.ll =================================================================== --- test/CodeGen/AArch64/ldst-regoffset.ll +++ test/CodeGen/AArch64/ldst-regoffset.ll @@ -1,3 +1,5 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s Index: test/CodeGen/AArch64/ldst-unscaledimm.ll =================================================================== --- test/CodeGen/AArch64/ldst-unscaledimm.ll +++ test/CodeGen/AArch64/ldst-unscaledimm.ll @@ -1,3 +1,5 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s Index: test/CodeGen/AArch64/ldst-unsignedimm.ll =================================================================== --- test/CodeGen/AArch64/ldst-unsignedimm.ll +++ test/CodeGen/AArch64/ldst-unsignedimm.ll @@ -1,3 +1,5 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s Index: test/CodeGen/AArch64/literal_pools.ll =================================================================== --- test/CodeGen/AArch64/literal_pools.ll +++ test/CodeGen/AArch64/literal_pools.ll @@ -1,3 +1,7 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK-LARGE %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -code-model=large -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP-LARGE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK-LARGE %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s Index: test/CodeGen/AArch64/local_vars.ll =================================================================== --- test/CodeGen/AArch64/local_vars.ll +++ test/CodeGen/AArch64/local_vars.ll @@ -1,3 +1,5 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -O0 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -O0 -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP %s Index: test/CodeGen/AArch64/logical-imm.ll =================================================================== --- test/CodeGen/AArch64/logical-imm.ll +++ test/CodeGen/AArch64/logical-imm.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @var32 = global i32 0 Index: test/CodeGen/AArch64/logical_shifted_reg.ll =================================================================== --- test/CodeGen/AArch64/logical_shifted_reg.ll +++ test/CodeGen/AArch64/logical_shifted_reg.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -O0 | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s @var1_32 = global i32 0 Index: test/CodeGen/AArch64/mature-mc-support.ll =================================================================== --- test/CodeGen/AArch64/mature-mc-support.ll +++ test/CodeGen/AArch64/mature-mc-support.ll @@ -1,3 +1,5 @@ +; RUN: not llc -mtriple=aarch64_be-pc-linux < %s > /dev/null 2> %t1 +; RUN: not llc -mtriple=aarch64_be-pc-linux -filetype=obj < %s > /dev/null 2> %t2 ; Test that inline assembly is parsed by the MC layer when MC support is mature ; (even when the output is assembly). Index: test/CodeGen/AArch64/movw-consts.ll =================================================================== --- test/CodeGen/AArch64/movw-consts.ll +++ test/CodeGen/AArch64/movw-consts.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s define i64 @test0() { Index: test/CodeGen/AArch64/movw-shift-encoding.ll =================================================================== --- test/CodeGen/AArch64/movw-shift-encoding.ll +++ test/CodeGen/AArch64/movw-shift-encoding.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu < %s -show-mc-encoding -code-model=large | FileCheck %s @var = global i32 0 Index: test/CodeGen/AArch64/neon-2velem-high.ll =================================================================== --- test/CodeGen/AArch64/neon-2velem-high.ll +++ test/CodeGen/AArch64/neon-2velem-high.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) Index: test/CodeGen/AArch64/neon-2velem.ll =================================================================== --- test/CodeGen/AArch64/neon-2velem.ll +++ test/CodeGen/AArch64/neon-2velem.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s declare <2 x double> @llvm.aarch64.neon.vmulx.v2f64(<2 x double>, <2 x double>) Index: test/CodeGen/AArch64/neon-3vdiff.ll =================================================================== --- test/CodeGen/AArch64/neon-3vdiff.ll +++ test/CodeGen/AArch64/neon-3vdiff.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) Index: test/CodeGen/AArch64/neon-aba-abd.ll =================================================================== --- test/CodeGen/AArch64/neon-aba-abd.ll +++ test/CodeGen/AArch64/neon-aba-abd.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) Index: test/CodeGen/AArch64/neon-across.ll =================================================================== --- test/CodeGen/AArch64/neon-across.ll +++ test/CodeGen/AArch64/neon-across.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s declare float @llvm.aarch64.neon.vminnmv(<4 x float>) Index: test/CodeGen/AArch64/neon-add-pairwise.ll =================================================================== --- test/CodeGen/AArch64/neon-add-pairwise.ll +++ test/CodeGen/AArch64/neon-add-pairwise.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) Index: test/CodeGen/AArch64/neon-add-sub.ll =================================================================== --- test/CodeGen/AArch64/neon-add-sub.ll +++ test/CodeGen/AArch64/neon-add-sub.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @add8xi8(<8 x i8> %A, <8 x i8> %B) { Index: test/CodeGen/AArch64/neon-bitcast.ll =================================================================== --- test/CodeGen/AArch64/neon-bitcast.ll +++ test/CodeGen/AArch64/neon-bitcast.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s ; From <8 x i8> Index: test/CodeGen/AArch64/neon-bitwise-instructions.ll =================================================================== --- test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @and8xi8(<8 x i8> %a, <8 x i8> %b) { Index: test/CodeGen/AArch64/neon-bsl.ll =================================================================== --- test/CodeGen/AArch64/neon-bsl.ll +++ test/CodeGen/AArch64/neon-bsl.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s declare <2 x double> @llvm.arm.neon.vbsl.v2f64(<2 x double>, <2 x double>, <2 x double>) Index: test/CodeGen/AArch64/neon-compare-instructions.ll =================================================================== --- test/CodeGen/AArch64/neon-compare-instructions.ll +++ test/CodeGen/AArch64/neon-compare-instructions.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s define <8 x i8> @cmeq8xi8(<8 x i8> %A, <8 x i8> %B) { Index: test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll =================================================================== --- test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll +++ test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <4 x i32> @copyTuple.QPair(i8* %a, i8* %b) { Index: test/CodeGen/AArch64/neon-crypto.ll =================================================================== --- test/CodeGen/AArch64/neon-crypto.ll +++ test/CodeGen/AArch64/neon-crypto.ll @@ -1,3 +1,5 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -mattr=+crypto | FileCheck %s +; RUN: not llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -mattr=+crypto | FileCheck %s ; RUN: not llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s Index: test/CodeGen/AArch64/neon-diagnostics.ll =================================================================== --- test/CodeGen/AArch64/neon-diagnostics.ll +++ test/CodeGen/AArch64/neon-diagnostics.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { Index: test/CodeGen/AArch64/neon-extract.ll =================================================================== --- test/CodeGen/AArch64/neon-extract.ll +++ test/CodeGen/AArch64/neon-extract.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) { Index: test/CodeGen/AArch64/neon-facge-facgt.ll =================================================================== --- test/CodeGen/AArch64/neon-facge-facgt.ll +++ test/CodeGen/AArch64/neon-facge-facgt.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float>, <2 x float>) Index: test/CodeGen/AArch64/neon-fma.ll =================================================================== --- test/CodeGen/AArch64/neon-fma.ll +++ test/CodeGen/AArch64/neon-fma.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) { Index: test/CodeGen/AArch64/neon-fpround_f128.ll =================================================================== --- test/CodeGen/AArch64/neon-fpround_f128.ll +++ test/CodeGen/AArch64/neon-fpround_f128.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define <1 x double> @test_fpround_v1f128(<1 x fp128>* %a) { Index: test/CodeGen/AArch64/neon-frsqrt-frecp.ll =================================================================== --- test/CodeGen/AArch64/neon-frsqrt-frecp.ll +++ test/CodeGen/AArch64/neon-frsqrt-frecp.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; Set of tests for when the intrinsic is used. Index: test/CodeGen/AArch64/neon-halving-add-sub.ll =================================================================== --- test/CodeGen/AArch64/neon-halving-add-sub.ll +++ test/CodeGen/AArch64/neon-halving-add-sub.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) Index: test/CodeGen/AArch64/neon-load-store-v1i32.ll =================================================================== --- test/CodeGen/AArch64/neon-load-store-v1i32.ll +++ test/CodeGen/AArch64/neon-load-store-v1i32.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; Test load/store of v1i8, v1i16, v1i32 types can be selected correctly Index: test/CodeGen/AArch64/neon-max-min-pairwise.ll =================================================================== --- test/CodeGen/AArch64/neon-max-min-pairwise.ll +++ test/CodeGen/AArch64/neon-max-min-pairwise.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) Index: test/CodeGen/AArch64/neon-max-min.ll =================================================================== --- test/CodeGen/AArch64/neon-max-min.ll +++ test/CodeGen/AArch64/neon-max-min.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) Index: test/CodeGen/AArch64/neon-misc.ll =================================================================== --- test/CodeGen/AArch64/neon-misc.ll +++ test/CodeGen/AArch64/neon-misc.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s Index: test/CodeGen/AArch64/neon-mla-mls.ll =================================================================== --- test/CodeGen/AArch64/neon-mla-mls.ll +++ test/CodeGen/AArch64/neon-mla-mls.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s Index: test/CodeGen/AArch64/neon-mov.ll =================================================================== --- test/CodeGen/AArch64/neon-mov.ll +++ test/CodeGen/AArch64/neon-mov.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @movi8b() { Index: test/CodeGen/AArch64/neon-mul-div.ll =================================================================== --- test/CodeGen/AArch64/neon-mul-div.ll +++ test/CodeGen/AArch64/neon-mul-div.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s Index: test/CodeGen/AArch64/neon-or-combine.ll =================================================================== --- test/CodeGen/AArch64/neon-or-combine.ll +++ test/CodeGen/AArch64/neon-or-combine.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; Check that the DAGCombiner does not crash with an assertion failure Index: test/CodeGen/AArch64/neon-perm.ll =================================================================== --- test/CodeGen/AArch64/neon-perm.ll +++ test/CodeGen/AArch64/neon-perm.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s %struct.int8x8x2_t = type { [2 x <8 x i8>] } Index: test/CodeGen/AArch64/neon-rounding-halving-add.ll =================================================================== --- test/CodeGen/AArch64/neon-rounding-halving-add.ll +++ test/CodeGen/AArch64/neon-rounding-halving-add.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) Index: test/CodeGen/AArch64/neon-rounding-shift.ll =================================================================== --- test/CodeGen/AArch64/neon-rounding-shift.ll +++ test/CodeGen/AArch64/neon-rounding-shift.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) Index: test/CodeGen/AArch64/neon-saturating-add-sub.ll =================================================================== --- test/CodeGen/AArch64/neon-saturating-add-sub.ll +++ test/CodeGen/AArch64/neon-saturating-add-sub.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>) Index: test/CodeGen/AArch64/neon-saturating-rounding-shift.ll =================================================================== --- test/CodeGen/AArch64/neon-saturating-rounding-shift.ll +++ test/CodeGen/AArch64/neon-saturating-rounding-shift.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) Index: test/CodeGen/AArch64/neon-saturating-shift.ll =================================================================== --- test/CodeGen/AArch64/neon-saturating-shift.ll +++ test/CodeGen/AArch64/neon-saturating-shift.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) Index: test/CodeGen/AArch64/neon-scalar-abs.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-abs.ll +++ test/CodeGen/AArch64/neon-scalar-abs.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define i64 @test_vabsd_s64(i64 %a) { Index: test/CodeGen/AArch64/neon-scalar-add-sub.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-add-sub.ll +++ test/CodeGen/AArch64/neon-scalar-add-sub.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) { Index: test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll +++ test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s declare float @llvm.fma.f32(float, float, float) Index: test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll +++ test/CodeGen/AArch64/neon-scalar-by-elem-mul.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define float @test_fmul_lane_ss2S(float %a, <2 x float> %v) { Index: test/CodeGen/AArch64/neon-scalar-compare.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-compare.ll +++ test/CodeGen/AArch64/neon-scalar-compare.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s ;; Scalar Integer Compare Index: test/CodeGen/AArch64/neon-scalar-copy.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-copy.ll +++ test/CodeGen/AArch64/neon-scalar-copy.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s define float @test_dup_sv2S(<2 x float> %v) { Index: test/CodeGen/AArch64/neon-scalar-cvt.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-cvt.ll +++ test/CodeGen/AArch64/neon-scalar-cvt.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s define float @test_vcvts_f32_s32(i32 %a) { Index: test/CodeGen/AArch64/neon-scalar-ext.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-ext.ll +++ test/CodeGen/AArch64/neon-scalar-ext.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s define <1 x i64> @test_zext_v1i32_v1i64(<2 x i32> %v) nounwind readnone { Index: test/CodeGen/AArch64/neon-scalar-extract-narrow.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-extract-narrow.ll +++ test/CodeGen/AArch64/neon-scalar-extract-narrow.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s define i8 @test_vqmovunh_s16(i16 %a) { Index: test/CodeGen/AArch64/neon-scalar-fabd.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-fabd.ll +++ test/CodeGen/AArch64/neon-scalar-fabd.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define float @test_vabds_f32(float %a, float %b) { Index: test/CodeGen/AArch64/neon-scalar-fcvt.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-fcvt.ll +++ test/CodeGen/AArch64/neon-scalar-fcvt.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s ;; Scalar Floating-point Convert Index: test/CodeGen/AArch64/neon-scalar-fp-compare.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-fp-compare.ll +++ test/CodeGen/AArch64/neon-scalar-fp-compare.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s ;; Scalar Floating-point Compare Index: test/CodeGen/AArch64/neon-scalar-mul.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-mul.ll +++ test/CodeGen/AArch64/neon-scalar-mul.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) { Index: test/CodeGen/AArch64/neon-scalar-neg.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-neg.ll +++ test/CodeGen/AArch64/neon-scalar-neg.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define i64 @test_vnegd_s64(i64 %a) { Index: test/CodeGen/AArch64/neon-scalar-recip.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-recip.ll +++ test/CodeGen/AArch64/neon-scalar-recip.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s define float @test_vrecpss_f32(float %a, float %b) { Index: test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll +++ test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>) Index: test/CodeGen/AArch64/neon-scalar-rounding-shift.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-rounding-shift.ll +++ test/CodeGen/AArch64/neon-scalar-rounding-shift.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s Index: test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll +++ test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <1 x i8> @llvm.arm.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>) Index: test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll +++ test/CodeGen/AArch64/neon-scalar-saturating-rounding-shift.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) Index: test/CodeGen/AArch64/neon-scalar-saturating-shift.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-saturating-shift.ll +++ test/CodeGen/AArch64/neon-scalar-saturating-shift.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) Index: test/CodeGen/AArch64/neon-scalar-shift-imm.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-shift-imm.ll +++ test/CodeGen/AArch64/neon-scalar-shift-imm.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define i64 @test_vshrd_n_s64(i64 %a) { Index: test/CodeGen/AArch64/neon-scalar-shift.ll =================================================================== --- test/CodeGen/AArch64/neon-scalar-shift.ll +++ test/CodeGen/AArch64/neon-scalar-shift.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>) Index: test/CodeGen/AArch64/neon-select_cc.ll =================================================================== --- test/CodeGen/AArch64/neon-select_cc.ll +++ test/CodeGen/AArch64/neon-select_cc.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) { Index: test/CodeGen/AArch64/neon-shift-left-long.ll =================================================================== --- test/CodeGen/AArch64/neon-shift-left-long.ll +++ test/CodeGen/AArch64/neon-shift-left-long.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i16> @test_sshll_v8i8(<8 x i8> %a) { Index: test/CodeGen/AArch64/neon-shift.ll =================================================================== --- test/CodeGen/AArch64/neon-shift.ll +++ test/CodeGen/AArch64/neon-shift.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s declare <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8>, <8 x i8>) Index: test/CodeGen/AArch64/neon-shl-ashr-lshr.ll =================================================================== --- test/CodeGen/AArch64/neon-shl-ashr-lshr.ll +++ test/CodeGen/AArch64/neon-shl-ashr-lshr.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @shl.v8i8(<8 x i8> %a, <8 x i8> %b) { Index: test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll =================================================================== --- test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll +++ test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define void @test_ldst1_v16i8(<16 x i8>* %ptr, <16 x i8>* %ptr2) { @@ -2,4 +3,4 @@ ; CHECK-LABEL: test_ldst1_v16i8: -; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}] +; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+|sp}}] %tmp = load <16 x i8>* %ptr @@ -11,8 +12,8 @@ define void @test_ldst1_v8i16(<8 x i16>* %ptr, <8 x i16>* %ptr2) { ; CHECK-LABEL: test_ldst1_v8i16: -; CHECK: ld1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}] +; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+|sp}}] %tmp = load <8 x i16>* %ptr store <8 x i16> %tmp, <8 x i16>* %ptr2 ret void @@ -20,8 +21,8 @@ define void @test_ldst1_v4i32(<4 x i32>* %ptr, <4 x i32>* %ptr2) { ; CHECK-LABEL: test_ldst1_v4i32: -; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}] +; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+|sp}}] %tmp = load <4 x i32>* %ptr store <4 x i32> %tmp, <4 x i32>* %ptr2 ret void @@ -29,8 +30,8 @@ define void @test_ldst1_v2i64(<2 x i64>* %ptr, <2 x i64>* %ptr2) { ; CHECK-LABEL: test_ldst1_v2i64: -; CHECK: ld1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}] +; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str q{{[0-9]+}}, [x{{[0-9]+|sp}}] %tmp = load <2 x i64>* %ptr store <2 x i64> %tmp, <2 x i64>* %ptr2 ret void @@ -38,8 +39,8 @@ define void @test_ldst1_v8i8(<8 x i8>* %ptr, <8 x i8>* %ptr2) { ; CHECK-LABEL: test_ldst1_v8i8: -; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}] +; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+|sp}}] %tmp = load <8 x i8>* %ptr store <8 x i8> %tmp, <8 x i8>* %ptr2 ret void @@ -47,8 +48,8 @@ define void @test_ldst1_v4i16(<4 x i16>* %ptr, <4 x i16>* %ptr2) { ; CHECK-LABEL: test_ldst1_v4i16: -; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}] +; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+|sp}}] %tmp = load <4 x i16>* %ptr store <4 x i16> %tmp, <4 x i16>* %ptr2 ret void @@ -56,8 +57,8 @@ define void @test_ldst1_v2i32(<2 x i32>* %ptr, <2 x i32>* %ptr2) { ; CHECK-LABEL: test_ldst1_v2i32: -; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}] +; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+|sp}}] %tmp = load <2 x i32>* %ptr store <2 x i32> %tmp, <2 x i32>* %ptr2 ret void @@ -65,8 +66,8 @@ define void @test_ldst1_v1i64(<1 x i64>* %ptr, <1 x i64>* %ptr2) { ; CHECK-LABEL: test_ldst1_v1i64: -; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}] -; CHECK: st1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}] +; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+|sp}}] +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+|sp}}] %tmp = load <1 x i64>* %ptr store <1 x i64> %tmp, <1 x i64>* %ptr2 ret void Index: test/CodeGen/AArch64/neon-simd-ldst.ll =================================================================== --- test/CodeGen/AArch64/neon-simd-ldst.ll +++ test/CodeGen/AArch64/neon-simd-ldst.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define void @test_ldstq_4v(i8* noalias %io, i32 %count) { Index: test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll =================================================================== --- test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll +++ test/CodeGen/AArch64/neon-simd-post-ldst-multi-elem.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ;Check for a post-increment updating load. Index: test/CodeGen/AArch64/neon-simd-post-ldst-one.ll =================================================================== --- test/CodeGen/AArch64/neon-simd-post-ldst-one.ll +++ test/CodeGen/AArch64/neon-simd-post-ldst-one.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define { [2 x <16 x i8>] } @test_vld2q_dup_fx_update(i8* %a, i8** %ptr) { Index: test/CodeGen/AArch64/neon-simd-shift.ll =================================================================== --- test/CodeGen/AArch64/neon-simd-shift.ll +++ test/CodeGen/AArch64/neon-simd-shift.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) { Index: test/CodeGen/AArch64/neon-simd-tbl.ll =================================================================== --- test/CodeGen/AArch64/neon-simd-tbl.ll +++ test/CodeGen/AArch64/neon-simd-tbl.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s declare <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) Index: test/CodeGen/AArch64/neon-simd-vget.ll =================================================================== --- test/CodeGen/AArch64/neon-simd-vget.ll +++ test/CodeGen/AArch64/neon-simd-vget.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @test_vget_high_s8(<16 x i8> %a) { Index: test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll =================================================================== --- test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll +++ test/CodeGen/AArch64/neon-spill-fpr8-fpr16.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s ; This file tests the spill of FPR8/FPR16. The volatile loads/stores force the Index: test/CodeGen/AArch64/neon-truncStore-extLoad.ll =================================================================== --- test/CodeGen/AArch64/neon-truncStore-extLoad.ll +++ test/CodeGen/AArch64/neon-truncStore-extLoad.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; A vector TruncStore can not be selected. @@ -5,7 +6,7 @@ define void @truncStore.v2i64(<2 x i64> %a, <2 x i32>* %result) { ; CHECK-LABEL: truncStore.v2i64: ; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d -; CHECK: st1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}] +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+|sp}}] %b = trunc <2 x i64> %a to <2 x i32> store <2 x i32> %b, <2 x i32>* %result ret void @@ -14,7 +15,7 @@ define void @truncStore.v4i32(<4 x i32> %a, <4 x i16>* %result) { ; CHECK-LABEL: truncStore.v4i32: ; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s -; CHECK: st1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}] +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+|sp}}] %b = trunc <4 x i32> %a to <4 x i16> store <4 x i16> %b, <4 x i16>* %result ret void @@ -23,7 +24,7 @@ define void @truncStore.v8i16(<8 x i16> %a, <8 x i8>* %result) { ; CHECK-LABEL: truncStore.v8i16: ; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h -; CHECK: st1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}] +; CHECK: str d{{[0-9]+}}, [x{{[0-9]+|sp}}] %b = trunc <8 x i16> %a to <8 x i8> store <8 x i8> %b, <8 x i8>* %result ret void @@ -54,4 +55,4 @@ %vecext = extractelement <4 x i8> %a, i32 0 %conv = zext i8 %vecext to i32 ret i32 %conv -} \ No newline at end of file +} Index: test/CodeGen/AArch64/neon-v1i1-setcc.ll =================================================================== --- test/CodeGen/AArch64/neon-v1i1-setcc.ll +++ test/CodeGen/AArch64/neon-v1i1-setcc.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s ; This file test the DAG node like "v1i1 SETCC v1i64, v1i64". As the v1i1 type Index: test/CodeGen/AArch64/neon-vector-list-spill.ll =================================================================== --- test/CodeGen/AArch64/neon-vector-list-spill.ll +++ test/CodeGen/AArch64/neon-vector-list-spill.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -fp-contract=fast ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast ; FIXME: We should not generate ld/st for such register spill/fill, because the Index: test/CodeGen/AArch64/regress-bitcast-formals.ll =================================================================== --- test/CodeGen/AArch64/regress-bitcast-formals.ll +++ test/CodeGen/AArch64/regress-bitcast-formals.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; CallingConv.td requires a bitcast for vector arguments. Make sure we're Index: test/CodeGen/AArch64/regress-f128csel-flags.ll =================================================================== --- test/CodeGen/AArch64/regress-f128csel-flags.ll +++ test/CodeGen/AArch64/regress-f128csel-flags.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; We used to not mark NZCV as being used in the continuation basic-block Index: test/CodeGen/AArch64/regress-fp128-livein.ll =================================================================== --- test/CodeGen/AArch64/regress-fp128-livein.ll +++ test/CodeGen/AArch64/regress-fp128-livein.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s ; Regression test for NZCV reg live-in not being added to fp128csel IfTrue BB, Index: test/CodeGen/AArch64/regress-tail-livereg.ll =================================================================== --- test/CodeGen/AArch64/regress-tail-livereg.ll +++ test/CodeGen/AArch64/regress-tail-livereg.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s @var = global void()* zeroinitializer Index: test/CodeGen/AArch64/regress-tblgen-chains.ll =================================================================== --- test/CodeGen/AArch64/regress-tblgen-chains.ll +++ test/CodeGen/AArch64/regress-tblgen-chains.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; When generating DAG selection tables, TableGen used to only flag an Index: test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll =================================================================== --- test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll +++ test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -disable-fp-elim < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s @var = global i32 0 Index: test/CodeGen/AArch64/regress-wzr-allocatable.ll =================================================================== --- test/CodeGen/AArch64/regress-wzr-allocatable.ll +++ test/CodeGen/AArch64/regress-wzr-allocatable.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -O0 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 ; When WZR wasn't marked as reserved, this function tried to allocate Index: test/CodeGen/AArch64/returnaddr.ll =================================================================== --- test/CodeGen/AArch64/returnaddr.ll +++ test/CodeGen/AArch64/returnaddr.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s define i8* @rt0(i32 %x) nounwind readnone { Index: test/CodeGen/AArch64/setcc-takes-i32.ll =================================================================== --- test/CodeGen/AArch64/setcc-takes-i32.ll +++ test/CodeGen/AArch64/setcc-takes-i32.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; Most important point here is that the promotion of the i1 works Index: test/CodeGen/AArch64/sext_inreg.ll =================================================================== --- test/CodeGen/AArch64/sext_inreg.ll +++ test/CodeGen/AArch64/sext_inreg.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s ; For formal arguments, we have the following vector type promotion, Index: test/CodeGen/AArch64/sibling-call.ll =================================================================== --- test/CodeGen/AArch64/sibling-call.ll +++ test/CodeGen/AArch64/sibling-call.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s declare void @callee_stack0() Index: test/CodeGen/AArch64/sincos-expansion.ll =================================================================== --- test/CodeGen/AArch64/sincos-expansion.ll +++ test/CodeGen/AArch64/sincos-expansion.ll @@ -1,3 +1,4 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s define float @test_sincos_f32(float %f) { Index: test/CodeGen/AArch64/sincospow-vector-expansion.ll =================================================================== --- test/CodeGen/AArch64/sincospow-vector-expansion.ll +++ test/CodeGen/AArch64/sincospow-vector-expansion.ll @@ -1,3 +1,4 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s Index: test/CodeGen/AArch64/tail-call.ll =================================================================== --- test/CodeGen/AArch64/tail-call.ll +++ test/CodeGen/AArch64/tail-call.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -tailcallopt | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s declare fastcc void @callee_stack0() Index: test/CodeGen/AArch64/tls-dynamic-together.ll =================================================================== --- test/CodeGen/AArch64/tls-dynamic-together.ll +++ test/CodeGen/AArch64/tls-dynamic-together.ll @@ -1,3 +1,4 @@ +; RUN: llc -O0 -mtriple=aarch64_be-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -O0 -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s ; If the .tlsdesccall and blr parts are emitted completely separately (even with Index: test/CodeGen/AArch64/tls-dynamics.ll =================================================================== --- test/CodeGen/AArch64/tls-dynamics.ll +++ test/CodeGen/AArch64/tls-dynamics.ll @@ -1,3 +1,5 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s Index: test/CodeGen/AArch64/tls-execs.ll =================================================================== --- test/CodeGen/AArch64/tls-execs.ll +++ test/CodeGen/AArch64/tls-execs.ll @@ -1,3 +1,5 @@ +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s +; RUN: llc -mtriple=aarch64_be-none-linux-gnu -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -show-mc-encoding < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s Index: test/CodeGen/AArch64/tst-br.ll =================================================================== --- test/CodeGen/AArch64/tst-br.ll +++ test/CodeGen/AArch64/tst-br.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; We've got the usual issues with LLVM reordering blocks here. The Index: test/CodeGen/AArch64/variadic.ll =================================================================== --- test/CodeGen/AArch64/variadic.ll +++ test/CodeGen/AArch64/variadic.ll @@ -1,3 +1,5 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 < %s | FileCheck --check-prefix=CHECK-NOFP %s ; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 < %s | FileCheck --check-prefix=CHECK-NOFP %s Index: test/CodeGen/AArch64/zero-reg.ll =================================================================== --- test/CodeGen/AArch64/zero-reg.ll +++ test/CodeGen/AArch64/zero-reg.ll @@ -1,3 +1,4 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s @var32 = global i32 0