Index: include/llvm/ADT/Triple.h =================================================================== --- include/llvm/ADT/Triple.h +++ include/llvm/ADT/Triple.h @@ -86,6 +86,7 @@ enum SubArchType { NoSubArch, + ARMSubArch_v8_1a, ARMSubArch_v8, ARMSubArch_v7, ARMSubArch_v7em, Index: include/llvm/Support/ARMBuildAttributes.h =================================================================== --- include/llvm/Support/ARMBuildAttributes.h +++ include/llvm/Support/ARMBuildAttributes.h @@ -106,7 +106,7 @@ v6_M = 11, // e.g. Cortex M1 v6S_M = 12, // v6_M with the System extensions v7E_M = 13, // v7_M with DSP extensions - v8 = 14 // v8, AArch32 + v8 = 14, // v8,v8.1a AArch32 }; enum CPUArchProfile { // (=7), uleb128 @@ -145,6 +145,7 @@ AllowNeon = 1, // SIMDv1 was permitted AllowNeon2 = 2, // SIMDv2 was permitted (Half-precision FP, MAC operations) AllowNeonARMv8 = 3, // ARM v8-A SIMD was permitted + AllowNeonARMv8_1a = 4,// ARM v8.1-A SIMD was permitted (RDMA) // Tag_ABI_PCS_R9_use, (=14), uleb128 R9IsGPR = 0, // R9 used as v6 (just another callee-saved register) Index: lib/Support/Triple.cpp =================================================================== --- lib/Support/Triple.cpp +++ lib/Support/Triple.cpp @@ -281,6 +281,7 @@ .Cases("v7", "v7a", "v7em", "v7l", arch) .Cases("v7m", "v7r", "v7s", arch) .Cases("v8", "v8a", arch) + .Cases("v8.1", "v8.1a", arch) .Default(Triple::UnknownArch); } @@ -403,6 +404,7 @@ SubArchName = SubArchName.substr(0, SubArchName.size() - 2); return StringSwitch(SubArchName) + .EndsWith("v8.1a", Triple::ARMSubArch_v8_1a) .EndsWith("v8", Triple::ARMSubArch_v8) .EndsWith("v8a", Triple::ARMSubArch_v8) .EndsWith("v7", Triple::ARMSubArch_v7) @@ -1109,6 +1111,7 @@ .Cases("v7m", "v7-m", "cortex-m3") .Cases("v7em", "v7e-m", "cortex-m4") .Cases("v8", "v8a", "v8-a", "cortex-a53") + .Cases("v8.1", "v8.1a", "v8.1-a", "generic-armv8.1-a") .Default(nullptr); else result = llvm::StringSwitch(MArch) Index: lib/Target/AArch64/AArch64.td =================================================================== --- lib/Target/AArch64/AArch64.td +++ lib/Target/AArch64/AArch64.td @@ -32,6 +32,9 @@ def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable ARMv8 CRC-32 checksum instructions">; +def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true", + "Enable ARMv8.1a extensions", [FeatureCRC]>; + /// Cyclone has register move instructions which are "free". def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; @@ -89,6 +92,10 @@ FeatureNEON, FeatureCRC]>; +def : ProcessorModel<"generic-armv8.1-a", NoSchedModel, [FeatureV8_1a, + FeatureNEON, + FeatureCrypto]>; + def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; // FIXME: Cortex-A72 is currently modelled as an Cortex-A57. Index: lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- lib/Target/AArch64/AArch64InstrFormats.td +++ lib/Target/AArch64/AArch64InstrFormats.td @@ -3282,6 +3282,10 @@ : BaseLoadStoreExclusive { bits<5> Rt; bits<5> Rn; + let Inst{20-16} = 0b11111; + let Unpredictable{20-16} = 0b11111; + let Inst{14-10} = 0b11111; + let Unpredictable{14-10} = 0b11111; let Inst{9-5} = Rn; let Inst{4-0} = Rt; @@ -5298,6 +5302,27 @@ let Inst{4-0} = Rd; } +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDThreeScalarTied size, bit R, bits<5> opcode, + dag oops, dag iops, string asm, + list pattern> + : I, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = R; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + multiclass SIMDThreeScalarD opc, string asm, SDPatternOperator OpNode> { def v1i64 : BaseSIMDThreeScalar; } +multiclass SIMDThreeScalarHSTied opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v1i32: BaseSIMDThreeScalarTied; + def v1i16: BaseSIMDThreeScalarTied; +} + multiclass SIMDThreeScalarSD opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { @@ -8517,6 +8552,197 @@ } // end of 'let Predicates = [HasNEON]' //---------------------------------------------------------------------------- +// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract +//---------------------------------------------------------------------------- + +let Predicates = [HasNEON, HasV8_1a] in { + +class BaseSIMDThreeSameVectorTiedR0 size, bits<5> opcode, + RegisterOperand regtype, string asm, + string kind, list pattern> + : BaseSIMDThreeSameVectorTied { + let Inst{21}=0; +} +multiclass SIMDThreeSameVectorSQRDMLxHTiedHS opc, string asm, + SDPatternOperator Accum> { + def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn), + (v4i16 V64:$Rm)))))]>; + def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn), + (v8i16 V128:$Rm)))))]>; + def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn), + (v2i32 V64:$Rm)))))]>; + def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn), + (v4i32 V128:$Rm)))))]>; +} + +multiclass SIMDIndexedSQRDMLxHSDTied opc, string asm, + SDPatternOperator Accum> { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, + V64, V64, V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$dst), + (Accum (v4i16 V64:$Rd), + (v4i16 (int_aarch64_neon_sqrdmulh + (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + // FIXME: uncomment the following, after backend will support i16 neon type + //def : Pat<(i16 (Accum (i16 FPR16Op:$Rd), + // (i16 (vector_extract (v4i16 + // (int_aarch64_neon_sqdmull (v4i16 V64:$Rn), + // (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + // VectorIndexH:$idx)))), + // (i64 0))))), + // (EXTRACT_SUBREG + // (!cast(NAME # v4i16_indexed) + // (SUBREG_TO_REG (i32 0), FPR16Op:$Rd, ssub), V64:$Rn, + // V128_lo:$Rm, VectorIndexH:$idx), + // ssub)>; + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$dst), + (Accum (v8i16 V128:$Rd), + (v8i16 (int_aarch64_neon_sqrdmulh + (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + // FIXME: It should be a "def" here, similar to one above, + // after backend will support i16 neon type + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V64, V64, V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$dst), + (Accum (v2i32 V64:$Rd), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we + // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..))) + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (insert_subvector + (undef), + (v2i32 (int_aarch64_neon_sqrdmulh + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i32 0))), + (i64 0))))), + (EXTRACT_SUBREG + (!cast(NAME # v2i32_indexed) + (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V64:$Rn, + V128:$Rm, + VectorIndexS:$idx), + ssub)>; + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but + // an intermediate EXTRACT_SUBREG would be untyped. + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract + (v4i32 (int_aarch64_neon_sqrdmulh + (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 + (v4i32 V128:$Rm), + VectorIndexS:$idx)))), + (i64 0))))), + (EXTRACT_SUBREG + (v4i32 (!cast(NAME # v4i32_indexed) + (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + FPR32Op:$Rd, + ssub)), + V128:$Rn, + V128:$Rm, + VectorIndexS:$idx)), + ssub)>; + + def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, + FPR16Op, FPR16Op, V128_lo, + VectorIndexH, asm, ".h", "", "", ".h", + [ + // FIXME: uncomment the following, after backend will support i16 neon type + // (set (i16 FPR16Op:$dst), + // (Accum (i16 FPR16Op:$Rd), + // (i16 (int_aarch64_neon_sqrdmulh + // (i16 FPR16Op:$Rn), + // (i16 (vector_extract (v8i16 V128_lo:$Rm), + // VectorIndexH:$idx)))))) + ]> { + + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (i32 FPR32Op:$dst), + (Accum (i32 FPR32Op:$Rd), + (i32 (int_aarch64_neon_sqrdmulh + (i32 FPR32Op:$Rn), + (i32 (vector_extract (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} +} // let Predicates = [HasNeon, HasV8_1a] + +//---------------------------------------------------------------------------- // Crypto extensions //---------------------------------------------------------------------------- @@ -8627,3 +8853,141 @@ def : TokenAlias<".S", ".s">; def : TokenAlias<".D", ".d">; def : TokenAlias<".Q", ".q">; + +//---------------------------------------------------------------------------- +// v8.1 atomic instructions extension: +// * CAS +// * CASP +// * LD +// * SWP + +// Instruction encodings: +// +// 31 30|29 24|23|22|21|20 16|15|14 10|9 5|4 0 +// CAS SZ |001000|1 |A |1 |Rs |R |11111 |Rn |Rt +// CASP 0|SZ|001000|0 |A |1 |Rs |R |11111 |Rn |Rt +// LD SZ |111000|A |R |1 |Rs |0 |OPC|00|Rn |Rt +// SWP SZ |111000|A |R |1 |Rs |1 |OPC|00|Rn |Rt + +// Instruction syntax: +// +// CAS{}[] , , [] +// CAS{} , , [] +// CASP{} , , , , [] +// CASP{} , , , , [] +// LD{}[] , , [] +// LD{} , , [] +// SWP{}[] , , [] +// SWP{} , , [] + + +let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseCASEncoding pattern> + : I { + bits<2> Sz; + bit NP; + bit A; + bit R; + bits<5> Rs; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b001000; + let Inst{23} = NP; + let Inst{22} = A; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = R; + let Inst{14-10} = 0b11111; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +class BaseCAS + : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "cas" # order # size, "\t$Rs, $Rt, [$Rn]", + "$out = $Rt",[]> { + let NP = 1; +} + +let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseLD + : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size, "\t$Rs, $Rt, [$Rn]","",[]> { + bits<2> Sz; + bit A; + bit R; + bits<5> Rs; + bits<3> opc; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b111000; + let Inst{23} = A; + let Inst{22} = R; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = 0b0; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass LDOp { + let opc = 0b000 in def _add : BaseLD<"add" , order, size, RC>; + let opc = 0b001 in def _clr : BaseLD<"clr" , order, size, RC>; + let opc = 0b010 in def _eor : BaseLD<"eor" , order, size, RC>; + let opc = 0b011 in def _set : BaseLD<"set" , order, size, RC>; + let opc = 0b100 in def _smax : BaseLD<"smax", order, size, RC>; + let opc = 0b101 in def _smin : BaseLD<"smin", order, size, RC>; + let opc = 0b110 in def _umax : BaseLD<"umax", order, size, RC>; + let opc = 0b111 in def _umin : BaseLD<"umin", order, size, RC>; +} + +// Aliases for LD +let Predicates = [HasV8_1a] in +class BaseAliasLD : + InstAlias; + +multiclass BaseAliasLDOp { + def : BaseAliasLD<"st" # asm # "lb", GPR32, WZR, !cast("LD" # _B_release_ # asm)>; + def : BaseAliasLD<"st" # asm # "lh", GPR32, WZR, !cast("LD" # _H_release_ # asm)>; + def : BaseAliasLD<"st" # asm # "l", GPR32, WZR, !cast("LD" # _S_release_ # asm)>; + def : BaseAliasLD<"st" # asm # "l", GPR64, XZR, !cast("LD" # _D_release_ # asm)>; + def : BaseAliasLD<"st" # asm # "b", GPR32, WZR, !cast("LD" # _B_no_order_ # asm)>; + def : BaseAliasLD<"st" # asm # "h", GPR32, WZR, !cast("LD" # _H_no_order_ # asm)>; + def : BaseAliasLD<"st" # asm # "", GPR32, WZR, !cast("LD" # _S_no_order_ # asm)>; + def : BaseAliasLD<"st" # asm # "", GPR64, XZR, !cast("LD" # _D_no_order_ # asm)>; +} + +let Predicates = [HasV8_1a] in +class BaseSWP + : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size, + "\t$Rs, $Rt, [$Rn]","",[]> { + bits<2> Sz; + bit A; + bit R; + bits<5> Rs; + bits<3> opc = 0b000; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b111000; + let Inst{23} = A; + let Inst{22} = R; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = 0b1; + let Inst{14-12} = opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +class BaseCASP + : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "casp" # order # size, "\t$Rs, $Rt, [$Rn]", + "$out = $Rs",[]> { + let NP = 0; +} Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -22,6 +22,8 @@ AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; +def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">, + AssemblerPredicate<"FeatureV8_1a", "v8.1a">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsCyclone : Predicate<"Subtarget->isCyclone()">; @@ -725,6 +727,80 @@ def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; +// v8.1 atomic CAS +multiclass CASOrder { + let A = 0b1, R = 0b0 in def _acquire : BaseCAS<"a",size,RC>; + let A = 0b0, R = 0b1 in def _release : BaseCAS<"l",size,RC>; + let A = 0b1, R = 0b1 in def _acquire_and_release : BaseCAS<"al",size,RC>; + let A = 0b0, R = 0b0 in def _no_order : BaseCAS<"",size,RC>; +} + +multiclass CASOrderSize { + let Sz = 0b00 in defm _B : CASOrder<"b",GPR32>; + let Sz = 0b01 in defm _H : CASOrder<"h",GPR32>; + let Sz = 0b10 in defm _S : CASOrder<"",GPR32>; + let Sz = 0b11 in defm _D : CASOrder<"",GPR64>; +} + +defm CAS : CASOrderSize; + +// v8.1 atomic SWP +multiclass SWPOrder { + let A = 0b1, R = 0b0 in def _acquire : BaseSWP<"a",size,RC>; + let A = 0b0, R = 0b1 in def _release : BaseSWP<"l",size,RC>; + let A = 0b1, R = 0b1 in def _acquire_and_release : BaseSWP<"al",size,RC>; + let A = 0b0, R = 0b0 in def _no_order : BaseSWP<"",size,RC>; +} + +multiclass SWPOrderSize { + let Sz = 0b00 in defm _B : SWPOrder<"b",GPR32>; + let Sz = 0b01 in defm _H : SWPOrder<"h",GPR32>; + let Sz = 0b10 in defm _S : SWPOrder<"",GPR32>; + let Sz = 0b11 in defm _D : SWPOrder<"",GPR64>; +} + +defm SWP : SWPOrderSize; + +// v8.1 atomic LD +multiclass LDOpOrder { + let A = 0b1, R = 0b0 in defm _acquire : LDOp<"a", size, RC>; + let A = 0b0, R = 0b1 in defm _release : LDOp<"l", size, RC>; + let A = 0b1, R = 0b1 in defm _acquire_and_release : LDOp<"al", size, RC>; + let A = 0b0, R = 0b0 in defm _no_order : LDOp<"", size, RC>; +} + +multiclass LDOpOrderSize { + let Sz = 0b00 in defm _B : LDOpOrder<"b", GPR32>; + let Sz = 0b01 in defm _H : LDOpOrder<"h", GPR32>; + let Sz = 0b10 in defm _S : LDOpOrder<"", GPR32>; + let Sz = 0b11 in defm _D : LDOpOrder<"", GPR64>; +} + +defm LD : LDOpOrderSize; + +defm : BaseAliasLDOp<"add">; +defm : BaseAliasLDOp<"clr">; +defm : BaseAliasLDOp<"eor">; +defm : BaseAliasLDOp<"set">; +defm : BaseAliasLDOp<"smax">; +defm : BaseAliasLDOp<"smin">; +defm : BaseAliasLDOp<"umax">; +defm : BaseAliasLDOp<"umin">; + +// v8.1 atomic CASP +multiclass CASPOrder { + let A = 0b1, R = 0b0 in def _acquire : BaseCASP<"a",size,RC>; + let A = 0b0, R = 0b1 in def _release : BaseCASP<"l",size,RC>; + let A = 0b1, R = 0b1 in def _acquire_and_release : BaseCASP<"al",size,RC>; + let A = 0b0, R = 0b0 in def _no_order : BaseCASP<"",size,RC>; +} + +multiclass CASPOrderSize { + let Sz = 0b00 in defm _S : CASPOrder<"",ConsecutivePairClassOperand32>; + let Sz = 0b01 in defm _D : CASPOrder<"",ConsecutivePairClassOperand64>; +} + +defm CASP : CASPOrderSize; //===----------------------------------------------------------------------===// // Logical instructions. @@ -2312,6 +2388,20 @@ def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; +let Predicates = [HasV8_1a] in { + // v8 limited-order region extension load-acquire instructions + def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; + def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; + def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; + def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; + + // v8 limited-order region extension store-release instructions + def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; + def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; + def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; + def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; +} + //===----------------------------------------------------------------------===// // Scaled floating point to integer conversion instructions. //===----------------------------------------------------------------------===// @@ -2769,6 +2859,10 @@ defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>; defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; +defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", + int_aarch64_neon_sqadd>; +defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", + int_aarch64_neon_sqsub>; defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", @@ -2985,6 +3079,32 @@ defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; +let Predicates = [HasV8_1a] in { + defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; + defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; + // FIXME: uncomment the following, after backend will support i16 neon type + //def : Pat<(i16 (int_aarch64_neon_sqadd + // (i16 FPR16:$Rd), + // (i16 (int_aarch64_neon_sqrdmulh (i16 FPR16:$Rn), + // (i16 FPR16:$Rm))))), + // (SQRDMLAHv1i16 FPR16:$Rd, FPR16:$Rn, FPR16:$Rm)>; + def : Pat<(i32 (int_aarch64_neon_sqadd + (i32 FPR32:$Rd), + (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; + // FIXME: uncomment the following, after backend will support i16 neon type + //def : Pat<(i16 (int_aarch64_neon_sqsub + // (i16 FPR16:$Rd), + // (i16 (int_aarch64_neon_sqrdmulh (i16 FPR16:$Rn), + // (i16 FPR16:$Rm))))), + // (SQRDMLSHv1i16 FPR16:$Rd, FPR16:$Rn, FPR16:$Rm)>; + def : Pat<(i32 (int_aarch64_neon_sqsub + (i32 FPR32:$Rd), + (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; +} def : InstAlias<"cmls $dst, $src1, $src2", (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; @@ -4315,6 +4435,10 @@ int_aarch64_neon_sqadd>; defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", int_aarch64_neon_sqsub>; +defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", + int_aarch64_neon_sqadd>; +defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", + int_aarch64_neon_sqsub>; defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; Index: lib/Target/AArch64/AArch64RegisterInfo.td =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.td +++ lib/Target/AArch64/AArch64RegisterInfo.td @@ -592,3 +592,37 @@ def FPR32Op : RegisterOperand; def FPR64Op : RegisterOperand; def FPR128Op : RegisterOperand; + + +//===----------------------------------------------------------------------===// +// ARMv8.1a atomic CASP register operands + + +let Namespace = "AArch64" in { + def sube32 : SubRegIndex<32>; + def subo32 : SubRegIndex<32>; + def sube64 : SubRegIndex<64>; + def subo64 : SubRegIndex<64>; +} + +def Pair32 : RegisterTuples<[sube32, subo32], [(rotl GPR32, 0), (rotl GPR32, 1)]>; +def Pair64 : RegisterTuples<[sube64, subo64], [(rotl GPR64, 0), (rotl GPR64, 1)]>; + +def Pair32Class : RegisterClass<"AArch64", [untyped], 32, (add Pair32)> {let Size = 64;} +def Pair64Class : RegisterClass<"AArch64", [untyped], 64, (add Pair64)> {let Size = 128;} + + +let RenderMethod = "addRegOperands", ParserMethod="tryParsePair" in { + def Pair32AsmOperandClass : AsmOperandClass { let Name = "Pair32"; } + def Pair64AsmOperandClass : AsmOperandClass { let Name = "Pair64"; } +} + +def ConsecutivePairClassOperand32 : RegisterOperand"> { + let ParserMatchClass = Pair32AsmOperandClass; +} +def ConsecutivePairClassOperand64 : RegisterOperand"> { + let ParserMatchClass = Pair64AsmOperandClass; +} + + +//===----- END: v8.1a atomic CASP register operands -----------------------===// Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -41,6 +41,7 @@ bool HasNEON; bool HasCrypto; bool HasCRC; + bool HasV8_1a; // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove; @@ -100,6 +101,7 @@ bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } + bool hasV8_1a() const { return HasV8_1a; } bool isLittleEndian() const { return IsLittle; } Index: lib/Target/AArch64/AArch64Subtarget.cpp =================================================================== --- lib/Target/AArch64/AArch64Subtarget.cpp +++ lib/Target/AArch64/AArch64Subtarget.cpp @@ -48,7 +48,7 @@ const TargetMachine &TM, bool LittleEndian) : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), - HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), + HasV8_1a(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {} Index: lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp =================================================================== --- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -106,6 +106,7 @@ OperandMatchResultTy tryParseAddSubImm(OperandVector &Operands); OperandMatchResultTy tryParseGPR64sp0Operand(OperandVector &Operands); bool tryParseVectorRegister(OperandVector &Operands); + OperandMatchResultTy tryParsePair(OperandVector &Operands); public: enum AArch64MatchResultTy { @@ -204,6 +205,8 @@ struct BarrierOp { unsigned Val; // Not the enum since not all values have names. + const char *Data; + unsigned Length; }; struct SysRegOp { @@ -220,6 +223,8 @@ struct PrefetchOp { unsigned Val; + const char *Data; + unsigned Length; }; struct ShiftExtendOp { @@ -347,6 +352,11 @@ return Barrier.Val; } + StringRef getBarrierName() const { + assert(Kind == k_Barrier && "Invalid access!"); + return StringRef(Barrier.Data, Barrier.Length); + } + unsigned getReg() const override { assert(Kind == k_Register && "Invalid access!"); return Reg.RegNum; @@ -382,6 +392,11 @@ return Prefetch.Val; } + StringRef getPrefetchName() const { + assert(Kind == k_Prefetch && "Invalid access!"); + return StringRef(Prefetch.Data, Prefetch.Length); + } + AArch64_AM::ShiftExtendType getShiftExtendType() const { assert(Kind == k_ShiftExtend && "Invalid access!"); return ShiftExtend.Type; @@ -860,7 +875,14 @@ return Kind == k_Register && !Reg.isVector && AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum); } - + bool isPair32() const { + return Kind == k_Register && !Reg.isVector && + AArch64MCRegisterClasses[AArch64::Pair32ClassRegClassID].contains(Reg.RegNum); + } + bool isPair64() const { + return Kind == k_Register && !Reg.isVector && + AArch64MCRegisterClasses[AArch64::Pair64ClassRegClassID].contains(Reg.RegNum); + } bool isGPR64sp0() const { return Kind == k_Register && !Reg.isVector && AArch64MCRegisterClasses[AArch64::GPR64spRegClassID].contains(Reg.RegNum); @@ -1595,10 +1617,14 @@ return Op; } - static std::unique_ptr CreateBarrier(unsigned Val, SMLoc S, + static std::unique_ptr CreateBarrier(unsigned Val, + StringRef Str, + SMLoc S, MCContext &Ctx) { auto Op = make_unique(k_Barrier, Ctx); Op->Barrier.Val = Val; + Op->Barrier.Data = Str.data(); + Op->Barrier.Length = Str.size(); Op->StartLoc = S; Op->EndLoc = S; return Op; @@ -1629,10 +1655,14 @@ return Op; } - static std::unique_ptr CreatePrefetch(unsigned Val, SMLoc S, + static std::unique_ptr CreatePrefetch(unsigned Val, + StringRef Str, + SMLoc S, MCContext &Ctx) { auto Op = make_unique(k_Prefetch, Ctx); Op->Prefetch.Val = Val; + Op->Barrier.Data = Str.data(); + Op->Barrier.Length = Str.size(); Op->StartLoc = S; Op->EndLoc = S; return Op; @@ -1660,9 +1690,8 @@ << AArch64_AM::getFPImmFloat(getFPImm()) << ") >"; break; case k_Barrier: { - bool Valid; - StringRef Name = AArch64DB::DBarrierMapper().toString(getBarrier(), Valid); - if (Valid) + StringRef Name = getBarrierName(); + if (!Name.empty()) OS << ""; else OS << ""; @@ -1705,9 +1734,8 @@ OS << "c" << getSysCR(); break; case k_Prefetch: { - bool Valid; - StringRef Name = AArch64PRFM::PRFMMapper().toString(getPrefetch(), Valid); - if (Valid) + StringRef Name = getPrefetchName(); + if (!Name.empty()) OS << ""; else OS << ""; @@ -1950,7 +1978,11 @@ return MatchOperand_ParseFail; } - Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext())); + bool Valid; + auto Mapper = AArch64PRFM::PRFMMapper(STI.getFeatureBits()); + StringRef Name = Mapper.toString(MCE->getValue(), Valid); + Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name, + S, getContext())); return MatchOperand_Success; } @@ -1960,14 +1992,16 @@ } bool Valid; - unsigned prfop = AArch64PRFM::PRFMMapper().fromString(Tok.getString(), Valid); + auto Mapper = AArch64PRFM::PRFMMapper(STI.getFeatureBits()); + unsigned prfop = Mapper.fromString(Tok.getString(), Valid); if (!Valid) { TokError("pre-fetch hint expected"); return MatchOperand_ParseFail; } Parser.Lex(); // Eat identifier token. - Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext())); + Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Tok.getString(), + S, getContext())); return MatchOperand_Success; } @@ -2569,8 +2603,11 @@ Error(ExprLoc, "barrier operand out of range"); return MatchOperand_ParseFail; } - Operands.push_back( - AArch64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext())); + bool Valid; + auto Mapper = AArch64DB::DBarrierMapper(STI.getFeatureBits()); + StringRef Name = Mapper.toString(MCE->getValue(), Valid); + Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name, + ExprLoc, getContext())); return MatchOperand_Success; } @@ -2580,7 +2617,8 @@ } bool Valid; - unsigned Opt = AArch64DB::DBarrierMapper().fromString(Tok.getString(), Valid); + auto Mapper = AArch64DB::DBarrierMapper(STI.getFeatureBits()); + unsigned Opt = Mapper.fromString(Tok.getString(), Valid); if (!Valid) { TokError("invalid barrier option name"); return MatchOperand_ParseFail; @@ -2592,8 +2630,8 @@ return MatchOperand_ParseFail; } - Operands.push_back( - AArch64Operand::CreateBarrier(Opt, getLoc(), getContext())); + Operands.push_back( AArch64Operand::CreateBarrier(Opt, Tok.getString(), + getLoc(), getContext())); Parser.Lex(); // Consume the option return MatchOperand_Success; @@ -2618,8 +2656,8 @@ assert(IsKnown == (MSRReg != -1U) && "register should be -1 if and only if it's unknown"); - uint32_t PStateField = - AArch64PState::PStateMapper().fromString(Tok.getString(), IsKnown); + auto PStateMapper = AArch64PState::PStateMapper(STI.getFeatureBits()); + uint32_t PStateField = PStateMapper.fromString(Tok.getString(), IsKnown); assert(IsKnown == (PStateField != -1U) && "register should be -1 if and only if it's unknown"); @@ -4262,3 +4300,75 @@ return Match_Success; return Match_InvalidOperand; } + + +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParsePair(OperandVector &Operands) { + + SMLoc S = getLoc(); + + if (getParser().getTok().isNot(AsmToken::Identifier)) { + Error(S, "Expected register"); + return MatchOperand_ParseFail; + } + + int FirstReg = tryParseRegister(); + if (FirstReg ==-1) { + return MatchOperand_ParseFail; + } + + const MCRegisterClass &WRegClass = + AArch64MCRegisterClasses[AArch64::GPR32RegClassID]; + const MCRegisterClass &XRegClass = + AArch64MCRegisterClasses[AArch64::GPR64RegClassID]; + + bool isXReg = XRegClass.contains(FirstReg), + isWReg = WRegClass.contains(FirstReg); + if (!isXReg && !isWReg) { + Error(S, "Expected register"); + return MatchOperand_ParseFail; + } + + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + unsigned FirstEncoding = RI->getEncodingValue(FirstReg); + + if (FirstEncoding & 0x1) { + Error(S, "Expected even register"); + return MatchOperand_ParseFail; + } + + SMLoc M = getLoc(); + if (getParser().getTok().isNot(AsmToken::Comma)) { + Error(M, "Expected Comma"); + return MatchOperand_ParseFail; + } + // Eat the comma + getParser().Lex(); + + SMLoc E = getLoc(); + int SecondReg = tryParseRegister(); + if (SecondReg ==-1) { + return MatchOperand_ParseFail; + } + + if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 || + (isXReg && !XRegClass.contains(SecondReg)) || + (isWReg && !WRegClass.contains(SecondReg))) { + Error(E,"Expected consecutive registers"); + return MatchOperand_ParseFail; + } + + unsigned Pair = 0; + if (isXReg) { + Pair = AArch64MCRegisterClasses[AArch64::Pair64ClassRegClassID].getRegister( + FirstEncoding); + } else { + Pair = AArch64MCRegisterClasses[AArch64::Pair32ClassRegClassID].getRegister( + FirstEncoding); + } + + Operands.push_back(AArch64Operand::CreateReg(Pair, false, S, getLoc(), + getContext())); + + return MatchOperand_Success; +} Index: lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp =================================================================== --- lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -169,6 +169,10 @@ uint64_t Addr, const void *Decoder); static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm, uint64_t Addr, const void *Decoder); +static DecodeStatus DecodePair32ClassRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, const void *Decoder); +static DecodeStatus DecodePair64ClassRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, const void *Decoder); static bool Check(DecodeStatus &Out, DecodeStatus In) { switch (In) { @@ -1102,6 +1106,12 @@ case AArch64::STLRW: case AArch64::STLRB: case AArch64::STLRH: + case AArch64::STLLRW: + case AArch64::STLLRB: + case AArch64::STLLRH: + case AArch64::LDLARW: + case AArch64::LDLARB: + case AArch64::LDLARH: DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); break; case AArch64::STLXRX: @@ -1112,6 +1122,8 @@ case AArch64::LDAXRX: case AArch64::LDXRX: case AArch64::STLRX: + case AArch64::LDLARX: + case AArch64::STLLRX: DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); break; case AArch64::STLXPW: @@ -1504,7 +1516,10 @@ Inst.addOperand(MCOperand::CreateImm(crm)); bool ValidNamed; - (void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed); + const AArch64Disassembler *Dis = + static_cast(Decoder); + AArch64PState::PStateMapper Mapper(Dis->getSubtargetInfo().getFeatureBits()); + Mapper.toString(pstate_field, ValidNamed); return ValidNamed ? Success : Fail; } @@ -1532,3 +1547,29 @@ return Success; } + +static DecodeStatus DecodePair32ClassRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void *Decoder) { + // Register number must be even (see CASP instruction) + if (RegNo & 0x1) + return Fail; + + unsigned Register = AArch64MCRegisterClasses[AArch64::Pair32ClassRegClassID]. + getRegister(RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return Success; +} + +static DecodeStatus DecodePair64ClassRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void *Decoder) { + // Register number must be even (see CASP instruction) + if (RegNo & 0x1) + return Fail; + + unsigned Register = AArch64MCRegisterClasses[AArch64::Pair64ClassRegClassID]. + getRegister(RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return Success; +} Index: lib/Target/AArch64/Disassembler/LLVMBuild.txt =================================================================== --- lib/Target/AArch64/Disassembler/LLVMBuild.txt +++ lib/Target/AArch64/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = AArch64Disassembler parent = AArch64 -required_libraries = AArch64Info AArch64Utils MC MCDisassembler Support +required_libraries = AArch64Desc AArch64Info AArch64Utils MC MCDisassembler Support add_to_library_groups = AArch64 Index: lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h =================================================================== --- lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -116,6 +116,8 @@ void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printSystemPStateField(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template + void printConsecutivePairClassOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); }; class AArch64AppleInstPrinter : public AArch64InstPrinter { Index: lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp =================================================================== --- lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -1088,7 +1088,8 @@ raw_ostream &O) { unsigned prfop = MI->getOperand(OpNum).getImm(); bool Valid; - StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, Valid); + auto PRFMMapper = AArch64PRFM::PRFMMapper(getAvailableFeatures()); + StringRef Name = PRFMMapper.toString(prfop, Valid); if (Valid) O << Name; else @@ -1150,6 +1151,19 @@ return Reg; } +template +void AArch64InstPrinter::printConsecutivePairClassOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + static_assert(size == 64 || size == 32,"Template parameter must be either 32 or 64"); + unsigned Reg = MI->getOperand(OpNum).getReg(); + + unsigned sube = (size == 32) ? AArch64::sube32 : AArch64::sube64; + unsigned subo = (size == 32) ? AArch64::subo32 : AArch64::subo64; + + unsigned even = MRI.getSubReg(Reg, sube); + unsigned odd = MRI.getSubReg(Reg, subo); + O << getRegisterName(even) << ", " << getRegisterName(odd); +} + void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O, StringRef LayoutSuffix) { @@ -1260,12 +1274,11 @@ unsigned Val = MI->getOperand(OpNo).getImm(); unsigned Opcode = MI->getOpcode(); + AArch64NamedImmMapper Mapper = Opcode == AArch64::ISB ? + AArch64ISB::ISBMapper(getAvailableFeatures()): + Mapper = AArch64DB::DBarrierMapper(getAvailableFeatures()); bool Valid; - StringRef Name; - if (Opcode == AArch64::ISB) - Name = AArch64ISB::ISBMapper().toString(Val, Valid); - else - Name = AArch64DB::DBarrierMapper().toString(Val, Valid); + StringRef Name = Mapper.toString(Val, Valid); if (Valid) O << Name; else @@ -1276,8 +1289,8 @@ raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); - auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures()); - std::string Name = Mapper.toString(Val); + auto MRSMapper = AArch64SysReg::MRSMapper(getAvailableFeatures()); + std::string Name = MRSMapper.toString(Val); O << StringRef(Name).upper(); } @@ -1286,8 +1299,8 @@ raw_ostream &O) { unsigned Val = MI->getOperand(OpNo).getImm(); - auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures()); - std::string Name = Mapper.toString(Val); + auto MSRMapper = AArch64SysReg::MSRMapper(getAvailableFeatures()); + std::string Name = MSRMapper.toString(Val); O << StringRef(Name).upper(); } @@ -1297,7 +1310,8 @@ unsigned Val = MI->getOperand(OpNo).getImm(); bool Valid; - StringRef Name = AArch64PState::PStateMapper().toString(Val, Valid); + auto PStateMapper = AArch64PState::PStateMapper(getAvailableFeatures()); + StringRef Name = PStateMapper.toString(Val, Valid); if (Valid) O << StringRef(Name.str()).upper(); else Index: lib/Target/AArch64/Utils/AArch64BaseInfo.h =================================================================== --- lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -280,11 +280,14 @@ struct Mapping { const char *Name; uint32_t Value; + uint64_t SubTargetFeature; }; template - AArch64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) - : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} + AArch64NamedImmMapper(uint64_t SubTargetFeatureBits, const Mapping (&Mappings)[N], + uint32_t TooBigImm) + : FeatureBits(SubTargetFeatureBits), Mappings(&Mappings[0]), NumMappings(N), + TooBigImm(TooBigImm) {} StringRef toString(uint32_t Value, bool &Valid) const; uint32_t fromString(StringRef Name, bool &Valid) const; @@ -294,9 +297,13 @@ /// N being 0 indicates no immediate syntax-form is allowed. bool validImm(uint32_t Value) const; protected: - const Mapping *Pairs; - size_t NumPairs; + uint64_t FeatureBits; + const Mapping *Mappings; + size_t NumMappings; uint32_t TooBigImm; + bool hasFeature(uint64_t SubTargetFeature) const { + return SubTargetFeature == 0 || (SubTargetFeature & FeatureBits) != 0; + } }; namespace AArch64AT { @@ -317,9 +324,9 @@ }; struct ATMapper : AArch64NamedImmMapper { - const static Mapping ATPairs[]; + const static Mapping ATMappings[]; - ATMapper(); + ATMapper(uint64_t SubTargetFeatureBits); }; } @@ -341,9 +348,9 @@ }; struct DBarrierMapper : AArch64NamedImmMapper { - const static Mapping DBarrierPairs[]; + const static Mapping DBarrierMappings[]; - DBarrierMapper(); + DBarrierMapper(uint64_t SubTargetFeatureBits); }; } @@ -361,9 +368,9 @@ }; struct DCMapper : AArch64NamedImmMapper { - const static Mapping DCPairs[]; + const static Mapping DCMappings[]; - DCMapper(); + DCMapper(uint64_t SubTargetFeatureBits); }; } @@ -378,9 +385,9 @@ struct ICMapper : AArch64NamedImmMapper { - const static Mapping ICPairs[]; + const static Mapping ICMappings[]; - ICMapper(); + ICMapper(uint64_t SubTargetFeatureBits); }; static inline bool NeedsRegister(ICValues Val) { @@ -394,9 +401,9 @@ SY = 0xf }; struct ISBMapper : AArch64NamedImmMapper { - const static Mapping ISBPairs[]; + const static Mapping ISBMappings[]; - ISBMapper(); + ISBMapper(uint64_t SubTargetFeatureBits); }; } @@ -424,9 +431,9 @@ }; struct PRFMMapper : AArch64NamedImmMapper { - const static Mapping PRFMPairs[]; + const static Mapping PRFMMappings[]; - PRFMMapper(); + PRFMMapper(uint64_t SubTargetFeatureBits); }; } @@ -435,13 +442,14 @@ Invalid = -1, SPSel = 0x05, DAIFSet = 0x1e, - DAIFClr = 0x1f + DAIFClr = 0x1f, + PAN = 0x04, }; struct PStateMapper : AArch64NamedImmMapper { - const static Mapping PStatePairs[]; + const static Mapping PStateMappings[]; - PStateMapper(); + PStateMapper(uint64_t SubTargetFeatureBits); }; } @@ -1122,10 +1130,47 @@ ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101 ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110 ICH_LR15_EL2 = 0xe66f, // 11 100 1100 1101 111 - }; - // Cyclone specific system registers - enum CycloneSysRegValues { + // Privileged Access Never extension specific system registers + PAN = 0xc213, // 11 000 0100 0010 011 + + // Limited Ordering Regions extension system registers + LORSA_EL1 = 0xc520, // 11 000 1010 0100 000 + LOREA_EL1 = 0xc521, // 11 000 1010 0100 001 + LORN_EL1 = 0xc522, // 11 000 1010 0100 010 + LORC_EL1 = 0xc523, // 11 000 1010 0100 011 + LORID_EL1 = 0xc527, // 11 000 1010 0100 111 + + // Virtualization host extensions system registers + TTBR1_EL2 = 0xe101, // 11 100 0010 0000 001 + CONTEXTIDR_EL2 = 0xe681, // 11 100 1101 0000 001 + CNTHV_TVAL_EL2 = 0xe718, // 11 100 1110 0011 000 + CNTHV_CVAL_EL2 = 0xe71a, // 11 100 1110 0011 010 + CNTHV_CTL_EL2 = 0xe719, // 11 100 1110 0011 001 + SCTLR_EL12 = 0xe880, // 11 101 0001 0000 000 + CPACR_EL12 = 0xe882, // 11 101 0001 0000 010 + TTBR0_EL12 = 0xe900, // 11 101 0010 0000 000 + TTBR1_EL12 = 0xe901, // 11 101 0010 0000 001 + TCR_EL12 = 0xe902, // 11 101 0010 0000 010 + AFSR0_EL12 = 0xea88, // 11 101 0101 0001 000 + AFSR1_EL12 = 0xea89, // 11 101 0101 0001 001 + ESR_EL12 = 0xea90, // 11 101 0101 0010 000 + FAR_EL12 = 0xeb00, // 11 101 0110 0000 000 + MAIR_EL12 = 0xed10, // 11 101 1010 0010 000 + AMAIR_EL12 = 0xed18, // 11 101 1010 0011 000 + VBAR_EL12 = 0xee00, // 11 101 1100 0000 000 + CONTEXTIDR_EL12 = 0xee81, // 11 101 1101 0000 001 + CNTKCTL_EL12 = 0xef08, // 11 101 1110 0001 000 + CNTP_TVAL_EL02 = 0xef10, // 11 101 1110 0010 000 + CNTP_CTL_EL02 = 0xef11, // 11 101 1110 0010 001 + CNTP_CVAL_EL02 = 0xef12, // 11 101 1110 0010 010 + CNTV_TVAL_EL02 = 0xef18, // 11 101 1110 0011 000 + CNTV_CTL_EL02 = 0xef19, // 11 101 1110 0011 001 + CNTV_CVAL_EL02 = 0xef1a, // 11 101 1110 0011 010 + SPSR_EL12 = 0xea00, // 11 101 0100 0000 000 + ELR_EL12 = 0xea01, // 11 101 0100 0000 001 + + // Cyclone specific system registers CPM_IOACC_CTL_EL3 = 0xff90 }; @@ -1134,25 +1179,28 @@ // burdening the common AArch64NamedImmMapper with abstractions only needed in // this one case. struct SysRegMapper { - static const AArch64NamedImmMapper::Mapping SysRegPairs[]; - static const AArch64NamedImmMapper::Mapping CycloneSysRegPairs[]; + static const AArch64NamedImmMapper::Mapping SysRegMappings[]; - const AArch64NamedImmMapper::Mapping *InstPairs; - size_t NumInstPairs; + const AArch64NamedImmMapper::Mapping *InstMappings; + size_t NumInstMappings; uint64_t FeatureBits; + bool hasFeature(uint64_t SubTargetFeature) const { + return SubTargetFeature == 0 || (SubTargetFeature & FeatureBits) != 0; + } + SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { } uint32_t fromString(StringRef Name, bool &Valid) const; std::string toString(uint32_t Bits) const; }; struct MSRMapper : SysRegMapper { - static const AArch64NamedImmMapper::Mapping MSRPairs[]; + static const AArch64NamedImmMapper::Mapping MSRMappings[]; MSRMapper(uint64_t FeatureBits); }; struct MRSMapper : SysRegMapper { - static const AArch64NamedImmMapper::Mapping MRSPairs[]; + static const AArch64NamedImmMapper::Mapping MRSMappings[]; MRSMapper(uint64_t FeatureBits); }; @@ -1197,9 +1245,9 @@ }; struct TLBIMapper : AArch64NamedImmMapper { - const static Mapping TLBIPairs[]; + const static Mapping TLBIMappings[]; - TLBIMapper(); + TLBIMapper(uint64_t SubTargetFeatureBits); }; static inline bool NeedsRegister(TLBIValues Val) { Index: lib/Target/AArch64/Utils/AArch64BaseInfo.cpp =================================================================== --- lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -19,10 +19,10 @@ using namespace llvm; StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Value == Value) { + for (unsigned i = 0; i < NumMappings; ++i) { + if (hasFeature(Mappings[i].SubTargetFeature) && Mappings[i].Value == Value) { Valid = true; - return Pairs[i].Name; + return Mappings[i].Name; } } @@ -32,10 +32,10 @@ uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const { std::string LowerCaseName = Name.lower(); - for (unsigned i = 0; i < NumPairs; ++i) { - if (Pairs[i].Name == LowerCaseName) { + for (unsigned i = 0; i < NumMappings; ++i) { + if (hasFeature(Mappings[i].SubTargetFeature) && Mappings[i].Name == LowerCaseName) { Valid = true; - return Pairs[i].Value; + return Mappings[i].Value; } } @@ -47,7 +47,7 @@ return Value < TooBigImm; } -const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = { {"s1e1r", S1E1R}, {"s1e2r", S1E2R}, {"s1e3r", S1E3R}, @@ -62,10 +62,10 @@ {"s12e0w", S12E0W}, }; -AArch64AT::ATMapper::ATMapper() - : AArch64NamedImmMapper(ATPairs, 0) {} +AArch64AT::ATMapper::ATMapper(uint64_t SubTargetFeatureBits) + : AArch64NamedImmMapper(SubTargetFeatureBits, ATMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = { {"oshld", OSHLD}, {"oshst", OSHST}, {"osh", OSH}, @@ -80,10 +80,10 @@ {"sy", SY} }; -AArch64DB::DBarrierMapper::DBarrierMapper() - : AArch64NamedImmMapper(DBarrierPairs, 16u) {} +AArch64DB::DBarrierMapper::DBarrierMapper(uint64_t SubTargetFeatureBits) + : AArch64NamedImmMapper(SubTargetFeatureBits, DBarrierMappings, 16u) {} -const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = { {"zva", ZVA}, {"ivac", IVAC}, {"isw", ISW}, @@ -94,26 +94,26 @@ {"cisw", CISW} }; -AArch64DC::DCMapper::DCMapper() - : AArch64NamedImmMapper(DCPairs, 0) {} +AArch64DC::DCMapper::DCMapper(uint64_t SubTargetFeatureBits) + : AArch64NamedImmMapper(SubTargetFeatureBits, DCMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = { {"ialluis", IALLUIS}, {"iallu", IALLU}, {"ivau", IVAU} }; -AArch64IC::ICMapper::ICMapper() - : AArch64NamedImmMapper(ICPairs, 0) {} +AArch64IC::ICMapper::ICMapper(uint64_t SubTargetFeatureBits) + : AArch64NamedImmMapper(SubTargetFeatureBits, ICMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = { {"sy", SY}, }; -AArch64ISB::ISBMapper::ISBMapper() - : AArch64NamedImmMapper(ISBPairs, 16) {} +AArch64ISB::ISBMapper::ISBMapper(uint64_t SubTargetFeatureBits) + : AArch64NamedImmMapper(SubTargetFeatureBits, ISBMappings, 16) {} -const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = { {"pldl1keep", PLDL1KEEP}, {"pldl1strm", PLDL1STRM}, {"pldl2keep", PLDL2KEEP}, @@ -134,19 +134,20 @@ {"pstl3strm", PSTL3STRM} }; -AArch64PRFM::PRFMMapper::PRFMMapper() - : AArch64NamedImmMapper(PRFMPairs, 32) {} +AArch64PRFM::PRFMMapper::PRFMMapper(uint64_t SubTargetFeatureBits) + : AArch64NamedImmMapper(SubTargetFeatureBits, PRFMMappings, 32) {} -const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStatePairs[] = { +const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = { {"spsel", SPSel}, {"daifset", DAIFSet}, - {"daifclr", DAIFClr} + {"daifclr", DAIFClr}, + {"pan", PAN, AArch64::FeatureV8_1a}, }; -AArch64PState::PStateMapper::PStateMapper() - : AArch64NamedImmMapper(PStatePairs, 0) {} +AArch64PState::PStateMapper::PStateMapper(uint64_t SubTargetFeatureBits) + : AArch64NamedImmMapper(SubTargetFeatureBits, PStateMappings, 0) {} -const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = { {"mdccsr_el0", MDCCSR_EL0}, {"dbgdtrrx_el0", DBGDTRRX_EL0}, {"mdrar_el1", MDRAR_EL1}, @@ -245,13 +246,13 @@ {"ich_elsr_el2", ICH_ELSR_EL2} }; -AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits) - : SysRegMapper(FeatureBits) { - InstPairs = &MRSPairs[0]; - NumInstPairs = llvm::array_lengthof(MRSPairs); +AArch64SysReg::MRSMapper::MRSMapper(uint64_t SubTargetFeatureBits) + : SysRegMapper(SubTargetFeatureBits) { + InstMappings = &MRSMappings[0]; + NumInstMappings = llvm::array_lengthof(MRSMappings); } -const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = { {"dbgdtrtx_el0", DBGDTRTX_EL0}, {"oslar_el1", OSLAR_EL1}, {"pmswinc_el0", PMSWINC_EL0}, @@ -266,17 +267,20 @@ {"icc_dir_el1", ICC_DIR_EL1}, {"icc_sgi1r_el1", ICC_SGI1R_EL1}, {"icc_asgi1r_el1", ICC_ASGI1R_EL1}, - {"icc_sgi0r_el1", ICC_SGI0R_EL1} + {"icc_sgi0r_el1", ICC_SGI0R_EL1}, + + // Privileged Access Never extension specific system registers + {"pan", PAN, AArch64::FeatureV8_1a}, }; -AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits) - : SysRegMapper(FeatureBits) { - InstPairs = &MSRPairs[0]; - NumInstPairs = llvm::array_lengthof(MSRPairs); +AArch64SysReg::MSRMapper::MSRMapper(uint64_t SubTargetFeatureBits) + : SysRegMapper(SubTargetFeatureBits) { + InstMappings = &MSRMappings[0]; + NumInstMappings = llvm::array_lengthof(MSRMappings); } -const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = { {"osdtrrx_el1", OSDTRRX_EL1}, {"osdtrtx_el1", OSDTRTX_EL1}, {"teecr32_el1", TEECR32_EL1}, @@ -752,12 +756,47 @@ {"ich_lr12_el2", ICH_LR12_EL2}, {"ich_lr13_el2", ICH_LR13_EL2}, {"ich_lr14_el2", ICH_LR14_EL2}, - {"ich_lr15_el2", ICH_LR15_EL2} -}; - -const AArch64NamedImmMapper::Mapping -AArch64SysReg::SysRegMapper::CycloneSysRegPairs[] = { - {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3} + {"ich_lr15_el2", ICH_LR15_EL2}, + {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3, AArch64::ProcCyclone}, + + // Privileged Access Never extension specific system registers + {"pan", PAN, AArch64::FeatureV8_1a}, + + // Limited Ordering Regions extension system registers + {"lorsa_el1", LORSA_EL1, AArch64::FeatureV8_1a}, + {"lorea_el1", LOREA_EL1, AArch64::FeatureV8_1a}, + {"lorn_el1", LORN_EL1, AArch64::FeatureV8_1a}, + {"lorc_el1", LORC_EL1, AArch64::FeatureV8_1a}, + {"lorid_el1", LORID_EL1, AArch64::FeatureV8_1a}, + + // Virtualization host extensions system registers + {"ttbr1_el2", TTBR1_EL2, AArch64::FeatureV8_1a}, + {"contextidr_el2", CONTEXTIDR_EL2, AArch64::FeatureV8_1a}, + {"cnthv_tval_el2", CNTHV_TVAL_EL2, AArch64::FeatureV8_1a}, + {"cnthv_cval_el2", CNTHV_CVAL_EL2, AArch64::FeatureV8_1a}, + {"cnthv_ctl_el2", CNTHV_CTL_EL2, AArch64::FeatureV8_1a}, + {"sctlr_el12", SCTLR_EL12, AArch64::FeatureV8_1a}, + {"cpacr_el12", CPACR_EL12, AArch64::FeatureV8_1a}, + {"ttbr0_el12", TTBR0_EL12, AArch64::FeatureV8_1a}, + {"ttbr1_el12", TTBR1_EL12, AArch64::FeatureV8_1a}, + {"tcr_el12", TCR_EL12, AArch64::FeatureV8_1a}, + {"afsr0_el12", AFSR0_EL12, AArch64::FeatureV8_1a}, + {"afsr1_el12", AFSR1_EL12, AArch64::FeatureV8_1a}, + {"esr_el12", ESR_EL12, AArch64::FeatureV8_1a}, + {"far_el12", FAR_EL12, AArch64::FeatureV8_1a}, + {"mair_el12", MAIR_EL12, AArch64::FeatureV8_1a}, + {"amair_el12", AMAIR_EL12, AArch64::FeatureV8_1a}, + {"vbar_el12", VBAR_EL12, AArch64::FeatureV8_1a}, + {"contextidr_el12", CONTEXTIDR_EL12, AArch64::FeatureV8_1a}, + {"cntkctl_el12", CNTKCTL_EL12, AArch64::FeatureV8_1a}, + {"cntp_tval_el02", CNTP_TVAL_EL02, AArch64::FeatureV8_1a}, + {"cntp_ctl_el02", CNTP_CTL_EL02, AArch64::FeatureV8_1a}, + {"cntp_cval_el02", CNTP_CVAL_EL02, AArch64::FeatureV8_1a}, + {"cntv_tval_el02", CNTV_TVAL_EL02, AArch64::FeatureV8_1a}, + {"cntv_ctl_el02", CNTV_CTL_EL02, AArch64::FeatureV8_1a}, + {"cntv_cval_el02", CNTV_CVAL_EL02, AArch64::FeatureV8_1a}, + {"spsr_el12", SPSR_EL12, AArch64::FeatureV8_1a}, + {"elr_el12", ELR_EL12, AArch64::FeatureV8_1a}, }; uint32_t @@ -765,29 +804,21 @@ std::string NameLower = Name.lower(); // First search the registers shared by all - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Name == NameLower) { + for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) { + if (hasFeature(SysRegMappings[i].SubTargetFeature) && + SysRegMappings[i].Name == NameLower) { Valid = true; - return SysRegPairs[i].Value; - } - } - - // Next search for target specific registers - if (FeatureBits & AArch64::ProcCyclone) { - for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) { - if (CycloneSysRegPairs[i].Name == NameLower) { - Valid = true; - return CycloneSysRegPairs[i].Value; - } + return SysRegMappings[i].Value; } } // Now try the instruction-specific registers (either read-only or // write-only). - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Name == NameLower) { + for (unsigned i = 0; i < NumInstMappings; ++i) { + if (hasFeature(InstMappings[i].SubTargetFeature) && + InstMappings[i].Name == NameLower) { Valid = true; - return InstPairs[i].Value; + return InstMappings[i].Value; } } @@ -816,26 +847,19 @@ std::string AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const { // First search the registers shared by all - for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { - if (SysRegPairs[i].Value == Bits) { - return SysRegPairs[i].Name; - } - } - - // Next search for target specific registers - if (FeatureBits & AArch64::ProcCyclone) { - for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) { - if (CycloneSysRegPairs[i].Value == Bits) { - return CycloneSysRegPairs[i].Name; - } + for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) { + if (hasFeature(SysRegMappings[i].SubTargetFeature) && + SysRegMappings[i].Value == Bits) { + return SysRegMappings[i].Name; } } // Now try the instruction-specific registers (either read-only or // write-only). - for (unsigned i = 0; i < NumInstPairs; ++i) { - if (InstPairs[i].Value == Bits) { - return InstPairs[i].Name; + for (unsigned i = 0; i < NumInstMappings; ++i) { + if (hasFeature(InstMappings[i].SubTargetFeature) && + InstMappings[i].Value == Bits) { + return InstMappings[i].Name; } } @@ -850,7 +874,7 @@ + "_c" + utostr(CRm) + "_" + utostr(Op2); } -const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = { {"ipas2e1is", IPAS2E1IS}, {"ipas2le1is", IPAS2LE1IS}, {"vmalle1is", VMALLE1IS}, @@ -885,5 +909,5 @@ {"vaale1", VAALE1} }; -AArch64TLBI::TLBIMapper::TLBIMapper() - : AArch64NamedImmMapper(TLBIPairs, 0) {} +AArch64TLBI::TLBIMapper::TLBIMapper(uint64_t SubTargetFeatureBits) + : AArch64NamedImmMapper(SubTargetFeatureBits, TLBIMappings, 0) {} Index: lib/Target/ARM/ARM.td =================================================================== --- lib/Target/ARM/ARM.td +++ lib/Target/ARM/ARM.td @@ -175,6 +175,9 @@ "Support ARM v8 instructions", [HasV7Ops, FeatureVirtualization, FeatureMP]>; +def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true", + "Support ARM v8.1a instructions", + [HasV8Ops, FeatureAClass, FeatureCRC]>; //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -449,6 +452,14 @@ FeatureDB,FeatureDSPThumb2, FeatureHasRAS, FeatureZCZeroing]>; +// V8.1 Processors +def : ProcNoItin<"generic-armv8.1-a", [HasV8Ops, FeatureV8_1a, + FeatureDB, FeatureFPARMv8, + FeatureNEON, FeatureDSPThumb2, + FeatureHWDiv, FeatureHWDivARM, + FeatureTrustZone, FeatureT2XtPk, + FeatureCrypto]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// Index: lib/Target/ARM/ARMAsmPrinter.cpp =================================================================== --- lib/Target/ARM/ARMAsmPrinter.cpp +++ lib/Target/ARM/ARMAsmPrinter.cpp @@ -666,7 +666,7 @@ std::string CPUString = STI.getCPUString(); - if (CPUString != "generic") { + if (CPUString.find("generic")) { // FIXME: remove krait check when GNU tools support krait cpu if (STI.isKrait()) { ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); @@ -720,7 +720,8 @@ // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture if (STI.hasV8Ops()) ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - ARMBuildAttrs::AllowNeonARMv8); + STI.hasV8_1a() ? ARMBuildAttrs::AllowNeonARMv8_1a: + ARMBuildAttrs::AllowNeonARMv8); } else { if (STI.hasFPARMv8()) // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -226,6 +226,8 @@ AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; +def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">, + AssemblerPredicate<"FeatureV8_1a", "v8.1a">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, @@ -1415,7 +1417,8 @@ let isCompare = 1, Defs = [CPSR] in { multiclass AI1_cmp_irs opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, bit Commutable = 0> { + PatFrag opnode, bit Commutable = 0, + string rrDecoderMethod = ""> { def ri : AI1, @@ -1443,6 +1446,7 @@ let Inst{15-12} = 0b0000; let Inst{11-4} = 0b00000000; let Inst{3-0} = Rm; + let DecoderMethod = rrDecoderMethod; let Unpredictable{15-12} = 0b1111; } @@ -4261,6 +4265,30 @@ def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>; //===----------------------------------------------------------------------===// +// ARMv8.1a Privilege Access Never extension +// +// SETPAN #imm1 + +def SETPAN : AInoP<(outs), (ins imm0_1:$imm), MiscFrm, NoItinerary, "setpan", + "\t$imm", []>, Requires<[IsARM, HasV8, HasV8_1a]> { + bits<1> imm; + + let Inst{31-28} = 0b1111; + let Inst{27-20} = 0b00010001; + let Inst{19-16} = 0b0000; + let Inst{15-10} = 0b000000; + let Inst{9} = imm; + let Inst{8} = 0b0; + let Inst{7-4} = 0b0000; + let Inst{3-0} = 0b0000; + + let Unpredictable{19-16} = 0b1111; + let Unpredictable{15-10} = 0b111111; + let Unpredictable{8} = 0b1; + let Unpredictable{3-0} = 0b1111; +} + +//===----------------------------------------------------------------------===// // Comparison Instructions... // @@ -4364,7 +4392,8 @@ // Note that TST/TEQ don't set all the same flags that CMP does! defm TST : AI1_cmp_irs<0b1000, "tst", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, - BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>; + BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1, + "DecodeTSTInstruction">; defm TEQ : AI1_cmp_irs<0b1001, "teq", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>; Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td +++ lib/Target/ARM/ARMInstrNEON.td @@ -2790,7 +2790,7 @@ imm:$lane)))))))]>; class N3VDMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode MulOp, SDNode ShOp> + ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), @@ -2826,7 +2826,7 @@ class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDNode MulOp, SDNode ShOp> + SDPatternOperator MulOp, SDPatternOperator ShOp> : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), @@ -3674,7 +3674,7 @@ multiclass N3VMulOpSL_HS op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, string Dt, SDNode ShOp> { + string OpcodeStr, string Dt, SDPatternOperator ShOp> { def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, @@ -3711,27 +3711,38 @@ } // Neon 3-argument intrinsics, -// element sizes of 8, 16 and 32 bits: -multiclass N3VInt3_QHS op11_8, bit op4, - InstrItinClass itinD, InstrItinClass itinQ, +// element sizes of 16 and 32 bits: +multiclass N3VInt3_HS op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, SDPatternOperator IntOp> { // 64-bit vector types. - def v8i8 : N3VDInt3; - def v4i16 : N3VDInt3; - def v2i32 : N3VDInt3; // 128-bit vector types. - def v16i8 : N3VQInt3; - def v8i16 : N3VQInt3; - def v4i32 : N3VQInt3; } +// element sizes of 8, 16 and 32 bits: +multiclass N3VInt3_QHS op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, SDPatternOperator IntOp> + :N3VInt3_HS { + // 64-bit vector types. + def v8i8 : N3VDInt3; + // 128-bit vector types. + def v16i8 : N3VQInt3; +} // Neon Long Multiply-Op vector operations, // element sizes of 8, 16 and 32 bits: @@ -4305,6 +4316,147 @@ defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; +let Predicates = [HasNEON, HasV8_1a] in { + // v8.1a Neon Rounding Double Multiply-Op vector operations, + // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long + // (Q += D * D) + defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqadds + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v2i32 (int_arm_neon_vqadds + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v8i16 (int_arm_neon_vqadds + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), + (v8i16 QPR:$Vm))))), + (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + def : Pat<(v4i32 (int_arm_neon_vqadds + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), + (v4i32 QPR:$Vm))))), + (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + + defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqadds + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh + (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, + imm:$lane))>; + def : Pat<(v2i32 (int_arm_neon_vqadds + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh + (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, + imm:$lane))>; + def : Pat<(v8i16 (int_arm_neon_vqadds + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh + (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), + imm:$lane)))))), + (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), + (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + def : Pat<(v4i32 (int_arm_neon_vqadds + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh + (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), + imm:$lane)))))), + (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), + (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + + // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long + // (Q -= D * D) + defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqsubs + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v2i32 (int_arm_neon_vqsubs + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; + def : Pat<(v8i16 (int_arm_neon_vqsubs + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), + (v8i16 QPR:$Vm))))), + (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + def : Pat<(v4i32 (int_arm_neon_vqsubs + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), + (v4i32 QPR:$Vm))))), + (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; + + defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", + null_frag>; + def : Pat<(v4i16 (int_arm_neon_vqsubs + (v4i16 DPR:$src1), + (v4i16 (int_arm_neon_vqrdmulh + (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; + def : Pat<(v2i32 (int_arm_neon_vqsubs + (v2i32 DPR:$src1), + (v2i32 (int_arm_neon_vqrdmulh + (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, + imm:$lane))>; + def : Pat<(v8i16 (int_arm_neon_vqsubs + (v8i16 QPR:$src1), + (v8i16 (int_arm_neon_vqrdmulh + (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), + imm:$lane)))))), + (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), + (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + def : Pat<(v4i32 (int_arm_neon_vqsubs + (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqrdmulh + (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), + imm:$lane)))))), + (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), + (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG + QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; +} // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlal", "s", null_frag>; Index: lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- lib/Target/ARM/ARMInstrThumb2.td +++ lib/Target/ARM/ARMInstrThumb2.td @@ -4281,6 +4281,23 @@ //===----------------------------------------------------------------------===// +// ARMv8.1 Privilege Access Never extension +// +// SETPAN #imm1 + +def t2SETPAN : T1I<(outs), (ins imm0_1:$imm), NoItinerary, "setpan\t$imm", []>, + T1Misc<0b0110000>, Requires<[IsThumb2, HasV8, HasV8_1a]> { + bits<1> imm; + + let Inst{4} = 0b1; + let Inst{3} = imm; + let Inst{2-0} = 0b000; + + let Unpredictable{4} = 0b1; + let Unpredictable{2-0} = 0b111; +} + +//===----------------------------------------------------------------------===// // Non-Instruction Patterns // Index: lib/Target/ARM/ARMSubtarget.h =================================================================== --- lib/Target/ARM/ARMSubtarget.h +++ lib/Target/ARM/ARMSubtarget.h @@ -182,6 +182,9 @@ /// HasCRC - if true, processor supports CRC instructions bool HasCRC; + /// HasV8_1a - if true, the processor has V8.1a: five extensions over V8a + bool HasV8_1a; + /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are /// particularly effective at zeroing a VFP register. bool HasZeroCycleZeroing; @@ -310,6 +313,7 @@ bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } + bool hasV8_1a() const { return HasV8_1a; } bool hasVirtualization() const { return HasVirtualization; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; Index: lib/Target/ARM/ARMSubtarget.cpp =================================================================== --- lib/Target/ARM/ARMSubtarget.cpp +++ lib/Target/ARM/ARMSubtarget.cpp @@ -166,6 +166,7 @@ HasTrustZone = false; HasCrypto = false; HasCRC = false; + HasV8_1a = false; HasZeroCycleZeroing = false; AllowsUnalignedMem = false; Thumb2DSP = false; Index: lib/Target/ARM/AsmParser/ARMAsmParser.cpp =================================================================== --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -276,6 +276,9 @@ bool hasD16() const { return STI.getFeatureBits() & ARM::FeatureD16; } + bool hasV8_1a() const { + return STI.getFeatureBits() & ARM::FeatureV8_1a; + } void SwitchMode() { uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); @@ -5442,6 +5445,7 @@ Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" || + Mnemonic == "setpan" || Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) { // These mnemonics are never predicable Index: lib/Target/ARM/Disassembler/ARMDisassembler.cpp =================================================================== --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -212,6 +212,10 @@ uint64_t Address, const void *Decoder); static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, @@ -2119,6 +2123,54 @@ return S; } +static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Pred = fieldFromInstruction(Insn, 28, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + + if (Pred == 0xF) + return DecodeSETPANInstruction(Inst, Insn, Address, Decoder); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, Pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Imm = fieldFromInstruction(Insn, 9, 1); + + const MCDisassembler *Dis = static_cast(Decoder); + uint64_t FeatureBits = Dis->getSubtargetInfo().getFeatureBits(); + if ((FeatureBits & ARM::FeatureV8_1a) == 0 || + (FeatureBits & ARM::HasV8Ops) == 0) + return MCDisassembler::Fail; + + // Decoder can be called from DecodeTST, which does not check the full + // encoding is valid. + if (fieldFromInstruction(Insn, 20,12) != 0xf11 || + fieldFromInstruction(Insn, 4,4) != 0) + return MCDisassembler::Fail; + if (fieldFromInstruction(Insn, 10,10) != 0 || + fieldFromInstruction(Insn, 0,4) != 0) + S = MCDisassembler::SoftFail; + + Inst.setOpcode(ARM::SETPAN); + Inst.addOperand(MCOperand::CreateImm(Imm)); + + return S; +} + static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; Index: lib/Target/ARM/MCTargetDesc/ARMArchName.def =================================================================== --- lib/Target/ARM/MCTargetDesc/ARMArchName.def +++ lib/Target/ARM/MCTargetDesc/ARMArchName.def @@ -44,6 +44,8 @@ ARM_ARCH_ALIAS("armv7m", ARMV7M) ARM_ARCH_NAME("armv8-a", ARMV8A, "8-A", v8) ARM_ARCH_ALIAS("armv8a", ARMV8A) +ARM_ARCH_NAME("armv8.1-a", ARMV8_1A, "8.1-A", v8) +ARM_ARCH_ALIAS("armv8.1a", ARMV8_1A) ARM_ARCH_NAME("iwmmxt", IWMMXT, "iwmmxt", v5TE) ARM_ARCH_NAME("iwmmxt2", IWMMXT2, "iwmmxt2", v5TE) Index: lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp =================================================================== --- lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -817,6 +817,7 @@ break; case ARM::ARMV8A: + case ARM::ARMV8_1A: setAttributeItem(CPU_arch_profile, ApplicationProfile, false); setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, AllowThumb32, false); @@ -914,9 +915,8 @@ setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPARMv8A, /* OverwriteExisting= */ false); - setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, - ARMBuildAttrs::AllowNeonARMv8, - /* OverwriteExisting= */ false); + // 'Advanced_SIMD_arch' must be emitted not here, but within + // ARMAsmPrinter::emitAttributes(), depending on hasV8Ops() and hasV8_1a() break; case ARM::SOFTVFP: Index: lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp =================================================================== --- lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -153,6 +153,17 @@ // Use CPU to figure out the exact features ARMArchFeature = "+v8"; break; + case Triple::ARMSubArch_v8_1a: + if (NoCPU) + // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, + // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, + // FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a + ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm," + "+trustzone,+t2xtpk,+crypto,+crc"; + else + // Use CPU to figure out the exact features + ARMArchFeature = "+v8.1a"; + break; case Triple::ARMSubArch_v7m: isThumb = true; if (NoCPU) Index: test/CodeGen/AArch64/arm64-neon-v8.1a.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-neon-v8.1a.ll @@ -0,0 +1,408 @@ +; RUN: llc < %s -verify-machineinstrs -march=arm64 | FileCheck %s --check-prefix=CHECK-V8a +; RUN: llc < %s -verify-machineinstrs -march=arm64 -mattr=+v8.1a | FileCheck %s --check-prefix=CHECK-V81a +; RUN: llc < %s -verify-machineinstrs -march=arm64 -mattr=+v8.1a -aarch64-neon-syntax=apple | FileCheck %s --check-prefix=CHECK-V81a-apple + +declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) +declare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32) +declare i16 @llvm.aarch64.neon.sqrdmulh.i16(i16, i16) + +declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) +declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) +declare i16 @llvm.aarch64.neon.sqadd.i16(i16, i16) + +declare <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) +declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) +declare i16 @llvm.aarch64.neon.sqsub.i16(i16, i16) + +;----------------------------------------------------------------------------- +; RDMA Vector +; test for SIMDThreeSameVectorSQRDMLxHTiedHS + +define <4 x i16> @test_sqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { +; CHECK-LABEL: test_sqrdmlah_v4i16: + %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) + %retval = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc, <4 x i16> %prod) +; CHECK-V8a: sqrdmulh v1.4h, v1.4h, v2.4h +; CHECK-V81a: sqrdmlah v0.4h, v1.4h, v2.4h +; CHECK-V81a-apple: sqrdmlah.4h v0, v1, v2 + ret <4 x i16> %retval +} + +define <8 x i16> @test_sqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { +; CHECK-LABEL: test_sqrdmlah_v8i16: + %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) + %retval = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc, <8 x i16> %prod) +; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.8h +; CHECK-V81a: sqrdmlah v0.8h, v1.8h, v2.8h +; CHECK-V81a-apple: sqrdmlah.8h v0, v1, v2 + ret <8 x i16> %retval +} + +define <2 x i32> @test_sqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { +; CHECK-LABEL: test_sqrdmlah_v2i32: + %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) + %retval = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %acc, <2 x i32> %prod) +; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.2s +; CHECK-V81a: sqrdmlah v0.2s, v1.2s, v2.2s +; CHECK-V81a-apple: sqrdmlah.2s v0, v1, v2 + ret <2 x i32> %retval +} + +define <4 x i32> @test_sqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { +; CHECK-LABEL: test_sqrdmlah_v4i32: + %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) + %retval = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc, <4 x i32> %prod) +; CHECK-V81: sqrdmulh v1.4s, v1.4s, v2.4s +; CHECK-V81a: sqrdmlah v0.4s, v1.4s, v2.4s +; CHECK-V81a-apple: sqrdmlah.4s v0, v1, v2 + ret <4 x i32> %retval +} + +define <4 x i16> @test_sqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { +; CHECK-LABEL: test_sqrdmlsh_v4i16: + %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) + %retval = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc, <4 x i16> %prod) +; CHECK-V8a: sqrdmulh v1.4h, v1.4h, v2.4h +; CHECK-V81a: sqrdmlsh v0.4h, v1.4h, v2.4h +; CHECK-V81a-apple: sqrdmlsh.4h v0, v1, v2 + ret <4 x i16> %retval +} + +define <8 x i16> @test_sqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { +; CHECK-LABEL: test_sqrdmlsh_v8i16: + %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) + %retval = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc, <8 x i16> %prod) +; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.8h +; CHECK-V81a: sqrdmlsh v0.8h, v1.8h, v2.8h +; CHECK-V81a-apple: sqrdmlsh.8h v0, v1, v2 + ret <8 x i16> %retval +} + +define <2 x i32> @test_sqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { +; CHECK-LABEL: test_sqrdmlsh_v2i32: + %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) + %retval = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %acc, <2 x i32> %prod) +; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.2s +; CHECK-V81a: sqrdmlsh v0.2s, v1.2s, v2.2s +; CHECK-V81a-apple: sqrdmlsh.2s v0, v1, v2 + ret <2 x i32> %retval +} + +define <4 x i32> @test_sqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { +; CHECK-LABEL: test_sqrdmlsh_v4i32: + %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) + %retval = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc, <4 x i32> %prod) +; CHECK-V8a: sqrdmulh v1.4s, v1.4s, v2.4s +; CHECK-V81a: sqrdmlsh v0.4s, v1.4s, v2.4s +; CHECK-V81a-apple: sqrdmlsh.4s v0, v1, v2 + ret <4 x i32> %retval +} + +;----------------------------------------------------------------------------- +; RDMA Vector, by element +; tests for vXiYY_indexed, vXiYY_indexed in SIMDIndexedSQRDMLxHSDTied + +define <4 x i16> @test_sqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { +; CHECK-LABEL: test_sqrdmlah_lane_s16: +entry: + %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> + %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) + %retval = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc, <4 x i16> %prod) +; CHECK-V8a : sqrdmulh v1.4h, v1.4h, v2.h[3] +; CHECK-V81a: sqrdmlah v0.4h, v1.4h, v2.h[3] +; CHECK-V81a-apple: sqrdmlah.4h v0, v1, v2[3] + ret <4 x i16> %retval +} + +define <8 x i16> @test_sqrdmlahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <8 x i16> %v) { +; CHECK-LABEL: test_sqrdmlahq_lane_s16: +entry: + %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> + %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) + %retval = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc, <8 x i16> %prod) +; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.h[2] +; CHECK-V81a: sqrdmlah v0.8h, v1.8h, v2.h[2] +; CHECK-V81a-apple: sqrdmlah.8h v0, v1, v2[2] + ret <8 x i16> %retval +} + +define <2 x i32> @test_sqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { +; CHECK-LABEL: test_sqrdmlah_lane_s32: +entry: + %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> + %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) + %retval = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %acc, <2 x i32> %prod) +; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.s[1] +; CHECK-V81a: sqrdmlah v0.2s, v1.2s, v2.s[1] +; CHECK-V81a-apple: sqrdmlah.2s v0, v1, v2[1] + ret <2 x i32> %retval +} + +define <4 x i32> @test_sqrdmlahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <4 x i32> %v) { +; CHECK-LABEL: test_sqrdmlahq_lane_s32: +entry: + %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer + %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) + %retval = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc, <4 x i32> %prod) +; CHECK-V8a: sqrdmulh v1.4s, v1.4s, v2.s[0] +; CHECK-V81a: sqrdmlah v0.4s, v1.4s, v2.s[0] +; CHECK-V81a-apple: sqrdmlah.4s v0, v1, v2[0] + ret <4 x i32> %retval +} + +define <4 x i16> @test_sqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { +; CHECK-LABEL: test_sqrdmlsh_lane_s16: +entry: + %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> + %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) + %retval = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc, <4 x i16> %prod) +; CHECK-V8a: sqrdmulh v1.4h, v1.4h, v2.h[3] +; CHECK-V81a: sqrdmlsh v0.4h, v1.4h, v2.h[3] +; CHECK-V81a-apple: sqrdmlsh.4h v0, v1, v2[3] + ret <4 x i16> %retval +} + +define <8 x i16> @test_sqrdmlshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <8 x i16> %v) { +; CHECK-LABEL: test_sqrdmlshq_lane_s16: +entry: + %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> + %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) + %retval = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc, <8 x i16> %prod) +; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.h[2] +; CHECK-V81a: sqrdmlsh v0.8h, v1.8h, v2.h[2] +; CHECK-V81a-apple: sqrdmlsh.8h v0, v1, v2[2] + ret <8 x i16> %retval +} + +define <2 x i32> @test_sqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { +; CHECK-LABEL: test_sqrdmlsh_lane_s32: +entry: + %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> + %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) + %retval = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %acc, <2 x i32> %prod) +; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.s[1] +; CHECK-V81a: sqrdmlsh v0.2s, v1.2s, v2.s[1] +; CHECK-V81a-apple: sqrdmlsh.2s v0, v1, v2[1] + ret <2 x i32> %retval +} + +define <4 x i32> @test_sqrdmlshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <4 x i32> %v) { +; CHECK-LABEL: test_sqrdmlshq_lane_s32: +entry: + %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer + %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) + %retval = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc, <4 x i32> %prod) +; CHECK-V8a: sqrdmulh v1.4s, v1.4s, v2.s[0] +; CHECK-V81a: sqrdmlsh v0.4s, v1.4s, v2.s[0] +; CHECK-V81a-apple: sqrdmlsh.4s v0, v1, v2[0] + ret <4 x i32> %retval +} + +;----------------------------------------------------------------------------- +; RDMA Vector, by element, extracted +; tests for "def : Pat" in SIMDIndexedSQRDMLxHSDTied + +; FIXME: after fix of https://llvm.org/bugs/show_bug.cgi?id=22886 +; uncomment this function, and replace "cHECK" for "CHECK" +;define i16 @test_sqrdmlah_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) { +;; cHECK-LABEL: test_sqrdmlah_extracted_lane_s16: +;entry: +; %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer +; %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) +; %extract = extractelement <4 x i16> %prod, i64 0 +; %retval = call i16 @llvm.aarch64.neon.sqadd.i16(i16 %acc, i16 %extract) +;; cHECK: sqrdmlah {{v[2-9]+}}.4h, v0.4h, v1.h[0] +; ret i16 %retval +;} + +; FIXME: after fix of https://llvm.org/bugs/show_bug.cgi?id=22886 +; uncomment this function, and replace "cHECK" for "CHECK" +;define i16 @test_sqrdmlahq_extracted_lane_s16(i16 %acc,<8 x i16> %x, <8 x i16> %v) { +;; cHECK-LABEL: test_sqrdmlahq_extracted_lane_s16: +;entry: +; %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer +; %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) +; %extract = extractelement <8 x i16> %prod, i64 0 +; %retval = call i16 @llvm.aarch64.neon.sqadd.i16(i16 %acc, i16 %extract) +;; cHECK: sqrdmlah {{v[2-9]+}}.8h, v0.8h, v1.h[0] +; ret i16 %retval +;} + +define i32 @test_sqrdmlah_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) { +; CHECK-LABEL: test_sqrdmlah_extracted_lane_s32: +entry: + %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer + %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) + %extract = extractelement <2 x i32> %prod, i64 0 + %retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %extract) +; CHECK-V8a: sqrdmulh v0.2s, v0.2s, v1.s[0] +; CHECK-V81a: sqrdmlah v2.2s, v0.2s, v1.s[0] +; CHECK-V81a-apple: sqrdmlah.2s v2, v0, v1[0] + ret i32 %retval +} + +define i32 @test_sqrdmlahq_extracted_lane_s32(i32 %acc,<4 x i32> %x, <4 x i32> %v) { +; CHECK-LABEL: test_sqrdmlahq_extracted_lane_s32: +entry: + %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer + %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) + %extract = extractelement <4 x i32> %prod, i64 0 + %retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %extract) +; CHECK-V8a: sqrdmulh v0.4s, v0.4s, v1.s[0] +; CHECK-V81a: sqrdmlah v2.4s, v0.4s, v1.s[0] +; CHECK-V81a-apple: sqrdmlah.4s v2, v0, v1[0] + ret i32 %retval +} + +; FIXME: after fix of https://llvm.org/bugs/show_bug.cgi?id=22886 +; uncomment this function, and replace "cHECK" for "CHECK" +;define i16 @test_sqrdmlsh_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) { +;; cHECK-LABEL: test_sqrdmlsh_extracted_lane_s16: +;entry: +; %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer +; %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) +; %extract = extractelement <4 x i16> %prod, i64 0 +; %retval = call i16 @llvm.aarch64.neon.sqsub.i16(i16 %acc, i16 %extract) +;; cHECK: sqrdmlah {{v[2-9]+}}.4h, v0.4h, v1.h[0] +; ret i16 %retval +;} + +; FIXME: after fix of https://llvm.org/bugs/show_bug.cgi?id=22886 +; uncomment this function, and replace "cHECK" for "CHECK" +;define i16 @test_sqrdmlshq_extracted_lane_s16(i16 %acc,<8 x i16> %x, <8 x i16> %v) { +;; cHECK-LABEL: test_sqrdmlshq_extracted_lane_s16: +;entry: +; %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer +; %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) +; %extract = extractelement <8 x i16> %prod, i64 0 +; %retval = call i16 @llvm.aarch64.neon.sqsub.i16(i16 %acc, i16 %extract) +;; cHECK: sqrdmlah {{v[0-9]+}}.8h, v0.8h, v1.h[0] +; ret i16 %retval +;} + +define i32 @test_sqrdmlsh_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) { +; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s32: +entry: + %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer + %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) + %extract = extractelement <2 x i32> %prod, i64 0 + %retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %extract) +; CHECK-V8a: sqrdmulh v0.2s, v0.2s, v1.s[0] +; CHECK-V81a: sqrdmlsh v2.2s, v0.2s, v1.s[0] +; CHECK-V81a-apple: sqrdmlsh.2s v2, v0, v1[0] + ret i32 %retval +} + +define i32 @test_sqrdmlshq_extracted_lane_s32(i32 %acc,<4 x i32> %x, <4 x i32> %v) { +; CHECK-LABEL: test_sqrdmlshq_extracted_lane_s32: +entry: + %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer + %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) + %extract = extractelement <4 x i32> %prod, i64 0 + %retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %extract) +; CHECK-V8a: sqrdmulh v0.4s, v0.4s, v1.s[0] +; CHECK-V81a: sqrdmlsh v2.4s, v0.4s, v1.s[0] +; CHECK-V81a-apple: sqrdmlsh.4s v2, v0, v1[0] + ret i32 %retval +} + +;----------------------------------------------------------------------------- +; RDMA Scalar +; test for "def : Pat" near SIMDThreeScalarHSTied in AArch64InstInfo.td + +; FIXME: after fix of https://llvm.org/bugs/show_bug.cgi?id=22886 +; uncomment this function, and replace "cHECK" for "CHECK" +;define i16 @test_sqrdmlah_i16(i16 %acc, i16 %mhs, i16 %rhs) { +;; cHECK-LABEL: test_sqrdmlah_i16: +; %prod = call i16 @llvm.aarch64.neon.sqrdmulh.i16(i16 %mhs, i16 %rhs) +; %retval = call i16 @llvm.aarch64.neon.sqadd.i16(i16 %acc, i16 %prod) +;; cHECK: sqrdmlah {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; ret i16 %retval +;} + +define i32 @test_sqrdmlah_i32(i32 %acc, i32 %mhs, i32 %rhs) { +; CHECK-LABEL: test_sqrdmlah_i32: + %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %rhs) + %retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %prod) +; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK-V81a: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK-V81a-apple: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret i32 %retval +} + +; FIXME: after fix of https://llvm.org/bugs/show_bug.cgi?id=22886 +; uncomment this function, and replace "cHECK" for "CHECK" +;define i16 @test_sqrdmlsh_i16(i16 %acc, i16 %mhs, i16 %rhs) { +;; cHECK-LABEL: test_sqrdmlsh_i16: +; %prod = call i16 @llvm.aarch64.neon.sqrdmulh.i16(i16 %mhs, i16 %rhs) +; %retval = call i16 @llvm.aarch64.neon.sqsub.i16(i16 %acc, i16 %prod) +;; cHECK: sqrdmlsh {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; ret i16 %retval +;} + +define i32 @test_sqrdmlsh_i32(i32 %acc, i32 %mhs, i32 %rhs) { +; CHECK-LABEL: test_sqrdmlsh_i32: + %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %rhs) + %retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %prod) +; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK-V81a: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} +; CHECK-V81a-apple: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret i32 %retval +} + +;----------------------------------------------------------------------------- +; RDMA Scalar, by element +; tests for iYY_indexed in SIMDIndexedSQRDMLxHSDTied + +; FIXME: after fix of https://llvm.org/bugs/show_bug.cgi?id=22886 +; uncomment this function, and replace "cHECK" for "CHECK" +;define i16 @test_sqrdmlah_extract_i16(i16 %acc, i16 %mhs, <4 x i16> %rhs) { +;; cHECK-LABEL: test_sqrdmlah_extract_i32: +; %extract = extractelement <4 x i16> %rhs, i32 3 +; %prod = call i16 @llvm.aarch64.neon.sqrdmulh.i16(i16 %mhs, i16 %extract) +; %retval = call i16 @llvm.aarch64.neon.sqadd.i16(i16 %acc, i16 %prod) +;; cHECK: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3] +; ret i16 %retval +;} + +define i32 @test_sqrdmlah_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) { +; CHECK-LABEL: test_sqrdmlah_extract_i32: + %extract = extractelement <4 x i32> %rhs, i32 3 + %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %extract) + %retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %prod) +; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3] +; CHECK-V81a: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3] +; CHECK-V81a-apple: sqrdmlah.s {{s[0-9]+}}, {{s[0-9]+}}, v0[3] + ret i32 %retval +} + +; FIXME: after fix of https://llvm.org/bugs/show_bug.cgi?id=22886 +; uncomment this function, and replace "cHECK" for "CHECK" +;define i16 @test_sqrdmlsh_extract_i16(i16 %acc, i16 %mhs, <4 x i16> %rhs) { +;; cHECK-LABEL: test_sqrdmlsh_extract_i32: +; %extract = extractelement <4 x i16> %rhs, i32 3 +; %prod = call i16 @llvm.aarch64.neon.sqrdmulh.i16(i16 %mhs, i16 %extract) +; %retval = call i16 @llvm.aarch64.neon.sqsub.i16(i16 %acc, i16 %prod) +;; cHECK: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3] +; ret i16 %retval +;} + +define i32 @test_sqrdmlsh_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) { +; CHECK-LABEL: test_sqrdmlsh_extract_i32: + %extract = extractelement <4 x i32> %rhs, i32 3 + %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %extract) + %retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %prod) +; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3] +; CHECK-V81a: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3] +; CHECK-V81a-apple: sqrdmlsh.s {{s[0-9]+}}, {{s[0-9]+}}, v0[3] + ret i32 %retval +} Index: test/CodeGen/ARM/build-attributes.ll =================================================================== --- test/CodeGen/ARM/build-attributes.ll +++ test/CodeGen/ARM/build-attributes.ll @@ -96,6 +96,9 @@ ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 | FileCheck %s --check-prefix=CORTEX-A72 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A72-FAST ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING +; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi | FileCheck %s --check-prefix=GENERIC-ARMV8_1-A +; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=GENERIC-ARMV8_1-A-FAST +; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s --check-prefix=CORTEX-A7-CHECK ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-CHECK-FAST ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-NOFPU @@ -112,6 +115,10 @@ ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi | FileCheck %s --check-prefix=PCS-R9-USE ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -arm-reserve-r9 | FileCheck %s --check-prefix=PCS-R9-RESERVE +; ARMv8.1a (AArch32) +; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN +; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN +; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; ARMv8a (AArch32) ; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN @@ -1153,6 +1160,35 @@ ; CORTEX-A72-FAST-NOT: .eabi_attribute 22 ; CORTEX-A72-FAST: .eabi_attribute 23, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 6, 14 +; GENERIC-ARMV8_1-A: .eabi_attribute 7, 65 +; GENERIC-ARMV8_1-A: .eabi_attribute 8, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 9, 2 +; GENERIC-ARMV8_1-A: .fpu crypto-neon-fp-armv8 +; GENERIC-ARMV8_1-A: .eabi_attribute 12, 4 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 19 +;; We default to IEEE 754 compliance +; GENERIC-ARMV8_1-A: .eabi_attribute 20, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 21, 1 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 22 +; GENERIC-ARMV8_1-A: .eabi_attribute 23, 3 +; GENERIC-ARMV8_1-A: .eabi_attribute 24, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 25, 1 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 27 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 28 +; GENERIC-ARMV8_1-A: .eabi_attribute 36, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 38, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 42, 1 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 44 +; GENERIC-ARMV8_1-A: .eabi_attribute 68, 3 + +; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 19 +;; GENERIC-ARMV8_1-A has the ARMv8 FP unit, which always flushes preserving sign. +; GENERIC-ARMV8_1-A-FAST: .eabi_attribute 20, 2 +; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 21 +; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 22 +; GENERIC-ARMV8_1-A-FAST: .eabi_attribute 23, 1 + ; RELOC-PIC: .eabi_attribute 15, 1 ; RELOC-PIC: .eabi_attribute 16, 1 ; RELOC-PIC: .eabi_attribute 17, 2 Index: test/CodeGen/ARM/neon-v8.1a.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/neon-v8.1a.ll @@ -0,0 +1,166 @@ +; RUN: llc < %s -mtriple=armv8 -mattr=+v8.1a | FileCheck %s + +;----------------------------------------------------------------------------- +; RDMA Vector + +declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) + +declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) + +declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) + +define <4 x i16> @test_vqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { +; CHECK-LABEL: test_vqrdmlah_v4i16: + %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) + %retval = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %acc, <4 x i16> %prod) +; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + ret <4 x i16> %retval +} + +define <8 x i16> @test_vqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { +; CHECK-LABEL: test_vqrdmlah_v8i16: + %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) + %retval = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %acc, <8 x i16> %prod) +; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} + ret <8 x i16> %retval +} + +define <2 x i32> @test_vqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { +; CHECK-LABEL: test_vqrdmlah_v2i32: + %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) + %retval = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %acc, <2 x i32> %prod) +; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + ret <2 x i32> %retval +} + +define <4 x i32> @test_vqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { +; CHECK-LABEL: test_vqrdmlah_v4i32: + %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) + %retval = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %acc, <4 x i32> %prod) +; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} + ret <4 x i32> %retval +} + +define <4 x i16> @test_vqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) { +; CHECK-LABEL: test_vqrdmlsh_v4i16: + %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs) + %retval = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %acc, <4 x i16> %prod) +; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + ret <4 x i16> %retval +} + +define <8 x i16> @test_vqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) { +; CHECK-LABEL: test_vqrdmlsh_v8i16: + %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs) + %retval = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %acc, <8 x i16> %prod) +; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} + ret <8 x i16> %retval +} + +define <2 x i32> @test_vqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) { +; CHECK-LABEL: test_vqrdmlsh_v2i32: + %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs) + %retval = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %acc, <2 x i32> %prod) +; CHECK: vqrdmlsh.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + ret <2 x i32> %retval +} + +define <4 x i32> @test_vqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) { +; CHECK-LABEL: test_vqrdmlsh_v4i32: + %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs) + %retval = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %acc, <4 x i32> %prod) +; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} + ret <4 x i32> %retval +} + +;----------------------------------------------------------------------------- +; RDMA Scalar + +define <4 x i16> @test_vqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmlah_lane_s16: +entry: + %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> + %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) + %retval = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %acc, <4 x i16> %prod) +; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3] + ret <4 x i16> %retval +} + +define <8 x i16> @test_vqrdmlahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmlahq_lane_s16: +entry: + %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> + %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) + %retval = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %acc, <8 x i16> %prod) +; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2] + ret <8 x i16> %retval +} + +define <2 x i32> @test_vqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmlah_lane_s32: +entry: + %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> + %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) + %retval = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %acc, <2 x i32> %prod) +; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1] + ret <2 x i32> %retval +} + +define <4 x i32> @test_vqrdmlahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmlahq_lane_s32: +entry: + %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer + %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) + %retval = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %acc, <4 x i32> %prod) +; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0] + ret <4 x i32> %retval +} + +define <4 x i16> @test_vqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmlsh_lane_s16: +entry: + %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> + %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle) + %retval = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %acc, <4 x i16> %prod) +; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3] + ret <4 x i16> %retval +} + +define <8 x i16> @test_vqrdmlshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmlshq_lane_s16: +entry: + %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> + %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle) + %retval = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %acc, <8 x i16> %prod) +; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2] + ret <8 x i16> %retval +} + +define <2 x i32> @test_vqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmlsh_lane_s32: +entry: + %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> + %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle) + %retval = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %acc, <2 x i32> %prod) +; CHECK: vqrdmlsh.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1] + ret <2 x i32> %retval +} + +define <4 x i32> @test_vqrdmlshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmlshq_lane_s32: +entry: + %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer + %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle) + %retval = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %acc, <4 x i32> %prod) +; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0] + ret <4 x i32> %retval +} Index: test/MC/AArch64/armv8-extension-atomic.s =================================================================== --- /dev/null +++ test/MC/AArch64/armv8-extension-atomic.s @@ -0,0 +1,163 @@ +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a -show-encoding < %s 2> %t | FileCheck %s +// RUN: FileCheck --check-prefix=CHECK-ERROR <%t %s + .text + + //8 bits + casb w0, w1, [x2] + casab w0, w1, [x2] + caslb w0, w1, [x2] + casalb w0, w1, [x2] + +//CHECK: casb w0, w1, [x2] // encoding: [0x41,0x7c,0xa0,0x08] +//CHECK: casab w0, w1, [x2] // encoding: [0x41,0x7c,0xe0,0x08] +//CHECK: caslb w0, w1, [x2] // encoding: [0x41,0xfc,0xa0,0x08] +//CHECK: casalb w0, w1, [x2] // encoding: [0x41,0xfc,0xe0,0x08] + + casb w0, w1, [w2] + casalb x0, x1, [x2] +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: casb w0, w1, [w2] +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: casalb x0, x1, [x2] +//CHECK-ERROR: ^ + + //16 bits + cash w0, w1, [x2] + casah w0, w1, [x2] + caslh w0, w1, [x2] + casalh w0, w1, [x2] + +//CHECK: cash w0, w1, [x2] // encoding: [0x41,0x7c,0xa0,0x48] +//CHECK: casah w0, w1, [x2] // encoding: [0x41,0x7c,0xe0,0x48] +//CHECK: caslh w0, w1, [x2] // encoding: [0x41,0xfc,0xa0,0x48] +//CHECK: casalh w0, w1, [x2] // encoding: [0x41,0xfc,0xe0,0x48] + + //32 bits + cas w0, w1, [x2] + casa w0, w1, [x2] + casl w0, w1, [x2] + casal w0, w1, [x2] + +//CHECK: cas w0, w1, [x2] // encoding: [0x41,0x7c,0xa0,0x88] +//CHECK: casa w0, w1, [x2] // encoding: [0x41,0x7c,0xe0,0x88] +//CHECK: casl w0, w1, [x2] // encoding: [0x41,0xfc,0xa0,0x88] +//CHECK: casal w0, w1, [x2] // encoding: [0x41,0xfc,0xe0,0x88] + + cas w0, w1, [w2] + casl w0, x1, [x2] + +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: cas w0, w1, [w2] +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: casl w0, x1, [x2] +//CHECK-ERROR: ^ + + //64 bits + cas x0, x1, [x2] + casa x0, x1, [x2] + casl x0, x1, [x2] + casal x0, x1, [x2] + +//CHECK: cas x0, x1, [x2] // encoding: [0x41,0x7c,0xa0,0xc8] +//CHECK: casa x0, x1, [x2] // encoding: [0x41,0x7c,0xe0,0xc8] +//CHECK: casl x0, x1, [x2] // encoding: [0x41,0xfc,0xa0,0xc8] +//CHECK: casal x0, x1, [x2] // encoding: [0x41,0xfc,0xe0,0xc8] + + casa x0, x1, [w2] + casal x0, w1, [x2] + +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: casa x0, x1, [w2] +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: casal x0, w1, [x2] +//CHECK-ERROR: ^ + + // LD intructions + ldadda x0, x1, [x2] + ldclrl x0, x1, [x2] + ldeoral x0, x1, [x2] + ldset x0, x1, [x2] + ldsmaxa w0, w1, [x2] + ldsminlb w0, w1, [x2] + ldumaxalh w0, w1, [x2] + ldumin w0, w1, [x2] +//CHECK: ldadda x0, x1, [x2] // encoding: [0x41,0x00,0xa0,0xf8] +//CHECK: ldclrl x0, x1, [x2] // encoding: [0x41,0x10,0x60,0xf8] +//CHECK: ldeoral x0, x1, [x2] // encoding: [0x41,0x20,0xe0,0xf8] +//CHECK: ldset x0, x1, [x2] // encoding: [0x41,0x30,0x20,0xf8] +//CHECK: ldsmaxa w0, w1, [x2] // encoding: [0x41,0x40,0xa0,0xb8] +//CHECK: ldsminlb w0, w1, [x2] // encoding: [0x41,0x50,0x60,0x38] +//CHECK: ldumaxalh w0, w1, [x2] // encoding: [0x41,0x60,0xe0,0x78] +//CHECK: ldumin w0, w1, [x2] // encoding: [0x41,0x70,0x20,0xb8] + + // ST intructions: aliases to LD + staddlb w0, [x2] + stclrlh w0, [x2] + steorl w0, [x2] + stsetl x0, [x2] + stsmaxb w0, [x2] + stsminh w0, [x2] + stumax w0, [x2] + stumin x0, [x2] +//CHECK: staddlb w0, [x2] // encoding: [0x5f,0x00,0x60,0x38] +//CHECK: stclrlh w0, [x2] // encoding: [0x5f,0x10,0x60,0x78] +//CHECK: steorl w0, [x2] // encoding: [0x5f,0x20,0x60,0xb8] +//CHECK: stsetl x0, [x2] // encoding: [0x5f,0x30,0x60,0xf8] +//CHECK: stsmaxb w0, [x2] // encoding: [0x5f,0x40,0x20,0x38] +//CHECK: stsminh w0, [x2] // encoding: [0x5f,0x50,0x20,0x78] +//CHECK: stumax w0, [x2] // encoding: [0x5f,0x60,0x20,0xb8] +//CHECK: stumin x0, [x2] // encoding: [0x5f,0x70,0x20,0xf8] + + ldsmax x0, x1, [w2] + ldeorl w0, w1, [w2] +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: ldsmax x0, x1, [w2] +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: ldeorl w0, w1, [w2] +//CHECK-ERROR: ^ + + //SWP instruction + swp x0, x1, [x2] + swpb w0, w1, [x2] + swplh w0, w1, [x2] + swpal x0, x1, [sp] +//CHECK: swp x0, x1, [x2] // encoding: [0x41,0x80,0x20,0xf8] +//CHECK: swpb w0, w1, [x2] // encoding: [0x41,0x80,0x20,0x38] +//CHECK: swplh w0, w1, [x2] // encoding: [0x41,0x80,0x60,0x78] +//CHECK: swpal x0, x1, [sp] // encoding: [0xe1,0x83,0xe0,0xf8] + + swp x0, x1, [w2] + swp x0, x1, [xzr] +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: swp x0, x1, [w2] +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: swp x0, x1, [xzr] +//CHECK-ERROR: ^ + + //CASP instruction + casp x0, x1, x2, x3, [x4] + casp w0, w1, w2, w3, [x4] +//CHECK: casp x0, x1, x2, x3, [x4] // encoding: [0x82,0x7c,0x20,0x48] +//CHECK: casp w0, w1, w2, w3, [x4] // encoding: [0x82,0x7c,0x20,0x08] + + casp x1, x2, x4, x5, [x6] + casp x0, x1, x3, x4, [x5] + casp x0, x2, x4, x5, [x6] + casp x0, x1, x2, x4, [x5] +//CHECK-ERROR: error: Expected even register +//CHECK-ERROR: casp x1, x2, x4, x5, [x6] +//CHECK-ERROR: ^ +//CHECK-ERROR: error: Expected even register +//CHECK-ERROR: casp x0, x1, x3, x4, [x5] +//CHECK-ERROR: ^ +//CHECK-ERROR: error: Expected consecutive registers +//CHECK-ERROR: casp x0, x2, x4, x5, [x6] +//CHECK-ERROR: ^ +//CHECK-ERROR: error: Expected consecutive registers +//CHECK-ERROR: casp x0, x1, x2, x4, [x5] +//CHECK-ERROR: ^ Index: test/MC/AArch64/armv8-extension-lor.s =================================================================== --- /dev/null +++ test/MC/AArch64/armv8-extension-lor.s @@ -0,0 +1,33 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.1a < %s | FileCheck %s + + +//------------------------------------------------------------------------------ +// Load acquire / store release +//------------------------------------------------------------------------------ + ldlarb w0,[x1] + ldlarh w0,[x1] + ldlar w0,[x1] + ldlar x0,[x1] +// CHECK: ldlarb w0, [x1] // encoding: [0x20,0x7c,0xdf,0x08] +// CHECK: ldlarh w0, [x1] // encoding: [0x20,0x7c,0xdf,0x48] +// CHECK: ldlar w0, [x1] // encoding: [0x20,0x7c,0xdf,0x88] +// CHECK: ldlar x0, [x1] // encoding: [0x20,0x7c,0xdf,0xc8] + stllrb w0,[x1] + stllrh w0,[x1] + stllr w0,[x1] + stllr x0,[x1] +// CHECK: stllrb w0, [x1] // encoding: [0x20,0x7c,0x9f,0x08] +// CHECK: stllrh w0, [x1] // encoding: [0x20,0x7c,0x9f,0x48] +// CHECK: stllr w0, [x1] // encoding: [0x20,0x7c,0x9f,0x88] +// CHECK: stllr x0, [x1] // encoding: [0x20,0x7c,0x9f,0xc8] + + msr LORSA_EL1, x0 + msr LOREA_EL1, x0 + msr LORN_EL1, x0 + msr LORC_EL1, x0 + mrs x0, LORID_EL1 +// CHECK: msr LORSA_EL1, x0 // encoding: [0x00,0xa4,0x18,0xd5] +// CHECK: msr LOREA_EL1, x0 // encoding: [0x20,0xa4,0x18,0xd5] +// CHECK: msr LORN_EL1, x0 // encoding: [0x40,0xa4,0x18,0xd5] +// CHECK: msr LORC_EL1, x0 // encoding: [0x60,0xa4,0x18,0xd5] +// CHECK: mrs x0, LORID_EL1 // encoding: [0xe0,0xa4,0x38,0xd5] Index: test/MC/AArch64/armv8-extension-pan.s =================================================================== --- /dev/null +++ test/MC/AArch64/armv8-extension-pan.s @@ -0,0 +1,30 @@ +//RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a -show-encoding < %s 2> %t | FileCheck %s +//RUN: FileCheck --check-prefix=CHECK-ERROR %s < %t + + .text + + msr pan, #0 +//CHECK: msr PAN, #0 // encoding: [0x9f,0x40,0x00,0xd5] + msr pan, #1 +//CHECK: msr PAN, #1 // encoding: [0x9f,0x41,0x00,0xd5] + msr pan, x5 +//CHECK: msr PAN, x5 // encoding: [0x65,0x42,0x18,0xd5] + mrs x13, pan +//CHECK: mrs x13, PAN // encoding: [0x6d,0x42,0x38,0xd5] + + msr pan, #-1 + msr pan, #20 + msr pan, w0 + mrs w0, pan +//CHECK-ERROR: error: immediate must be an integer in range [0, 15]. +//CHECK-ERROR: msr pan, #-1 +//CHECK-ERROR: ^ +//CHECK-ERROR: error: immediate must be an integer in range [0, 15]. +//CHECK-ERROR: msr pan, #20 +//CHECK-ERROR: ^ +//CHECK-ERROR: error: immediate must be an integer in range [0, 15]. +//CHECK-ERROR: msr pan, w0 +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: mrs w0, pan +//CHECK-ERROR: ^ Index: test/MC/AArch64/armv8-extension-rdma.s =================================================================== --- /dev/null +++ test/MC/AArch64/armv8-extension-rdma.s @@ -0,0 +1,154 @@ +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a -show-encoding < %s 2> %t | FileCheck %s +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s + .text + + //AdvSIMD RDMA vector + sqrdmlah v0.4h, v1.4h, v2.4h + sqrdmlsh v0.4h, v1.4h, v2.4h + sqrdmlah v0.2s, v1.2s, v2.2s + sqrdmlsh v0.2s, v1.2s, v2.2s + sqrdmlah v0.4s, v1.4s, v2.4s + sqrdmlsh v0.4s, v1.4s, v2.4s + sqrdmlah v0.8h, v1.8h, v2.8h + sqrdmlsh v0.8h, v1.8h, v2.8h +// CHECK: sqrdmlah v0.4h, v1.4h, v2.4h // encoding: [0x20,0x84,0x42,0x2e] +// CHECK: sqrdmlsh v0.4h, v1.4h, v2.4h // encoding: [0x20,0x8c,0x42,0x2e] +// CHECK: sqrdmlah v0.2s, v1.2s, v2.2s // encoding: [0x20,0x84,0x82,0x2e] +// CHECK: sqrdmlsh v0.2s, v1.2s, v2.2s // encoding: [0x20,0x8c,0x82,0x2e] +// CHECK: sqrdmlah v0.4s, v1.4s, v2.4s // encoding: [0x20,0x84,0x82,0x6e] +// CHECK: sqrdmlsh v0.4s, v1.4s, v2.4s // encoding: [0x20,0x8c,0x82,0x6e] +// CHECK: sqrdmlah v0.8h, v1.8h, v2.8h // encoding: [0x20,0x84,0x42,0x6e] +// CHECK: sqrdmlsh v0.8h, v1.8h, v2.8h // encoding: [0x20,0x8c,0x42,0x6e] + + sqrdmlah v0.2h, v1.2h, v2.2h + sqrdmlsh v0.2h, v1.2h, v2.2h + sqrdmlah v0.8s, v1.8s, v2.8s + sqrdmlsh v0.8s, v1.8s, v2.8s + sqrdmlah v0.2s, v1.4h, v2.8h + sqrdmlsh v0.4s, v1.8h, v2.2s +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlah v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlah v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlah v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlsh v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlsh v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlsh v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlsh v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah v0.2s, v1.4h, v2.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlsh v0.4s, v1.8h, v2.2s +// CHECK-ERROR: ^ + + //AdvSIMD RDMA scalar + sqrdmlah h0, h1, h2 + sqrdmlsh h0, h1, h2 + sqrdmlah s0, s1, s2 + sqrdmlsh s0, s1, s2 +// CHECK: sqrdmlah h0, h1, h2 // encoding: [0x20,0x84,0x42,0x7e] +// CHECK: sqrdmlsh h0, h1, h2 // encoding: [0x20,0x8c,0x42,0x7e] +// CHECK: sqrdmlah s0, s1, s2 // encoding: [0x20,0x84,0x82,0x7e] +// CHECK: sqrdmlsh s0, s1, s2 // encoding: [0x20,0x8c,0x82,0x7e] + + //AdvSIMD RDMA vector by-element + sqrdmlah v0.4h, v1.4h, v2.h[3] + sqrdmlsh v0.4h, v1.4h, v2.h[3] + sqrdmlah v0.2s, v1.2s, v2.s[1] + sqrdmlsh v0.2s, v1.2s, v2.s[1] + sqrdmlah v0.8h, v1.8h, v2.h[3] + sqrdmlsh v0.8h, v1.8h, v2.h[3] + sqrdmlah v0.4s, v1.4s, v2.s[3] + sqrdmlsh v0.4s, v1.4s, v2.s[3] +// CHECK: sqrdmlah v0.4h, v1.4h, v2.h[3] // encoding: [0x20,0xd0,0x72,0x2f] +// CHECK: sqrdmlsh v0.4h, v1.4h, v2.h[3] // encoding: [0x20,0xf0,0x72,0x2f] +// CHECK: sqrdmlah v0.2s, v1.2s, v2.s[1] // encoding: [0x20,0xd0,0xa2,0x2f] +// CHECK: sqrdmlsh v0.2s, v1.2s, v2.s[1] // encoding: [0x20,0xf0,0xa2,0x2f] +// CHECK: sqrdmlah v0.8h, v1.8h, v2.h[3] // encoding: [0x20,0xd0,0x72,0x6f] +// CHECK: sqrdmlsh v0.8h, v1.8h, v2.h[3] // encoding: [0x20,0xf0,0x72,0x6f] +// CHECK: sqrdmlah v0.4s, v1.4s, v2.s[3] // encoding: [0x20,0xd8,0xa2,0x6f] +// CHECK: sqrdmlsh v0.4s, v1.4s, v2.s[3] // encoding: [0x20,0xf8,0xa2,0x6f] + + sqrdmlah v0.4s, v1.2s, v2.s[1] + sqrdmlsh v0.2s, v1.2d, v2.s[1] + sqrdmlah v0.8h, v1.8h, v2.s[3] + sqrdmlsh v0.8h, v1.8h, v2.h[8] +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah v0.4s, v1.2s, v2.s[1] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlsh v0.2s, v1.2d, v2.s[1] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah v0.8h, v1.8h, v2.s[3] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: vector lane must be an integer in range [0, 7]. +// CHECK-ERROR: sqrdmlsh v0.8h, v1.8h, v2.h[8] +// CHECK-ERROR: ^ + + //AdvSIMD RDMA scalar by-element + sqrdmlah h0, h1, v2.h[3] + sqrdmlsh h0, h1, v2.h[3] + sqrdmlah s0, s1, v2.s[3] + sqrdmlsh s0, s1, v2.s[3] +// CHECK: sqrdmlah h0, h1, v2.h[3] // encoding: [0x20,0xd0,0x72,0x7f] +// CHECK: sqrdmlsh h0, h1, v2.h[3] // encoding: [0x20,0xf0,0x72,0x7f] +// CHECK: sqrdmlah s0, s1, v2.s[3] // encoding: [0x20,0xd8,0xa2,0x7f] +// CHECK: sqrdmlsh s0, s1, v2.s[3] // encoding: [0x20,0xf8,0xa2,0x7f] + + sqrdmlah b0, h1, v2.h[3] + sqrdmlah s0, d1, v2.s[3] + sqrdmlsh h0, h1, v2.s[3] + sqrdmlsh s0, s1, v2.s[4] +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah b0, h1, v2.h[3] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah s0, d1, v2.s[3] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlsh h0, h1, v2.s[3] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: vector lane must be an integer in range [0, 3]. +// CHECK-ERROR: sqrdmlsh s0, s1, v2.s[4] +// CHECK-ERROR: ^ Index: test/MC/AArch64/armv8-extension-vhe.s =================================================================== --- /dev/null +++ test/MC/AArch64/armv8-extension-vhe.s @@ -0,0 +1,61 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.1a < %s | FileCheck %s + + +//------------------------------------------------------------------------------ +// Virtualization Host Extensions +//------------------------------------------------------------------------------ + msr TTBR1_EL2, x0 + msr CONTEXTIDR_EL2, x0 + msr CNTHV_TVAL_EL2, x0 + msr CNTHV_CVAL_EL2, x0 + msr CNTHV_CTL_EL2, x0 + msr SCTLR_EL12, x0 + msr CPACR_EL12, x0 + msr TTBR0_EL12, x0 + msr TTBR1_EL12, x0 + msr TCR_EL12, x0 + msr AFSR0_EL12, x0 + msr AFSR1_EL12, x0 + msr ESR_EL12, x0 + msr FAR_EL12, x0 + msr MAIR_EL12, x0 + msr AMAIR_EL12, x0 + msr VBAR_EL12, x0 + msr CONTEXTIDR_EL12, x0 + msr CNTKCTL_EL12, x0 + msr CNTP_TVAL_EL02, x0 + msr CNTP_CTL_EL02, x0 + msr CNTP_CVAL_EL02, x0 + msr CNTV_TVAL_EL02, x0 + msr CNTV_CTL_EL02, x0 + msr CNTV_CVAL_EL02, x0 + msr SPSR_EL12, x0 + msr ELR_EL12, x0 + +// CHECK: msr TTBR1_EL2, x0 // encoding: [0x20,0x20,0x1c,0xd5] +// CHECK: msr CONTEXTIDR_EL2, x0 // encoding: [0x20,0xd0,0x1c,0xd5] +// CHECK: msr CNTHV_TVAL_EL2, x0 // encoding: [0x00,0xe3,0x1c,0xd5] +// CHECK: msr CNTHV_CVAL_EL2, x0 // encoding: [0x40,0xe3,0x1c,0xd5] +// CHECK: msr CNTHV_CTL_EL2, x0 // encoding: [0x20,0xe3,0x1c,0xd5] +// CHECK: msr SCTLR_EL12, x0 // encoding: [0x00,0x10,0x1d,0xd5] +// CHECK: msr CPACR_EL12, x0 // encoding: [0x40,0x10,0x1d,0xd5] +// CHECK: msr TTBR0_EL12, x0 // encoding: [0x00,0x20,0x1d,0xd5] +// CHECK: msr TTBR1_EL12, x0 // encoding: [0x20,0x20,0x1d,0xd5] +// CHECK: msr TCR_EL12, x0 // encoding: [0x40,0x20,0x1d,0xd5] +// CHECK: msr AFSR0_EL12, x0 // encoding: [0x00,0x51,0x1d,0xd5] +// CHECK: msr AFSR1_EL12, x0 // encoding: [0x20,0x51,0x1d,0xd5] +// CHECK: msr ESR_EL12, x0 // encoding: [0x00,0x52,0x1d,0xd5] +// CHECK: msr FAR_EL12, x0 // encoding: [0x00,0x60,0x1d,0xd5] +// CHECK: msr MAIR_EL12, x0 // encoding: [0x00,0xa2,0x1d,0xd5] +// CHECK: msr AMAIR_EL12, x0 // encoding: [0x00,0xa3,0x1d,0xd5] +// CHECK: msr VBAR_EL12, x0 // encoding: [0x00,0xc0,0x1d,0xd5] +// CHECK: msr CONTEXTIDR_EL12, x0 // encoding: [0x20,0xd0,0x1d,0xd5] +// CHECK: msr CNTKCTL_EL12, x0 // encoding: [0x00,0xe1,0x1d,0xd5] +// CHECK: msr CNTP_TVAL_EL02, x0 // encoding: [0x00,0xe2,0x1d,0xd5] +// CHECK: msr CNTP_CTL_EL02, x0 // encoding: [0x20,0xe2,0x1d,0xd5] +// CHECK: msr CNTP_CVAL_EL02, x0 // encoding: [0x40,0xe2,0x1d,0xd5] +// CHECK: msr CNTV_TVAL_EL02, x0 // encoding: [0x00,0xe3,0x1d,0xd5] +// CHECK: msr CNTV_CTL_EL02, x0 // encoding: [0x20,0xe3,0x1d,0xd5] +// CHECK: msr CNTV_CVAL_EL02, x0 // encoding: [0x40,0xe3,0x1d,0xd5] +// CHECK: msr SPSR_EL12, x0 // encoding: [0x00,0x40,0x1d,0xd5] +// CHECK: msr ELR_EL12, x0 // encoding: [0x20,0x40,0x1d,0xd5] Index: test/MC/ARM/basic-arm-instructions-v8.1a.s =================================================================== --- /dev/null +++ test/MC/ARM/basic-arm-instructions-v8.1a.s @@ -0,0 +1,206 @@ +//RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8.1a -mattr=neon -show-encoding < %s 2>%t | FileCheck %s --check-prefix=CHECK-V81aTHUMB +//RUN: FileCheck --check-prefix=CHECK-ERROR <%t %s +//RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8.1a -mattr=neon -show-encoding < %s 2>%t | FileCheck %s --check-prefix=CHECK-V81aARM +//RUN: FileCheck --check-prefix=CHECK-ERROR <%t %s + +//RUN: not llvm-mc -triple thumb-none-linux-gnu -mattr=+v8 -mattr=neon -show-encoding < %s 2>1 |& FileCheck %s --check-prefix=CHECK-V8 +//RUN: not llvm-mc -triple arm-none-linux-gnu -mattr=+v8 -mattr=neon -show-encoding < %s 2>1 |& FileCheck %s --check-prefix=CHECK-V8 + + + .text +//CHECK-V8THUMB: .text + + vqrdmlah.i8 q0, q1, q2 + vqrdmlah.u16 d0, d1, d2 + vqrdmlsh.f32 q3, q4, q5 + vqrdmlsh.f64 d3, d5, d5 + +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: vqrdmlah.i8 q0, q1, q2 +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: vqrdmlah.u16 d0, d1, d2 +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: vqrdmlsh.f32 q3, q4, q5 +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: vqrdmlsh.f64 d3, d5, d5 +//CHECK-ERROR: ^ +//CHECK-V8: error: invalid operand for instruction +//CHECK-V8: vqrdmlah.i8 q0, q1, q2 +//CHECK-V8: ^ +//CHECK-V8: error: invalid operand for instruction +//CHECK-V8: vqrdmlah.u16 d0, d1, d2 +//CHECK-V8: ^ +//CHECK-V8: error: invalid operand for instruction +//CHECK-V8: vqrdmlsh.f32 q3, q4, q5 +//CHECK-V8: ^ +//CHECK-V8: error: invalid operand for instruction +//CHECK-V8 vqrdmlsh.f64 d3, d5, d5 +//CHECK-V8: ^ + + vqrdmlah.s16 d0, d1, d2 +//CHECK-V81aARM: vqrdmlah.s16 d0, d1, d2 @ encoding: [0x12,0x0b,0x11,0xf3] +//CHECK-V81aTHUMB: vqrdmlah.s16 d0, d1, d2 @ encoding: [0x11,0xff,0x12,0x0b] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlah.s16 d0, d1, d2 +//CHECK-V8: ^ + + vqrdmlah.s32 d0, d1, d2 +//CHECK-V81aARM: vqrdmlah.s32 d0, d1, d2 @ encoding: [0x12,0x0b,0x21,0xf3] +//CHECK-V81aTHUMB: vqrdmlah.s32 d0, d1, d2 @ encoding: [0x21,0xff,0x12,0x0b] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlah.s32 d0, d1, d2 +//CHECK-V8: ^ + + vqrdmlah.s16 q0, q1, q2 +//CHECK-V81aARM: vqrdmlah.s16 q0, q1, q2 @ encoding: [0x54,0x0b,0x12,0xf3] +//CHECK-V81aTHUMB: vqrdmlah.s16 q0, q1, q2 @ encoding: [0x12,0xff,0x54,0x0b] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlah.s16 q0, q1, q2 +//CHECK-V8: ^ + + vqrdmlah.s32 q2, q3, q0 +//CHECK-V81aARM: vqrdmlah.s32 q2, q3, q0 @ encoding: [0x50,0x4b,0x26,0xf3] +//CHECK-V81aTHUMB: vqrdmlah.s32 q2, q3, q0 @ encoding: [0x26,0xff,0x50,0x4b] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlah.s32 q2, q3, q0 +//CHECK-V8: ^ + + + vqrdmlsh.s16 d7, d6, d5 +//CHECK-V81aARM: vqrdmlsh.s16 d7, d6, d5 @ encoding: [0x15,0x7c,0x16,0xf3] +//CHECK-V81aTHUMB: vqrdmlsh.s16 d7, d6, d5 @ encoding: [0x16,0xff,0x15,0x7c] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlsh.s16 d7, d6, d5 +//CHECK-V8: ^ + + vqrdmlsh.s32 d0, d1, d2 +//CHECK-V81aARM: vqrdmlsh.s32 d0, d1, d2 @ encoding: [0x12,0x0c,0x21,0xf3] +//CHECK-V81aTHUMB: vqrdmlsh.s32 d0, d1, d2 @ encoding: [0x21,0xff,0x12,0x0c] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlsh.s32 d0, d1, d2 +//CHECK-V8: ^ + + vqrdmlsh.s16 q0, q1, q2 +//CHECK-V81aARM: vqrdmlsh.s16 q0, q1, q2 @ encoding: [0x54,0x0c,0x12,0xf3] +//CHECK-V81aTHUMB: vqrdmlsh.s16 q0, q1, q2 @ encoding: [0x12,0xff,0x54,0x0c] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlsh.s16 q0, q1, q2 +//CHECK-V8: ^ + + vqrdmlsh.s32 q3, q4, q5 +//CHECK-V81aARM: vqrdmlsh.s32 q3, q4, q5 @ encoding: [0x5a,0x6c,0x28,0xf3] +//CHECK-V81aTHUMB: vqrdmlsh.s32 q3, q4, q5 @ encoding: [0x28,0xff,0x5a,0x6c] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlsh.s32 q3, q4, q5 +//CHECK-V8: ^ + + + vqrdmlah.i8 q0, q1, d9[7] + vqrdmlah.u16 d0, d1, d2[3] + vqrdmlsh.f32 q3, q4, d5[1] + vqrdmlsh.f64 d3, d5, d5[0] + +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: vqrdmlah.i8 q0, q1, d9[7] +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: vqrdmlah.u16 d0, d1, d2[3] +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: vqrdmlsh.f32 q3, q4, d5[1] +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: vqrdmlsh.f64 d3, d5, d5[0] +//CHECK-ERROR: ^ + + vqrdmlah.s16 d0, d1, d2[0] +//CHECK-V81aARM: vqrdmlah.s16 d0, d1, d2[0] @ encoding: [0x42,0x0e,0x91,0xf2] +//CHECK-V81aTHUMB: vqrdmlah.s16 d0, d1, d2[0] @ encoding: [0x91,0xef,0x42,0x0e] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlah.s16 d0, d1, d2[0] +//CHECK-V8: ^ + + vqrdmlah.s32 d0, d1, d2[0] +//CHECK-V81aARM: vqrdmlah.s32 d0, d1, d2[0] @ encoding: [0x42,0x0e,0xa1,0xf2] +//CHECK-V81aTHUMB: vqrdmlah.s32 d0, d1, d2[0] @ encoding: [0xa1,0xef,0x42,0x0e] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlah.s32 d0, d1, d2[0] +//CHECK-V8: ^ + + vqrdmlah.s16 q0, q1, d2[0] +//CHECK-V81aARM: vqrdmlah.s16 q0, q1, d2[0] @ encoding: [0x42,0x0e,0x92,0xf3] +//CHECK-V81aTHUMB: vqrdmlah.s16 q0, q1, d2[0] @ encoding: [0x92,0xff,0x42,0x0e] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlah.s16 q0, q1, d2[0] +//CHECK-V8: ^ + + vqrdmlah.s32 q0, q1, d2[0] +//CHECK-V81aARM: vqrdmlah.s32 q0, q1, d2[0] @ encoding: [0x42,0x0e,0xa2,0xf3] +//CHECK-V81aTHUMB: vqrdmlah.s32 q0, q1, d2[0] @ encoding: [0xa2,0xff,0x42,0x0e] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlah.s32 q0, q1, d2[0] +//CHECK-V8: ^ + + + vqrdmlsh.s16 d0, d1, d2[0] +//CHECK-V81aARM: vqrdmlsh.s16 d0, d1, d2[0] @ encoding: [0x42,0x0f,0x91,0xf2] +//CHECK-V81aTHUMB: vqrdmlsh.s16 d0, d1, d2[0] @ encoding: [0x91,0xef,0x42,0x0f] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlsh.s16 d0, d1, d2[0] +//CHECK-V8: ^ + + vqrdmlsh.s32 d0, d1, d2[0] +//CHECK-V81aARM: vqrdmlsh.s32 d0, d1, d2[0] @ encoding: [0x42,0x0f,0xa1,0xf2] +//CHECK-V81aTHUMB: vqrdmlsh.s32 d0, d1, d2[0] @ encoding: [0xa1,0xef,0x42,0x0f] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlsh.s32 d0, d1, d2[0] +//CHECK-V8: ^ + + vqrdmlsh.s16 q0, q1, d2[0] +//CHECK-V81aARM: vqrdmlsh.s16 q0, q1, d2[0] @ encoding: [0x42,0x0f,0x92,0xf3] +//CHECK-V81aTHUMB: vqrdmlsh.s16 q0, q1, d2[0] @ encoding: [0x92,0xff,0x42,0x0f] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlsh.s16 q0, q1, d2[0] +//CHECK-V8: ^ + + vqrdmlsh.s32 q0, q1, d2[0] +//CHECK-V81aARM: vqrdmlsh.s32 q0, q1, d2[0] @ encoding: [0x42,0x0f,0xa2,0xf3] +//CHECK-V81aTHUMB: vqrdmlsh.s32 q0, q1, d2[0] @ encoding: [0xa2,0xff,0x42,0x0f] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: vqrdmlsh.s32 q0, q1, d2[0] +//CHECK-V8: ^ + + setpan #0 +//CHECK-V81aTHUMB: setpan #0 @ encoding: [0x10,0xb6] +//CHECK-V81aARM: setpan #0 @ encoding: [0x00,0x00,0x10,0xf1] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: setpan #0 +//CHECK-V8: ^ + + setpan #1 +//CHECK-V81aTHUMB: setpan #1 @ encoding: [0x18,0xb6] +//CHECK-V81aARM: setpan #1 @ encoding: [0x00,0x02,0x10,0xf1] +//CHECK-V8: error: instruction requires: v8.1a +//CHECK-V8: setpan #1 +//CHECK-V8: ^ + setpan + setpan #-1 + setpan #2 +//CHECK-ERROR: error: too few operands for instruction +//CHECK-ERROR: setpan +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: setpan #-1 +//CHECK-ERROR: ^ +//CHECK-ERROR: error: invalid operand for instruction +//CHECK-ERROR: setpan #2 +//CHECK-ERROR: ^ + + it eq + setpaneq #0 +//CHECK-THUMB-ERROR: error: instruction 'setpan' is not predicable, but condition code specified +//CHECK-THUMB-ERROR: setpaneq #0 +//CHECK-THUMB-ERROR: ^ Index: test/MC/Disassembler/AArch64/armv8-extension-atomic.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AArch64/armv8-extension-atomic.txt @@ -0,0 +1,83 @@ +# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a --disassemble < %s | FileCheck %s + +0x41,0x7c,0xa0,0x08 +0x41,0x7c,0xe0,0x08 +0x41,0xfc,0xa0,0x08 +0x41,0xfc,0xe0,0x08 +0x41,0x7c,0xa0,0x48 +0x41,0x7c,0xe0,0x48 +0x41,0xfc,0xa0,0x48 +0x41,0xfc,0xe0,0x48 +# CHECK: casb w0, w1, [x2] +# CHECK: casab w0, w1, [x2] +# CHECK: caslb w0, w1, [x2] +# CHECK: casalb w0, w1, [x2] +# CHECK: cash w0, w1, [x2] +# CHECK: casah w0, w1, [x2] +# CHECK: caslh w0, w1, [x2] +# CHECK: casalh w0, w1, [x2] + +0x41,0x7c,0xa0,0x88 +0x41,0x7c,0xe0,0x88 +0x41,0xfc,0xa0,0x88 +0x41,0xfc,0xe0,0x88 +0x41,0x7c,0xa0,0xc8 +0x41,0x7c,0xe0,0xc8 +0x41,0xfc,0xa0,0xc8 +0x41,0xfc,0xe0,0xc8 +# CHECK: cas w0, w1, [x2] +# CHECK: casa w0, w1, [x2] +# CHECK: casl w0, w1, [x2] +# CHECK: casal w0, w1, [x2] +# CHECK: cas x0, x1, [x2] +# CHECK: casa x0, x1, [x2] +# CHECK: casl x0, x1, [x2] +# CHECK: casal x0, x1, [x2] + +0x41,0x80,0x20,0xf8 +0x41,0x80,0x20,0x38 +0x41,0x80,0x60,0x78 +0xe1,0x83,0xe0,0xf8 +# CHECK: swp x0, x1, [x2] +# CHECK: swpb w0, w1, [x2] +# CHECK: swplh w0, w1, [x2] +# CHECK: swpal x0, x1, [sp] + +0x41,0x00,0xa0,0xf8 +0x41,0x10,0x60,0xf8 +0x41,0x20,0xe0,0xf8 +0x41,0x30,0x20,0xf8 +0x41,0x40,0xa0,0xb8 +0x41,0x50,0x60,0x38 +0x41,0x60,0xe0,0x78 +0x41,0x70,0x20,0xb8 +# CHECK: ldadda x0, x1, [x2] +# CHECK: ldclrl x0, x1, [x2] +# CHECK: ldeoral x0, x1, [x2] +# CHECK: ldset x0, x1, [x2] +# CHECK: ldsmaxa w0, w1, [x2] +# CHECK: ldsminlb w0, w1, [x2] +# CHECK: ldumaxalh w0, w1, [x2] +# CHECK: ldumin w0, w1, [x2] + +0x5f,0x00,0x60,0x38 +0x5f,0x10,0x60,0x78 +0x5f,0x20,0x60,0xb8 +0x5f,0x30,0x60,0xf8 +0x5f,0x40,0x20,0x38 +0x5f,0x50,0x20,0x78 +0x5f,0x60,0x20,0xb8 +0x5f,0x70,0x20,0xf8 +# CHECK: staddlb w0, [x2] +# CHECK: stclrlh w0, [x2] +# CHECK: steorl w0, [x2] +# CHECK: stsetl x0, [x2] +# CHECK: stsmaxb w0, [x2] +# CHECK: stsminh w0, [x2] +# CHECK: stumax w0, [x2] +# CHECK: stumin x0, [x2] + +0x82,0x7c,0x20,0x48 +0x82,0x7c,0x20,0x08 +# CHECK: casp x0, x1, x2, x3, [x4] +# CHECK: casp w0, w1, w2, w3, [x4] Index: test/MC/Disassembler/AArch64/armv8-extension-lor.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AArch64/armv8-extension-lor.txt @@ -0,0 +1,28 @@ +# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a --disassemble < %s | FileCheck %s + +0x20,0x7c,0xdf,0x08 +0x20,0x7c,0xdf,0x48 +0x20,0x7c,0xdf,0x88 +0x20,0x7c,0xdf,0xc8 +0x20,0x7c,0x9f,0x08 +0x20,0x7c,0x9f,0x48 +0x20,0x7c,0x9f,0x88 +0x20,0x7c,0x9f,0xc8 +# CHECK: ldlarb w0, [x1] +# CHECK: ldlarh w0, [x1] +# CHECK: ldlar w0, [x1] +# CHECK: ldlar x0, [x1] +# CHECK: stllrb w0, [x1] +# CHECK: stllrh w0, [x1] +# CHECK: stllr w0, [x1] +# CHECK: stllr x0, [x1] +0x00,0xa4,0x18,0xd5 +0x20,0xa4,0x18,0xd5 +0x40,0xa4,0x18,0xd5 +0x60,0xa4,0x18,0xd5 +0xe0,0xa4,0x38,0xd5 +# CHECK: msr LORSA_EL1, x0 +# CHECK: msr LOREA_EL1, x0 +# CHECK: msr LORN_EL1, x0 +# CHECK: msr LORC_EL1, x0 +# CHECK: mrs x0, LORID_EL1 Index: test/MC/Disassembler/AArch64/armv8-extension-pan.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AArch64/armv8-extension-pan.txt @@ -0,0 +1,10 @@ +# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a --disassemble < %s | FileCheck %s + +0x9f,0x40,0x00,0xd5 +0x9f,0x41,0x00,0xd5 +0x65,0x42,0x18,0xd5 +0x6d,0x42,0x38,0xd5 +# CHECK: msr PAN, #0 +# CHECK: msr PAN, #1 +# CHECK: msr PAN, x5 +# CHECK: mrs x13, PAN Index: test/MC/Disassembler/AArch64/armv8-extension-rdma.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AArch64/armv8-extension-rdma.txt @@ -0,0 +1,129 @@ +# RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a --disassemble < %s 2>&1 | FileCheck %s + +[0x20,0x84,0x02,0x2e] # sqrdmlah v0.8b, v1.8b, v2.8b +[0x20,0x8c,0x02,0x2e] # sqrdmlsh v0.8b, v1.8b, v2.8b +[0x20,0x84,0xc2,0x2e] # sqrdmlah v0.1d, v1.1d, v2.1d +[0x20,0x8c,0xc2,0x2e] # sqrdmlsh v0.1d, v1.1d, v2.1d +[0x20,0x84,0x02,0x6e] # sqrdmlah v0.16b, v1.16b, v2.16b +[0x20,0x8c,0x02,0x6e] # sqrdmlsh v0.16b, v1.16b, v2.16b +[0x20,0x84,0xc2,0x6e] # sqrdmlah v0.2d, v1.2d, v2.2d +[0x20,0x8c,0xc2,0x6e] # sqrdmlsh v0.2d, v1.2d, v2.2d +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0x84,0x02,0x2e] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0x8c,0x02,0x2e] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0x84,0xc2,0x2e] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0x8c,0xc2,0x2e] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0x84,0x02,0x6e] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0x8c,0x02,0x6e] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0x84,0xc2,0x6e] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0x8c,0xc2,0x6e] + +[0x20,0x84,0x02,0x7e] # sqrdmlah b0, b1, b2 +[0x20,0x8c,0x02,0x7e] # sqrdmlsh b0, b1, b2 +[0x20,0x84,0xc2,0x7e] # sqrdmlah d0, d1, d2 +[0x20,0x8c,0xc2,0x7e] # sqrdmlsh d0, d1, d2 +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0x84,0x02,0x7e] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0x8c,0x02,0x7e] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0x84,0xc2,0x7e] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0x8c,0xc2,0x7e] + +[0x20,0xd0,0x32,0x2f] # sqrdmlah v0.8b, v1.8b, v2.b[3] +[0x20,0xf0,0x32,0x2f] # sqrdmlsh v0.8b, v1.8b, v2.b[3] +[0x20,0xd0,0xe2,0x2f] # sqrdmlah v0.1d, v1.1d, v2.d[1] +[0x20,0xf0,0xe2,0x2f] # sqrdmlsh v0.1d, v1.1d, v2.d[1] +[0x20,0xd0,0x32,0x6f] # sqrdmlah v0.16b, v1.16b, v2.b[3] +[0x20,0xf0,0x32,0x6f] # sqrdmlsh v0.16b, v1.16b, v2.b[3] +[0x20,0xd8,0xe2,0x6f] # sqrdmlah v0.2d, v1.2d, v2.d[3] +[0x20,0xf8,0xe2,0x6f] # sqrdmlsh v0.2d, v1.2d, v2.d[3] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0xd0,0x32,0x2f] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0xf0,0x32,0x2f] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0xd0,0xe2,0x2f] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0xf0,0xe2,0x2f] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0xd0,0x32,0x6f] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0xf0,0x32,0x6f] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0xd8,0xe2,0x6f] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0xf8,0xe2,0x6f] + +[0x20,0xd0,0x32,0x7f] # sqrdmlah b0, b1, v2.b[3] +[0x20,0xf0,0x32,0x7f] # sqrdmlsh b0, b1, v2.b[3] +[0x20,0xd8,0xe2,0x7f] # sqrdmlah d0, d1, v2.d[3] +[0x20,0xf8,0xe2,0x7f] # sqrdmlsh d0, d1, v2.d[3] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0xd0,0x32,0x7f] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0xf0,0x32,0x7f] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0xd8,0xe2,0x7f] +# CHECK: warning: invalid instruction encoding +# CHECK: [0x20,0xf8,0xe2,0x7f] + +[0x20,0x84,0x42,0x2e] +[0x20,0x8c,0x42,0x2e] +[0x20,0x84,0x82,0x2e] +[0x20,0x8c,0x82,0x2e] +[0x20,0x84,0x42,0x6e] +[0x20,0x8c,0x42,0x6e] +[0x20,0x84,0x82,0x6e] +[0x20,0x8c,0x82,0x6e] +# CHECK: sqrdmlah v0.4h, v1.4h, v2.4h +# CHECK: sqrdmlsh v0.4h, v1.4h, v2.4h +# CHECK: sqrdmlah v0.2s, v1.2s, v2.2s +# CHECK: sqrdmlsh v0.2s, v1.2s, v2.2s +# CHECK: sqrdmlah v0.8h, v1.8h, v2.8h +# CHECK: sqrdmlsh v0.8h, v1.8h, v2.8h +# CHECK: sqrdmlah v0.4s, v1.4s, v2.4s +# CHECK: sqrdmlsh v0.4s, v1.4s, v2.4s + +[0x20,0x84,0x42,0x7e] +[0x20,0x8c,0x42,0x7e] +[0x20,0x84,0x82,0x7e] +[0x20,0x8c,0x82,0x7e] +# CHECK: sqrdmlah h0, h1, h2 +# CHECK: sqrdmlsh h0, h1, h2 +# CHECK: sqrdmlah s0, s1, s2 +# CHECK: sqrdmlsh s0, s1, s2 + +0x20,0xd0,0x72,0x2f +0x20,0xf0,0x72,0x2f +0x20,0xd0,0xa2,0x2f +0x20,0xf0,0xa2,0x2f +0x20,0xd0,0x72,0x6f +0x20,0xf0,0x72,0x6f +0x20,0xd8,0xa2,0x6f +0x20,0xf8,0xa2,0x6f +# CHECK: sqrdmlah v0.4h, v1.4h, v2.h[3] +# CHECK: sqrdmlsh v0.4h, v1.4h, v2.h[3] +# CHECK: sqrdmlah v0.2s, v1.2s, v2.s[1] +# CHECK: sqrdmlsh v0.2s, v1.2s, v2.s[1] +# CHECK: sqrdmlah v0.8h, v1.8h, v2.h[3] +# CHECK: sqrdmlsh v0.8h, v1.8h, v2.h[3] +# CHECK: sqrdmlah v0.4s, v1.4s, v2.s[3] +# CHECK: sqrdmlsh v0.4s, v1.4s, v2.s[3] + +0x20,0xd0,0x72,0x7f +0x20,0xf0,0x72,0x7f +0x20,0xd8,0xa2,0x7f +0x20,0xf8,0xa2,0x7f +# CHECK: sqrdmlah h0, h1, v2.h[3] +# CHECK: sqrdmlsh h0, h1, v2.h[3] +# CHECK: sqrdmlah s0, s1, v2.s[3] +# CHECK: sqrdmlsh s0, s1, v2.s[3] Index: test/MC/Disassembler/AArch64/armv8-extension-vhe.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AArch64/armv8-extension-vhe.txt @@ -0,0 +1,56 @@ +# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a --disassemble < %s | FileCheck %s + +0x20,0x20,0x1c,0xd5 +0x20,0xd0,0x1c,0xd5 +0x00,0xe3,0x1c,0xd5 +0x40,0xe3,0x1c,0xd5 +0x20,0xe3,0x1c,0xd5 +0x00,0x10,0x1d,0xd5 +0x40,0x10,0x1d,0xd5 +0x00,0x20,0x1d,0xd5 +0x20,0x20,0x1d,0xd5 +0x40,0x20,0x1d,0xd5 +0x00,0x51,0x1d,0xd5 +0x20,0x51,0x1d,0xd5 +0x00,0x52,0x1d,0xd5 +0x00,0x60,0x1d,0xd5 +0x00,0xa2,0x1d,0xd5 +0x00,0xa3,0x1d,0xd5 +0x00,0xc0,0x1d,0xd5 +0x20,0xd0,0x1d,0xd5 +0x00,0xe1,0x1d,0xd5 +0x00,0xe2,0x1d,0xd5 +0x20,0xe2,0x1d,0xd5 +0x40,0xe2,0x1d,0xd5 +0x00,0xe3,0x1d,0xd5 +0x20,0xe3,0x1d,0xd5 +0x40,0xe3,0x1d,0xd5 +0x00,0x40,0x1d,0xd5 +0x20,0x40,0x1d,0xd5 +# CHECK: msr TTBR1_EL2, x0 +# CHECK: msr CONTEXTIDR_EL2, x0 +# CHECK: msr CNTHV_TVAL_EL2, x0 +# CHECK: msr CNTHV_CVAL_EL2, x0 +# CHECK: msr CNTHV_CTL_EL2, x0 +# CHECK: msr SCTLR_EL12, x0 +# CHECK: msr CPACR_EL12, x0 +# CHECK: msr TTBR0_EL12, x0 +# CHECK: msr TTBR1_EL12, x0 +# CHECK: msr TCR_EL12, x0 +# CHECK: msr AFSR0_EL12, x0 +# CHECK: msr AFSR1_EL12, x0 +# CHECK: msr ESR_EL12, x0 +# CHECK: msr FAR_EL12, x0 +# CHECK: msr MAIR_EL12, x0 +# CHECK: msr AMAIR_EL12, x0 +# CHECK: msr VBAR_EL12, x0 +# CHECK: msr CONTEXTIDR_EL12, x0 +# CHECK: msr CNTKCTL_EL12, x0 +# CHECK: msr CNTP_TVAL_EL02, x0 +# CHECK: msr CNTP_CTL_EL02, x0 +# CHECK: msr CNTP_CVAL_EL02, x0 +# CHECK: msr CNTV_TVAL_EL02, x0 +# CHECK: msr CNTV_CTL_EL02, x0 +# CHECK: msr CNTV_CVAL_EL02, x0 +# CHECK: msr SPSR_EL12, x0 +# CHECK: msr ELR_EL12, x0 Index: test/MC/Disassembler/ARM/armv8.1a.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/ARM/armv8.1a.txt @@ -0,0 +1,52 @@ +# RUN: llvm-mc -triple armv8 -mattr=+v8.1a --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V81a +# RUN: not llvm-mc -triple armv8 -mattr=+v8 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V8 + +[0x54,0x0b,0x12,0xf3] +[0x12,0x0b,0x21,0xf3] +[0x54,0x0c,0x12,0xf3] +[0x12,0x0c,0x21,0xf3] +# CHECK-V81a: vqrdmlah.s16 q0, q1, q2 +# CHECK-V81a: vqrdmlah.s32 d0, d1, d2 +# CHECK-V81a: vqrdmlsh.s16 q0, q1, q2 +# CHECK-V81a: vqrdmlsh.s32 d0, d1, d2 +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x54,0x0b,0x12,0xf3] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x12,0x0b,0x21,0xf3] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x54,0x0c,0x12,0xf3] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x12,0x0c,0x21,0xf3] + +[0x42,0x0e,0x92,0xf3] +[0x42,0x0e,0xa1,0xf2] +[0x42,0x0f,0x92,0xf3] +[0x42,0x0f,0xa1,0xf2] +# CHECK-V81a: vqrdmlah.s16 q0, q1, d2[0] +# CHECK-V81a: vqrdmlah.s32 d0, d1, d2[0] +# CHECK-V81a: vqrdmlsh.s16 q0, q1, d2[0] +# CHECK-V81a: vqrdmlsh.s32 d0, d1, d2[0] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x42,0x0e,0x92,0xf3] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x42,0x0e,0xa1,0xf2] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x42,0x0f,0x92,0xf3] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x42,0x0f,0xa1,0xf2] + +# The SETPAN(v8.1a) and TST(v8) instructions occupy the same space, but SETPAN +# uses the encoding for the invalid NV predicate operand. This test checks that +# the disassembler is correctly disambiguating and decoding these instructions. + +[0x00 0x00 0x10 0xf1] +# CHECK: setpan #0 + +[0x00 0x02 0x10 0xf1] +# CHECK: setpan #1 + +[0x00 0x00 0x10 0xe1] +# CHECK: tst r0, r0 + +[0x00 0x02 0x10 0xe1] +# CHECK: tst r0, r0, lsl #4 Index: test/MC/Disassembler/ARM/invalid-armv8.1a.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/ARM/invalid-armv8.1a.txt @@ -0,0 +1,83 @@ +# RUN: not llvm-mc -triple armv8 -mattr=+v8.1a --disassemble < %s 2>&1 | FileCheck %s + +# Check, if sizes 00 and 11 are undefined for RDMA +[0x12,0x0b,0x01,0xf3] # vqrdmlah.s8 d0, d1, d2 +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x12,0x0b,0x01,0xf3] # vqrdmlah.s8 d0, d1, d2 +# CHECK-NEXT: ^ + +[0x12,0x0b,0x31,0xf3] # vqrdmlah.s64 d0, d1, d2 +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x12,0x0b,0x31,0xf3] # vqrdmlah.s64 d0, d1, d2 +# CHECK-NEXT: ^ + +[0x54,0x0b,0x02,0xf3] # vqrdmlah.s8 q0, q1, q2 +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x54,0x0b,0x02,0xf3] # vqrdmlah.s8 q0, q1, q2 +# CHECK-NEXT: ^ + +[0x54,0x0b,0x32,0xf3] # vqrdmlah.s64 q2, q3, q0 +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x54,0x0b,0x32,0xf3] # vqrdmlah.s64 q2, q3, q0 +# CHECK-NEXT: ^ + +[0x15,0x7c,0x06,0xf3] # vqrdmlsh.s8 d0, d1, d2 +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x15,0x7c,0x06,0xf3] # vqrdmlsh.s8 d0, d1, d2 +# CHECK-NEXT: ^ + +[0x15,0x7c,0x36,0xf3] # vqrdmlsh.s64 d0, d1, d2 +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x15,0x7c,0x36,0xf3] # vqrdmlsh.s64 d0, d1, d2 +# CHECK-NEXT: ^ + +[0x54,0x0c,0x02,0xf3] # vqrdmlsh.s8 q0, q1, q2 +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x54,0x0c,0x02,0xf3] # vqrdmlsh.s8 q0, q1, q2 +# CHECK-NEXT: ^ + +[0x54,0x0c,0x32,0xf3] # vqrdmlsh.s64 q0, q1, q2 +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x54,0x0c,0x32,0xf3] # vqrdmlsh.s64 q0, q1, q2 +# CHECK-NEXT: ^ + +[0x42,0x0e,0x81,0xf2] # vqrdmlah.s8 d0, d1, d2[0] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x42,0x0e,0x81,0xf2] # vqrdmlah.s8 d0, d1, d2[0] +# CHECK-NEXT: ^ + +[0x42,0x0e,0xb1,0xf2] # vqrdmlah.s64 d0, d1, d2[0] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x42,0x0e,0xb1,0xf2] # vqrdmlah.s64 d0, d1, d2[0] +# CHECK-NEXT: ^ + +[0x42,0x0e,0x82,0xf3] # vqrdmlah.s8 q0, q1, d2[0] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x42,0x0e,0x82,0xf3] # vqrdmlah.s8 q0, q1, d2[0] +# CHECK-NEXT: ^ + +[0x42,0x0e,0xb2,0xf3] # vqrdmlah.s64 q0, q1, d2[0] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x42,0x0e,0xb2,0xf3] # vqrdmlah.s64 q0, q1, d2[0] +# CHECK-NEXT: ^ + + +[0x42,0x0f,0x81,0xf2] # vqrdmlsh.s8 d0, d1, d2[0] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x42,0x0f,0x81,0xf2] # vqrdmlsh.s8 d0, d1, d2[0] +# CHECK-NEXT: ^ + +[0x42,0x0f,0xb1,0xf2] # vqrdmlsh.s64 d0, d1, d2[0] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x42,0x0f,0xb1,0xf2] # vqrdmlsh.s64 d0, d1, d2[0] +# CHECK-NEXT: ^ + +[0x42,0x0f,0x82,0xf3] # vqrdmlsh.s8 q0, q1, d2[0] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x42,0x0f,0x82,0xf3] # vqrdmlsh.s8 q0, q1, d2[0] +# CHECK-NEXT: ^ + +[0x42,0x0f,0xb2,0xf3] # vqrdmlsh.s64 q0, q1, d2[0] +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x42,0x0f,0xb2,0xf3] # vqrdmlsh.s64 q0, q1, d2[0] +# CHECK-NEXT: ^ Index: test/MC/Disassembler/ARM/invalid-thumbv8.1a.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/ARM/invalid-thumbv8.1a.txt @@ -0,0 +1,72 @@ +# RUN: not llvm-mc -triple thumbv8 -mattr=+v8.1a --disassemble < %s 2>&1 | FileCheck %s + +# Check, if sizes 00 and 11 are undefined for RDMA +[0x01,0xff,0x12,0x0b] # vqrdmlah.s8 d0, d1, d2 +[0x31,0xff,0x12,0x0b] # vqrdmlah.s64 d0, d1, d2 +[0x02,0xff,0x54,0x0b] # vqrdmlah.s8 q0, q1, q2 +[0x06,0xff,0x50,0x4b] # vqrdmlah.s64 q2, q3, q0 + +[0x01,0xff,0x12,0x0c] # vqrdmlsh.s8 d0, d1, d2 +[0x31,0xff,0x12,0x0c] # vqrdmlsh.s64 d0, d1, d2 +[0x02,0xff,0x54,0x0c] # vqrdmlsh.s8 q0, q1, q2 +[0x32,0xff,0x54,0x0c] # vqrdmlsh.s64 q0, q1, q2 + +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x01,0xff,0x12,0x0b] # vqrdmlah.s8 d0, d1, d2 +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x31,0xff,0x12,0x0b] # vqrdmlah.s64 d0, d1, d2 +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x02,0xff,0x54,0x0b] # vqrdmlah.s8 q0, q1, q2 +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x06,0xff,0x50,0x4b] # vqrdmlah.s64 q2, q3, q0 +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x01,0xff,0x12,0x0c] # vqrdmlsh.s8 d0, d1, d2 +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x31,0xff,0x12,0x0c] # vqrdmlsh.s64 d0, d1, d2 +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x02,0xff,0x54,0x0c] # vqrdmlsh.s8 q0, q1, q2 +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x32,0xff,0x54,0x0c] # vqrdmlsh.s64 q0, q1, q2 +# CHECK-NEXT: ^ + +[0x81,0xef,0x42,0x0e] # vqrdmlah.s8 d0, d1, d2[0] +[0xb1,0xef,0x42,0x0e] # vqrdmlah.s64 d0, d1, d2[0] +[0x82,0xff,0x42,0x0e] # vqrdmlah.s8 q0, q1, d2[0] +[0xb2,0xff,0x42,0x0e] # vqrdmlah.s64 q0, q1, d2[0] + +[0x81,0xef,0x42,0x0f] # vqrdmlsh.s8 d0, d1, d2[0] +[0xb1,0xef,0x42,0x0f] # vqrdmlsh.s64 d0, d1, d2[0] +[0x82,0xff,0x42,0x0f] # vqrdmlsh.s8 q0, q1, d2[0] +[0xb2,0xff,0x42,0x0f] # vqrdmlsh.s64 q0, q1, d2[0] + +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x81,0xef,0x42,0x0e] # vqrdmlah.s8 d0, d1, d2[0] +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0xb1,0xef,0x42,0x0e] # vqrdmlah.s64 d0, d1, d2[0] +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x82,0xff,0x42,0x0e] # vqrdmlah.s8 q0, q1, d2[0] +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0xb2,0xff,0x42,0x0e] # vqrdmlah.s64 q0, q1, d2[0] +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x81,0xef,0x42,0x0f] # vqrdmlsh.s8 d0, d1, d2[0] +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0xb1,0xef,0x42,0x0f] # vqrdmlsh.s64 d0, d1, d2[0] +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0x82,0xff,0x42,0x0f] # vqrdmlsh.s8 q0, q1, d2[0] +# CHECK-NEXT: ^ +# CHECK: warning: invalid instruction encoding +# CHECK-NEXT: [0xb2,0xff,0x42,0x0f] # vqrdmlsh.s64 q0, q1, d2[0] +# CHECK-NEXT: ^ Index: test/MC/Disassembler/ARM/thumb-v8.1a.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/ARM/thumb-v8.1a.txt @@ -0,0 +1,110 @@ +# RUN: llvm-mc -triple thumbv8 -mattr=+v8.1a --disassemble < %s |& FileCheck %s --check-prefix=CHECK-V81a +# RUN: not llvm-mc -triple thumbv8 -mattr=+v8 --disassemble < %s |& FileCheck %s --check-prefix=CHECK-V8 + +[0x11,0xff,0x12,0x0b] +# CHECK-V81a: vqrdmlah.s16 d0, d1, d2 +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x11,0xff,0x12,0x0b] +# CHECK-V8: ^ + +[0x21,0xff,0x12,0x0b] +# CHECK-V81a: vqrdmlah.s32 d0, d1, d2 +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x21,0xff,0x12,0x0b] +# CHECK-V8: ^ + +[0x12,0xff,0x54,0x0b] +# CHECK-V81a: vqrdmlah.s16 q0, q1, q2 +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x12,0xff,0x54,0x0b] +# CHECK-V8: ^ + +[0x26,0xff,0x50,0x4b] +# CHECK-V81a: vqrdmlah.s32 q2, q3, q0 +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x26,0xff,0x50,0x4b] +# CHECK-V8: ^ + +[0x16,0xff,0x15,0x7c] +# CHECK-V81a: vqrdmlsh.s16 d7, d6, d5 +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x16,0xff,0x15,0x7c] +# CHECK-V8: ^ + +[0x21,0xff,0x12,0x0c] +# CHECK-V81a: vqrdmlsh.s32 d0, d1, d2 +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x21,0xff,0x12,0x0c] +# CHECK-V8: ^ + +[0x12,0xff,0x54,0x0c] +# CHECK-V81a: vqrdmlsh.s16 q0, q1, q2 +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x12,0xff,0x54,0x0c] +# CHECK-V8: ^ + +[0x28,0xff,0x5a,0x6c] +# CHECK-V81a: vqrdmlsh.s32 q3, q4, q5 +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x28,0xff,0x5a,0x6c] +# CHECK-V8: ^ + +[0x91,0xef,0x42,0x0e] +# CHECK-V81a: vqrdmlah.s16 d0, d1, d2[0] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x91,0xef,0x42,0x0e] +# CHECK-V8: ^ + +[0xa1,0xef,0x42,0x0e] +# CHECK-V81a: vqrdmlah.s32 d0, d1, d2[0] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0xa1,0xef,0x42,0x0e] +# CHECK-V8: ^ + +[0x92,0xff,0x42,0x0e] +# CHECK-V81a: vqrdmlah.s16 q0, q1, d2[0] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x92,0xff,0x42,0x0e] +# CHECK-V8: ^ + +[0xa2,0xff,0x42,0x0e] +# CHECK-V81a: vqrdmlah.s32 q0, q1, d2[0] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0xa2,0xff,0x42,0x0e] +# CHECK-V8: ^ + +[0x91,0xef,0x42,0x0f] +# CHECK-V81a: vqrdmlsh.s16 d0, d1, d2[0] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x91,0xef,0x42,0x0f] +# CHECK-V8: ^ + +[0xa1,0xef,0x42,0x0f] +# CHECK-V81a: vqrdmlsh.s32 d0, d1, d2[0] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0xa1,0xef,0x42,0x0f] +# CHECK-V8: ^ + +[0x92,0xff,0x42,0x0f] +# CHECK-V81a: vqrdmlsh.s16 q0, q1, d2[0] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x92,0xff,0x42,0x0f] +# CHECK-V8: ^ + +[0xa2,0xff,0x42,0x0f] +# CHECK-V81a: vqrdmlsh.s32 q0, q1, d2[0] +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0xa2,0xff,0x42,0x0f] +# CHECK-V8: ^ + +[0x10,0xb6] +# CHECK-V81a: setpan #0 +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x10,0xb6] +# CHECK-V8: ^ + +[0x18,0xb6] +# CHECK-V81a: setpan #1 +# CHECK-V8: warning: invalid instruction encoding +# CHECK-V8: [0x18,0xb6] +# CHECK-V8: ^