Index: llvm/trunk/include/llvm/ADT/Triple.h =================================================================== --- llvm/trunk/include/llvm/ADT/Triple.h +++ llvm/trunk/include/llvm/ADT/Triple.h @@ -86,6 +86,7 @@ enum SubArchType { NoSubArch, + ARMSubArch_v8_1a, ARMSubArch_v8, ARMSubArch_v7, ARMSubArch_v7em, Index: llvm/trunk/include/llvm/Support/ARMBuildAttributes.h =================================================================== --- llvm/trunk/include/llvm/Support/ARMBuildAttributes.h +++ llvm/trunk/include/llvm/Support/ARMBuildAttributes.h @@ -106,7 +106,7 @@ v6_M = 11, // e.g. Cortex M1 v6S_M = 12, // v6_M with the System extensions v7E_M = 13, // v7_M with DSP extensions - v8 = 14 // v8, AArch32 + v8 = 14, // v8,v8.1a AArch32 }; enum CPUArchProfile { // (=7), uleb128 @@ -145,6 +145,7 @@ AllowNeon = 1, // SIMDv1 was permitted AllowNeon2 = 2, // SIMDv2 was permitted (Half-precision FP, MAC operations) AllowNeonARMv8 = 3, // ARM v8-A SIMD was permitted + AllowNeonARMv8_1a = 4,// ARM v8.1-A SIMD was permitted (RDMA) // Tag_ABI_PCS_R9_use, (=14), uleb128 R9IsGPR = 0, // R9 used as v6 (just another callee-saved register) Index: llvm/trunk/lib/Support/Triple.cpp =================================================================== --- llvm/trunk/lib/Support/Triple.cpp +++ llvm/trunk/lib/Support/Triple.cpp @@ -281,6 +281,7 @@ .Cases("v7", "v7a", "v7em", "v7l", arch) .Cases("v7m", "v7r", "v7s", arch) .Cases("v8", "v8a", arch) + .Cases("v8.1", "v8.1a", arch) .Default(Triple::UnknownArch); } @@ -403,6 +404,7 @@ SubArchName = SubArchName.substr(0, SubArchName.size() - 2); return StringSwitch(SubArchName) + .EndsWith("v8.1a", Triple::ARMSubArch_v8_1a) .EndsWith("v8", Triple::ARMSubArch_v8) .EndsWith("v8a", Triple::ARMSubArch_v8) .EndsWith("v7", Triple::ARMSubArch_v7) @@ -1109,6 +1111,7 @@ .Cases("v7m", "v7-m", "cortex-m3") .Cases("v7em", "v7e-m", "cortex-m4") .Cases("v8", "v8a", "v8-a", "cortex-a53") + .Cases("v8.1a", "v8.1-a", "generic-armv8.1-a") .Default(nullptr); else result = llvm::StringSwitch(MArch) Index: llvm/trunk/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64.td +++ llvm/trunk/lib/Target/AArch64/AArch64.td @@ -32,6 +32,9 @@ def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable ARMv8 CRC-32 checksum instructions">; +def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true", + "Enable ARMv8.1a extensions", [FeatureCRC]>; + /// Cyclone has register move instructions which are "free". def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; @@ -89,6 +92,10 @@ FeatureNEON, FeatureCRC]>; +def : ProcessorModel<"generic-armv8.1-a", NoSchedModel, [FeatureV8_1a, + FeatureNEON, + FeatureCrypto]>; + def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; // FIXME: Cortex-A72 is currently modelled as an Cortex-A57. Index: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td @@ -22,6 +22,8 @@ AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; +def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">, + AssemblerPredicate<"FeatureV8_1a", "v8.1a">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsCyclone : Predicate<"Subtarget->isCyclone()">; Index: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h @@ -41,6 +41,7 @@ bool HasNEON; bool HasCrypto; bool HasCRC; + bool HasV8_1a; // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove; @@ -100,6 +101,7 @@ bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } + bool hasV8_1a() const { return HasV8_1a; } bool isLittleEndian() const { return IsLittle; } Index: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp @@ -48,7 +48,7 @@ const TargetMachine &TM, bool LittleEndian) : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), - HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), + HasV8_1a(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {} Index: llvm/trunk/lib/Target/ARM/ARM.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARM.td +++ llvm/trunk/lib/Target/ARM/ARM.td @@ -175,6 +175,9 @@ "Support ARM v8 instructions", [HasV7Ops, FeatureVirtualization, FeatureMP]>; +def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true", + "Support ARM v8.1a instructions", + [HasV8Ops, FeatureAClass, FeatureCRC]>; //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -449,6 +452,14 @@ FeatureDB,FeatureDSPThumb2, FeatureHasRAS, FeatureZCZeroing]>; +// V8.1 Processors +def : ProcNoItin<"generic-armv8.1-a", [HasV8Ops, FeatureV8_1a, + FeatureDB, FeatureFPARMv8, + FeatureNEON, FeatureDSPThumb2, + FeatureHWDiv, FeatureHWDivARM, + FeatureTrustZone, FeatureT2XtPk, + FeatureCrypto]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// Index: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp @@ -607,7 +607,7 @@ std::string CPUString = STI.getCPUString(); - if (CPUString != "generic") { + if (CPUString.find("generic") != 0) { //CPUString doesn't start with "generic" // FIXME: remove krait check when GNU tools support krait cpu if (STI.isKrait()) { ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9"); @@ -661,7 +661,8 @@ // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture if (STI.hasV8Ops()) ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - ARMBuildAttrs::AllowNeonARMv8); + STI.hasV8_1a() ? ARMBuildAttrs::AllowNeonARMv8_1a: + ARMBuildAttrs::AllowNeonARMv8); } else { if (STI.hasFPARMv8()) // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h @@ -182,6 +182,9 @@ /// HasCRC - if true, processor supports CRC instructions bool HasCRC; + /// HasV8_1a - if true, the processor has V8.1a: PAN and RDMA extensions + bool HasV8_1a; + /// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are /// particularly effective at zeroing a VFP register. bool HasZeroCycleZeroing; @@ -310,6 +313,7 @@ bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } + bool hasV8_1a() const { return HasV8_1a; } bool hasVirtualization() const { return HasVirtualization; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp @@ -166,6 +166,7 @@ HasTrustZone = false; HasCrypto = false; HasCRC = false; + HasV8_1a = false; HasZeroCycleZeroing = false; AllowsUnalignedMem = false; Thumb2DSP = false; Index: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -276,6 +276,9 @@ bool hasD16() const { return STI.getFeatureBits() & ARM::FeatureD16; } + bool hasV8_1a() const { + return STI.getFeatureBits() & ARM::FeatureV8_1a; + } void SwitchMode() { uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); Index: llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMArchName.def =================================================================== --- llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMArchName.def +++ llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMArchName.def @@ -44,6 +44,8 @@ ARM_ARCH_ALIAS("armv7m", ARMV7M) ARM_ARCH_NAME("armv8-a", ARMV8A, "8-A", v8) ARM_ARCH_ALIAS("armv8a", ARMV8A) +ARM_ARCH_NAME("armv8.1-a", ARMV8_1A, "8.1-A", v8) +ARM_ARCH_ALIAS("armv8.1a", ARMV8_1A) ARM_ARCH_NAME("iwmmxt", IWMMXT, "iwmmxt", v5TE) ARM_ARCH_NAME("iwmmxt2", IWMMXT2, "iwmmxt2", v5TE) Index: llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -817,6 +817,7 @@ break; case ARM::ARMV8A: + case ARM::ARMV8_1A: setAttributeItem(CPU_arch_profile, ApplicationProfile, false); setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, AllowThumb32, false); @@ -914,9 +915,8 @@ setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPARMv8A, /* OverwriteExisting= */ false); - setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, - ARMBuildAttrs::AllowNeonARMv8, - /* OverwriteExisting= */ false); + // 'Advanced_SIMD_arch' must be emitted not here, but within + // ARMAsmPrinter::emitAttributes(), depending on hasV8Ops() and hasV8_1a() break; case ARM::SOFTVFP: Index: llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -153,6 +153,17 @@ // Use CPU to figure out the exact features ARMArchFeature = "+v8"; break; + case Triple::ARMSubArch_v8_1a: + if (NoCPU) + // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, + // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, + // FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a + ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm," + "+trustzone,+t2xtpk,+crypto,+crc"; + else + // Use CPU to figure out the exact features + ARMArchFeature = "+v8.1a"; + break; case Triple::ARMSubArch_v7m: isThumb = true; if (NoCPU) Index: llvm/trunk/test/CodeGen/ARM/build-attributes.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/build-attributes.ll +++ llvm/trunk/test/CodeGen/ARM/build-attributes.ll @@ -96,6 +96,9 @@ ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 | FileCheck %s --check-prefix=CORTEX-A72 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A72-FAST ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING +; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi | FileCheck %s --check-prefix=GENERIC-ARMV8_1-A +; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=GENERIC-ARMV8_1-A-FAST +; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s --check-prefix=CORTEX-A7-CHECK ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-CHECK-FAST ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-NOFPU @@ -112,6 +115,10 @@ ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi | FileCheck %s --check-prefix=PCS-R9-USE ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -arm-reserve-r9 | FileCheck %s --check-prefix=PCS-R9-RESERVE +; ARMv8.1a (AArch32) +; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN +; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN +; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; ARMv8a (AArch32) ; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN ; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN @@ -1153,6 +1160,35 @@ ; CORTEX-A72-FAST-NOT: .eabi_attribute 22 ; CORTEX-A72-FAST: .eabi_attribute 23, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 6, 14 +; GENERIC-ARMV8_1-A: .eabi_attribute 7, 65 +; GENERIC-ARMV8_1-A: .eabi_attribute 8, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 9, 2 +; GENERIC-ARMV8_1-A: .fpu crypto-neon-fp-armv8 +; GENERIC-ARMV8_1-A: .eabi_attribute 12, 4 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 19 +;; We default to IEEE 754 compliance +; GENERIC-ARMV8_1-A: .eabi_attribute 20, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 21, 1 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 22 +; GENERIC-ARMV8_1-A: .eabi_attribute 23, 3 +; GENERIC-ARMV8_1-A: .eabi_attribute 24, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 25, 1 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 27 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 28 +; GENERIC-ARMV8_1-A: .eabi_attribute 36, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 38, 1 +; GENERIC-ARMV8_1-A: .eabi_attribute 42, 1 +; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 44 +; GENERIC-ARMV8_1-A: .eabi_attribute 68, 3 + +; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 19 +;; GENERIC-ARMV8_1-A has the ARMv8 FP unit, which always flushes preserving sign. +; GENERIC-ARMV8_1-A-FAST: .eabi_attribute 20, 2 +; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 21 +; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 22 +; GENERIC-ARMV8_1-A-FAST: .eabi_attribute 23, 1 + ; RELOC-PIC: .eabi_attribute 15, 1 ; RELOC-PIC: .eabi_attribute 16, 1 ; RELOC-PIC: .eabi_attribute 17, 2