Index: llvm/trunk/include/llvm/Support/TargetParser.h =================================================================== --- llvm/trunk/include/llvm/Support/TargetParser.h +++ llvm/trunk/include/llvm/Support/TargetParser.h @@ -36,7 +36,11 @@ FK_VFP, FK_VFPV2, FK_VFPV3, + FK_VFPV3_FP16, FK_VFPV3_D16, + FK_VFPV3_D16_FP16, + FK_VFPV3XD, + FK_VFPV3XD_FP16, FK_VFPV4, FK_VFPV4_D16, FK_FPV4_SP_D16, @@ -44,6 +48,7 @@ FK_FPV5_SP_D16, FK_FP_ARMV8, FK_NEON, + FK_NEON_FP16, FK_NEON_VFPV4, FK_NEON_FP_ARMV8, FK_CRYPTO_NEON_FP_ARMV8, @@ -51,6 +56,16 @@ FK_LAST }; + // FPU Version + enum FPUVersion { + FV_NONE = 0, + FV_VFPV2, + FV_VFPV3, + FV_VFPV3_FP16, + FV_VFPV4, + FV_VFPV5 + }; + // An FPU name implies one of three levels of Neon support: enum NeonSupportLevel { NS_None = 0, ///< No Neon Index: llvm/trunk/lib/Support/TargetParser.cpp =================================================================== --- llvm/trunk/lib/Support/TargetParser.cpp +++ llvm/trunk/lib/Support/TargetParser.cpp @@ -25,31 +25,37 @@ // List of canonical FPU names (use getFPUSynonym) and which architectural // features they correspond to (use getFPUFeatures). // FIXME: TableGen this. +// The entries must appear in the order listed in ARM::FPUKind for correct indexing struct { const char * Name; ARM::FPUKind ID; - unsigned FPUVersion; ///< Corresponds directly to the FP arch version number. + ARM::FPUVersion FPUVersion; ARM::NeonSupportLevel NeonSupport; ARM::FPURestriction Restriction; } FPUNames[] = { - { "invalid", ARM::FK_INVALID, 0, ARM::NS_None, ARM::FR_None}, - { "none", ARM::FK_NONE, 0, ARM::NS_None, ARM::FR_None}, - { "vfp", ARM::FK_VFP, 2, ARM::NS_None, ARM::FR_None}, - { "vfpv2", ARM::FK_VFPV2, 2, ARM::NS_None, ARM::FR_None}, - { "vfpv3", ARM::FK_VFPV3, 3, ARM::NS_None, ARM::FR_None}, - { "vfpv3-d16", ARM::FK_VFPV3_D16, 3, ARM::NS_None, ARM::FR_D16}, - { "vfpv4", ARM::FK_VFPV4, 4, ARM::NS_None, ARM::FR_None}, - { "vfpv4-d16", ARM::FK_VFPV4_D16, 4, ARM::NS_None, ARM::FR_D16}, - { "fpv4-sp-d16", ARM::FK_FPV4_SP_D16, 4, ARM::NS_None, ARM::FR_SP_D16}, - { "fpv5-d16", ARM::FK_FPV5_D16, 5, ARM::NS_None, ARM::FR_D16}, - { "fpv5-sp-d16", ARM::FK_FPV5_SP_D16, 5, ARM::NS_None, ARM::FR_SP_D16}, - { "fp-armv8", ARM::FK_FP_ARMV8, 5, ARM::NS_None, ARM::FR_None}, - { "neon", ARM::FK_NEON, 3, ARM::NS_Neon, ARM::FR_None}, - { "neon-vfpv4", ARM::FK_NEON_VFPV4, 4, ARM::NS_Neon, ARM::FR_None}, - { "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8, 5, ARM::NS_Neon, ARM::FR_None}, + { "invalid", ARM::FK_INVALID, ARM::FV_NONE, ARM::NS_None, ARM::FR_None}, + { "none", ARM::FK_NONE, ARM::FV_NONE, ARM::NS_None, ARM::FR_None}, + { "vfp", ARM::FK_VFP, ARM::FV_VFPV2, ARM::NS_None, ARM::FR_None}, + { "vfpv2", ARM::FK_VFPV2, ARM::FV_VFPV2, ARM::NS_None, ARM::FR_None}, + { "vfpv3", ARM::FK_VFPV3, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_None}, + { "vfpv3-fp16", ARM::FK_VFPV3_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_None}, + { "vfpv3-d16", ARM::FK_VFPV3_D16, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_D16}, + { "vfpv3-d16-fp16", ARM::FK_VFPV3_D16_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_D16}, + { "vfpv3xd", ARM::FK_VFPV3XD, ARM::FV_VFPV3, ARM::NS_None, ARM::FR_SP_D16}, + { "vfpv3xd-fp16", ARM::FK_VFPV3XD_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None, ARM::FR_SP_D16}, + { "vfpv4", ARM::FK_VFPV4, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_None}, + { "vfpv4-d16", ARM::FK_VFPV4_D16, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_D16}, + { "fpv4-sp-d16", ARM::FK_FPV4_SP_D16, ARM::FV_VFPV4, ARM::NS_None, ARM::FR_SP_D16}, + { "fpv5-d16", ARM::FK_FPV5_D16, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_D16}, + { "fpv5-sp-d16", ARM::FK_FPV5_SP_D16, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_SP_D16}, + { "fp-armv8", ARM::FK_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_None, ARM::FR_None}, + { "neon", ARM::FK_NEON, ARM::FV_VFPV3, ARM::NS_Neon, ARM::FR_None}, + { "neon-fp16", ARM::FK_NEON_FP16, ARM::FV_VFPV3_FP16, ARM::NS_Neon, ARM::FR_None}, + { "neon-vfpv4", ARM::FK_NEON_VFPV4, ARM::FV_VFPV4, ARM::NS_Neon, ARM::FR_None}, + { "neon-fp-armv8", ARM::FK_NEON_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_Neon, ARM::FR_None}, { "crypto-neon-fp-armv8", - ARM::FK_CRYPTO_NEON_FP_ARMV8, 5, ARM::NS_Crypto, ARM::FR_None}, - { "softvfp", ARM::FK_SOFTVFP, 0, ARM::NS_None, ARM::FR_None}, + ARM::FK_CRYPTO_NEON_FP_ARMV8, ARM::FV_VFPV5, ARM::NS_Crypto, ARM::FR_None}, + { "softvfp", ARM::FK_SOFTVFP, ARM::FV_NONE, ARM::NS_None, ARM::FR_None}, }; // List of canonical arch names (use getArchSynonym). @@ -279,33 +285,41 @@ // higher. We also have to make sure to disable fp16 when vfp4 is disabled, // as +vfp4 implies +fp16 but -vfp4 does not imply -fp16. switch (FPUNames[FPUKind].FPUVersion) { - case 5: + case ARM::FV_VFPV5: Features.push_back("+fp-armv8"); break; - case 4: + case ARM::FV_VFPV4: Features.push_back("+vfp4"); Features.push_back("-fp-armv8"); break; - case 3: + case ARM::FV_VFPV3_FP16: + Features.push_back("+vfp3"); + Features.push_back("+fp16"); + Features.push_back("-vfp4"); + Features.push_back("-fp-armv8"); + break; + case ARM::FV_VFPV3: Features.push_back("+vfp3"); Features.push_back("-fp16"); Features.push_back("-vfp4"); Features.push_back("-fp-armv8"); break; - case 2: + case ARM::FV_VFPV2: Features.push_back("+vfp2"); Features.push_back("-vfp3"); Features.push_back("-fp16"); Features.push_back("-vfp4"); Features.push_back("-fp-armv8"); break; - case 0: + case ARM::FV_NONE: Features.push_back("-vfp2"); Features.push_back("-vfp3"); Features.push_back("-fp16"); Features.push_back("-vfp4"); Features.push_back("-fp-armv8"); break; + default: + return false; } // crypto includes neon, so we handle this similarly to FPU version. Index: llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp +++ llvm/trunk/lib/Target/ARM/ARMAsmPrinter.cpp @@ -630,7 +630,7 @@ } else if (STI.hasVFP4()) ATS.emitFPU(ARM::FK_NEON_VFPV4); else - ATS.emitFPU(ARM::FK_NEON); + ATS.emitFPU(STI.hasFP16() ? ARM::FK_NEON_FP16 : ARM::FK_NEON); // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture if (STI.hasV8Ops()) ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, @@ -648,7 +648,13 @@ ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16) : ARM::FK_VFPV4); else if (STI.hasVFP3()) - ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV3_D16 : ARM::FK_VFPV3); + ATS.emitFPU(STI.hasD16() + // +d16 + ? (STI.isFPOnlySP() + ? (STI.hasFP16() ? ARM::FK_VFPV3XD_FP16 : ARM::FK_VFPV3XD) + : (STI.hasFP16() ? ARM::FK_VFPV3_D16_FP16 : ARM::FK_VFPV3_D16)) + // -d16 + : (STI.hasFP16() ? ARM::FK_VFPV3_FP16 : ARM::FK_VFPV3)); else if (STI.hasVFP2()) ATS.emitFPU(ARM::FK_VFPV2); } Index: llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -797,12 +797,44 @@ /* OverwriteExisting= */ false); break; + case ARM::FK_VFPV3_FP16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3A, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::FP_HP_extension, + ARMBuildAttrs::AllowHPFP, + /* OverwriteExisting= */ false); + break; + case ARM::FK_VFPV3_D16: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv3B, /* OverwriteExisting= */ false); break; + case ARM::FK_VFPV3_D16_FP16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3B, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::FP_HP_extension, + ARMBuildAttrs::AllowHPFP, + /* OverwriteExisting= */ false); + break; + + case ARM::FK_VFPV3XD: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3B, + /* OverwriteExisting= */ false); + break; + case ARM::FK_VFPV3XD_FP16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3B, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::FP_HP_extension, + ARMBuildAttrs::AllowHPFP, + /* OverwriteExisting= */ false); + break; + case ARM::FK_VFPV4: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4A, @@ -842,6 +874,18 @@ /* OverwriteExisting= */ false); break; + case ARM::FK_NEON_FP16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPv3A, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::AllowNeon, + /* OverwriteExisting= */ false); + setAttributeItem(ARMBuildAttrs::FP_HP_extension, + ARMBuildAttrs::AllowHPFP, + /* OverwriteExisting= */ false); + break; + case ARM::FK_NEON_VFPV4: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv4A, Index: llvm/trunk/test/CodeGen/ARM/build-attributes.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/build-attributes.ll +++ llvm/trunk/test/CodeGen/ARM/build-attributes.ll @@ -51,6 +51,13 @@ ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-FAST ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A17-NOFPU ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-NOFPU-FAST + +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3-FP16 +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+d16,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3-D16-FP16 +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp-only-sp,+d16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=-neon,+vfp3,+fp-only-sp,+d16,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-VFPV3XD-FP16 +; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=+neon,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-NEON-FP16 + ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0 ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0-FAST @@ -1091,7 +1098,7 @@ ; CORTEX-R7: .eabi_attribute 7, 82 ; CORTEX-R7: .eabi_attribute 8, 1 ; CORTEX-R7: .eabi_attribute 9, 2 -; CORTEX-R7: .fpu vfpv3-d16 +; CORTEX-R7: .fpu vfpv3xd ; CORTEX-R7-NOT: .eabi_attribute 19 ;; We default to IEEE 754 compliance ; CORTEX-R7: .eabi_attribute 20, 1 @@ -1205,6 +1212,12 @@ ; CORTEX-A72-FAST-NOT: .eabi_attribute 22 ; CORTEX-A72-FAST: .eabi_attribute 23, 1 +; GENERIC-FPU-VFPV3-FP16: .fpu vfpv3-fp16 +; GENERIC-FPU-VFPV3-D16-FP16: .fpu vfpv3-d16-fp16 +; GENERIC-FPU-VFPV3XD: .fpu vfpv3xd +; GENERIC-FPU-VFPV3XD-FP16: .fpu vfpv3xd-fp16 +; GENERIC-FPU-NEON-FP16: .fpu neon-fp16 + ; GENERIC-ARMV8_1-A: .eabi_attribute 6, 14 ; GENERIC-ARMV8_1-A: .eabi_attribute 7, 65 ; GENERIC-ARMV8_1-A: .eabi_attribute 8, 1 Index: llvm/trunk/test/MC/ARM/directive-fpu-multiple.s =================================================================== --- llvm/trunk/test/MC/ARM/directive-fpu-multiple.s +++ llvm/trunk/test/MC/ARM/directive-fpu-multiple.s @@ -10,7 +10,11 @@ .fpu vfp .fpu vfpv2 .fpu vfpv3 + .fpu vfpv3-fp16 .fpu vfpv3-d16 + .fpu vfpv3-d16-fp16 + .fpu vfpv3xd + .fpu vfpv3xd-fp16 .fpu vfpv4 .fpu vfpv4-d16 .fpu fpv4-sp-d16 @@ -18,6 +22,7 @@ .fpu fpv5-sp-d16 .fpu fp-armv8 .fpu neon + .fpu neon-fp16 .fpu neon-vfpv4 .fpu neon-fp-armv8 .fpu crypto-neon-fp-armv8