Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6631,6 +6631,17 @@ return Chain; } +static MVT getFirstLegalVT(const TargetRegisterClass *RC, + const TargetLowering &TLI) { + for (auto *I = RC->vt_begin(); I != RC->vt_end(); ++I) { + auto RegVT = *I; + if (TLI.isTypeLegal(RegVT)) + return RegVT; + } + + return MVT::Other; +} + /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the /// register allocator to handle the assignment process. However, if the asm @@ -6692,12 +6703,17 @@ if (unsigned AssignedReg = PhysReg.first) { const TargetRegisterClass *RC = PhysReg.second; if (OpInfo.ConstraintVT == MVT::Other) - ValueVT = *RC->vt_begin(); + ValueVT = getFirstLegalVT(RC, TLI); // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to // remember that AX is actually i16 to get the right extension. - RegVT = *RC->vt_begin(); + // + // There are edge cases where the register cannot hold a legal type but we + // still want to have the constraint (for example when using the + // -mgeneral-regs-only option). In these cases we will assign an MVT::Other + // type under the assumption that we don't need any extensions. + RegVT = getFirstLegalVT(RC, TLI); // This is a explicit reference to a physical register. Regs.push_back(AssignedReg); Index: lib/Target/AArch64/AArch64.td =================================================================== --- lib/Target/AArch64/AArch64.td +++ lib/Target/AArch64/AArch64.td @@ -44,6 +44,18 @@ def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true", "Enable Statistical Profiling extension">; +def FeatureNEONAsm : SubtargetFeature<"neonasm", "HasNEONAsm", "true", + "Enable assembling Advanced SIMD instructions">; + +def FeatureCryptoAsm : SubtargetFeature<"cryptoasm", "HasCryptoAsm", "true", + "Enable assembling cryptographic instructions">; + +def FeatureFPARMv8Asm : SubtargetFeature<"fp-armv8asm", "HasFPARMv8Asm", + "true", "Enable assembling ARMv8 FP instructions">; + +def FeatureFullFP16Asm : SubtargetFeature<"fullfp16asm", "HasFullFP16Asm", + "true", "Enable assembling Full FP16 instructions", [FeatureFPARMv8Asm]>; + /// Cyclone has register move instructions which are "free". def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -63,6 +63,10 @@ bool HasPerfMon = false; bool HasFullFP16 = false; bool HasSPE = false; + bool HasFPARMv8Asm = false; + bool HasNEONAsm = false; + bool HasCryptoAsm = false; + bool HasFullFP16Asm = false; // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove = false; @@ -181,6 +185,9 @@ bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } bool hasRAS() const { return HasRAS; } + bool hasFPARMv8Asm() const { return HasFPARMv8Asm; } + bool hasNEONAsm() const { return HasNEONAsm; } + bool hasCryptoAsm() const { return HasCryptoAsm; } bool balanceFPOps() const { return BalanceFPOps; } bool predictableSelectIsExpensive() const { return PredictableSelectIsExpensive; @@ -215,6 +222,7 @@ bool hasPerfMon() const { return HasPerfMon; } bool hasFullFP16() const { return HasFullFP16; } + bool hasFullFP16Asm() const { return HasFullFP16Asm; } bool hasSPE() const { return HasSPE; } bool isLittleEndian() const { return IsLittle; } Index: lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp =================================================================== --- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -128,7 +128,32 @@ new AArch64TargetStreamer(S); // Initialize the set of available features. - setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + uint64_t FeatureMask = ComputeAvailableFeatures(getSTI().getFeatureBits()); + bool HasNEONAsm = STI.getFeatureBits()[AArch64::FeatureNEONAsm]; + bool HasCryptoAsm = STI.getFeatureBits()[AArch64::FeatureCryptoAsm]; + bool HasFPARMv8Asm = STI.getFeatureBits()[AArch64::FeatureFPARMv8Asm]; + bool HasFullFP16Asm = STI.getFeatureBits()[AArch64::FeatureFullFP16Asm]; + + // Check for features disabled by -mgeneral-regs-only and re-enable them + // in the assembler. + if (!HasNEONAsm && !HasCryptoAsm && !HasFPARMv8Asm && !HasFullFP16Asm) { + setAvailableFeatures(FeatureMask); + return; + } + + // We need to enable some features. + MCSubtargetInfo &CSTI = copySTI(); + if (HasNEONAsm) + CSTI.ToggleFeature(AArch64::FeatureNEON); + if (HasCryptoAsm) + CSTI.ToggleFeature(AArch64::FeatureCrypto); + if (HasFPARMv8Asm) + CSTI.ToggleFeature(AArch64::FeatureFPARMv8); + if (HasFullFP16Asm) + CSTI.ToggleFeature(AArch64::FeatureFullFP16); + + FeatureMask = ComputeAvailableFeatures(CSTI.getFeatureBits()); + setAvailableFeatures(FeatureMask); } bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Index: test/CodeGen/AArch64/inlineasm-general-regs-only.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/inlineasm-general-regs-only.ll @@ -0,0 +1,25 @@ +; RUN: llc -mtriple=aarch64-none-eabi -mattr=-neon,-crypto,-fp-armv8,-fullfp16,+neonasm,+cryptoasm,+fp-armv8asm,+fullfp16asm %s -o - | FileCheck %s + +; CHECK-LABEL: fun +; CHECK: ld2 +; CHECK: st2 +; CHECK aese +; CHECK: dup +; CHECK: umov +; CHECK: fabd +define i32 @fun(i8 *%addr0, i8 *%addr1, i32 %input) { +entry: + ; We can assemble neon instructions + tail call void asm "ld2 {v0.16b, v1.16b}, $1 ; st2 {v0.16b, v1.16b}, $0", "=*Q,*Q,~{v0},~{v1}"(i8* %addr0, i8* %addr1) + + ; We can assemble crypto instructions + tail call void asm "aese v0.16b, v1.16b;", "~{v0},~{v1}"() + + ; We can move data form the simd register file. + %retval = tail call i32 asm "dup v1.4s, ${1:w} ; umov ${0:w}, v1.b[0];", "=r,r,~{v0},~{v1}"(i32 %input) + + ; We can use fullfp16 instructions. + tail call void asm "fabd v0.4h, v1.4h, v2.4h;", "~{v0},~{v1},~{v2}"() + + ret i32 %retval +}