diff --git a/llvm/lib/Target/ARM/ARMCallingConv.cpp b/llvm/lib/Target/ARM/ARMCallingConv.cpp --- a/llvm/lib/Target/ARM/ARMCallingConv.cpp +++ b/llvm/lib/Target/ARM/ARMCallingConv.cpp @@ -218,7 +218,11 @@ case MVT::f64: RegList = DRegList; break; + case MVT::v4i32: + case MVT::v4f32: + // TODO: assert this is mve case MVT::v8f16: + case MVT::v2i64: case MVT::v2f64: RegList = QRegList; break; diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -8,6 +8,19 @@ // This describes the calling conventions for ARM architecture. //===----------------------------------------------------------------------===// +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget + : CCIf" + "(State.getMachineFunction().getSubtarget()).", + F), + A>; + +class CCIfNotSubtarget + : CCIf" + "(State.getMachineFunction().getSubtarget()).", + F), + A>; + /// CCIfAlign - Match of the original alignment of the arg class CCIfAlign: CCIf; @@ -31,7 +44,11 @@ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfNotSubtarget<"hasMVEIntegerOps()", + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], + CCBitConvertToType>>, + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v2i64, v8i16, v16i8, v4f32], + CCBitConvertToType>>, // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>, @@ -41,7 +58,10 @@ CCIfType<[i32], CCAssignToStack<4, 4>>, CCIfType<[f64], CCAssignToStack<8, 4>>, - CCIfType<[v2f64], CCAssignToStack<16, 4>> + CCIfNotSubtarget<"hasMVEIntegerOps()", + CCIfType<[v2f64], CCAssignToStack<16, 4>>>, + CCIfSubtarget<"hasMVEIntegerOps()", + CCIfType<[v4i32], CCAssignToStack<16, 4>>> ]>; let Entry = 1 in @@ -57,7 +77,11 @@ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfNotSubtarget<"hasMVEIntegerOps()", + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], + CCBitConvertToType>>, + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v2i64, v8i16, v16i8, v4f32], + CCBitConvertToType>>, CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>, @@ -72,7 +96,11 @@ def FastCC_ARM_APCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfNotSubtarget<"hasMVEIntegerOps()", + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], + CCBitConvertToType>>, + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v2i64, v8i16, v16i8, v4f32], + CCBitConvertToType>>, CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, @@ -83,7 +111,10 @@ // may never be allocated to core registers. CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToStackWithShadow<8, 4, [Q0, Q1, Q2, Q3]>>, - CCIfType<[v2f64], CCAssignToStackWithShadow<16, 4, [Q0, Q1, Q2, Q3]>>, + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v2f64], + CCAssignToStackWithShadow<16, 4, [Q0, Q1, Q2, Q3]>>>, + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v4i32], + CCAssignToStackWithShadow<16, 4, [Q0, Q1, Q2, Q3]>>>, CCDelegateTo ]>; @@ -92,9 +123,17 @@ def RetFastCC_ARM_APCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfNotSubtarget<"hasMVEIntegerOps()", + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], + CCBitConvertToType>>, + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v2i64, v8i16, v16i8, v4f32], + CCBitConvertToType>>, + + CCIfNotSubtarget<"hasMVEIntegerOps()", + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>>, + CCIfSubtarget<"hasMVEIntegerOps()", + CCIfType<[v4i32], CCAssignToReg<[Q0, Q1, Q2, Q3]>>>, - CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, @@ -109,7 +148,11 @@ def CC_ARM_APCS_GHC : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfNotSubtarget<"hasMVEIntegerOps()", + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], + CCBitConvertToType>>, + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v2i64, v8i16, v16i8, v4f32], + CCBitConvertToType>>, CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>, @@ -141,9 +184,27 @@ CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>, CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>, - CCIfType<[v2f64], CCIfAlign<"16", - CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>, - CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>> + + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v2f64], CCIfAlign<"16", + CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>>, + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v2f64], + CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>>>, + + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v2i64], CCIfAlign<"16", + CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>>, + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v2i64], + CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>>>, + + + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v4f32], CCIfAlign<"16", + CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>>, + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v4f32], + CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>>>, + + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v4i32], CCIfAlign<"16", + CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>>, + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v4i32], + CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>>> ]>; def RetCC_ARM_AAPCS_Common : CallingConv<[ @@ -166,7 +227,12 @@ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, + CCIfNotSubtarget<"hasMVEIntegerOps()", + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], + CCBitConvertToType>>, + CCIfSubtarget<"hasMVEIntegerOps()", + CCIfType<[v2i64, v8i16, v8f16, v16i8, v4f32], + CCBitConvertToType>>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, @@ -183,7 +249,12 @@ def RetCC_ARM_AAPCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16,v16i8, v4f32], CCBitConvertToType>, + CCIfNotSubtarget<"hasMVEIntegerOps()", + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], + CCBitConvertToType>>, + CCIfSubtarget<"hasMVEIntegerOps()", + CCIfType<[v2i64, v8i16, v8f16, v16i8, v4f32], + CCBitConvertToType>>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, @@ -209,7 +280,14 @@ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v4i32, v8i16, v16i8], + CCBitConvertToType>>, + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v8i16, v16i8], + CCBitConvertToType>>, + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v8f16, v4f32], + CCBitConvertToType>>, + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v8f16], + CCBitConvertToType>>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, @@ -220,7 +298,12 @@ // HFAs are passed in a contiguous block of registers, or on the stack CCIfConsecutiveRegs>, - CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>>, + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v2i64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>>, + + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v4f32], CCAssignToReg<[Q0, Q1, Q2, Q3]>>>, + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v4i32], CCAssignToReg<[Q0, Q1, Q2, Q3]>>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, @@ -231,7 +314,14 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v4i32, v8i16, v16i8], + CCBitConvertToType>>, + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v8i16, v16i8], + CCBitConvertToType>>, + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v8f16, v4f32], + CCBitConvertToType>>, + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v8f16], + CCBitConvertToType>>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, @@ -239,7 +329,12 @@ // A SwiftError is returned in R8. CCIfSwiftError>>, - CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>>, + CCIfNotSubtarget<"hasMVEIntegerOps()", CCIfType<[v2i64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>>, + + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v4f32], CCAssignToReg<[Q0, Q1, Q2, Q3]>>>, + CCIfSubtarget<"hasMVEIntegerOps()", CCIfType<[v4i32], CCAssignToReg<[Q0, Q1, Q2, Q3]>>>, + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, @@ -251,6 +346,7 @@ //===----------------------------------------------------------------------===// def CSR_NoRegs : CalleeSavedRegs<(add)>; +// TODO: Do we need to update this for MVE with only d0-d15? def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>; def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -3743,7 +3743,11 @@ RC = &ARM::SPRRegClass; else if (RegVT == MVT::f64 || RegVT == MVT::v4f16) RC = &ARM::DPRRegClass; - else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16) + else if (RegVT == MVT::v4f32 || RegVT == MVT::v8f16 || + RegVT == MVT::v2i64 || RegVT == MVT::v4i32 || + RegVT == MVT::v8i16 || RegVT == MVT::v16i8) + RC = &ARM::QPRRegClass; + else if (RegVT == MVT::v2f64 && Subtarget->hasNEON()) RC = &ARM::QPRRegClass; else if (RegVT == MVT::i32) RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass