Index: include/clang/Basic/arm_neon.td =================================================================== --- include/clang/Basic/arm_neon.td +++ include/clang/Basic/arm_neon.td @@ -30,13 +30,18 @@ def OP_MLA : Op; def OP_MLAL : Op; def OP_MULLHi : Op; +def OP_MULLHi_N : Op; def OP_MLALHi : Op; +def OP_MLALHi_N : Op; def OP_MLS : Op; def OP_MLSL : Op; def OP_MLSLHi : Op; +def OP_MLSLHi_N : Op; def OP_MUL_N : Op; def OP_MLA_N : Op; def OP_MLS_N : Op; +def OP_FMLA_N : Op; +def OP_FMLS_N : Op; def OP_MLAL_N : Op; def OP_MLSL_N : Op; def OP_MUL_LN: Op; @@ -104,8 +109,11 @@ def OP_ABAL : Op; def OP_ABALHi : Op; def OP_QDMULLHi : Op; +def OP_QDMULLHi_N : Op; def OP_QDMLALHi : Op; +def OP_QDMLALHi_N : Op; def OP_QDMLSLHi : Op; +def OP_QDMLSLHi_N : Op; def OP_DIV : Op; def OP_LONG_HI : Op; def OP_NARROW_HI : Op; @@ -625,6 +633,12 @@ def FMLS : SInst<"vfms", "dddd", "fQfQd">; //////////////////////////////////////////////////////////////////////////////// +// MUL, FMA, FMS definitions with scalar argument +def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>; +def FMLA_N : SOpInst<"vfma_n", "ddds", "fQf", OP_FMLA_N>; +def FMLS_N : SOpInst<"vfms_n", "ddds", "fQf", OP_FMLS_N>; + +//////////////////////////////////////////////////////////////////////////////// // Logical operations // With additional Qd, Ql, QPl type. def BSL : SInst<"vbsl", "dudd", @@ -816,8 +830,11 @@ def VABAL_HIGH : SOpInst<"vabal_high", "wwkk", "csiUcUsUi", OP_ABALHi>; def VMULL_HIGH : SOpInst<"vmull_high", "wkk", "csiUcUsUiPc", OP_MULLHi>; +def VMULL_HIGH_N : SOpInst<"vmull_high_n", "wks", "siUsUi", OP_MULLHi_N>; def VMLAL_HIGH : SOpInst<"vmlal_high", "wwkk", "csiUcUsUi", OP_MLALHi>; +def VMLAL_HIGH_N : SOpInst<"vmlal_high_n", "wwks", "siUsUi", OP_MLALHi_N>; def VMLSL_HIGH : SOpInst<"vmlsl_high", "wwkk", "csiUcUsUi", OP_MLSLHi>; +def VMLSL_HIGH_N : SOpInst<"vmlsl_high_n", "wwks", "siUsUi", OP_MLSLHi_N>; def VADDHN_HIGH : SOpInst<"vaddhn_high", "qhkk", "silUsUiUl", OP_ADDHNHi>; def VRADDHN_HIGH : SOpInst<"vraddhn_high", "qhkk", "silUsUiUl", OP_RADDHNHi>; @@ -825,8 +842,11 @@ def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "qhkk", "silUsUiUl", OP_RSUBHNHi>; def VQDMULL_HIGH : SOpInst<"vqdmull_high", "wkk", "si", OP_QDMULLHi>; +def VQDMULL_HIGH_N : SOpInst<"vqdmull_high_n", "wks", "si", OP_QDMULLHi_N>; def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "wwkk", "si", OP_QDMLALHi>; +def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "wwks", "si", OP_QDMLALHi_N>; def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>; +def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "wwks", "si", OP_QDMLSLHi_N>; //////////////////////////////////////////////////////////////////////////////// // Extract or insert element from vector Index: test/CodeGen/aarch64-neon-2velem.c =================================================================== --- test/CodeGen/aarch64-neon-2velem.c +++ test/CodeGen/aarch64-neon-2velem.c @@ -1547,3 +1547,152 @@ // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] } +int32x4_t test_vmull_high_n_s16(int16x8_t a, int16_t b) { + // CHECK: test_vmull_high_n_s16 + return vmull_high_n_s16(a, b); + // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) { + // CHECK: test_vmull_high_n_s32 + return vmull_high_n_s32(a, b); + // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vmull_high_n_u16(uint16x8_t a, uint16_t b) { + // CHECK: test_vmull_high_n_u16 + return vmull_high_n_u16(a, b); + // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) { + // CHECK: test_vmull_high_n_u32 + return vmull_high_n_u32(a, b); + // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmull_high_n_s16(int16x8_t a, int16_t b) { + // CHECK: test_vqdmull_high_n_s16 + return vqdmull_high_n_s16(a, b); + // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) { + // CHECK: test_vqdmull_high_n_s32 + return vqdmull_high_n_s32(a, b); + // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { + // CHECK: test_vmlal_high_n_s16 + return vmlal_high_n_s16(a, b, c); + // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { + // CHECK: test_vmlal_high_n_s32 + return vmlal_high_n_s32(a, b, c); + // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vmlal_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { + // CHECK: test_vmlal_high_n_u16 + return vmlal_high_n_u16(a, b, c); + // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +uint64x2_t test_vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { + // CHECK: test_vmlal_high_n_u32 + return vmlal_high_n_u32(a, b, c); + // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { + // CHECK: test_vqdmlal_high_n_s16 + return vqdmlal_high_n_s16(a, b, c); + // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { + // CHECK: test_vqdmlal_high_n_s32 + return vqdmlal_high_n_s32(a, b, c); + // CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { + // CHECK: test_vmlsl_high_n_s16 + return vmlsl_high_n_s16(a, b, c); + // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { + // CHECK: test_vmlsl_high_n_s32 + return vmlsl_high_n_s32(a, b, c); + // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vmlsl_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { + // CHECK: test_vmlsl_high_n_u16 + return vmlsl_high_n_u16(a, b, c); + // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +uint64x2_t test_vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { + // CHECK: test_vmlsl_high_n_u32 + return vmlsl_high_n_u32(a, b, c); + // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { + // CHECK: test_vqdmlsl_high_n_s16 + return vqdmlsl_high_n_s16(a, b, c); + // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { + // CHECK: test_vqdmlsl_high_n_s32 + return vqdmlsl_high_n_s32(a, b, c); + // CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) { + // CHECK: test_vmul_n_f32 + return vmul_n_f32(a, b); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) { + // CHECK: test_vmulq_n_f32 + return vmulq_n_f32(a, b); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float64x2_t test_vmulq_n_f64(float64x2_t a, float64_t b) { + // CHECK: test_vmulq_n_f64 + return vmulq_n_f64(a, b); + // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} + +float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) { + // CHECK: test_vfma_n_f32 + return vfma_n_f32(a, b, n); + // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { + // CHECK: test_vfmaq_n_f32 + return vfmaq_n_f32(a, b, n); + // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) { + // CHECK: test_vfms_n_f32 + return vfms_n_f32(a, b, n); + // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { + // CHECK: test_vfmsq_n_f32 + return vfmsq_n_f32(a, b, n); + // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} Index: utils/TableGen/NeonEmitter.cpp =================================================================== --- utils/TableGen/NeonEmitter.cpp +++ utils/TableGen/NeonEmitter.cpp @@ -52,13 +52,18 @@ OpMla, OpMlal, OpMullHi, + OpMullHiN, OpMlalHi, + OpMlalHiN, OpMls, OpMlsl, OpMlslHi, + OpMlslHiN, OpMulN, OpMlaN, OpMlsN, + OpFMlaN, + OpFMlsN, OpMlalN, OpMlslN, OpMulLane, @@ -126,8 +131,11 @@ OpAbal, OpAbalHi, OpQDMullHi, + OpQDMullHiN, OpQDMlalHi, + OpQDMlalHiN, OpQDMlslHi, + OpQDMlslHiN, OpDiv, OpLongHi, OpNarrowHi, @@ -224,13 +232,18 @@ OpMap["OP_MLA"] = OpMla; OpMap["OP_MLAL"] = OpMlal; OpMap["OP_MULLHi"] = OpMullHi; + OpMap["OP_MULLHi_N"] = OpMullHiN; OpMap["OP_MLALHi"] = OpMlalHi; + OpMap["OP_MLALHi_N"] = OpMlalHiN; OpMap["OP_MLS"] = OpMls; OpMap["OP_MLSL"] = OpMlsl; OpMap["OP_MLSLHi"] = OpMlslHi; + OpMap["OP_MLSLHi_N"] = OpMlslHiN; OpMap["OP_MUL_N"] = OpMulN; OpMap["OP_MLA_N"] = OpMlaN; OpMap["OP_MLS_N"] = OpMlsN; + OpMap["OP_FMLA_N"] = OpFMlaN; + OpMap["OP_FMLS_N"] = OpFMlsN; OpMap["OP_MLAL_N"] = OpMlalN; OpMap["OP_MLSL_N"] = OpMlslN; OpMap["OP_MUL_LN"]= OpMulLane; @@ -298,8 +311,11 @@ OpMap["OP_ABAL"] = OpAbal; OpMap["OP_ABALHi"] = OpAbalHi; OpMap["OP_QDMULLHi"] = OpQDMullHi; + OpMap["OP_QDMULLHi_N"] = OpQDMullHiN; OpMap["OP_QDMLALHi"] = OpQDMlalHi; + OpMap["OP_QDMLALHi_N"] = OpQDMlalHiN; OpMap["OP_QDMLSLHi"] = OpQDMlslHi; + OpMap["OP_QDMLSLHi_N"] = OpQDMlslHiN; OpMap["OP_DIV"] = OpDiv; OpMap["OP_LONG_HI"] = OpLongHi; OpMap["OP_NARROW_HI"] = OpNarrowHi; @@ -1654,6 +1670,14 @@ case OpMul: s += "__a * __b;"; break; + case OpFMlaN: + s += MangleName("vfma", typestr, ClassS); + s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");"; + break; + case OpFMlsN: + s += MangleName("vfms", typestr, ClassS); + s += "(__a, __b, " + Duplicate(nElts,typestr, "__c") + ");"; + break; case OpMullLane: s += MangleName("vmull", typestr, ClassS) + "(__a, " + SplatLane(nElts, "__b", "__c") + ");"; @@ -1689,9 +1713,17 @@ case OpMullHi: s += Gen2OpWith2High(typestr, "vmull", "__a", "__b"); break; + case OpMullHiN: + s += MangleName("vmull_n", typestr, ClassS); + s += "(" + GetHigh("__a", typestr) + ", __b);"; + return s; case OpMlalHi: s += Gen3OpWith2High(typestr, "vmlal", "__a", "__b", "__c"); break; + case OpMlalHiN: + s += MangleName("vmlal_n", typestr, ClassS); + s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; + return s; case OpMlsN: s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");"; break; @@ -1731,6 +1763,10 @@ case OpMlslHi: s += Gen3OpWith2High(typestr, "vmlsl", "__a", "__b", "__c"); break; + case OpMlslHiN: + s += MangleName("vmlsl_n", typestr, ClassS); + s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; + break; case OpQDMullLane: s += MangleName("vqdmull", typestr, ClassS) + "(__a, " + SplatLane(nElts, "__b", "__c") + ");"; @@ -1997,12 +2033,24 @@ case OpQDMullHi: s += Gen2OpWith2High(typestr, "vqdmull", "__a", "__b"); break; + case OpQDMullHiN: + s += MangleName("vqdmull_n", typestr, ClassS); + s += "(" + GetHigh("__a", typestr) + ", __b);"; + return s; case OpQDMlalHi: s += Gen3OpWith2High(typestr, "vqdmlal", "__a", "__b", "__c"); break; + case OpQDMlalHiN: + s += MangleName("vqdmlal_n", typestr, ClassS); + s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; + return s; case OpQDMlslHi: s += Gen3OpWith2High(typestr, "vqdmlsl", "__a", "__b", "__c"); break; + case OpQDMlslHiN: + s += MangleName("vqdmlsl_n", typestr, ClassS); + s += "(__a, " + GetHigh("__b", typestr) + ", __c);"; + return s; case OpDiv: s += "__a / __b;"; break;