Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -250,6 +250,9 @@ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SMIN, VT, Legal); setOperationAction(ISD::SMAX, VT, Legal); setOperationAction(ISD::UMIN, VT, Legal); @@ -5718,10 +5721,11 @@ return SDValue(); // We essentially have two forms here. Shift by an immediate and shift by a - // vector register. We cannot easily match shift by an immediate in tablegen - // so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM. For shifting - // by a vector, we don't have VSHR, only VSHL (which can be signed or - // unsigned, and a negative shift indicates a shift right). + // vector register (there are also shift by a gpr, but that is just handled + // with a tablegen pattern). We cannot easily match shift by an immediate in + // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM. + // For shifting by a vector, we don't have VSHR, only VSHL (which can be + // signed or unsigned, and a negative shift indicates a shift right). if (N->getOpcode() == ISD::SHL) { if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0), @@ -12852,7 +12856,6 @@ if (!VT.isVector() || !TLI.isTypeLegal(VT)) return SDValue(); - assert(ST->hasNEON() && "unexpected vector shift"); int64_t Cnt; switch (N->getOpcode()) { Index: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td @@ -254,6 +254,17 @@ def ARMvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; + +def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>,]>; +def ARMvshlImm : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>; +def ARMvshrsImm : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>; +def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>; +def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>; +def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>; + def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMWhileLoop, [SDNPHasChain]>; Index: llvm/trunk/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrMVE.td +++ llvm/trunk/lib/Target/ARM/ARMInstrMVE.td @@ -2119,6 +2119,22 @@ defm MVE_VQRSHL_by_vec : mve_shift_by_vec_multi<"vqrshl", 0b1, 0b1>; defm MVE_VRSHL_by_vec : mve_shift_by_vec_multi<"vrshl", 0b0, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))), + (v4i32 (MVE_VSHL_by_vecu32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>; + def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))), + (v8i16 (MVE_VSHL_by_vecu16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>; + def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))), + (v16i8 (MVE_VSHL_by_vecu8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>; + + def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))), + (v4i32 (MVE_VSHL_by_vecs32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>; + def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))), + (v8i16 (MVE_VSHL_by_vecs16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>; + def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))), + (v16i8 (MVE_VSHL_by_vecs8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>; +} + class MVE_shift_with_imm pattern=[]> @@ -2344,6 +2360,29 @@ let Inst{21} = 0b1; } +let Predicates = [HasMVEInt] in { + def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)), + (v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>; + def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)), + (v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>; + def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)), + (v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>; + + def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)), + (v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>; + def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)), + (v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>; + def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)), + (v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>; + + def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)), + (v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>; + def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)), + (v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>; + def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)), + (v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>; +} + // end of mve_shift instructions // start of MVE Floating Point instructions @@ -3353,6 +3392,22 @@ defm MVE_VQSHL_qr : MVE_VxSHL_qr_types<"vqshl", 0b1, 0b0>; defm MVE_VQRSHL_qr : MVE_VxSHL_qr_types<"vqrshl", 0b1, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))), + (v4i32 (MVE_VSHL_qru32 (v4i32 MQPR:$Qm), GPR:$Rm))>; + def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))), + (v8i16 (MVE_VSHL_qru16 (v8i16 MQPR:$Qm), GPR:$Rm))>; + def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))), + (v16i8 (MVE_VSHL_qru8 (v16i8 MQPR:$Qm), GPR:$Rm))>; + + def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))), + (v4i32 (MVE_VSHL_qrs32 (v4i32 MQPR:$Qm), GPR:$Rm))>; + def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))), + (v8i16 (MVE_VSHL_qrs16 (v8i16 MQPR:$Qm), GPR:$Rm))>; + def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))), + (v16i8 (MVE_VSHL_qrs8 (v16i8 MQPR:$Qm), GPR:$Rm))>; +} + class MVE_VBRSR size, list pattern=[]> : MVE_qDest_rSrc { Index: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td @@ -493,26 +493,14 @@ def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; -// Vector Shifts -def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>,]>; - -def NEONvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>; -def NEONvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>; - // Types for vector shift by immediates. The "SHX" version is for long and // narrow operations where the source and destination vectors have different // types. The "SHINS" version is for shift and insert operations. -def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, - SDTCisVT<2, i32>]>; def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, SDTCisVT<2, i32>]>; def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; -def NEONvshlImm : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>; -def NEONvshrsImm : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>; -def NEONvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>; def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>; def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>; @@ -4269,11 +4257,11 @@ int_arm_neon_vraddhn, 1>; let Predicates = [HasNEON] in { -def : Pat<(v8i8 (trunc (NEONvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), +def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; -def : Pat<(v4i16 (trunc (NEONvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), +def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; -def : Pat<(v2i32 (trunc (NEONvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), +def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; } @@ -5027,11 +5015,11 @@ int_arm_neon_vrsubhn, 0>; let Predicates = [HasNEON] in { -def : Pat<(v8i8 (trunc (NEONvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), +def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; -def : Pat<(v4i16 (trunc (NEONvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), +def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; -def : Pat<(v2i32 (trunc (NEONvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), +def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; } @@ -5522,7 +5510,7 @@ def abd_shr : PatFrag<(ops node:$in1, node:$in2, node:$shift), - (NEONvshrsImm (sub (zext node:$in1), + (ARMvshrsImm (sub (zext node:$in1), (zext node:$in2)), (i32 $shift))>; let Predicates = [HasNEON] in { @@ -5790,56 +5778,56 @@ "vshl", "u", int_arm_neon_vshiftu>; let Predicates = [HasNEON] in { -def : Pat<(v8i8 (NEONvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), +def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>; -def : Pat<(v4i16 (NEONvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), +def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>; -def : Pat<(v2i32 (NEONvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), +def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>; -def : Pat<(v1i64 (NEONvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), +def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>; -def : Pat<(v16i8 (NEONvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), +def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>; -def : Pat<(v8i16 (NEONvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), +def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>; -def : Pat<(v4i32 (NEONvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), +def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>; -def : Pat<(v2i64 (NEONvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), +def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>; -def : Pat<(v8i8 (NEONvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), +def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))), (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>; -def : Pat<(v4i16 (NEONvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), +def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))), (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>; -def : Pat<(v2i32 (NEONvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), +def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))), (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>; -def : Pat<(v1i64 (NEONvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), +def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))), (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>; -def : Pat<(v16i8 (NEONvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), +def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))), (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>; -def : Pat<(v8i16 (NEONvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), +def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))), (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>; -def : Pat<(v4i32 (NEONvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), +def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))), (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>; -def : Pat<(v2i64 (NEONvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), +def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))), (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>; } // VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshlImm>; +defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>; // VSHR : Vector Shift Right (Immediate) defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", - NEONvshrsImm>; + ARMvshrsImm>; defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", - NEONvshruImm>; + ARMvshruImm>; // VSHLL : Vector Shift Left Long defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", - PatFrag<(ops node:$LHS, node:$RHS), (NEONvshlImm (sext node:$LHS), node:$RHS)>>; + PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>; defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", - PatFrag<(ops node:$LHS, node:$RHS), (NEONvshlImm (zext node:$LHS), node:$RHS)>>; + PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>; // VSHLL : Vector Shift Left Long (with maximum shift count) class N2VLShMax op21_16, bits<4> op11_8, bit op7, @@ -5858,37 +5846,37 @@ v2i64, v2i32, imm32>; let Predicates = [HasNEON] in { -def : Pat<(v8i16 (NEONvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))), +def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))), (VSHLLi8 DPR:$Rn, 8)>; -def : Pat<(v4i32 (NEONvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))), +def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))), (VSHLLi16 DPR:$Rn, 16)>; -def : Pat<(v2i64 (NEONvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))), +def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))), (VSHLLi32 DPR:$Rn, 32)>; -def : Pat<(v8i16 (NEONvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))), +def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))), (VSHLLi8 DPR:$Rn, 8)>; -def : Pat<(v4i32 (NEONvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))), +def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))), (VSHLLi16 DPR:$Rn, 16)>; -def : Pat<(v2i64 (NEONvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))), +def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))), (VSHLLi32 DPR:$Rn, 32)>; -def : Pat<(v8i16 (NEONvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))), +def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))), (VSHLLi8 DPR:$Rn, 8)>; -def : Pat<(v4i32 (NEONvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))), +def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))), (VSHLLi16 DPR:$Rn, 16)>; -def : Pat<(v2i64 (NEONvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))), +def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))), (VSHLLi32 DPR:$Rn, 32)>; } // VSHRN : Vector Shift Right and Narrow defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", PatFrag<(ops node:$Rn, node:$amt), - (trunc (NEONvshrsImm node:$Rn, node:$amt))>>; + (trunc (ARMvshrsImm node:$Rn, node:$amt))>>; let Predicates = [HasNEON] in { -def : Pat<(v8i8 (trunc (NEONvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))), +def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))), (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; -def : Pat<(v4i16 (trunc (NEONvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))), +def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))), (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; -def : Pat<(v2i32 (trunc (NEONvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))), +def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))), (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; } @@ -5952,8 +5940,8 @@ NEONvqrshrnsuImm>; // VSRA : Vector Shift Right and Accumulate -defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrsImm>; -defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshruImm>; +defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>; +defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>; // VRSRA : Vector Rounding Shift Right and Accumulate defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>; defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>; Index: llvm/trunk/test/CodeGen/Thumb2/mve-shifts.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/mve-shifts.ll +++ llvm/trunk/test/CodeGen/Thumb2/mve-shifts.ll @@ -0,0 +1,391 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @shl_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: shl_qq_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshl.u8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = shl <16 x i8> %src1, %src2 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @shl_qq_int16_t(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: shl_qq_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshl.u16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = shl <8 x i16> %src1, %src2 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @shl_qq_int32_t(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: shl_qq_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshl.u32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = shl <4 x i32> %src1, %src2 + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @shru_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: shru_qq_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vneg.s8 q1, q1 +; CHECK-NEXT: vshl.u8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = lshr <16 x i8> %src1, %src2 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @shru_qq_int16_t(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: shru_qq_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vneg.s16 q1, q1 +; CHECK-NEXT: vshl.u16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = lshr <8 x i16> %src1, %src2 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @shru_qq_int32_t(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: shru_qq_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vneg.s32 q1, q1 +; CHECK-NEXT: vshl.u32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = lshr <4 x i32> %src1, %src2 + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @shrs_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) { +; CHECK-LABEL: shrs_qq_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vneg.s8 q1, q1 +; CHECK-NEXT: vshl.s8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = ashr <16 x i8> %src1, %src2 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @shrs_qq_int16_t(<8 x i16> %src1, <8 x i16> %src2) { +; CHECK-LABEL: shrs_qq_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vneg.s16 q1, q1 +; CHECK-NEXT: vshl.s16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = ashr <8 x i16> %src1, %src2 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @shrs_qq_int32_t(<4 x i32> %src1, <4 x i32> %src2) { +; CHECK-LABEL: shrs_qq_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vneg.s32 q1, q1 +; CHECK-NEXT: vshl.s32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = ashr <4 x i32> %src1, %src2 + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @shl_qi_int8_t(<16 x i8> %src1) { +; CHECK-LABEL: shl_qi_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshl.i8 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = shl <16 x i8> %src1, + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @shl_qi_int16_t(<8 x i16> %src1) { +; CHECK-LABEL: shl_qi_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshl.i16 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = shl <8 x i16> %src1, + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @shl_qi_int32_t(<4 x i32> %src1) { +; CHECK-LABEL: shl_qi_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshl.i32 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = shl <4 x i32> %src1, + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @shru_qi_int8_t(<16 x i8> %src1) { +; CHECK-LABEL: shru_qi_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshr.u8 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = lshr <16 x i8> %src1, + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @shru_qi_int16_t(<8 x i16> %src1) { +; CHECK-LABEL: shru_qi_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshr.u16 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = lshr <8 x i16> %src1, + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @shru_qi_int32_t(<4 x i32> %src1) { +; CHECK-LABEL: shru_qi_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshr.u32 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = lshr <4 x i32> %src1, + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @shrs_qi_int8_t(<16 x i8> %src1) { +; CHECK-LABEL: shrs_qi_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshr.s8 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = ashr <16 x i8> %src1, + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @shrs_qi_int16_t(<8 x i16> %src1) { +; CHECK-LABEL: shrs_qi_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshr.s16 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = ashr <8 x i16> %src1, + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @shrs_qi_int32_t(<4 x i32> %src1) { +; CHECK-LABEL: shrs_qi_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshr.s32 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = ashr <4 x i32> %src1, + ret <4 x i32> %0 +} + + + +define arm_aapcs_vfpcc <16 x i8> @shl_qr_int8_t(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: shl_qr_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshl.u8 q0, r0 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %s = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %0 = shl <16 x i8> %src1, %s + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @shl_qr_int16_t(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: shl_qr_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshl.u16 q0, r0 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %s = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %0 = shl <8 x i16> %src1, %s + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @shl_qr_int32_t(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: shl_qr_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshl.u32 q0, r0 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %s = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %0 = shl <4 x i32> %src1, %s + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @shru_qr_int8_t(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: shru_qr_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.8 q1, r0 +; CHECK-NEXT: vneg.s8 q1, q1 +; CHECK-NEXT: vshl.u8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %s = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %0 = lshr <16 x i8> %src1, %s + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @shru_qr_int16_t(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: shru_qr_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q1, r0 +; CHECK-NEXT: vneg.s16 q1, q1 +; CHECK-NEXT: vshl.u16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %s = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %0 = lshr <8 x i16> %src1, %s + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @shru_qr_int32_t(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: shru_qr_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q1, r0 +; CHECK-NEXT: vneg.s32 q1, q1 +; CHECK-NEXT: vshl.u32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %s = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %0 = lshr <4 x i32> %src1, %s + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @shrs_qr_int8_t(<16 x i8> %src1, i8 %src2) { +; CHECK-LABEL: shrs_qr_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.8 q1, r0 +; CHECK-NEXT: vneg.s8 q1, q1 +; CHECK-NEXT: vshl.s8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <16 x i8> undef, i8 %src2, i32 0 + %s = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer + %0 = ashr <16 x i8> %src1, %s + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @shrs_qr_int16_t(<8 x i16> %src1, i16 %src2) { +; CHECK-LABEL: shrs_qr_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.16 q1, r0 +; CHECK-NEXT: vneg.s16 q1, q1 +; CHECK-NEXT: vshl.s16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <8 x i16> undef, i16 %src2, i32 0 + %s = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer + %0 = ashr <8 x i16> %src1, %s + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @shrs_qr_int32_t(<4 x i32> %src1, i32 %src2) { +; CHECK-LABEL: shrs_qr_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vdup.32 q1, r0 +; CHECK-NEXT: vneg.s32 q1, q1 +; CHECK-NEXT: vshl.s32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <4 x i32> undef, i32 %src2, i32 0 + %s = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer + %0 = ashr <4 x i32> %src1, %s + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <16 x i8> @shl_qiv_int8_t(<16 x i8> %src1) { +; CHECK-LABEL: shl_qiv_int8_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adr r0, .LCPI27_0 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vshl.u8 q0, q0, q1 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI27_0: +; CHECK-NEXT: .byte 1 @ 0x1 +; CHECK-NEXT: .byte 2 @ 0x2 +; CHECK-NEXT: .byte 3 @ 0x3 +; CHECK-NEXT: .byte 4 @ 0x4 +; CHECK-NEXT: .byte 1 @ 0x1 +; CHECK-NEXT: .byte 2 @ 0x2 +; CHECK-NEXT: .byte 3 @ 0x3 +; CHECK-NEXT: .byte 4 @ 0x4 +; CHECK-NEXT: .byte 1 @ 0x1 +; CHECK-NEXT: .byte 2 @ 0x2 +; CHECK-NEXT: .byte 3 @ 0x3 +; CHECK-NEXT: .byte 4 @ 0x4 +; CHECK-NEXT: .byte 1 @ 0x1 +; CHECK-NEXT: .byte 2 @ 0x2 +; CHECK-NEXT: .byte 3 @ 0x3 +; CHECK-NEXT: .byte 4 @ 0x4 +entry: + %0 = shl <16 x i8> %src1, + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @shl_qiv_int16_t(<8 x i16> %src1) { +; CHECK-LABEL: shl_qiv_int16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adr r0, .LCPI28_0 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vshl.u16 q0, q0, q1 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI28_0: +; CHECK-NEXT: .short 1 @ 0x1 +; CHECK-NEXT: .short 2 @ 0x2 +; CHECK-NEXT: .short 3 @ 0x3 +; CHECK-NEXT: .short 4 @ 0x4 +; CHECK-NEXT: .short 1 @ 0x1 +; CHECK-NEXT: .short 2 @ 0x2 +; CHECK-NEXT: .short 3 @ 0x3 +; CHECK-NEXT: .short 4 @ 0x4 +entry: + %0 = shl <8 x i16> %src1, + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @shl_qiv_int32_t(<4 x i32> %src1) { +; CHECK-LABEL: shl_qiv_int32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adr r0, .LCPI29_0 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vshl.u32 q0, q0, q1 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI29_0: +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .long 2 @ 0x2 +; CHECK-NEXT: .long 3 @ 0x3 +; CHECK-NEXT: .long 4 @ 0x4 +entry: + %0 = shl <4 x i32> %src1, + ret <4 x i32> %0 +}