Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -273,6 +273,8 @@ bool SelectCMP_SWAP(SDNode *N); + bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift); + bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm); @@ -2918,6 +2920,32 @@ return true; } +bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base, + SDValue &Offset) { + auto C = dyn_cast(N); + if (!C) + return false; + + auto Ty = N->getValueType(0); + + int64_t Imm = C->getSExtValue(); + SDLoc DL(N); + + if ((Imm >= -128) && (Imm <= 127)) { + Base = CurDAG->getTargetConstant(Imm, DL, Ty); + Offset = CurDAG->getTargetConstant(0, DL, Ty); + return true; + } + + if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) { + Base = CurDAG->getTargetConstant(Imm/256, DL, Ty); + Offset = CurDAG->getTargetConstant(8, DL, Ty); + return true; + } + + return false; +} + bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) { if (auto CNode = dyn_cast(N)) { const int64_t ImmVal = CNode->getZExtValue(); Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// +def SVE8BitLslImm : ComplexPattern; + def SDT_AArch64_LDNF1 : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>, SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> @@ -318,6 +320,32 @@ def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>; + // Duplicate Int immediate into all vector elements + def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))), + (DUP_ZI_B $a, $b)>; + def : Pat<(nxv8i16 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))), + (DUP_ZI_H $a, $b)>; + def : Pat<(nxv4i32 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))), + (DUP_ZI_S $a, $b)>; + def : Pat<(nxv2i64 (AArch64dup (i64 (SVE8BitLslImm i32:$a, i32:$b)))), + (DUP_ZI_D $a, $b)>; + + // Duplicate FP immediate into all vector elements + let AddedComplexity = 2 in { + def : Pat<(nxv8f16 (AArch64dup fpimm16:$imm8)), + (FDUP_ZI_H fpimm16:$imm8)>; + def : Pat<(nxv4f16 (AArch64dup fpimm16:$imm8)), + (FDUP_ZI_H fpimm16:$imm8)>; + def : Pat<(nxv2f16 (AArch64dup fpimm16:$imm8)), + (FDUP_ZI_H fpimm16:$imm8)>; + def : Pat<(nxv4f32 (AArch64dup fpimm32:$imm8)), + (FDUP_ZI_S fpimm32:$imm8)>; + def : Pat<(nxv2f32 (AArch64dup fpimm32:$imm8)), + (FDUP_ZI_S fpimm32:$imm8)>; + def : Pat<(nxv2f64 (AArch64dup fpimm64:$imm8)), + (FDUP_ZI_D fpimm64:$imm8)>; + } + // Select elements from either vector (predicated) defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>; Index: llvm/test/CodeGen/AArch64/sve-vector-splat.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -38,6 +38,42 @@ ret %splat } +define @sve_splat_16xi8_imm() { +; CHECK-LABEL: @sve_splat_16xi8_imm +; CHECK: mov z0.b, #1 +; CHECK-NEXT: ret + %ins = insertelement undef, i8 1, i32 0 + %splat = shufflevector %ins, undef, zeroinitializer + ret %splat +} + +define @sve_splat_8xi16_imm() { +; CHECK-LABEL: @sve_splat_8xi16_imm +; CHECK: mov z0.h, #1 +; CHECK-NEXT: ret + %ins = insertelement undef, i16 1, i32 0 + %splat = shufflevector %ins, undef, zeroinitializer + ret %splat +} + +define @sve_splat_4xi32_imm() { +; CHECK-LABEL: @sve_splat_4xi32_imm +; CHECK: mov z0.s, #1 +; CHECK-NEXT: ret + %ins = insertelement undef, i32 1, i32 0 + %splat = shufflevector %ins, undef, zeroinitializer + ret %splat +} + +define @sve_splat_2xi64_imm() { +; CHECK-LABEL: @sve_splat_2xi64_imm +; CHECK: mov z0.d, #1 +; CHECK-NEXT: ret + %ins = insertelement undef, i64 1, i32 0 + %splat = shufflevector %ins, undef, zeroinitializer + ret %splat +} + ;; Promote splats of smaller illegal integer vector types define @sve_splat_2xi8(i8 %val) { @@ -234,3 +270,60 @@ ; CHECK-NEXT: ret ret zeroinitializer } + +; TODO: The f16 constant should be folded into the move. +define @splat_nxv8f16_imm() { +; CHECK-LABEL: splat_nxv8f16_imm: +; CHECK: mov z0.h, h0 +; CHECK-NEXT: ret + %1 = insertelement undef, half 1.0, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + +; TODO: The f16 constant should be folded into the move. +define @splat_nxv4f16_imm() { +; CHECK-LABEL: splat_nxv4f16_imm: +; CHECK: mov z0.h, h0 +; CHECK-NEXT: ret + %1 = insertelement undef, half 1.0, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + +; TODO: The f16 constant should be folded into the move. +define @splat_nxv2f16_imm() { +; CHECK-LABEL: splat_nxv2f16_imm: +; CHECK: mov z0.h, h0 +; CHECK-NEXT: ret + %1 = insertelement undef, half 1.0, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + +define @splat_nxv4f32_imm() { +; CHECK-LABEL: splat_nxv4f32_imm: +; CHECK: mov z0.s, #1.0 +; CHECK-NEXT: ret + %1 = insertelement undef, float 1.0, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + +define @splat_nxv2f32_imm() { +; CHECK-LABEL: splat_nxv2f32_imm: +; CHECK: mov z0.s, #1.0 +; CHECK-NEXT: ret + %1 = insertelement undef, float 1.0, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + +define @splat_nxv2f64_imm() { +; CHECK-LABEL: splat_nxv2f64_imm: +; CHECK: mov z0.d, #1.0 +; CHECK-NEXT: ret + %1 = insertelement undef, double 1.0, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +}