diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -204,6 +204,11 @@ return SelectSVEAddSubImm(N, VT, Imm, Shift); } + template + bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) { + return SelectSVECpyDupImm(N, VT, Imm, Shift); + } + template bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { return SelectSVELogicalImm(N, VT, Imm, Invert); @@ -357,10 +362,8 @@ bool SelectCMP_SWAP(SDNode *N); - bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift); - bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); - + bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); @@ -3129,32 +3132,6 @@ return true; } -bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base, - SDValue &Offset) { - auto C = dyn_cast(N); - if (!C) - return false; - - auto Ty = N->getValueType(0); - - int64_t Imm = C->getSExtValue(); - SDLoc DL(N); - - if ((Imm >= -128) && (Imm <= 127)) { - Base = CurDAG->getTargetConstant(Imm, DL, Ty); - Offset = CurDAG->getTargetConstant(0, DL, Ty); - return true; - } - - if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) { - Base = CurDAG->getTargetConstant(Imm/256, DL, Ty); - Offset = CurDAG->getTargetConstant(8, DL, Ty); - return true; - } - - return false; -} - bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) { if (auto CNode = dyn_cast(N)) { const int64_t ImmVal = CNode->getSExtValue(); @@ -3200,6 +3177,46 @@ return false; } +bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, + SDValue &Shift) { + if (!isa(N)) + return false; + + SDLoc DL(N); + int64_t Val = cast(N) + ->getAPIntValue() + .truncOrSelf(VT.getFixedSizeInBits()) + .getSExtValue(); + + switch (VT.SimpleTy) { + case MVT::i8: + // All immediates are supported. + Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); + Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); + return true; + case MVT::i16: + case MVT::i32: + case MVT::i64: + // Support 8bit signed immediates. + if (Val >= -128 && Val <= 127) { + Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); + Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32); + return true; + } + // Support 16bit signed immediates that are a multiple of 256. + if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) { + Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); + Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32); + return true; + } + break; + default: + break; + } + + return false; +} + bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { if (auto CNode = dyn_cast(N)) { int64_t ImmVal = CNode->getSExtValue(); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -653,13 +653,13 @@ def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; // Duplicate Int immediate into all vector elements - def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))), + def : Pat<(nxv16i8 (AArch64dup (i32 (SVECpyDupImm8Pat i32:$a, i32:$b)))), (DUP_ZI_B $a, $b)>; - def : Pat<(nxv8i16 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))), + def : Pat<(nxv8i16 (AArch64dup (i32 (SVECpyDupImm16Pat i32:$a, i32:$b)))), (DUP_ZI_H $a, $b)>; - def : Pat<(nxv4i32 (AArch64dup (i32 (SVE8BitLslImm32 i32:$a, i32:$b)))), + def : Pat<(nxv4i32 (AArch64dup (i32 (SVECpyDupImm32Pat i32:$a, i32:$b)))), (DUP_ZI_S $a, $b)>; - def : Pat<(nxv2i64 (AArch64dup (i64 (SVE8BitLslImm64 i32:$a, i32:$b)))), + def : Pat<(nxv2i64 (AArch64dup (i64 (SVECpyDupImm64Pat i32:$a, i32:$b)))), (DUP_ZI_D $a, $b)>; // Duplicate immediate FP into all vector elements. diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -199,6 +199,11 @@ def SVEAddSubImm32Pat : ComplexPattern", []>; def SVEAddSubImm64Pat : ComplexPattern", []>; +def SVECpyDupImm8Pat : ComplexPattern", []>; +def SVECpyDupImm16Pat : ComplexPattern", []>; +def SVECpyDupImm32Pat : ComplexPattern", []>; +def SVECpyDupImm64Pat : ComplexPattern", []>; + def SVELogicalImm8Pat : ComplexPattern", []>; def SVELogicalImm16Pat : ComplexPattern", []>; def SVELogicalImm32Pat : ComplexPattern", []>; @@ -209,14 +214,6 @@ def SVELogicalImm32NotPat : ComplexPattern", []>; def SVELogicalImm64NotPat : ComplexPattern", []>; -def SVE8BitLslImm32 : ComplexPattern; -def SVE8BitLslImm64 : ComplexPattern; -class SVE8BitLslImm { - ComplexPattern Pat = !cond( - !eq(ty, i32): SVE8BitLslImm32, - !eq(ty, i64): SVE8BitLslImm64); -} - def SVEArithUImm8Pat : ComplexPattern", []>; def SVEArithUImm16Pat : ComplexPattern", []>; def SVEArithUImm32Pat : ComplexPattern", []>; @@ -4623,29 +4620,28 @@ } multiclass sve_int_dup_imm_pred_merge_inst< - bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty, - ValueType predty, ValueType scalarty, imm8_opt_lsl cpyimm> { + bits<2> sz8_64, string asm, ZPRRegOp zprty, imm8_opt_lsl cpyimm, + ValueType intty, ValueType predty, ValueType scalarty, ComplexPattern cpx> { let Constraints = "$Zd = $_Zd" in def NAME : sve_int_dup_imm_pred; def : InstAlias<"mov $Zd, $Pg/m, $imm", (!cast(NAME) zprty:$Zd, PPRAny:$Pg, cpyimm:$imm), 1>; - def : Pat<(intty - (vselect predty:$Pg, - (intty (AArch64dup (scalarty (SVE8BitLslImm.Pat i32:$imm, i32:$shift)))), - intty:$Zd)), - (!cast(NAME) zprty:$Zd, $Pg, i32:$imm, i32:$shift)>; + def : Pat<(vselect predty:$Pg, + (intty (AArch64dup (scalarty (cpx i32:$imm, i32:$shift)))), + ZPR:$Zd), + (!cast(NAME) $Zd, $Pg, $imm, $shift)>; } multiclass sve_int_dup_imm_pred_merge { - defm _B : sve_int_dup_imm_pred_merge_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1, - i32, cpy_imm8_opt_lsl_i8>; - defm _H : sve_int_dup_imm_pred_merge_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1, - i32, cpy_imm8_opt_lsl_i16>; - defm _S : sve_int_dup_imm_pred_merge_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1, - i32, cpy_imm8_opt_lsl_i32>; - defm _D : sve_int_dup_imm_pred_merge_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1, - i64, cpy_imm8_opt_lsl_i64>; + defm _B : sve_int_dup_imm_pred_merge_inst<0b00, asm, ZPR8, cpy_imm8_opt_lsl_i8, + nxv16i8, nxv16i1, i32, SVECpyDupImm8Pat>; + defm _H : sve_int_dup_imm_pred_merge_inst<0b01, asm, ZPR16, cpy_imm8_opt_lsl_i16, + nxv8i16, nxv8i1, i32, SVECpyDupImm16Pat>; + defm _S : sve_int_dup_imm_pred_merge_inst<0b10, asm, ZPR32, cpy_imm8_opt_lsl_i32, + nxv4i32, nxv4i1, i32, SVECpyDupImm32Pat>; + defm _D : sve_int_dup_imm_pred_merge_inst<0b11, asm, ZPR64, cpy_imm8_opt_lsl_i64, + nxv2i64, nxv2i1, i64, SVECpyDupImm64Pat>; def : InstAlias<"fmov $Zd, $Pg/m, #0.0", (!cast(NAME # _H) ZPR16:$Zd, PPRAny:$Pg, 0, 0), 0>; @@ -4656,8 +4652,8 @@ } multiclass sve_int_dup_imm_pred_zero_inst< - bits<2> sz8_64, string asm, ZPRRegOp zprty, ValueType intty, - ValueType predty, ValueType scalarty, imm8_opt_lsl cpyimm> { + bits<2> sz8_64, string asm, ZPRRegOp zprty, imm8_opt_lsl cpyimm, + ValueType intty, ValueType predty, ValueType scalarty, ComplexPattern cpx> { def NAME : sve_int_dup_imm_pred; def : InstAlias<"mov $Zd, $Pg/z, $imm", @@ -4668,22 +4664,21 @@ (!cast(NAME) PPRAny:$Ps1, -1, 0)>; def : Pat<(intty (anyext (predty PPRAny:$Ps1))), (!cast(NAME) PPRAny:$Ps1, 1, 0)>; - def : Pat<(intty - (vselect predty:$Pg, - (intty (AArch64dup (scalarty (SVE8BitLslImm.Pat i32:$imm, i32:$shift)))), - (intty (AArch64dup (scalarty 0))))), - (!cast(NAME) $Pg, i32:$imm, i32:$shift)>; + def : Pat<(vselect predty:$Pg, + (intty (AArch64dup (scalarty (cpx i32:$imm, i32:$shift)))), + (intty (AArch64dup (scalarty 0)))), + (!cast(NAME) $Pg, $imm, $shift)>; } multiclass sve_int_dup_imm_pred_zero { - defm _B : sve_int_dup_imm_pred_zero_inst<0b00, asm, ZPR8, nxv16i8, nxv16i1, - i32, cpy_imm8_opt_lsl_i8>; - defm _H : sve_int_dup_imm_pred_zero_inst<0b01, asm, ZPR16, nxv8i16, nxv8i1, - i32, cpy_imm8_opt_lsl_i16>; - defm _S : sve_int_dup_imm_pred_zero_inst<0b10, asm, ZPR32, nxv4i32, nxv4i1, - i32, cpy_imm8_opt_lsl_i32>; - defm _D : sve_int_dup_imm_pred_zero_inst<0b11, asm, ZPR64, nxv2i64, nxv2i1, - i64, cpy_imm8_opt_lsl_i64>; + defm _B : sve_int_dup_imm_pred_zero_inst<0b00, asm, ZPR8, cpy_imm8_opt_lsl_i8, + nxv16i8, nxv16i1, i32, SVECpyDupImm8Pat>; + defm _H : sve_int_dup_imm_pred_zero_inst<0b01, asm, ZPR16, cpy_imm8_opt_lsl_i16, + nxv8i16, nxv8i1, i32, SVECpyDupImm16Pat>; + defm _S : sve_int_dup_imm_pred_zero_inst<0b10, asm, ZPR32, cpy_imm8_opt_lsl_i32, + nxv4i32, nxv4i1, i32, SVECpyDupImm32Pat>; + defm _D : sve_int_dup_imm_pred_zero_inst<0b11, asm, ZPR64, cpy_imm8_opt_lsl_i64, + nxv2i64, nxv2i1, i64, SVECpyDupImm64Pat>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-float-compares.ll @@ -365,11 +365,10 @@ ; CHECK-LABEL: fcmp_ueq_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: mov w8, #65535 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: st1h { z0.h }, p0, [x2] @@ -412,11 +411,10 @@ ; CHECK-LABEL: fcmp_une_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: mov w8, #65535 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: st1h { z0.h }, p0, [x2] @@ -459,11 +457,10 @@ ; CHECK-LABEL: fcmp_ugt_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: mov w8, #65535 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: st1h { z0.h }, p0, [x2] @@ -506,11 +503,10 @@ ; CHECK-LABEL: fcmp_ult_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: mov w8, #65535 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: st1h { z0.h }, p0, [x2] @@ -553,11 +549,10 @@ ; CHECK-LABEL: fcmp_uge_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: mov w8, #65535 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: st1h { z0.h }, p0, [x2] @@ -600,11 +595,10 @@ ; CHECK-LABEL: fcmp_ule_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: mov w8, #65535 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: st1h { z0.h }, p0, [x2] @@ -647,11 +641,10 @@ ; CHECK-LABEL: fcmp_ord_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: mov w8, #65535 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: eor z0.d, z0.d, z1.d ; CHECK-NEXT: st1h { z0.h }, p0, [x2]