Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1469,6 +1469,32 @@ def : Pat<(nxv2i1 (and PPR:$Ps1, PPR:$Ps2)), (AND_PPzPP (PTRUE_D 31), PPR:$Ps1, PPR:$Ps2)>; + // Whole vector selects. + def : Pat<(nxv16i8 (select GPR32:$cond, ZPR:$Zs1, ZPR:$Zs2)), + (SEL_ZPZZ_S (CMPNE_PPzZI_S (PTRUE_S 31), (DUP_ZR_S $cond), 0), ZPR:$Zs1, ZPR:$Zs2)>; + def : Pat<(nxv8i16 (select GPR32:$cond, ZPR:$Zs1, ZPR:$Zs2)), + (SEL_ZPZZ_S (CMPNE_PPzZI_S (PTRUE_S 31), (DUP_ZR_S $cond), 0), ZPR:$Zs1, ZPR:$Zs2)>; + def : Pat<(nxv4i32 (select GPR32:$cond, ZPR:$Zs1, ZPR:$Zs2)), + (SEL_ZPZZ_S (CMPNE_PPzZI_S (PTRUE_S 31), (DUP_ZR_S $cond), 0), ZPR:$Zs1, ZPR:$Zs2)>; + def : Pat<(nxv2i64 (select GPR32:$cond, ZPR:$Zs1, ZPR:$Zs2)), + (SEL_ZPZZ_S (CMPNE_PPzZI_S (PTRUE_S 31), (DUP_ZR_S $cond), 0), ZPR:$Zs1, ZPR:$Zs2)>; + def : Pat<(nxv8f16 (select GPR32:$cond, ZPR:$Zs1, ZPR:$Zs2)), + (SEL_ZPZZ_S (CMPNE_PPzZI_S (PTRUE_S 31), (DUP_ZR_S $cond), 0), ZPR:$Zs1, ZPR:$Zs2)>; + def : Pat<(nxv2f32 (select GPR32:$cond, ZPR:$Zs1, ZPR:$Zs2)), + (SEL_ZPZZ_S (CMPNE_PPzZI_S (PTRUE_S 31), (DUP_ZR_S $cond), 0), ZPR:$Zs1, ZPR:$Zs2)>; + def : Pat<(nxv4f32 (select GPR32:$cond, ZPR:$Zs1, ZPR:$Zs2)), + (SEL_ZPZZ_S (CMPNE_PPzZI_S (PTRUE_S 31), (DUP_ZR_S $cond), 0), ZPR:$Zs1, ZPR:$Zs2)>; + def : Pat<(nxv2f64 (select GPR32:$cond, ZPR:$Zs1, ZPR:$Zs2)), + (SEL_ZPZZ_S (CMPNE_PPzZI_S (PTRUE_S 31), (DUP_ZR_S $cond), 0), ZPR:$Zs1, ZPR:$Zs2)>; + def : Pat<(nxv16i1 (select GPR32:$cond, PPR:$Ps1, PPR:$Ps2)), + (SEL_PPPP (CMPNE_PPzZI_B (PTRUE_B 31), (DUP_ZR_B $cond), 0), PPR:$Ps1, PPR:$Ps2)>; + def : Pat<(nxv8i1 (select GPR32:$cond, PPR:$Ps1, PPR:$Ps2)), + (SEL_PPPP (CMPNE_PPzZI_B (PTRUE_B 31), (DUP_ZR_B $cond), 0), PPR:$Ps1, PPR:$Ps2)>; + def : Pat<(nxv4i1 (select GPR32:$cond, PPR:$Ps1, PPR:$Ps2)), + (SEL_PPPP (CMPNE_PPzZI_B (PTRUE_B 31), (DUP_ZR_B $cond), 0), PPR:$Ps1, PPR:$Ps2)>; + def : Pat<(nxv2i1 (select GPR32:$cond, PPR:$Ps1, PPR:$Ps2)), + (SEL_PPPP (CMPNE_PPzZI_B (PTRUE_B 31), (DUP_ZR_B $cond), 0), PPR:$Ps1, PPR:$Ps2)>; + // Add more complex addressing modes here as required multiclass pred_load { Index: llvm/test/CodeGen/AArch64/select-sve.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/select-sve.ll @@ -0,0 +1,133 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s + +define @select_nxv16i8(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv16i8: +; CHECK-DAG: ptrue [[PG:p[0-9]+]].s +; CHECK-DAG: and [[COND:w[0-9]+]], w0, #0x1 +; CHECK-DAG: mov [[VCOND:z[0-9]+]].s, [[COND]] +; CHECK-NEXT: cmpne [[PRED:p[0-9]+]].s, [[PG]]/z, [[VCOND]].s, #0 +; CHECK-NEXT: sel z0.s, [[PRED]], z0.s, z1.s +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv8i16(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv8i16: +; CHECK-DAG: ptrue [[PG:p[0-9]+]].s +; CHECK-DAG: and [[COND:w[0-9]+]], w0, #0x1 +; CHECK-DAG: mov [[VCOND:z[0-9]+]].s, [[COND]] +; CHECK-NEXT: cmpne [[PRED:p[0-9]+]].s, [[PG]]/z, [[VCOND]].s, #0 +; CHECK-NEXT: sel z0.s, [[PRED]], z0.s, z1.s +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv4i32(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv4i32: +; CHECK-DAG: ptrue [[PG:p[0-9]+]].s +; CHECK-DAG: and [[COND:w[0-9]+]], w0, #0x1 +; CHECK-DAG: mov [[VCOND:z[0-9]+]].s, [[COND]] +; CHECK-NEXT: cmpne [[PRED:p[0-9]+]].s, [[PG]]/z, [[VCOND]].s, #0 +; CHECK-NEXT: sel z0.s, [[PRED]], z0.s, z1.s +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv2i64(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv2i64: +; CHECK-DAG: ptrue [[PG:p[0-9]+]].s +; CHECK-DAG: and [[COND:w[0-9]+]], w0, #0x1 +; CHECK-DAG: mov [[VCOND:z[0-9]+]].s, [[COND]] +; CHECK-NEXT: cmpne [[PRED:p[0-9]+]].s, [[PG]]/z, [[VCOND]].s, #0 +; CHECK-NEXT: sel z0.s, [[PRED]], z0.s, z1.s +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv8f16(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv8f16: +; CHECK-DAG: ptrue [[PG:p[0-9]+]].s +; CHECK-DAG: and [[COND:w[0-9]+]], w0, #0x1 +; CHECK-DAG: mov [[VCOND:z[0-9]+]].s, [[COND]] +; CHECK-NEXT: cmpne [[PRED:p[0-9]+]].s, [[PG]]/z, [[VCOND]].s, #0 +; CHECK-NEXT: sel z0.s, [[PRED]], z0.s, z1.s +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv4f32(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv4f32: +; CHECK-DAG: ptrue [[PG:p[0-9]+]].s +; CHECK-DAG: and [[COND:w[0-9]+]], w0, #0x1 +; CHECK-DAG: mov [[VCOND:z[0-9]+]].s, [[COND]] +; CHECK-NEXT: cmpne [[PRED:p[0-9]+]].s, [[PG]]/z, [[VCOND]].s, #0 +; CHECK-NEXT: sel z0.s, [[PRED]], z0.s, z1.s +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv2f64(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv2f64: +; CHECK-DAG: ptrue [[PG:p[0-9]+]].s +; CHECK-DAG: and [[COND:w[0-9]+]], w0, #0x1 +; CHECK-DAG: mov [[VCOND:z[0-9]+]].s, [[COND]] +; CHECK-NEXT: cmpne [[PRED:p[0-9]+]].s, [[PG]]/z, [[VCOND]].s, #0 +; CHECK-NEXT: sel z0.s, [[PRED]], z0.s, z1.s +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv16i1(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv16i1: +; CHECK-DAG: ptrue [[PG:p[0-9]+]].b +; CHECK-DAG: and [[COND:w[0-9]+]], w0, #0x1 +; CHECK-DAG: mov [[VCOND:z[0-9]+]].b, [[COND]] +; CHECK-NEXT: cmpne [[PRED:p[0-9]+]].b, [[PG]]/z, [[VCOND]].b, #0 +; CHECK-NEXT: sel p0.b, [[PRED]], p0.b, p1.b +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv8i1(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv8i1: +; CHECK-DAG: ptrue [[PG:p[0-9]+]].b +; CHECK-DAG: and [[COND:w[0-9]+]], w0, #0x1 +; CHECK-DAG: mov [[VCOND:z[0-9]+]].b, [[COND]] +; CHECK-NEXT: cmpne [[PRED:p[0-9]+]].b, [[PG]]/z, [[VCOND]].b, #0 +; CHECK-NEXT: sel p0.b, [[PRED]], p0.b, p1.b +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv4i1(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv4i1: +; CHECK-DAG: ptrue [[PG:p[0-9]+]].b +; CHECK-DAG: and [[COND:w[0-9]+]], w0, #0x1 +; CHECK-DAG: mov [[VCOND:z[0-9]+]].b, [[COND]] +; CHECK-NEXT: cmpne [[PRED:p[0-9]+]].b, [[PG]]/z, [[VCOND]].b, #0 +; CHECK-NEXT: sel p0.b, [[PRED]], p0.b, p1.b +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +} + +define @select_nxv2i1(i1 %cond, %a, %b) { +; CHECK-LABEL: select_nxv2i1: +; CHECK-DAG: ptrue [[PG:p[0-9]+]].b +; CHECK-DAG: and [[COND:w[0-9]+]], w0, #0x1 +; CHECK-DAG: mov [[VCOND:z[0-9]+]].b, [[COND]] +; CHECK-NEXT: cmpne [[PRED:p[0-9]+]].b, [[PG]]/z, [[VCOND]].b, #0 +; CHECK-NEXT: sel p0.b, [[PRED]], p0.b, p1.b +; CHECK-NEXT: ret + %res = select i1 %cond, %a, %b + ret %res +}