diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3133,6 +3133,31 @@ } } // End HasSVEorSME +let Predicates = [HasSVEorSME] in { + multiclass sve_predicated_add { + def : Pat<(nxv16i8 (add ZPR:$op, (extend (nxv16i1 PPR:$pred)))), + (ADD_ZPmZ_B PPR:$pred, ZPR:$op, (DUP_ZI_B value, 0))>; + def : Pat<(nxv8i16 (add ZPR:$op, (extend (nxv8i1 PPR:$pred)))), + (ADD_ZPmZ_H PPR:$pred, ZPR:$op, (DUP_ZI_H value, 0))>; + def : Pat<(nxv4i32 (add ZPR:$op, (extend (nxv4i1 PPR:$pred)))), + (ADD_ZPmZ_S PPR:$pred, ZPR:$op, (DUP_ZI_S value, 0))>; + def : Pat<(nxv2i64 (add ZPR:$op, (extend (nxv2i1 PPR:$pred)))), + (ADD_ZPmZ_D PPR:$pred, ZPR:$op, (DUP_ZI_D value, 0))>; + } + + defm : sve_predicated_add; + defm : sve_predicated_add; + + def : Pat<(nxv16i8 (sub ZPR:$op, (sext (nxv16i1 PPR:$pred)))), + (SUB_ZPmZ_B PPR:$pred, ZPR:$op, (DUP_ZI_B 255, 0))>; + def : Pat<(nxv8i16 (sub ZPR:$op, (sext (nxv8i1 PPR:$pred)))), + (SUB_ZPmZ_H PPR:$pred, ZPR:$op, (DUP_ZI_H 255, 0))>; + def : Pat<(nxv4i32 (sub ZPR:$op, (sext (nxv4i1 PPR:$pred)))), + (SUB_ZPmZ_S PPR:$pred, ZPR:$op, (DUP_ZI_S 255, 0))>; + def : Pat<(nxv2i64 (sub ZPR:$op, (sext (nxv2i1 PPR:$pred)))), + (SUB_ZPmZ_D PPR:$pred, ZPR:$op, (DUP_ZI_D 255, 0))>; +} // End HasSVEorSME + let Predicates = [HasSVE, HasMatMulInt8] in { defm SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>; defm UMMLA_ZZZ : sve_int_matmul<0b11, "ummla", int_aarch64_sve_ummla>; diff --git a/llvm/test/CodeGen/AArch64/predicated-add-sub.ll b/llvm/test/CodeGen/AArch64/predicated-add-sub.ll --- a/llvm/test/CodeGen/AArch64/predicated-add-sub.ll +++ b/llvm/test/CodeGen/AArch64/predicated-add-sub.ll @@ -11,8 +11,8 @@ ; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: and z2.h, z2.h, #0xff ; CHECK-NEXT: cmphi p0.h, p0/z, z2.h, z1.h -; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #1 // =0x1 +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -28,8 +28,8 @@ ; CHECK-NEXT: and z1.s, z1.s, #0xffff ; CHECK-NEXT: and z2.s, z2.s, #0xffff ; CHECK-NEXT: cmphi p0.s, p0/z, z2.s, z1.s -; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #1 // =0x1 +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -45,8 +45,8 @@ ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff ; CHECK-NEXT: and z2.d, z2.d, #0xffffffff ; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z1.d -; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #1 // =0x1 +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -59,8 +59,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, z1.b -; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: mov z1.b, #1 // =0x1 +; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -73,8 +73,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmphi p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #1 // =0x1 +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -87,8 +87,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s -; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #1 // =0x1 +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -101,8 +101,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmphi p0.d, p0/z, z0.d, z1.d -; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #1 // =0x1 +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -114,12 +114,11 @@ ; CHECK-LABEL: zext.add.8xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z2.s -; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z3.s -; CHECK-NEXT: mov z2.s, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z3.s, p1/z, #1 // =0x1 -; CHECK-NEXT: add z0.s, z0.s, z3.s -; CHECK-NEXT: add z1.s, z1.s, z2.s +; CHECK-NEXT: cmphi p1.s, p0/z, z1.s, z3.s +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.s, #1 // =0x1 +; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -131,18 +130,15 @@ ; CHECK-LABEL: zext.add.16xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmphi p1.s, p0/z, z2.s, z6.s -; CHECK-NEXT: cmphi p2.s, p0/z, z0.s, z4.s +; CHECK-NEXT: cmphi p1.s, p0/z, z3.s, z7.s +; CHECK-NEXT: cmphi p2.s, p0/z, z2.s, z6.s ; CHECK-NEXT: cmphi p3.s, p0/z, z1.s, z5.s -; CHECK-NEXT: cmphi p0.s, p0/z, z3.s, z7.s -; CHECK-NEXT: mov z4.s, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z5.s, p1/z, #1 // =0x1 -; CHECK-NEXT: mov z6.s, p3/z, #1 // =0x1 -; CHECK-NEXT: mov z7.s, p2/z, #1 // =0x1 -; CHECK-NEXT: add z0.s, z0.s, z7.s -; CHECK-NEXT: add z1.s, z1.s, z6.s -; CHECK-NEXT: add z2.s, z2.s, z5.s -; CHECK-NEXT: add z3.s, z3.s, z4.s +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z4.s +; CHECK-NEXT: mov z4.s, #1 // =0x1 +; CHECK-NEXT: add z0.s, p0/m, z0.s, z4.s +; CHECK-NEXT: add z1.s, p3/m, z1.s, z4.s +; CHECK-NEXT: add z2.s, p2/m, z2.s, z4.s +; CHECK-NEXT: add z3.s, p1/m, z3.s, z4.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -158,8 +154,8 @@ ; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: and z2.h, z2.h, #0xff ; CHECK-NEXT: cmphi p0.h, p0/z, z2.h, z1.h -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -175,8 +171,8 @@ ; CHECK-NEXT: and z1.s, z1.s, #0xffff ; CHECK-NEXT: and z2.s, z2.s, #0xffff ; CHECK-NEXT: cmphi p0.s, p0/z, z2.s, z1.s -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -192,8 +188,8 @@ ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff ; CHECK-NEXT: and z2.d, z2.d, #0xffffffff ; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z1.d -; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -206,8 +202,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, z1.b -; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: mov z1.b, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -220,8 +216,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmphi p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -234,8 +230,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -248,8 +244,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmphi p0.d, p0/z, z0.d, z1.d -; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -263,10 +259,9 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmphi p1.s, p0/z, z1.s, z3.s ; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z3.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z2.s -; CHECK-NEXT: add z1.s, z1.s, z3.s +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -282,14 +277,11 @@ ; CHECK-NEXT: cmphi p2.s, p0/z, z2.s, z6.s ; CHECK-NEXT: cmphi p3.s, p0/z, z1.s, z5.s ; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z4.s -; CHECK-NEXT: mov z4.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z5.s, p3/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z4.s -; CHECK-NEXT: add z1.s, z1.s, z5.s -; CHECK-NEXT: mov z4.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z5.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z2.s, z2.s, z4.s -; CHECK-NEXT: add z3.s, z3.s, z5.s +; CHECK-NEXT: mov z4.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, p0/m, z0.s, z4.s +; CHECK-NEXT: add z1.s, p3/m, z1.s, z4.s +; CHECK-NEXT: add z2.s, p2/m, z2.s, z4.s +; CHECK-NEXT: add z3.s, p1/m, z3.s, z4.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = zext %v to @@ -305,8 +297,8 @@ ; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: and z2.h, z2.h, #0xff ; CHECK-NEXT: cmphi p0.h, p0/z, z2.h, z1.h -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -322,8 +314,8 @@ ; CHECK-NEXT: and z1.s, z1.s, #0xffff ; CHECK-NEXT: and z2.s, z2.s, #0xffff ; CHECK-NEXT: cmphi p0.s, p0/z, z2.s, z1.s -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -339,8 +331,8 @@ ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff ; CHECK-NEXT: and z2.d, z2.d, #0xffffffff ; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z1.d -; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -353,8 +345,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, z1.b -; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: mov z1.b, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -367,8 +359,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmphi p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -381,8 +373,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -395,8 +387,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmphi p0.d, p0/z, z0.d, z1.d -; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -408,12 +400,11 @@ ; CHECK-LABEL: sext.add.8xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z2.s -; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z3.s -; CHECK-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z3.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z3.s -; CHECK-NEXT: add z1.s, z1.s, z2.s +; CHECK-NEXT: cmphi p1.s, p0/z, z1.s, z3.s +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -425,18 +416,15 @@ ; CHECK-LABEL: sext.add.16xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmphi p1.s, p0/z, z2.s, z6.s -; CHECK-NEXT: cmphi p2.s, p0/z, z0.s, z4.s +; CHECK-NEXT: cmphi p1.s, p0/z, z3.s, z7.s +; CHECK-NEXT: cmphi p2.s, p0/z, z2.s, z6.s ; CHECK-NEXT: cmphi p3.s, p0/z, z1.s, z5.s -; CHECK-NEXT: cmphi p0.s, p0/z, z3.s, z7.s -; CHECK-NEXT: mov z4.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z5.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z6.s, p3/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z7.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z7.s -; CHECK-NEXT: add z1.s, z1.s, z6.s -; CHECK-NEXT: add z2.s, z2.s, z5.s -; CHECK-NEXT: add z3.s, z3.s, z4.s +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z4.s +; CHECK-NEXT: mov z4.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, p0/m, z0.s, z4.s +; CHECK-NEXT: add z1.s, p3/m, z1.s, z4.s +; CHECK-NEXT: add z2.s, p2/m, z2.s, z4.s +; CHECK-NEXT: add z3.s, p1/m, z3.s, z4.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -452,8 +440,8 @@ ; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: and z2.h, z2.h, #0xff ; CHECK-NEXT: cmphi p0.h, p0/z, z2.h, z1.h -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -469,8 +457,8 @@ ; CHECK-NEXT: and z1.s, z1.s, #0xffff ; CHECK-NEXT: and z2.s, z2.s, #0xffff ; CHECK-NEXT: cmphi p0.s, p0/z, z2.s, z1.s -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -486,8 +474,8 @@ ; CHECK-NEXT: and z1.d, z1.d, #0xffffffff ; CHECK-NEXT: and z2.d, z2.d, #0xffffffff ; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z1.d -; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -500,8 +488,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmphi p0.b, p0/z, z0.b, z1.b -; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.b, z0.b, z1.b +; CHECK-NEXT: mov z1.b, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -514,8 +502,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmphi p0.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -528,8 +516,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -542,8 +530,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmphi p0.d, p0/z, z0.d, z1.d -; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -555,12 +543,11 @@ ; CHECK-LABEL: sext.sub.8xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmphi p1.s, p0/z, z0.s, z2.s -; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z3.s -; CHECK-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z3.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.s, z0.s, z3.s -; CHECK-NEXT: sub z1.s, z1.s, z2.s +; CHECK-NEXT: cmphi p1.s, p0/z, z1.s, z3.s +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: sub z1.s, p1/m, z1.s, z2.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to @@ -572,18 +559,15 @@ ; CHECK-LABEL: sext.sub.16xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmphi p1.s, p0/z, z2.s, z6.s -; CHECK-NEXT: cmphi p2.s, p0/z, z0.s, z4.s +; CHECK-NEXT: cmphi p1.s, p0/z, z3.s, z7.s +; CHECK-NEXT: cmphi p2.s, p0/z, z2.s, z6.s ; CHECK-NEXT: cmphi p3.s, p0/z, z1.s, z5.s -; CHECK-NEXT: cmphi p0.s, p0/z, z3.s, z7.s -; CHECK-NEXT: mov z4.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z5.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z6.s, p3/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z7.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.s, z0.s, z7.s -; CHECK-NEXT: sub z1.s, z1.s, z6.s -; CHECK-NEXT: sub z2.s, z2.s, z5.s -; CHECK-NEXT: sub z3.s, z3.s, z4.s +; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z4.s +; CHECK-NEXT: mov z4.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.s, p0/m, z0.s, z4.s +; CHECK-NEXT: sub z1.s, p3/m, z1.s, z4.s +; CHECK-NEXT: sub z2.s, p2/m, z2.s, z4.s +; CHECK-NEXT: sub z3.s, p1/m, z3.s, z4.s ; CHECK-NEXT: ret %v = icmp ugt %a0, %a1 %extend = sext %v to