diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -47,6 +47,10 @@ ]>; def AArch64setcc_z : SDNode<"AArch64ISD::SETCC_MERGE_ZERO", SDT_AArch64Setcc>; +def AArch64setcc_z_oneuse : PatFrag<(ops node:$pg, node:$op1, node:$op2, node:$cc), + (AArch64setcc_z node:$pg, node:$op1, node:$op2, node:$cc), [{ + return N->hasOneUse(); +}]>; def SVEPatternOperand : AsmOperandClass { let Name = "SVEPattern"; @@ -5028,9 +5032,9 @@ (cmp $Op1, $Op2, $Op3)>; def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, invcc)), (cmp $Op1, $Op3, $Op2)>; - def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))), + def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))), (cmp $Pg, $Op2, $Op3)>; - def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))), + def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))), (cmp $Pg, $Op3, $Op2)>; } @@ -5040,9 +5044,9 @@ (cmp $Op1, $Op2)>; def : Pat<(predvt (AArch64setcc_z predvt:$Op1, (SVEDup0), intvt:$Op2, invcc)), (cmp $Op1, $Op2)>; - def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))), + def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))), (cmp $Pg, $Op1)>; - def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))), + def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))), (cmp $Pg, $Op1)>; } @@ -5126,13 +5130,13 @@ commuted_cc)), (cmp $Pg, $Zs1, immtype:$imm)>; def : Pat<(predvt (and predvt:$Pg, - (AArch64setcc_z (predvt (AArch64ptrue 31)), + (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), (intvt ZPR:$Zs1), (intvt (splat_vector (immtype:$imm))), cc))), (cmp $Pg, $Zs1, immtype:$imm)>; def : Pat<(predvt (and predvt:$Pg, - (AArch64setcc_z (predvt (AArch64ptrue 31)), + (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), (intvt (splat_vector (immtype:$imm))), (intvt ZPR:$Zs1), commuted_cc))), diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll --- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll @@ -493,8 +493,8 @@ ; CHECK-LABEL: and_of_multiuse_fcmp_ogt: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s ; CHECK-NEXT: fcmgt p1.s, p1/z, z0.s, z1.s +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %cmp = fcmp ogt %x, %y %and = and %pg, %cmp @@ -507,8 +507,8 @@ ; CHECK-LABEL: and_of_multiuse_fcmp_ogt_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: fcmgt p1.s, p1/z, z0.s, #0.0 +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %cmp = fcmp ogt %x, zeroinitializer %and = and %pg, %cmp @@ -521,8 +521,8 @@ ; CHECK-LABEL: and_of_multiuse_fcmp_olt: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s ; CHECK-NEXT: fcmgt p1.s, p1/z, z1.s, z0.s +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %cmp = fcmp olt %x, %y %and = and %pg, %cmp @@ -535,8 +535,8 @@ ; CHECK-LABEL: and_of_multiuse_fcmp_olt_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: fcmlt p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: fcmlt p1.s, p1/z, z0.s, #0.0 +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %cmp = fcmp olt %x, zeroinitializer %and = and %pg, %cmp diff --git a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll --- a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll @@ -23,12 +23,11 @@ ; CHECK-NEXT: sxtw z5.d, p0/m, z6.d ; CHECK-NEXT: smin z4.d, p0/m, z4.d, z5.d ; CHECK-NEXT: cmpne p1.d, p0/z, z4.d, #0 -; CHECK-NEXT: ld1w { z5.d }, p1/z, [x1] +; CHECK-NEXT: ld1w { z4.d }, p1/z, [x1] ; CHECK-NEXT: ld1w { z0.d }, p1/z, [z0.d] -; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z5.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z4.s ; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z3.s -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: cmpne p2.d, p2/z, z4.d, #0 +; CHECK-NEXT: bic p2.b, p1/z, p1.b, p2.b ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 ; CHECK-NEXT: add z2.d, p1/m, z2.d, z1.d ; CHECK-NEXT: uaddv d0, p0, z2.d diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll @@ -1223,8 +1223,8 @@ ; CHECK-LABEL: and_of_multiuse_icmp_sle: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: cmpge p0.s, p0/z, z1.s, z0.s ; CHECK-NEXT: cmpge p1.s, p1/z, z1.s, z0.s +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %cmp = icmp sle %b, %c %and = and %a, %cmp @@ -1237,8 +1237,8 @@ ; CHECK-LABEL: and_of_multiuse_icmp_sle_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: cmple p0.s, p0/z, z0.s, #1 ; CHECK-NEXT: cmple p1.s, p1/z, z0.s, #1 +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %imm = shufflevector insertelement ( undef, i32 1, i64 0), undef, zeroinitializer %cmp = icmp sle %b, %imm @@ -1252,8 +1252,8 @@ ; CHECK-LABEL: and_of_multiuse_icmp_ugt: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, z1.s ; CHECK-NEXT: cmphi p1.s, p1/z, z0.s, z1.s +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %cmp = icmp ugt %b, %c %and = and %a, %cmp @@ -1266,8 +1266,8 @@ ; CHECK-LABEL: and_of_multiuse_icmp_ugt_imm: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: cmphi p0.s, p0/z, z0.s, #1 ; CHECK-NEXT: cmphi p1.s, p1/z, z0.s, #1 +; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b ; CHECK-NEXT: ret %imm = shufflevector insertelement ( undef, i32 1, i64 0), undef, zeroinitializer %cmp = icmp ugt %b, %imm