diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -975,17 +975,35 @@ static Optional instCombineSVEPTest(InstCombiner &IC, IntrinsicInst &II) { - IntrinsicInst *Pg = dyn_cast(II.getArgOperand(0)); - IntrinsicInst *Op = dyn_cast(II.getArgOperand(1)); + Value *PgVal = II.getArgOperand(0); + Value *OpVal = II.getArgOperand(1); + + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + + // PTEST_(X, X) is equivalent to PTEST_ANY(X, X). + // Later optimizations prefer this form. + if (PgVal == OpVal && + (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first || + II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) { + Value *Ops[] = {PgVal, OpVal}; + Type *Tys[] = {PgVal->getType()}; + + auto *PTest = + Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptest_any, Tys, Ops); + PTest->takeName(&II); + + return IC.replaceInstUsesWith(II, PTest); + } + + IntrinsicInst *Pg = dyn_cast(PgVal); + IntrinsicInst *Op = dyn_cast(OpVal); if (!Pg || !Op) return None; Intrinsic::ID OpIID = Op->getIntrinsicID(); - IRBuilder<> Builder(II.getContext()); - Builder.SetInsertPoint(&II); - if (Pg->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && OpIID == Intrinsic::aarch64_sve_convert_to_svbool && Pg->getArgOperand(0)->getType() == Op->getArgOperand(0)->getType()) { diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve %s -o - | FileCheck %s +; RUN: opt -instcombine -S %s | llc -mtriple=aarch64--linux-gnu -mattr=+sve -o - | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" ; ; Immediate Compares @@ -189,8 +191,7 @@ ; CHECK-LABEL: cmp8_ptest_first_xx: ; CHECK: // %bb.0: ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b -; CHECK-NEXT: ptest p0, p0.b -; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) %2 = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1( %1, %1) @@ -205,8 +206,7 @@ ; CHECK-LABEL: cmp8_ptest_last_xx: ; CHECK: // %bb.0: ; CHECK-NEXT: cmpge p0.b, p0/z, z0.b, z1.b -; CHECK-NEXT: ptest p0, p0.b -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.cmpge.nxv16i8( %pg, %a, %b) %2 = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1( %1, %1) @@ -235,8 +235,7 @@ ; CHECK-LABEL: cmp32_ptest_first_xx: ; CHECK: // %bb.0: ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s -; CHECK-NEXT: ptest p0, p0.b -; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) @@ -252,8 +251,7 @@ ; CHECK-LABEL: cmp32_ptest_last_xx: ; CHECK: // %bb.0: ; CHECK-NEXT: cmpge p0.s, p0/z, z0.s, z1.s -; CHECK-NEXT: ptest p0, p0.b -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg) %2 = tail call @llvm.aarch64.sve.cmpge.nxv4i32( %1, %a, %b) diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll @@ -3,6 +3,26 @@ target triple = "aarch64-unknown-linux-gnu" +; PTEST first can be changed to any if the mask and operand are the same +define i1 @ptest_first_to_any( %a) #0 { +; CHECK-LABEL: @ptest_first_to_any( +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[A:%.*]], [[A]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %out = call i1 @llvm.aarch64.sve.ptest.first.nxv16i1( %a, %a) + ret i1 %out +} + +; PTEST last can be changed to any if the mask and operand are the same +define i1 @ptest_last_to_any( %a) #0 { +; CHECK-LABEL: @ptest_last_to_any( +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[A:%.*]], [[A]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %out = call i1 @llvm.aarch64.sve.ptest.last.nxv16i1( %a, %a) + ret i1 %out +} + define i1 @ptest_any1( %a) #0 { ; CHECK-LABEL: @ptest_any1( ; CHECK-NEXT: [[MASK:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 0) @@ -47,7 +67,7 @@ define i1 @ptest_first_same_ops( %a) #0 { ; CHECK-LABEL: @ptest_first_same_ops( -; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.aarch64.sve.ptest.first.nxv2i1( [[A:%.*]], [[A]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv2i1( [[A:%.*]], [[A]]) ; CHECK-NEXT: ret i1 [[TMP1]] ; %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)