diff --git a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp --- a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp +++ b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -61,7 +61,8 @@ private: MachineInstr *getOperandDef(const MachineOperand &MO); - MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting); + MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting, + bool Is64Bit); MachineInstr *convertToCondBr(MachineInstr &MI); bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI); }; @@ -84,7 +85,8 @@ } MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI, - bool IsFlagSetting) { + bool IsFlagSetting, + bool Is64Bit) { // If this is already the flag setting version of the instruction (e.g., SUBS) // just make sure the implicit-def of NZCV isn't marked dead. if (IsFlagSetting) { @@ -93,8 +95,7 @@ MO.setIsDead(false); return &MI; } - bool Is64Bit; - unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit); + unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode()); Register NewDestReg = MI.getOperand(0).getReg(); if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg())) NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR; @@ -198,7 +199,7 @@ LLVM_DEBUG(dbgs() << " "); LLVM_DEBUG(MI.print(dbgs())); - NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); + NewCmp = convertToFlagSetting(DefMI, IsFlagSetting, /*Is64Bit=*/false); NewBr = convertToCondBr(MI); break; } @@ -253,7 +254,7 @@ LLVM_DEBUG(dbgs() << " "); LLVM_DEBUG(MI.print(dbgs())); - NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); + NewCmp = convertToFlagSetting(DefMI, IsFlagSetting, /*Is64Bit=*/true); NewBr = convertToCondBr(MI); break; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -125,7 +125,7 @@ /// Return the opcode that set flags when possible. The caller is /// responsible for ensuring the opc has a flag setting equivalent. - static unsigned convertToFlagSettingOpc(unsigned Opc, bool &Is64Bit); + static unsigned convertToFlagSettingOpc(unsigned Opc); /// Return true if this is a load/store that can be potentially paired/merged. bool isCandidateToMergeOrPair(const MachineInstr &MI) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1333,51 +1333,47 @@ // Fallthough to simply remove the PTEST. } else { - switch (Pred->getOpcode()) { + // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the + // opcode so the PTEST becomes redundant. + switch (PredOpcode) { + case AArch64::AND_PPzPP: + case AArch64::BIC_PPzPP: + case AArch64::EOR_PPzPP: + case AArch64::NAND_PPzPP: + case AArch64::NOR_PPzPP: + case AArch64::ORN_PPzPP: + case AArch64::ORR_PPzPP: + case AArch64::BRKA_PPzP: + case AArch64::BRKPA_PPzPP: case AArch64::BRKB_PPzP: - case AArch64::BRKPB_PPzPP: { - // Op 0 is chain, 1 is the mask, 2 the previous predicate to - // propagate, 3 the new predicate. - - // Check to see if our mask is the same as the brkpb's. If - // not the resulting flag bits may be different and we - // can't remove the ptest. + case AArch64::BRKPB_PPzPP: + case AArch64::RDFFR_PPz: { + // Check to see if our mask is the same. If not the resulting flag bits + // may be different and we can't remove the ptest. auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); if (Mask != PredMask) return false; - - // Switch to the new opcode - NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP - : AArch64::BRKPBS_PPzPP; - OpChanged = true; break; } case AArch64::BRKN_PPzP: { + // BRKN uses an all active implicit mask to set flags unlike the other + // flag-setting instructions. // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B). if ((MaskOpcode != AArch64::PTRUE_B) || (Mask->getOperand(1).getImm() != 31)) return false; - - NewOp = AArch64::BRKNS_PPzP; - OpChanged = true; break; } - case AArch64::RDFFR_PPz: { - // rdffr p1.b, PredMask=p0/z <--- Definition of Pred - // ptest Mask=p0, Pred=p1.b <--- If equal masks, remove this and use - // `rdffrs p1.b, p0/z` above. - auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg()); - if (Mask != PredMask) - return false; - - NewOp = AArch64::RDFFRS_PPz; - OpChanged = true; + case AArch64::PTRUE_B: + // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A) break; - } default: // Bail out if we don't recognize the input return false; } + + NewOp = convertToFlagSettingOpc(PredOpcode); + OpChanged = true; } const TargetRegisterInfo *TRI = &getRegisterInfo(); @@ -2388,91 +2384,93 @@ } } -unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc, - bool &Is64Bit) { +unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Opcode has no flag setting equivalent!"); // 32-bit cases: case AArch64::ADDWri: - Is64Bit = false; return AArch64::ADDSWri; case AArch64::ADDWrr: - Is64Bit = false; return AArch64::ADDSWrr; case AArch64::ADDWrs: - Is64Bit = false; return AArch64::ADDSWrs; case AArch64::ADDWrx: - Is64Bit = false; return AArch64::ADDSWrx; case AArch64::ANDWri: - Is64Bit = false; return AArch64::ANDSWri; case AArch64::ANDWrr: - Is64Bit = false; return AArch64::ANDSWrr; case AArch64::ANDWrs: - Is64Bit = false; return AArch64::ANDSWrs; case AArch64::BICWrr: - Is64Bit = false; return AArch64::BICSWrr; case AArch64::BICWrs: - Is64Bit = false; return AArch64::BICSWrs; case AArch64::SUBWri: - Is64Bit = false; return AArch64::SUBSWri; case AArch64::SUBWrr: - Is64Bit = false; return AArch64::SUBSWrr; case AArch64::SUBWrs: - Is64Bit = false; return AArch64::SUBSWrs; case AArch64::SUBWrx: - Is64Bit = false; return AArch64::SUBSWrx; // 64-bit cases: case AArch64::ADDXri: - Is64Bit = true; return AArch64::ADDSXri; case AArch64::ADDXrr: - Is64Bit = true; return AArch64::ADDSXrr; case AArch64::ADDXrs: - Is64Bit = true; return AArch64::ADDSXrs; case AArch64::ADDXrx: - Is64Bit = true; return AArch64::ADDSXrx; case AArch64::ANDXri: - Is64Bit = true; return AArch64::ANDSXri; case AArch64::ANDXrr: - Is64Bit = true; return AArch64::ANDSXrr; case AArch64::ANDXrs: - Is64Bit = true; return AArch64::ANDSXrs; case AArch64::BICXrr: - Is64Bit = true; return AArch64::BICSXrr; case AArch64::BICXrs: - Is64Bit = true; return AArch64::BICSXrs; case AArch64::SUBXri: - Is64Bit = true; return AArch64::SUBSXri; case AArch64::SUBXrr: - Is64Bit = true; return AArch64::SUBSXrr; case AArch64::SUBXrs: - Is64Bit = true; return AArch64::SUBSXrs; case AArch64::SUBXrx: - Is64Bit = true; return AArch64::SUBSXrx; + // SVE instructions: + case AArch64::AND_PPzPP: + return AArch64::ANDS_PPzPP; + case AArch64::BIC_PPzPP: + return AArch64::BICS_PPzPP; + case AArch64::EOR_PPzPP: + return AArch64::EORS_PPzPP; + case AArch64::NAND_PPzPP: + return AArch64::NANDS_PPzPP; + case AArch64::NOR_PPzPP: + return AArch64::NORS_PPzPP; + case AArch64::ORN_PPzPP: + return AArch64::ORNS_PPzPP; + case AArch64::ORR_PPzPP: + return AArch64::ORRS_PPzPP; + case AArch64::BRKA_PPzP: + return AArch64::BRKAS_PPzP; + case AArch64::BRKPA_PPzPP: + return AArch64::BRKPAS_PPzPP; + case AArch64::BRKB_PPzP: + return AArch64::BRKBS_PPzP; + case AArch64::BRKPB_PPzPP: + return AArch64::BRKPBS_PPzPP; + case AArch64::BRKN_PPzP: + return AArch64::BRKNS_PPzP; + case AArch64::RDFFR_PPz: + return AArch64::RDFFRS_PPz; + case AArch64::PTRUE_B: + return AArch64::PTRUES_B; } } diff --git a/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll @@ -7,8 +7,7 @@ ; CHECK-LABEL: reduce_and_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.and.i1.nxv16i1( %vec) @@ -19,8 +18,7 @@ ; CHECK-LABEL: reduce_and_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.and.i1.nxv8i1( %vec) @@ -31,8 +29,7 @@ ; CHECK-LABEL: reduce_and_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.and.i1.nxv4i1( %vec) @@ -43,8 +40,7 @@ ; CHECK-LABEL: reduce_and_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.and.i1.nxv2i1( %vec) @@ -185,8 +181,7 @@ ; CHECK-LABEL: reduce_smax_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.smax.i1.nxv16i1( %vec) @@ -197,8 +192,7 @@ ; CHECK-LABEL: reduce_smax_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.smax.i1.nxv8i1( %vec) @@ -209,8 +203,7 @@ ; CHECK-LABEL: reduce_smax_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.smax.i1.nxv4i1( %vec) @@ -221,8 +214,7 @@ ; CHECK-LABEL: reduce_smax_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.smax.i1.nxv2i1( %vec) @@ -362,8 +354,7 @@ ; CHECK-LABEL: reduce_umin_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.umin.i1.nxv16i1( %vec) @@ -374,8 +365,7 @@ ; CHECK-LABEL: reduce_umin_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.umin.i1.nxv8i1( %vec) @@ -386,8 +376,7 @@ ; CHECK-LABEL: reduce_umin_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.umin.i1.nxv4i1( %vec) @@ -411,8 +400,7 @@ ; CHECK-LABEL: reduce_umin_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.umin.i1.nxv2i1( %vec) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-brk.ll @@ -6,8 +6,7 @@ define i32 @brkpa( %pg, %a, %b) { ; CHECK-LABEL: brkpa: ; CHECK: // %bb.0: -; CHECK-NEXT: brkpa p1.b, p0/z, p1.b, p2.b -; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: brkpas p0.b, p0/z, p1.b, p2.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.brkpa.z.nxv16i1( %pg, %a, %b) @@ -31,8 +30,7 @@ define i32 @brka( %pg, %a) { ; CHECK-LABEL: brka: ; CHECK: // %bb.0: -; CHECK-NEXT: brka p1.b, p0/z, p1.b -; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: brkas p0.b, p0/z, p1.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.brka.z.nxv16i1( %pg, %a) @@ -53,19 +51,6 @@ ret i32 %conv } -define i32 @brkn( %pg, %a, %b) { -; CHECK-LABEL: brkn: -; CHECK: // %bb.0: -; CHECK-NEXT: brkn p2.b, p0/z, p1.b, p2.b -; CHECK-NEXT: ptest p0, p2.b -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: ret - %1 = tail call @llvm.aarch64.sve.brkn.z.nxv16i1( %pg, %a, %b) - %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) - %conv = zext i1 %2 to i32 - ret i32 %conv -} - define i32 @brkn_all_active( %pg, %a, %b) { ; CHECK-LABEL: brkn_all_active: ; CHECK: // %bb.0: @@ -146,6 +131,19 @@ ret i32 %conv } +define i32 @brkn_neg2( %pg, %a, %b) { +; CHECK-LABEL: brkn_neg2: +; CHECK: // %bb.0: +; CHECK-NEXT: brkn p2.b, p0/z, p1.b, p2.b +; CHECK-NEXT: ptest p0, p2.b +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %1 = tail call @llvm.aarch64.sve.brkn.z.nxv16i1( %pg, %a, %b) + %2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %pg, %1) + %conv = zext i1 %2 to i32 + ret i32 %conv +} + declare @llvm.aarch64.sve.brkpa.z.nxv16i1(, , ) declare @llvm.aarch64.sve.brkpb.z.nxv16i1(, , ) declare @llvm.aarch64.sve.brka.z.nxv16i1(, ) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-log.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-log.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-log.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-log.ll @@ -7,8 +7,7 @@ define i1 @and( %pg, %a, %b) { ; CHECK-LABEL: and: ; CHECK: // %bb.0: -; CHECK-NEXT: and p1.b, p0/z, p1.b, p2.b -; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: ands p0.b, p0/z, p1.b, p2.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.and.z.nxv16i1( %pg, %a, %b) @@ -19,8 +18,7 @@ define i1 @bic( %pg, %a, %b) { ; CHECK-LABEL: bic: ; CHECK: // %bb.0: -; CHECK-NEXT: bic p1.b, p0/z, p1.b, p2.b -; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: bics p0.b, p0/z, p1.b, p2.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.bic.z.nxv16i1( %pg, %a, %b) @@ -31,8 +29,7 @@ define i1 @eor( %pg, %a, %b) { ; CHECK-LABEL: eor: ; CHECK: // %bb.0: -; CHECK-NEXT: eor p1.b, p0/z, p1.b, p2.b -; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: eors p0.b, p0/z, p1.b, p2.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.eor.z.nxv16i1( %pg, %a, %b) @@ -43,8 +40,7 @@ define i1 @nand( %pg, %a, %b) { ; CHECK-LABEL: nand: ; CHECK: // %bb.0: -; CHECK-NEXT: nand p1.b, p0/z, p1.b, p2.b -; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: nands p0.b, p0/z, p1.b, p2.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.nand.z.nxv16i1( %pg, %a, %b) @@ -55,8 +51,7 @@ define i1 @nor( %pg, %a, %b) { ; CHECK-LABEL: nor: ; CHECK: // %bb.0: -; CHECK-NEXT: nor p1.b, p0/z, p1.b, p2.b -; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: nors p0.b, p0/z, p1.b, p2.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.nor.z.nxv16i1( %pg, %a, %b) @@ -67,8 +62,7 @@ define i1 @orn( %pg, %a, %b) { ; CHECK-LABEL: orn: ; CHECK: // %bb.0: -; CHECK-NEXT: orn p1.b, p0/z, p1.b, p2.b -; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: orns p0.b, p0/z, p1.b, p2.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.orn.z.nxv16i1( %pg, %a, %b) @@ -79,8 +73,7 @@ define i1 @orr( %pg, %a, %b) { ; CHECK-LABEL: orr: ; CHECK: // %bb.0: -; CHECK-NEXT: orr p1.b, p0/z, p1.b, p2.b -; CHECK-NEXT: ptest p0, p1.b +; CHECK-NEXT: orrs p0.b, p0/z, p1.b, p2.b ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.orr.z.nxv16i1( %pg, %a, %b) diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-ptrue.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-ptrue.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-ptrue.ll @@ -4,8 +4,7 @@ define i1 @ptrue() { ; CHECK-LABEL: ptrue: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, pow2 -; CHECK-NEXT: ptest p0, p0.b +; CHECK-NEXT: ptrues p0.b, pow2 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %1 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 0) diff --git a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll @@ -8,8 +8,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b -; CHECK-NEXT: not p0.b, p2/z, p0.b -; CHECK-NEXT: ptest p2, p0.b +; CHECK-NEXT: nots p0.b, p2/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.and.nxv32i1( %a) @@ -29,8 +28,7 @@ ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: ptrue p4.b ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b -; CHECK-NEXT: not p0.b, p4/z, p0.b -; CHECK-NEXT: ptest p4, p0.b +; CHECK-NEXT: nots p0.b, p4/z, p0.b ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: addvl sp, sp, #1 @@ -74,8 +72,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b -; CHECK-NEXT: not p0.b, p2/z, p0.b -; CHECK-NEXT: ptest p2, p0.b +; CHECK-NEXT: nots p0.b, p2/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.smax.nxv32i1( %a) @@ -115,8 +112,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b -; CHECK-NEXT: not p0.b, p2/z, p0.b -; CHECK-NEXT: ptest p2, p0.b +; CHECK-NEXT: nots p0.b, p2/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.umin.nxv32i1( %a) diff --git a/llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll b/llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll --- a/llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll +++ b/llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll @@ -53,8 +53,7 @@ ; CHECK-LABEL: reduce_and_insert_subvec_into_ones: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %allones.ins = insertelement poison, i1 1, i32 0 @@ -68,8 +67,7 @@ ; CHECK-LABEL: reduce_and_insert_subvec_into_poison: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: not p0.b, p1/z, p0.b -; CHECK-NEXT: ptest p1, p0.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t = call @llvm.vector.insert.nxv16i1.nxv4i1( poison, %in, i64 0) @@ -86,8 +84,7 @@ ; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b -; CHECK-NEXT: not p0.b, p2/z, p0.b -; CHECK-NEXT: ptest p2, p0.b +; CHECK-NEXT: nots p0.b, p2/z, p0.b ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t = call @llvm.vector.insert.nxv16i1.nxv4i1( %vec, %in, i64 0)