diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -35,6 +35,7 @@ RISCVMergeBaseOffset.cpp RISCVRegisterBankInfo.cpp RISCVRegisterInfo.cpp + RISCVSExtWRemoval.cpp RISCVSubtarget.cpp RISCVTargetMachine.cpp RISCVTargetObjectFile.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -40,6 +40,9 @@ FunctionPass *createRISCVGatherScatterLoweringPass(); void initializeRISCVGatherScatterLoweringPass(PassRegistry &); +FunctionPass *createRISCVSExtWRemovalPass(); +void initializeRISCVSExtWRemovalPass(PassRegistry &); + FunctionPass *createRISCVMergeBaseOffsetOptPass(); void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -0,0 +1,266 @@ +//===-------------- RISCVSExtWRemoval.cpp - MI sext.w Removal -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// This pass removes unneeded sext.w instructions at the MI level. +// +//===---------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-sextw-removal" + +STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions"); + +static cl::opt DisableSExtWRemoval("riscv-disable-sextw-removal", + cl::desc("Disable removal of sext.w"), + cl::init(false), cl::Hidden); +namespace { + +class RISCVSExtWRemoval : public MachineFunctionPass { +public: + static char ID; + + RISCVSExtWRemoval() : MachineFunctionPass(ID) { + initializeRISCVSExtWRemovalPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return "RISCV sext.w Removal"; } +}; + +} // end anonymous namespace + +char RISCVSExtWRemoval::ID = 0; +INITIALIZE_PASS(RISCVSExtWRemoval, DEBUG_TYPE, "RISCV sext.w Removal", false, + false) + +FunctionPass *llvm::createRISCVSExtWRemovalPass() { + return new RISCVSExtWRemoval(); +} + +// This function returns true if the machine instruction always outputs a value +// where bits 63:32 match bit 31. +// TODO: Allocate a bit in TSFlags for the W instructions? +// TODO: Add other W instructions. +static bool isSignExtendingOpW(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case RISCV::LUI: + case RISCV::LW: + case RISCV::ADDW: + case RISCV::ADDIW: + case RISCV::SUBW: + case RISCV::MULW: + case RISCV::SLLW: + case RISCV::SLLIW: + case RISCV::SRAW: + case RISCV::SRAIW: + case RISCV::SRLW: + case RISCV::SRLIW: + case RISCV::DIVW: + case RISCV::DIVUW: + case RISCV::REMW: + case RISCV::REMUW: + case RISCV::ROLW: + case RISCV::RORW: + case RISCV::RORIW: + case RISCV::CLZW: + case RISCV::CTZW: + case RISCV::CPOPW: + case RISCV::FCVT_W_H: + case RISCV::FCVT_WU_H: + case RISCV::FCVT_W_S: + case RISCV::FCVT_WU_S: + case RISCV::FCVT_W_D: + case RISCV::FCVT_WU_D: + // The following aren't W instructions, but are either sign extended from a + // smaller size or put zeros in bits 63:31. + case RISCV::LBU: + case RISCV::LHU: + case RISCV::LB: + case RISCV::LH: + case RISCV::SEXTB: + case RISCV::SEXTH: + case RISCV::ZEXTH_RV64: + return true; + } + + // The LI pattern ADDI rd, X0, imm is sign extended. + if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() && + MI.getOperand(1).getReg() == RISCV::X0) + return true; + + // An ANDI with an 11 bit immediate will zero bits 63:11. + if (MI.getOpcode() == RISCV::ANDI && isUInt<11>(MI.getOperand(2).getImm())) + return true; + + // Copying from X0 produces zero. + if (MI.getOpcode() == RISCV::COPY && MI.getOperand(1).getReg() == RISCV::X0) + return true; + + return false; +} + +static bool isSignExtendedW(const MachineInstr &OrigMI, + MachineRegisterInfo &MRI) { + + SmallPtrSet Visited; + SmallVector Worklist; + + Worklist.push_back(&OrigMI); + + while (!Worklist.empty()) { + const MachineInstr *MI = Worklist.pop_back_val(); + + // If we already visited this instruction, we don't need to check it again. + if (!Visited.insert(MI).second) + continue; + + // If this is a sign extending operation we don't need to look any further. + if (isSignExtendingOpW(*MI)) + continue; + + // Is this an instruction that propagates sign extend. + switch (MI->getOpcode()) { + default: + // Unknown opcode, give up. + return false; + case RISCV::COPY: { + Register SrcReg = MI->getOperand(1).getReg(); + + // TODO: Handle arguments and returns from calls? + + // If this is a copy from another register, check its source instruction. + if (!SrcReg.isVirtual()) + return false; + const MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + if (!SrcMI) + return false; + + // Add SrcMI to the worklist. + Worklist.push_back(SrcMI); + break; + } + case RISCV::ANDI: + case RISCV::ORI: + case RISCV::XORI: { + // Logical operations use a sign extended 12-bit immediate. We just need + // to check if the other operand is sign extended. + Register SrcReg = MI->getOperand(1).getReg(); + if (!SrcReg.isVirtual()) + return false; + const MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + if (!SrcMI) + return false; + + // Add SrcMI to the worklist. + Worklist.push_back(SrcMI); + break; + } + case RISCV::AND: + case RISCV::OR: + case RISCV::XOR: + case RISCV::ANDN: + case RISCV::ORN: + case RISCV::XNOR: + case RISCV::MAX: + case RISCV::MAXU: + case RISCV::MIN: + case RISCV::MINU: + case RISCV::PHI: { + // If all incoming values are sign-extended, the output of AND, OR, XOR, + // MIN, MAX, or PHI is also sign-extended. + + // The input registers for PHI are operand 1, 3, ... + // The input registers for others are operand 1 and 2. + unsigned E = 3, D = 1; + if (MI->getOpcode() == RISCV::PHI) { + E = MI->getNumOperands(); + D = 2; + } + + for (unsigned I = 1; I != E; I += D) { + if (!MI->getOperand(I).isReg()) + return false; + + Register SrcReg = MI->getOperand(I).getReg(); + if (!SrcReg.isVirtual()) + return false; + const MachineInstr *SrcMI = MRI.getVRegDef(SrcReg); + if (!SrcMI) + return false; + + // Add SrcMI to the worklist. + Worklist.push_back(SrcMI); + } + + break; + } + } + } + + // If we get here, then every node we visited produces a sign extended value + // or propagated sign extended values. So the result must be sign extended. + return true; +} + +bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction()) || DisableSExtWRemoval) + return false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + const RISCVSubtarget &ST = MF.getSubtarget(); + + if (!ST.is64Bit()) + return false; + + bool MadeChange = false; + for (MachineBasicBlock &MBB : MF) { + for (auto I = MBB.begin(), IE = MBB.end(); I != IE;) { + MachineInstr *MI = &*I++; + + // We're looking for the sext.w pattern ADDIW rd, rs1, 0. + if (MI->getOpcode() != RISCV::ADDIW || !MI->getOperand(2).isImm() || + MI->getOperand(2).getImm() != 0 || !MI->getOperand(1).isReg()) + continue; + + // Input should be a virtual register. + Register SrcReg = MI->getOperand(1).getReg(); + if (!SrcReg.isVirtual()) + continue; + + const MachineInstr &SrcMI = *MRI.getVRegDef(SrcReg); + if (!isSignExtendedW(SrcMI, MRI)) + continue; + + Register DstReg = MI->getOperand(0).getReg(); + if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg))) + continue; + + LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n"); + MRI.replaceRegWith(DstReg, SrcReg); + MRI.clearKillFlags(SrcReg); + MI->eraseFromParent(); + ++NumRemovedSExtW; + MadeChange = true; + } + } + + return MadeChange; +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -39,6 +39,7 @@ initializeGlobalISel(*PR); initializeRISCVGatherScatterLoweringPass(*PR); initializeRISCVMergeBaseOffsetOptPass(*PR); + initializeRISCVSExtWRemovalPass(*PR); initializeRISCVExpandPseudoPass(*PR); initializeRISCVInsertVSETVLIPass(*PR); } @@ -140,6 +141,7 @@ void addPreEmitPass() override; void addPreEmitPass2() override; void addPreSched2() override; + void addMachineSSAOptimization() override; void addPreRegAlloc() override; }; } // namespace @@ -194,6 +196,13 @@ addPass(createRISCVExpandAtomicPseudoPass()); } +void RISCVPassConfig::addMachineSSAOptimization() { + TargetPassConfig::addMachineSSAOptimization(); + + if (TM->getTargetTriple().getArch() == Triple::riscv64) + addPass(createRISCVSExtWRemovalPass()); +} + void RISCVPassConfig::addPreRegAlloc() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createRISCVMergeBaseOffsetOptPass()); diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -2573,7 +2573,7 @@ ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: j .LBB32_1 ; RV64I-NEXT: .LBB32_4: # %atomicrmw.end -; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -2663,7 +2663,7 @@ ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: j .LBB33_1 ; RV64I-NEXT: .LBB33_4: # %atomicrmw.end -; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -2753,7 +2753,7 @@ ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: j .LBB34_1 ; RV64I-NEXT: .LBB34_4: # %atomicrmw.end -; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -2843,7 +2843,7 @@ ; RV64I-NEXT: mv a2, s2 ; RV64I-NEXT: j .LBB35_1 ; RV64I-NEXT: .LBB35_4: # %atomicrmw.end -; RV64I-NEXT: sext.w a0, a3 +; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rv64zbs.ll b/llvm/test/CodeGen/RISCV/rv64zbs.ll --- a/llvm/test/CodeGen/RISCV/rv64zbs.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbs.ll @@ -57,7 +57,6 @@ ; RV64I-NEXT: sllw a1, a2, a1 ; RV64I-NEXT: not a1, a1 ; RV64I-NEXT: and a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64ZBS-LABEL: bclr_i32_load: @@ -67,7 +66,6 @@ ; RV64ZBS-NEXT: sllw a1, a2, a1 ; RV64ZBS-NEXT: not a1, a1 ; RV64ZBS-NEXT: and a0, a1, a0 -; RV64ZBS-NEXT: sext.w a0, a0 ; RV64ZBS-NEXT: ret %a = load i32, i32* %p %shl = shl i32 1, %b @@ -161,7 +159,6 @@ ; RV64I-NEXT: li a2, 1 ; RV64I-NEXT: sllw a1, a2, a1 ; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64ZBS-LABEL: bset_i32_load: @@ -170,7 +167,6 @@ ; RV64ZBS-NEXT: li a2, 1 ; RV64ZBS-NEXT: sllw a1, a2, a1 ; RV64ZBS-NEXT: or a0, a1, a0 -; RV64ZBS-NEXT: sext.w a0, a0 ; RV64ZBS-NEXT: ret %a = load i32, i32* %p %shl = shl i32 1, %b @@ -292,7 +288,6 @@ ; RV64I-NEXT: li a2, 1 ; RV64I-NEXT: sllw a1, a2, a1 ; RV64I-NEXT: xor a0, a1, a0 -; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ret ; ; RV64ZBS-LABEL: binv_i32_load: @@ -301,7 +296,6 @@ ; RV64ZBS-NEXT: li a2, 1 ; RV64ZBS-NEXT: sllw a1, a2, a1 ; RV64ZBS-NEXT: xor a0, a1, a0 -; RV64ZBS-NEXT: sext.w a0, a0 ; RV64ZBS-NEXT: ret %a = load i32, i32* %p %shl = shl i32 1, %b diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -0,0 +1,318 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64I +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+experimental-zbb,+f \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64ZBB +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+experimental-zbb,+f \ +; RUN: -riscv-disable-sextw-removal | FileCheck %s --check-prefix=NOREMOVAL + +define void @test1(i32 signext %arg, i32 signext %arg1) nounwind { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: sraw s1, a0, a1 +; CHECK-NEXT: .LBB0_1: # %bb2 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call bar@plt +; CHECK-NEXT: sllw s1, s1, s0 +; CHECK-NEXT: bnez a0, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %bb7 +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret +; +; NOREMOVAL-LABEL: test1: +; NOREMOVAL: # %bb.0: # %bb +; NOREMOVAL-NEXT: addi sp, sp, -32 +; NOREMOVAL-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; NOREMOVAL-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; NOREMOVAL-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; NOREMOVAL-NEXT: mv s0, a1 +; NOREMOVAL-NEXT: sraw s1, a0, a1 +; NOREMOVAL-NEXT: .LBB0_1: # %bb2 +; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: sext.w a0, s1 +; NOREMOVAL-NEXT: call bar@plt +; NOREMOVAL-NEXT: sllw s1, s1, s0 +; NOREMOVAL-NEXT: bnez a0, .LBB0_1 +; NOREMOVAL-NEXT: # %bb.2: # %bb7 +; NOREMOVAL-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; NOREMOVAL-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; NOREMOVAL-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; NOREMOVAL-NEXT: addi sp, sp, 32 +; NOREMOVAL-NEXT: ret +bb: + %i = ashr i32 %arg, %arg1 + br label %bb2 + +bb2: ; preds = %bb2, %bb + %i3 = phi i32 [ %i, %bb ], [ %i5, %bb2 ] + %i4 = tail call signext i32 @bar(i32 signext %i3) + %i5 = shl i32 %i3, %arg1 + %i6 = icmp eq i32 %i4, 0 + br i1 %i6, label %bb7, label %bb2 + +bb7: ; preds = %bb2 + ret void +} + +declare signext i32 @bar(i32 signext) + +; The load here will be an anyext load in isel and sext.w will be emitted for +; the ret. Make sure we can look through logic ops to prove the sext.w is +; unnecessary. +define signext i32 @test2(i32* %p, i32 signext %b) nounwind { +; RV64I-LABEL: test2: +; RV64I: # %bb.0: +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: li a2, 1 +; RV64I-NEXT: sllw a1, a2, a1 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: test2: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lw a0, 0(a0) +; RV64ZBB-NEXT: li a2, 1 +; RV64ZBB-NEXT: sllw a1, a2, a1 +; RV64ZBB-NEXT: andn a0, a0, a1 +; RV64ZBB-NEXT: ret +; +; NOREMOVAL-LABEL: test2: +; NOREMOVAL: # %bb.0: +; NOREMOVAL-NEXT: lw a0, 0(a0) +; NOREMOVAL-NEXT: li a2, 1 +; NOREMOVAL-NEXT: sllw a1, a2, a1 +; NOREMOVAL-NEXT: andn a0, a0, a1 +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: ret + %a = load i32, i32* %p + %shl = shl i32 1, %b + %neg = xor i32 %shl, -1 + %and1 = and i32 %neg, %a + ret i32 %and1 +} + +define signext i32 @test3(i32* %p, i32 signext %b) nounwind { +; RV64I-LABEL: test3: +; RV64I: # %bb.0: +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: li a2, 1 +; RV64I-NEXT: sllw a1, a2, a1 +; RV64I-NEXT: not a1, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: test3: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lw a0, 0(a0) +; RV64ZBB-NEXT: li a2, 1 +; RV64ZBB-NEXT: sllw a1, a2, a1 +; RV64ZBB-NEXT: orn a0, a0, a1 +; RV64ZBB-NEXT: ret +; +; NOREMOVAL-LABEL: test3: +; NOREMOVAL: # %bb.0: +; NOREMOVAL-NEXT: lw a0, 0(a0) +; NOREMOVAL-NEXT: li a2, 1 +; NOREMOVAL-NEXT: sllw a1, a2, a1 +; NOREMOVAL-NEXT: orn a0, a0, a1 +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: ret + %a = load i32, i32* %p + %shl = shl i32 1, %b + %neg = xor i32 %shl, -1 + %and1 = or i32 %neg, %a + ret i32 %and1 +} + +define signext i32 @test4(i32* %p, i32 signext %b) nounwind { +; RV64I-LABEL: test4: +; RV64I: # %bb.0: +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: li a2, 1 +; RV64I-NEXT: sllw a1, a2, a1 +; RV64I-NEXT: xor a0, a1, a0 +; RV64I-NEXT: not a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: test4: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lw a0, 0(a0) +; RV64ZBB-NEXT: li a2, 1 +; RV64ZBB-NEXT: sllw a1, a2, a1 +; RV64ZBB-NEXT: xnor a0, a1, a0 +; RV64ZBB-NEXT: ret +; +; NOREMOVAL-LABEL: test4: +; NOREMOVAL: # %bb.0: +; NOREMOVAL-NEXT: lw a0, 0(a0) +; NOREMOVAL-NEXT: li a2, 1 +; NOREMOVAL-NEXT: sllw a1, a2, a1 +; NOREMOVAL-NEXT: xnor a0, a1, a0 +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: ret + %a = load i32, i32* %p + %shl = shl i32 1, %b + %neg = xor i32 %shl, -1 + %and1 = xor i32 %neg, %a + ret i32 %and1 +} + +; Make sure we don't put a sext.w before bar when using cpopw. +define void @test5(i32 signext %arg, i32 signext %arg1) nounwind { +; RV64I-LABEL: test5: +; RV64I: # %bb.0: # %bb +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sraw a0, a0, a1 +; RV64I-NEXT: lui a1, 349525 +; RV64I-NEXT: addiw s2, a1, 1365 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: addiw s1, a1, 819 +; RV64I-NEXT: lui a1, 61681 +; RV64I-NEXT: addiw s3, a1, -241 +; RV64I-NEXT: lui a1, 4112 +; RV64I-NEXT: addiw s0, a1, 257 +; RV64I-NEXT: .LBB4_1: # %bb2 +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: call bar@plt +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: and a0, a0, s2 +; RV64I-NEXT: subw a0, a1, a0 +; RV64I-NEXT: and a2, a0, s1 +; RV64I-NEXT: srli a0, a0, 2 +; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: srli a2, a0, 4 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: and a0, a0, s3 +; RV64I-NEXT: mulw a0, a0, s0 +; RV64I-NEXT: srliw a0, a0, 24 +; RV64I-NEXT: bnez a1, .LBB4_1 +; RV64I-NEXT: # %bb.2: # %bb7 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: test5: +; RV64ZBB: # %bb.0: # %bb +; RV64ZBB-NEXT: addi sp, sp, -16 +; RV64ZBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64ZBB-NEXT: sraw a0, a0, a1 +; RV64ZBB-NEXT: .LBB4_1: # %bb2 +; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64ZBB-NEXT: call bar@plt +; RV64ZBB-NEXT: mv a1, a0 +; RV64ZBB-NEXT: cpopw a0, a0 +; RV64ZBB-NEXT: bnez a1, .LBB4_1 +; RV64ZBB-NEXT: # %bb.2: # %bb7 +; RV64ZBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64ZBB-NEXT: addi sp, sp, 16 +; RV64ZBB-NEXT: ret +; +; NOREMOVAL-LABEL: test5: +; NOREMOVAL: # %bb.0: # %bb +; NOREMOVAL-NEXT: addi sp, sp, -16 +; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; NOREMOVAL-NEXT: sraw a1, a0, a1 +; NOREMOVAL-NEXT: .LBB4_1: # %bb2 +; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: sext.w a0, a1 +; NOREMOVAL-NEXT: call bar@plt +; NOREMOVAL-NEXT: cpopw a1, a0 +; NOREMOVAL-NEXT: bnez a0, .LBB4_1 +; NOREMOVAL-NEXT: # %bb.2: # %bb7 +; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; NOREMOVAL-NEXT: addi sp, sp, 16 +; NOREMOVAL-NEXT: ret +bb: + %i = ashr i32 %arg, %arg1 + br label %bb2 + +bb2: ; preds = %bb2, %bb + %i3 = phi i32 [ %i, %bb ], [ %i5, %bb2 ] + %i4 = tail call signext i32 @bar(i32 signext %i3) + %i5 = tail call i32 @llvm.ctpop.i32(i32 %i4) + %i6 = icmp eq i32 %i4, 0 + br i1 %i6, label %bb7, label %bb2 + +bb7: ; preds = %bb2 + ret void +} + +declare i32 @llvm.ctpop.i32(i32) + +define void @test6(i32 signext %arg, i32 signext %arg1) nounwind { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sraw a0, a0, a1 +; CHECK-NEXT: fmv.w.x ft0, zero +; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill +; CHECK-NEXT: .LBB5_1: # %bb2 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: call baz@plt +; CHECK-NEXT: fmv.w.x ft0, a0 +; CHECK-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload +; CHECK-NEXT: feq.s a1, ft0, ft1 +; CHECK-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK-NEXT: beqz a1, .LBB5_1 +; CHECK-NEXT: # %bb.2: # %bb7 +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +; +; NOREMOVAL-LABEL: test6: +; NOREMOVAL: # %bb.0: # %bb +; NOREMOVAL-NEXT: addi sp, sp, -16 +; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; NOREMOVAL-NEXT: sraw a0, a0, a1 +; NOREMOVAL-NEXT: fmv.w.x ft0, zero +; NOREMOVAL-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill +; NOREMOVAL-NEXT: .LBB5_1: # %bb2 +; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: call baz@plt +; NOREMOVAL-NEXT: fmv.w.x ft0, a0 +; NOREMOVAL-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload +; NOREMOVAL-NEXT: feq.s a1, ft0, ft1 +; NOREMOVAL-NEXT: fcvt.w.s a0, ft0, rtz +; NOREMOVAL-NEXT: beqz a1, .LBB5_1 +; NOREMOVAL-NEXT: # %bb.2: # %bb7 +; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; NOREMOVAL-NEXT: addi sp, sp, 16 +; NOREMOVAL-NEXT: ret +bb: + %i = ashr i32 %arg, %arg1 + br label %bb2 + +bb2: ; preds = %bb2, %bb + %i3 = phi i32 [ %i, %bb ], [ %i5, %bb2 ] + %i4 = tail call float @baz(i32 signext %i3) + %i5 = fptosi float %i4 to i32 + %i6 = fcmp oeq float %i4, zeroinitializer + br i1 %i6, label %bb7, label %bb2 + +bb7: ; preds = %bb2 + ret void +} +declare float @baz(i32 signext %i3) diff --git a/llvm/test/CodeGen/RISCV/split-offsets.ll b/llvm/test/CodeGen/RISCV/split-offsets.ll --- a/llvm/test/CodeGen/RISCV/split-offsets.ll +++ b/llvm/test/CodeGen/RISCV/split-offsets.ll @@ -83,8 +83,7 @@ ; RV64I-NEXT: add a0, a1, a5 ; RV64I-NEXT: add a1, a4, a5 ; RV64I-NEXT: sext.w a2, a2 -; RV64I-NEXT: sext.w a4, a3 -; RV64I-NEXT: bge a4, a2, .LBB1_2 +; RV64I-NEXT: bge a3, a2, .LBB1_2 ; RV64I-NEXT: .LBB1_1: # %while_body ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: addiw a4, a3, 1 @@ -93,8 +92,7 @@ ; RV64I-NEXT: sw a4, 0(a0) ; RV64I-NEXT: sw a3, 4(a0) ; RV64I-NEXT: mv a3, a4 -; RV64I-NEXT: sext.w a4, a3 -; RV64I-NEXT: blt a4, a2, .LBB1_1 +; RV64I-NEXT: blt a3, a2, .LBB1_1 ; RV64I-NEXT: .LBB1_2: # %while_end ; RV64I-NEXT: ret entry: