diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_target(RISCVCodeGen RISCVAsmPrinter.cpp RISCVCallLowering.cpp + RISCVCodeGenPrepare.cpp RISCVMakeCompressible.cpp RISCVExpandAtomicPseudoInsts.cpp RISCVExpandPseudoInsts.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -30,6 +30,9 @@ class MachineOperand; class PassRegistry; +FunctionPass *createRISCVCodeGenPreparePass(); +void initializeRISCVCodeGenPreparePass(PassRegistry &); + bool lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP); bool lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO, diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp @@ -0,0 +1,116 @@ +//===----- RISCVCodeGenPrepare.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a RISCV specific version of CodeGenPrepare. +// It munges the code in the input function to better prepare it for +// SelectionDAG-based code generation. This works around limitations in it's +// basic-block-at-a-time approach. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-codegenprepare" +#define PASS_NAME "RISCV CodeGenPrepare" + +STATISTIC(NumZExtToSExt, "Number of SExt instructions converted to ZExt"); + +namespace { + +class RISCVCodeGenPrepare : public FunctionPass { + const DataLayout *DL; + const RISCVSubtarget *ST; + +public: + static char ID; + + RISCVCodeGenPrepare() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { return PASS_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + } + +private: + bool optimizeZExt(ZExtInst *I); +}; + +} // end anonymous namespace + +bool RISCVCodeGenPrepare::optimizeZExt(ZExtInst *ZExt) { + if (!ST->is64Bit()) + return false; + + Value *Src = ZExt->getOperand(0); + + // We only care about ZExt from i32 to i64. + if (!ZExt->getType()->isIntegerTy(64) || !Src->getType()->isIntegerTy(32)) + return false; + + // Look for an opportunity to replace (i64 (zext (i32 X))) with a sext if we + // can determine that the sign bit of X is zero via a dominating condition. + // This often occurs with widened induction variables. + if (isImpliedByDomCondition(ICmpInst::ICMP_SGE, Src, + Constant::getNullValue(Src->getType()), ZExt, + *DL)) { + IRBuilder<> Builder(ZExt); + Value *SExt = Builder.CreateSExt(Src, ZExt->getType()); + SExt->takeName(ZExt); + + ZExt->replaceAllUsesWith(SExt); + ZExt->eraseFromParent(); + ++NumZExtToSExt; + return true; + } + + return false; +} + +bool RISCVCodeGenPrepare::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + auto &TPC = getAnalysis(); + auto &TM = TPC.getTM(); + ST = &TM.getSubtarget(F); + + DL = &F.getParent()->getDataLayout(); + + bool MadeChange = false; + for (auto &BB : F) { + for (Instruction &I : llvm::make_early_inc_range(BB)) { + if (auto *ZExt = dyn_cast(&I)) + MadeChange |= optimizeZExt(ZExt); + } + } + + return MadeChange; +} + +INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) + +char RISCVCodeGenPrepare::ID = 0; + +FunctionPass *llvm::createRISCVCodeGenPreparePass() { + return new RISCVCodeGenPrepare(); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -49,6 +49,7 @@ initializeGlobalISel(*PR); initializeRISCVMakeCompressibleOptPass(*PR); initializeRISCVGatherScatterLoweringPass(*PR); + initializeRISCVCodeGenPreparePass(*PR); initializeRISCVMergeBaseOffsetOptPass(*PR); initializeRISCVSExtWRemovalPass(*PR); initializeRISCVExpandPseudoPass(*PR); @@ -165,6 +166,7 @@ } void addIRPasses() override; + void addCodeGenPrepare() override; bool addPreISel() override; bool addInstSelector() override; bool addIRTranslator() override; @@ -192,6 +194,12 @@ TargetPassConfig::addIRPasses(); } +void RISCVPassConfig::addCodeGenPrepare() { + if (getOptLevel() != CodeGenOpt::None) + addPass(createRISCVCodeGenPreparePass()); + TargetPassConfig::addCodeGenPrepare(); +} + bool RISCVPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None) { // Add a barrier before instruction selection so that we will not get diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -57,6 +57,7 @@ ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: TLS Variable Hoist +; CHECK-NEXT: RISCV CodeGenPrepare ; CHECK-NEXT: CodeGen Prepare ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Exception handling preparation diff --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s + + +; Make sure we don't emit a pair of shift for the zext in the preheader. We +; can tell that bit 31 is 0 in the preheader and rely on %n already being +; sign extended without adding zeros explicitly. +define void @test1(ptr nocapture noundef %a, i32 noundef signext %n) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: blez a1, .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lw a2, 0(a0) +; CHECK-NEXT: addiw a2, a2, 4 +; CHECK-NEXT: sw a2, 0(a0) +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: addi a0, a0, 4 +; CHECK-NEXT: bnez a1, .LBB0_1 +; CHECK-NEXT: .LBB0_2: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %cmp3 = icmp sgt i32 %n, 0 + br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, 4 + store i32 %add, ptr %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} diff --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -riscv-codegenprepare -mtriple=riscv64 | FileCheck %s + +; Test that we can convert the %wide.trip.count zext to a sext. The dominating +; condition %cmp3 ruled out %n being negative. +define void @test1(ptr nocapture noundef %a, i32 noundef signext %n) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV5:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[FOR_BODY_PREHEADER]] ], [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ], [ [[UGLYGEP:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[LSR_IV]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[LSR_IV]], align 4 +; CHECK-NEXT: [[UGLYGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV5]], -1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] +; +entry: + %cmp3 = icmp sgt i32 %n, 0 + br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64 + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %lsr.iv5 = phi i64 [ %wide.trip.count, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %lsr.iv = phi ptr [ %a, %for.body.preheader ], [ %uglygep, %for.body ] + %0 = load i32, ptr %lsr.iv, align 4 + %add = add nsw i32 %0, 4 + store i32 %add, ptr %lsr.iv, align 4 + %uglygep = getelementptr i8, ptr %lsr.iv, i64 4 + %lsr.iv.next = add nsw i64 %lsr.iv5, -1 + %exitcond.not = icmp eq i64 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body +}