diff --git a/llvm/lib/Target/AVR/AVR.h b/llvm/lib/Target/AVR/AVR.h --- a/llvm/lib/Target/AVR/AVR.h +++ b/llvm/lib/Target/AVR/AVR.h @@ -22,6 +22,7 @@ class AVRTargetMachine; class FunctionPass; +Pass *createAVRShiftExpandPass(); FunctionPass *createAVRISelDag(AVRTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createAVRExpandPseudoPass(); @@ -30,6 +31,7 @@ FunctionPass *createAVRDynAllocaSRPass(); FunctionPass *createAVRBranchSelectionPass(); +void initializeAVRShiftExpandPass(PassRegistry &); void initializeAVRExpandPseudoPass(PassRegistry&); void initializeAVRRelaxMemPass(PassRegistry&); diff --git a/llvm/lib/Target/AVR/AVRShiftExpand.cpp b/llvm/lib/Target/AVR/AVRShiftExpand.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AVR/AVRShiftExpand.cpp @@ -0,0 +1,155 @@ +//===- AVRShift.cpp - Shift Expansion Pass --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Expand 32-bit shift instructions (shl, lshr, ashr) to inline loops, just +/// like avr-gcc. This must be done in IR because otherwise the type legalizer +/// will turn 32-bit shifts into (non-existing) library calls such as __ashlsi3. +// +//===----------------------------------------------------------------------===// + +#include "AVR.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IRBuilder.h" + +using namespace llvm; + +namespace { + class AVRShiftExpand: public FunctionPass { + public: + static char ID; + + AVRShiftExpand() : FunctionPass(ID) { } + + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { + return "AVR Shift Expansion"; + } + + private: + void expand(BinaryOperator *BI); + }; +} + +char AVRShiftExpand::ID = 0; + +INITIALIZE_PASS(AVRShiftExpand, "avr-shift-expand", "AVR Shift Expansion", + false, false) + +Pass *llvm::createAVRShiftExpandPass() { + return new AVRShiftExpand(); +} + +bool AVRShiftExpand::runOnFunction(Function &F) { + SmallVector ShiftInsts; + auto &Ctx = F.getContext(); + for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) { + Instruction *I = &*II; + if (!I->isShift()) + // Only expand shift instructions (shl, lshr, ashr). + continue; + if (I->getType() != Type::getInt32Ty(Ctx)) + // Only expand plain i32 types. + continue; + if (isa(I->getOperand(1))) + // Only expand when the shift amount is not known. + // Known shift amounts are (currently) better expanded inline. + continue; + ShiftInsts.push_back(cast(I)); + } + + if (ShiftInsts.size() == 0) + // There are no shift instructions in this function that should be expanded + // to loops. + return false; + + // The expanding itself needs to be done separately as expand() will remove + // these instructions. Removing instructions while iterating over a basic + // block is not a great idea. + for (auto I : ShiftInsts) { + expand(I); + } + + // This function was modified. + return true; +} + +void AVRShiftExpand::expand(BinaryOperator *BI) { + auto &Ctx = BI->getContext(); + IRBuilder<> Builder(BI); + Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *Int8Ty = Type::getInt8Ty(Ctx); + Value *Int8Zero = ConstantInt::get(Int8Ty, 0); + + // Truncate the shift amount to i8, which is trivially lowered to a single + // AVR register. + Value *ShiftAmount = Builder.CreateTrunc(BI->getOperand(1), Int8Ty); + + // Split the current basic block at the point of the existing shift + // instruction and insert a new basic block for the loop. + BasicBlock *BB = BI->getParent(); + Function *F = BB->getParent(); + BasicBlock *EndBB = BB->splitBasicBlock(BI, "shift.done"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "shift.loop", F, EndBB); + + // Replace the unconditional branch that splitBasicBlock created with a + // conditional branch. + Builder.SetInsertPoint(cast(ShiftAmount)->getNextNode()); + Value *Cmp1 = Builder.CreateICmpEQ(ShiftAmount, Int8Zero); + BranchInst *Br = Builder.CreateCondBr(Cmp1, EndBB, LoopBB); + Br->getNextNode()->eraseFromParent(); + + // Create the loop body starting with PHI nodes. + Builder.SetInsertPoint(LoopBB); + PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2); + ShiftAmountPHI->addIncoming(ShiftAmount, BB); + PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2); + ValuePHI->addIncoming(BI->getOperand(0), BB); + + // Subtract the shift amount by one, as we're shifting one this loop + // iteration. + Value *ShiftAmountSub = Builder.CreateSub(ShiftAmountPHI, + ConstantInt::get(Int8Ty, 1)); + ShiftAmountPHI->addIncoming(ShiftAmountSub, LoopBB); + + // Emit the actual shift instruction. The difference is that this shift + // instruction has a constant shift amount, which can be emitted inline + // without a library call. + Value *ValueShifted; + switch (BI->getOpcode()) { + case Instruction::Shl: + ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1)); + break; + case Instruction::LShr: + ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1)); + break; + case Instruction::AShr: + ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1)); + break; + default: + llvm_unreachable("asked to expand an instruction that is not a shift"); + } + ValuePHI->addIncoming(ValueShifted, LoopBB); + + // Branch to either the loop again (if there is more to shift) or to the + // basic block after the loop (if all bits are shifted). + Value *Cmp2 = Builder.CreateICmpEQ(ShiftAmountSub, Int8Zero); + Builder.CreateCondBr(Cmp2, EndBB, LoopBB); + + // Collect the resulting value. This is necessary in the IR but won't produce + // any actual instructions. + Builder.SetInsertPoint(BI); + PHINode *Result = Builder.CreatePHI(Int32Ty, 2); + Result->addIncoming(BI->getOperand(0), BB); + Result->addIncoming(ValueShifted, LoopBB); + + // Replace the original shift instruction. + BI->replaceAllUsesWith(Result); + BI->eraseFromParent(); +} diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp --- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp +++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp @@ -65,6 +65,7 @@ return getTM(); } + void addIRPasses() override; bool addInstSelector() override; void addPreSched2() override; void addPreEmitPass() override; @@ -76,6 +77,15 @@ return new AVRPassConfig(*this, PM); } +void AVRPassConfig::addIRPasses() { + // Expand instructions like + // %result = shl i32 %n, %amount + // to a loop so that library calls are avoided. + addPass(createAVRShiftExpandPass()); + + TargetPassConfig::addIRPasses(); +} + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTarget() { // Register the target. RegisterTargetMachine X(getTheAVRTarget()); @@ -83,6 +93,7 @@ auto &PR = *PassRegistry::getPassRegistry(); initializeAVRExpandPseudoPass(PR); initializeAVRRelaxMemPass(PR); + initializeAVRShiftExpandPass(PR); } const AVRSubtarget *AVRTargetMachine::getSubtargetImpl() const { diff --git a/llvm/lib/Target/AVR/CMakeLists.txt b/llvm/lib/Target/AVR/CMakeLists.txt --- a/llvm/lib/Target/AVR/CMakeLists.txt +++ b/llvm/lib/Target/AVR/CMakeLists.txt @@ -24,6 +24,7 @@ AVRMCInstLower.cpp AVRRelaxMemOperations.cpp AVRRegisterInfo.cpp + AVRShiftExpand.cpp AVRSubtarget.cpp AVRTargetMachine.cpp AVRTargetObjectFile.cpp diff --git a/llvm/test/CodeGen/AVR/shift-expand.ll b/llvm/test/CodeGen/AVR/shift-expand.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AVR/shift-expand.ll @@ -0,0 +1,81 @@ +; RUN: opt -avr-shift-expand -S %s -o - | FileCheck %s + +; The avr-shift-expand pass expands large shifts with a non-constant shift +; amount to a loop. These loops avoid generating a (non-existing) builtin such +; as __ashlsi3. + +target datalayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8" +target triple = "avr" + +; CHECK-LABEL: @shl +; CHECK: %3 = trunc i32 %1 to i8 +; CHECK: %4 = icmp eq i8 %3, 0 +; CHECK: br i1 %4, label %shift.done, label %shift.loop +; CHECK: shift.loop: +; CHECK: %5 = phi i8 [ %3, %2 ], [ %7, %shift.loop ] +; CHECK: %6 = phi i32 [ %0, %2 ], [ %8, %shift.loop ] +; CHECK: %7 = sub i8 %5, 1 +; CHECK: %8 = shl i32 %6, 1 +; CHECK: %9 = icmp eq i8 %7, 0 +; CHECK: br i1 %9, label %shift.done, label %shift.loop +; CHECK: shift.done: +; CHECK: %10 = phi i32 [ %0, %2 ], [ %8, %shift.loop ] +; CHECK: ret i32 %10 +define i32 @shl(i32 %0, i32 %1) addrspace(1) { + %3 = shl i32 %0, %1 + ret i32 %3 +} + +; CHECK-LABEL: @lshr +; CHECK: %3 = trunc i32 %1 to i8 +; CHECK: %4 = icmp eq i8 %3, 0 +; CHECK: br i1 %4, label %shift.done, label %shift.loop +; CHECK: shift.loop: +; CHECK: %5 = phi i8 [ %3, %2 ], [ %7, %shift.loop ] +; CHECK: %6 = phi i32 [ %0, %2 ], [ %8, %shift.loop ] +; CHECK: %7 = sub i8 %5, 1 +; CHECK: %8 = lshr i32 %6, 1 +; CHECK: %9 = icmp eq i8 %7, 0 +; CHECK: br i1 %9, label %shift.done, label %shift.loop +; CHECK: shift.done: +; CHECK: %10 = phi i32 [ %0, %2 ], [ %8, %shift.loop ] +; CHECK: ret i32 %10 +define i32 @lshr(i32 %0, i32 %1) addrspace(1) { + %3 = lshr i32 %0, %1 + ret i32 %3 +} + +; CHECK-LABEL: @ashr +; CHECK: %3 = trunc i32 %1 to i8 +; CHECK: %4 = icmp eq i8 %3, 0 +; CHECK: br i1 %4, label %shift.done, label %shift.loop +; CHECK: shift.loop: +; CHECK: %5 = phi i8 [ %3, %2 ], [ %7, %shift.loop ] +; CHECK: %6 = phi i32 [ %0, %2 ], [ %8, %shift.loop ] +; CHECK: %7 = sub i8 %5, 1 +; CHECK: %8 = ashr i32 %6, 1 +; CHECK: %9 = icmp eq i8 %7, 0 +; CHECK: br i1 %9, label %shift.done, label %shift.loop +; CHECK: shift.done: +; CHECK: %10 = phi i32 [ %0, %2 ], [ %8, %shift.loop ] +; CHECK: ret i32 %10 +define i32 @ashr(i32 %0, i32 %1) addrspace(1) { + %3 = ashr i32 %0, %1 + ret i32 %3 +} + +; This function is not modified because it is not an i32. +; CHECK-LABEL: @shl40 +; CHECK: %3 = shl i40 %0, %1 +define i40 @shl40(i40 %0, i40 %1) addrspace(1) { + %3 = shl i40 %0, %1 + ret i40 %3 +} + +; This function isn't either, although perhaps it should. +; CHECK-LABEL: @shl24 +; CHECK: %3 = shl i24 %0, %1 +define i24 @shl24(i24 %0, i24 %1) addrspace(1) { + %3 = shl i24 %0, %1 + ret i24 %3 +}