diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h --- a/llvm/lib/Target/BPF/BPF.h +++ b/llvm/lib/Target/BPF/BPF.h @@ -23,14 +23,12 @@ FunctionPass *createBPFISelDag(BPFTargetMachine &TM); FunctionPass *createBPFMISimplifyPatchablePass(); FunctionPass *createBPFMIPeepholePass(); -FunctionPass *createBPFMIPeepholeTruncElimPass(); FunctionPass *createBPFMIPreEmitPeepholePass(); FunctionPass *createBPFMIPreEmitCheckingPass(); void initializeBPFCheckAndAdjustIRPass(PassRegistry&); void initializeBPFDAGToDAGISelPass(PassRegistry &); -void initializeBPFMIPeepholePass(PassRegistry&); -void initializeBPFMIPeepholeTruncElimPass(PassRegistry &); +void initializeBPFMIPeepholePass(PassRegistry &); void initializeBPFMIPreEmitCheckingPass(PassRegistry&); void initializeBPFMIPreEmitPeepholePass(PassRegistry &); void initializeBPFMISimplifyPatchablePass(PassRegistry &); diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -144,6 +144,7 @@ // For 32bit ALU result zext to 64bit is free. bool isZExtFree(Type *Ty1, Type *Ty2) const override; bool isZExtFree(EVT VT1, EVT VT2) const override; + bool isZExtFree(SDValue Val, EVT VT2) const override; unsigned EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB, unsigned Reg, bool isSigned) const; diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -224,6 +224,18 @@ return NumBits1 == 32 && NumBits2 == 64; } +bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { + EVT VT1 = Val.getValueType(); + if (Val.getOpcode() == ISD::LOAD && VT1.isSimple() && VT2.isSimple()) { + MVT MT1 = VT1.getSimpleVT().SimpleTy; + MVT MT2 = VT2.getSimpleVT().SimpleTy; + if ((MT1 == MVT::i8 || MT1 == MVT::i16 || MT1 == MVT::i32) && + (MT2 == MVT::i32 || MT2 == MVT::i64)) + return true; + } + return TargetLoweringBase::isZExtFree(Val, VT2); +} + BPFTargetLowering::ConstraintType BPFTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -606,180 +606,3 @@ { return new BPFMIPreEmitPeephole(); } - -STATISTIC(TruncElemNum, "Number of truncation eliminated"); - -namespace { - -struct BPFMIPeepholeTruncElim : public MachineFunctionPass { - - static char ID; - const BPFInstrInfo *TII; - MachineFunction *MF; - MachineRegisterInfo *MRI; - - BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) { - initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry()); - } - -private: - // Initialize class variables. - void initialize(MachineFunction &MFParm); - - bool eliminateTruncSeq(); - -public: - - // Main entry point for this pass. - bool runOnMachineFunction(MachineFunction &MF) override { - if (skipFunction(MF.getFunction())) - return false; - - initialize(MF); - - return eliminateTruncSeq(); - } -}; - -static bool TruncSizeCompatible(int TruncSize, unsigned opcode) -{ - if (TruncSize == 1) - return opcode == BPF::LDB || opcode == BPF::LDB32; - - if (TruncSize == 2) - return opcode == BPF::LDH || opcode == BPF::LDH32; - - if (TruncSize == 4) - return opcode == BPF::LDW || opcode == BPF::LDW32; - - return false; -} - -// Initialize class variables. -void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) { - MF = &MFParm; - MRI = &MF->getRegInfo(); - TII = MF->getSubtarget().getInstrInfo(); - LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n"); -} - -// Reg truncating is often the result of 8/16/32bit->64bit or -// 8/16bit->32bit conversion. If the reg value is loaded with -// masked byte width, the AND operation can be removed since -// BPF LOAD already has zero extension. -// -// This also solved a correctness issue. -// In BPF socket-related program, e.g., __sk_buff->{data, data_end} -// are 32-bit registers, but later on, kernel verifier will rewrite -// it with 64-bit value. Therefore, truncating the value after the -// load will result in incorrect code. -bool BPFMIPeepholeTruncElim::eliminateTruncSeq() { - MachineInstr* ToErase = nullptr; - bool Eliminated = false; - - for (MachineBasicBlock &MBB : *MF) { - for (MachineInstr &MI : MBB) { - // The second insn to remove if the eliminate candidate is a pair. - MachineInstr *MI2 = nullptr; - Register DstReg, SrcReg; - MachineInstr *DefMI; - int TruncSize = -1; - - // If the previous instruction was marked for elimination, remove it now. - if (ToErase) { - ToErase->eraseFromParent(); - ToErase = nullptr; - } - - // AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate - // for BPF ANDI is i32, and this case only happens on ALU64. - if (MI.getOpcode() == BPF::SRL_ri && - MI.getOperand(2).getImm() == 32) { - SrcReg = MI.getOperand(1).getReg(); - if (!MRI->hasOneNonDBGUse(SrcReg)) - continue; - - MI2 = MRI->getVRegDef(SrcReg); - DstReg = MI.getOperand(0).getReg(); - - if (!MI2 || - MI2->getOpcode() != BPF::SLL_ri || - MI2->getOperand(2).getImm() != 32) - continue; - - // Update SrcReg. - SrcReg = MI2->getOperand(1).getReg(); - DefMI = MRI->getVRegDef(SrcReg); - if (DefMI) - TruncSize = 4; - } else if (MI.getOpcode() == BPF::AND_ri || - MI.getOpcode() == BPF::AND_ri_32) { - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - DefMI = MRI->getVRegDef(SrcReg); - - if (!DefMI) - continue; - - int64_t imm = MI.getOperand(2).getImm(); - if (imm == 0xff) - TruncSize = 1; - else if (imm == 0xffff) - TruncSize = 2; - } - - if (TruncSize == -1) - continue; - - // The definition is PHI node, check all inputs. - if (DefMI->isPHI()) { - bool CheckFail = false; - - for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) { - MachineOperand &opnd = DefMI->getOperand(i); - if (!opnd.isReg()) { - CheckFail = true; - break; - } - - MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg()); - if (!PhiDef || PhiDef->isPHI() || - !TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) { - CheckFail = true; - break; - } - } - - if (CheckFail) - continue; - } else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) { - continue; - } - - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg) - .addReg(SrcReg); - - if (MI2) - MI2->eraseFromParent(); - - // Mark it to ToErase, and erase in the next iteration. - ToErase = &MI; - TruncElemNum++; - Eliminated = true; - } - } - - return Eliminated; -} - -} // end default namespace - -INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim", - "BPF MachineSSA Peephole Optimization For TRUNC Eliminate", - false, false) - -char BPFMIPeepholeTruncElim::ID = 0; -FunctionPass* llvm::createBPFMIPeepholeTruncElimPass() -{ - return new BPFMIPeepholeTruncElim(); -} diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -42,7 +42,6 @@ PassRegistry &PR = *PassRegistry::getPassRegistry(); initializeBPFCheckAndAdjustIRPass(PR); initializeBPFMIPeepholePass(PR); - initializeBPFMIPeepholeTruncElimPass(PR); initializeBPFDAGToDAGISelPass(PR); } @@ -155,7 +154,6 @@ if (!DisableMIPeephole) { if (Subtarget->getHasAlu32()) addPass(createBPFMIPeepholePass()); - addPass(createBPFMIPeepholeTruncElimPass()); } } diff --git a/llvm/test/CodeGen/BPF/remove_truncate_9.ll b/llvm/test/CodeGen/BPF/remove_truncate_9.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/remove_truncate_9.ll @@ -0,0 +1,81 @@ +; RUN: llc -mcpu=v2 -march=bpf < %s | FileCheck %s +; RUN: llc -mcpu=v4 -march=bpf < %s | FileCheck %s + +; Zero extension instructions should be eliminated at instruction +; selection phase for all test cases below. + +; In BPF zero extension is implemented as &= or a pair of <<=/>>= +; instructions, hence simply check that &= and >>= do not exist in +; generated code (<<= remains because %c is used by both call and +; lshr in a few test cases). + +; CHECK-NOT: &= +; CHECK-NOT: >>= + +define void @shl_lshr_same_bb(ptr %p) { +entry: + %a = load i8, ptr %p, align 1 + %b = zext i8 %a to i64 + %c = shl i64 %b, 56 + %d = lshr i64 %c, 56 + %e = icmp eq i64 %d, 0 + ; hasOneUse() is a common requirement for many CombineDAG + ; transofmations, make sure that it does not matter in this case. + call void @sink1(i8 %a, i64 %b, i64 %c, i64 %d, i1 %e) + ret void +} + +define void @shl_lshr_diff_bb(ptr %p) { +entry: + %a = load i16, ptr %p, align 2 + %b = zext i16 %a to i64 + %c = shl i64 %b, 48 + %d = lshr i64 %c, 48 + br label %next + +; Jump to the new basic block creates a COPY instruction for %d, which +; might be materialized as noop or as AND_ri (zero extension) at the +; start of the basic block. The decision depends on TLI.isZExtFree() +; results, see RegsForValue::getCopyToRegs(). Check below verifies +; that COPY is materialized as noop. +next: + %e = icmp eq i64 %d, 0 + call void @sink2(i16 %a, i64 %b, i64 %c, i64 %d, i1 %e) + ret void +} + +define void @load_zext_same_bb(ptr %p) { +entry: + %a = load i8, ptr %p, align 1 + ; zext is implicit in this context + %b = icmp eq i8 %a, 0 + call void @sink3(i8 %a, i1 %b) + ret void +} + +define void @load_zext_diff_bb(ptr %p) { +entry: + %a = load i8, ptr %p, align 1 + br label %next + +next: + %b = icmp eq i8 %a, 0 + call void @sink3(i8 %a, i1 %b) + ret void +} + +define void @load_zext_diff_bb_2(ptr %p) { +entry: + %a = load i32, ptr %p, align 4 + br label %next + +next: + %b = icmp eq i32 %a, 0 + call void @sink4(i32 %a, i1 %b) + ret void +} + +declare void @sink1(i8, i64, i64, i64, i1); +declare void @sink2(i16, i64, i64, i64, i1); +declare void @sink3(i8, i1); +declare void @sink4(i32, i1);