Index: lib/Target/PowerPC/CMakeLists.txt =================================================================== --- lib/Target/PowerPC/CMakeLists.txt +++ lib/Target/PowerPC/CMakeLists.txt @@ -40,6 +40,7 @@ PPCVSXCopy.cpp PPCVSXFMAMutate.cpp PPCVSXSwapRemoval.cpp + PPCSelectionDAGInfo.cpp ) add_subdirectory(AsmParser) Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -9717,6 +9718,128 @@ unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); return BB; + } else if (MI.getOpcode() == PPC::MEMCMP) { + //Example comparing 16 bytes: + // LoadSubBB: + // ldbrx LoadSrc1Reg,0,Src1 + // ldbrx LoadSrc2Reg,0,Src2 + // subf. SubReg1, LoadSrc1Reg, LoadSrc2Reg + // bne CmpResBB + // LoadSubBB2: + // ldbrx LoadSrc1Reg,8,Src1 + // ldbrx LoadSrc2Reg,8,Src2 + // subf SubReg2, LoadSrc1Reg, LoadSrc2Reg + // CmpResBB: + // PhiRes = phi [ SubReg1, LoadSubBB ], [ SubReg2, LoadSubBB2 ] + // cnltzd CountTZeroReg, PhiRes + // addi AddiReg, CountTZeroReg, -1 + // xori Res, AddiReg, 63 + // exitBB: + + unsigned Res = MI.getOperand(0).getReg(); + unsigned Src1 = MI.getOperand(1).getReg(); + unsigned Src2 = MI.getOperand(2).getReg(); + int64_t Size = MI.getOperand(3).getImm(); + unsigned Count = Size / 8; + + DebugLoc dl = MI.getDebugLoc(); + + std::vector MBBList; + std::vector VitualRegsList; + + MachineBasicBlock *LoadSubBB; + unsigned i; + // Create and save the number of LoadSub basic blocks required + for (i = 0; i < Count; i++) { + LoadSubBB = F->CreateMachineBasicBlock(LLVM_BB); + MBBList.push_back(LoadSubBB); + F->insert(It, LoadSubBB); + } + + MachineBasicBlock *CmpResBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, CmpResBB); + F->insert(It, exitMBB); + + exitMBB->splice(exitMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + const TargetRegisterClass *RC = &PPC::G8RCRegClass; + MachineInstrBuilder MIB; + + unsigned LoadSrc1Reg; + unsigned LoadSrc2Reg; + unsigned Base1Reg; + unsigned Base2Reg; + unsigned ZeroReg = PPC::ZERO8; + unsigned SubReg; + unsigned PhiRes = RegInfo.createVirtualRegister(RC); + unsigned CountTZeroReg = + RegInfo.createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); + unsigned AddiReg = RegInfo.createVirtualRegister(RC); + + // Add the first LoadSubBB as a successor + BB->addSuccessor(MBBList[0]); + + i = 0; + int Offset = 0; + // Add the machine instructions for each LoadSubBB + while (i < Count) { + LoadSrc1Reg = RegInfo.createVirtualRegister(RC); + LoadSrc2Reg = RegInfo.createVirtualRegister(RC); + SubReg = RegInfo.createVirtualRegister(RC); + // Increment base registers by 8 + if (i != 0) { + Base1Reg = RegInfo.createVirtualRegister(RC); + Base2Reg = RegInfo.createVirtualRegister(RC); + BuildMI(MBBList[i], dl, TII->get(PPC::ADDI8), Base1Reg) + .addReg(Src1) + .addImm(Offset); + BuildMI(MBBList[i], dl, TII->get(PPC::ADDI8), Base2Reg) + .addReg(Src2) + .addImm(Offset); + } + BuildMI(MBBList[i], dl, TII->get(PPC::LDBRX), LoadSrc1Reg) + .addReg(ZeroReg) + .addReg(i == 0 ? Src1 : Base1Reg); + BuildMI(MBBList[i], dl, TII->get(PPC::LDBRX), LoadSrc2Reg) + .addReg(ZeroReg) + .addReg(i == 0 ? Src2 : Base2Reg); + // Add early exit branch for all but last LoadSubBB + if (i < Count - 1) { + BuildMI(MBBList[i], dl, TII->get(PPC::SUBF8o), SubReg) + .addReg(LoadSrc2Reg) + .addReg(LoadSrc1Reg); + BuildMI(MBBList[i], dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE) + .addReg(PPC::CR0) + .addMBB(CmpResBB); + MBBList[i]->addSuccessor(MBBList[i + 1]); + } else { + BuildMI(MBBList[i], dl, TII->get(PPC::SUBF8), SubReg) + .addReg(LoadSrc2Reg) + .addReg(LoadSrc1Reg); + } + VitualRegsList.push_back(SubReg); + MBBList[i]->addSuccessor(CmpResBB); + Offset += 8; + i = i + 1; + } + // Build Phi instruction to choose which subf result to use + MIB = BuildMI(CmpResBB, dl, TII->get(PPC::PHI), PhiRes); + for (i = 0; i < Count; i++) { + MIB.addReg(VitualRegsList[i]); + MIB.addMBB(MBBList[i]); + } + BuildMI(CmpResBB, dl, TII->get(PPC::CNTLZD), CountTZeroReg).addReg(PhiRes); + BuildMI(CmpResBB, dl, TII->get(PPC::ADDI8), AddiReg) + .addReg(CountTZeroReg) + .addImm(-1); + BuildMI(CmpResBB, dl, TII->get(PPC::XORI8), Res).addReg(AddiReg).addImm(63); + CmpResBB->addSuccessor(exitMBB); + BB = exitMBB; } else { llvm_unreachable("Unexpected instr type to insert"); } Index: lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- lib/Target/PowerPC/PPCInstr64Bit.td +++ lib/Target/PowerPC/PPCInstr64Bit.td @@ -193,6 +193,13 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), (BL8_NOP texternalsym:$dst)>; +// memcmp pseudo +let usesCustomInserter = 1 in { + def MEMCMP : Pseudo< + (outs g8rc:$res), (ins ptr_rc_nor0:$src1, ptr_rc_nor0:$src2, + i64imm:$count), "#MEMCMP",[]>; +} + // Atomic operations let usesCustomInserter = 1 in { let Defs = [CR0] in { Index: lib/Target/PowerPC/PPCSelectionDAGInfo.h =================================================================== --- /dev/null +++ lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -0,0 +1,38 @@ +//===-- PPCSelectionDAGInfo.h - PPC SelectionDAG Info -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PPC subclass for SelectionDAGTargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_PPC_PPCSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_PPC_PPCSELECTIONDAGINFO_H + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/MC/MCRegisterInfo.h" + +namespace llvm { + +class PPCTargetLowering; +class PPCTargetMachine; +class PPCSubtarget; + +class PPCSelectionDAGInfo : public SelectionDAGTargetInfo { +public: + explicit PPCSelectionDAGInfo() = default; + + std::pair EmitTargetCodeForMemcmp(SelectionDAG &DAG, + const SDLoc &dl, SDValue Chain, SDValue Src1, + SDValue Src2, SDValue Size, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const override; +}; + +} +#endif Index: lib/Target/PowerPC/PPCSelectionDAGInfo.cpp =================================================================== --- /dev/null +++ lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -0,0 +1,73 @@ +//===-- PPCSelectionDAGInfo.cpp - PPC SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PPCSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// +#include "PPCInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include "PPCISelLowering.h" +#include "PPCRegisterInfo.h" +#include "PPCSubtarget.h" +#include "PPCSelectionDAGInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/Target/TargetLowering.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-inline-memcmp" +STATISTIC(NumMemCmpCalls, "Number of memcmp calls"); +STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size"); +STATISTIC(NumMemCmpNot8ByteMultiples, "Number of memcmp calls without 8 byte multiples"); +STATISTIC(NumMemCmpGreaterThan64, "Number of memcmp calls with size greater than 64"); +STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls"); + +// This function checks to see if an expansion of memcmp can be generated. +// It checks for constant compare size that is less than the max inline size. +// If an expansion cannot occur, an empty pair is returned. +// Otherwise, a new machine node is created with a memcmp pseudo instruction +// to be expanded later. +std::pair PPCSelectionDAGInfo::EmitTargetCodeForMemcmp( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Src1, + SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const { + + NumMemCmpCalls++; + ConstantSDNode *ConstantSize = dyn_cast(Size); + const PPCSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget(); + + if(!Subtarget.isLittleEndian() || !Subtarget.isPPC64()) + return std::make_pair(SDValue(), SDValue()); + + if (!ConstantSize){ + NumMemCmpNotConstant++; + return std::make_pair(SDValue(), SDValue()); + } + + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (SizeVal > Subtarget.getMaxInlineSizeThreshold()){ + NumMemCmpGreaterThan64++; + return std::make_pair(SDValue(), SDValue()); + } + + if(SizeVal % 8 != 0){ + NumMemCmpNot8ByteMultiples++; + return std::make_pair(SDValue(), SDValue()); + } + + NumMemCmpInlined++; + SDValue Res = SDValue(DAG.getMachineNode(PPC::MEMCMP, dl, MVT::i64, Src1, + Src2, + DAG.getTargetConstant(SizeVal, + dl, MVT::i64)), 0); + return std::make_pair(Res, Chain); +} Index: lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- lib/Target/PowerPC/PPCSubtarget.h +++ lib/Target/PowerPC/PPCSubtarget.h @@ -17,6 +17,7 @@ #include "PPCFrameLowering.h" #include "PPCISelLowering.h" #include "PPCInstrInfo.h" +#include "PPCSelectionDAGInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -145,7 +146,7 @@ PPCFrameLowering FrameLowering; PPCInstrInfo InstrInfo; PPCTargetLowering TLInfo; - SelectionDAGTargetInfo TSInfo; + PPCSelectionDAGInfo TSInfo; public: /// This constructor initializes the data members to match that @@ -316,6 +317,8 @@ /// classifyGlobalReference - Classify a global variable reference for the /// current subtarget accourding to how we should reference it. unsigned char classifyGlobalReference(const GlobalValue *GV) const; + // max size for expanding memcmp library call + unsigned getMaxInlineSizeThreshold() const { return 64; } }; } // End llvm namespace Index: test/CodeGen/PowerPC/memcmp.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/memcmp.ll @@ -0,0 +1,24 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; Function Attrs: nounwind +define signext i32 @foo(double* %x, double* %y) #0 { +entry: + %0 = bitcast double* %x to i8* + %1 = bitcast double* %y to i8* + %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 16) #2 + ret i32 %call + +; CHECK-LABEL: @foo +; CHECK: ldbrx +; CHECK-NEXT: ldbrx +; CHECK-NEXT: subf. +; CHECK-NEXT: bne +; CHECK: cntlzd +; CHECK-NEXT: addi +; CHECK-NEXT: xori +} + +; Function Attrs: nounwind readonly +declare signext i32 @memcmp(i8*, i8*, i64) #1