Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -321,6 +321,7 @@ AMDGPU ARM BPF + #Connex Hexagon Lanai Mips Index: CODE_OWNERS.TXT =================================================================== --- CODE_OWNERS.TXT +++ CODE_OWNERS.TXT @@ -194,6 +194,10 @@ E: alexei.starovoitov@gmail.com D: BPF backend +N: Alex Susu +E: alex.susu@gmail.com +D: Connex wide vector processor backend + N: Tom Stellard E: tstellar@redhat.com D: Stable release management (x.y.[1-9] releases), AMDGPU Backend, libclc Index: include/llvm/ADT/Triple.h =================================================================== --- include/llvm/ADT/Triple.h +++ include/llvm/ADT/Triple.h @@ -53,6 +53,7 @@ avr, // AVR: Atmel AVR microcontroller bpfel, // eBPF or extended BPF or 64-bit BPF (little endian) bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian) + connex, // Connex vector processor hexagon, // Hexagon: hexagon mips, // MIPS: mips, mipsallegrex, mipsr6 mipsel, // MIPSEL: mipsel, mipsallegrexe, mipsr6el Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -270,6 +270,12 @@ uint16_t NextPersistentId = 0; public: + DenseMap *crtNodeMapPtr; + + void SetNodeMap(DenseMap *aCrtNodeMapPtr); + + void UpdateNodeMapSDValue(SDNode *oldSDN, SDValue &newSDV); + /// Clients of various APIs that cause global effects on /// the DAG can optionally implement this interface. This allows the clients /// to handle the various sorts of updates that happen. @@ -1217,6 +1223,12 @@ MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT, SDValue Op1, SDValue Op2, SDValue Op3); MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT, + SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4); + MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1, + EVT VT2, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4); + MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT, ArrayRef Ops); MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2); Index: include/llvm/CodeGen/SelectionDAGISel.h =================================================================== --- include/llvm/CodeGen/SelectionDAGISel.h +++ include/llvm/CodeGen/SelectionDAGISel.h @@ -56,6 +56,7 @@ const TargetLowering *TLI; bool FastISelFailed; SmallPtrSet ElidedArgCopyInstrs; + DenseMap crtNodeMap; /// Current optimization remark emitter. /// Used to report things like combines and FastISel failures. Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -1182,6 +1182,7 @@ include "llvm/IR/IntrinsicsMips.td" include "llvm/IR/IntrinsicsAMDGPU.td" include "llvm/IR/IntrinsicsBPF.td" +include "llvm/IR/IntrinsicsConnex.td" include "llvm/IR/IntrinsicsSystemZ.td" include "llvm/IR/IntrinsicsWebAssembly.td" include "llvm/IR/IntrinsicsRISCV.td" Index: include/llvm/IR/IntrinsicsConnex.td =================================================================== --- include/llvm/IR/IntrinsicsConnex.td +++ include/llvm/IR/IntrinsicsConnex.td @@ -0,0 +1,106 @@ +//===- IntrinsicsConnex.td - Defines Connex-S intrinsics ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the Connex-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +// All Connex-S vector processor intrinsics start with "llvm.connex." +// +let TargetPrefix = "connex" in { + + /* + * Note: all intrinsics defined in these .td files start with + * the int_ prefix (from intrinsic). For this file they start with + * int_connex prefix - otherwise we get the following TableGen error + * <> + * + * The LLVM IR intrinsics extend the LLVM language s.t. we can use + * these instructions in an LLVM IR program. We also need to define the + * corresponding assembly instructions in the back end TableGen files. + */ + + /* Following Intrinsics.td: + class Intrinsic ret_types, + list param_types = [], + list properties = [], + string name = ""> + */ + + + /* Small-note: + llvm_i64_ty makes simpler my LLVM IR generation in the LoopVectorize.cpp + module: + def int_connex_repeat_x_times : Intrinsic<[], [llvm_i64_ty], []>; + But llvm_i32_ty is in accordance to the original i32 type of n.vec in the + LoopVectorize.cpp module: + def int_connex_repeat_x_times : Intrinsic<[], [llvm_i32_ty], []>; + + Small-note: We get inspired from include/llvm/IR/IntrinsicsPowerPC.td: + // Intrinsics used to generate ctrl-based loops. + def int_ppc_mtctr : Intrinsic<[], [llvm_anyint_ty], []>; + + Small-note: Trying to use a polymorphic definition, which requires + specifying the actual type in Function::Create(FunctionType::get(), ...) + is: + def int_connex_repeat_x_times : Intrinsic<[], [llvm_anyint_ty], []>; + When instantiating it in LoopVectorize.cpp like this: + Value *instrinsicFunc = Intrinsic::getDeclaration(M, + Intrinsic::connex_repeat_x_times); + it gives error at runtime: + llvm::ArrayRef::operator[](size_t) const [with T = llvm::Type*; + size_t = long unsigned int]: Assertion `Index < Length && + "Invalid index!"' failed. + */ + def int_connex_repeat_x_times : Intrinsic<[], [llvm_i64_ty], []>; + def int_connex_end_repeat : Intrinsic<[], [], []>; + + /* Note: Possibly useful in the future. + Connex Opincaa's END_REPEAT does not have a relative offset, + as the standard Connex assembly ijmpnzdec instruction, + since it falls on Opincaa to compute the jump back relative offset. + We can also use a setlc to position it outside the loop created by the + ijmpnzdec instruction by using it inside a delay-slot instruction. + + def int_connex_setlc : Intrinsic<[], [llvm_i16_ty], []>; + def int_connex_ijmpnzdec : Intrinsic<[], [], []>; + */ + + + + /* IMPORTANT: REDUCE cannot return a value. It is the duty of the host (CPU) + to read the result itself from the REDUCE issued by Connex-S. + Therefore this definition is incorrect: + def int_connex_reduce : Intrinsic<[llvm_i32_ty], [llvm_v128i16_ty], []>; + */ + /* GOOD: + def int_connex_reduce : Intrinsic<[], [llvm_v128i16_ty], []>; + def int_connex_reduce_i32 : Intrinsic<[], [llvm_v64i32_ty], []>; + def int_connex_reduce_f16 : Intrinsic<[], [llvm_v128f16_ty], []>; + */ + def int_connex_reduce : Intrinsic<[], [llvm_anyvector_ty], []>; + + /* Note: ctpop is already defined in Intrinsics.td. + So the below definition is not required: + def int_connex_ctpop : Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty], []>; + */ + + + // Inherited BPF scalar intrinsics: Specialized loads from packet + def int_connex_load_byte : GCCBuiltin<"__builtin_connex_load_byte">, + Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>; + def int_connex_load_half : GCCBuiltin<"__builtin_connex_load_half">, + Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>; + def int_connex_load_word : GCCBuiltin<"__builtin_connex_load_word">, + Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>; + def int_connex_pseudo : GCCBuiltin<"__builtin_connex_pseudo">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>; +} + Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1469,6 +1469,9 @@ LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); + // Replacing SDNode N with RV in crtNodeMap + DAG.UpdateNodeMapSDValue(N, RV); + if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -82,6 +82,33 @@ return Res; } +void SelectionDAG::SetNodeMap(DenseMap *aCrtNodeMapPtr) { + crtNodeMapPtr = aCrtNodeMapPtr; +} + +void SelectionDAG::UpdateNodeMapSDValue(SDNode *oldSDN, SDValue &newSDV) { + /* NOTE: SelectionDAGBuilder defines DenseMap NodeMap. + * I added in SelectionDAGISel a copy of it, crtNodeMap. + * The pointer crtNodeMapPtr here is the pointer of crtNodeMap + * initialized in SelectionDAGISel::CodeGenAndEmitDAG(). + */ + for (auto iterNodeMap = crtNodeMapPtr->begin(); + iterNodeMap != crtNodeMapPtr->end(); iterNodeMap++) { + auto tmp1 = (*iterNodeMap); + + const Value *crtValue = (const Value *)(tmp1.first); + + SDValue crtSDValue = tmp1.second; + SDNode *crtSDNode = crtSDValue.getNode(); + + if (crtSDNode == oldSDN) { + (*crtNodeMapPtr)[crtValue] = newSDV; + break; + } + } +} + + // Default null implementations of the callbacks. void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} @@ -7803,6 +7830,24 @@ } MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, + SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3, Op4 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, + SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3, Op4 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1, EVT VT2, EVT VT3, ArrayRef Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -33,6 +33,7 @@ #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" +#include "llvm/Support/Debug.h" #include #include #include @@ -125,6 +126,11 @@ MapVector DanglingDebugInfoMap; public: + // Add a getter for NodeMap + DenseMap &getNodeMap() { + return NodeMap; + } + /// Loads are not emitted to the program immediately. We bunch them up and /// then emit token factor nodes when possible. This allows us to get simple /// disambiguation between loads without worrying about alias analysis. Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -692,6 +692,9 @@ CurDAG->setRoot(SDB->getControlRoot()); HadTailCall = SDB->HasTailCall; SDB->resolveOrClearDbgInfo(); + + crtNodeMap = SDB->getNodeMap(); + SDB->clear(); // Final step, emit the lowered DAG as machine code. @@ -778,6 +781,9 @@ // Run the DAG combiner in pre-legalize mode. { + // We should do this only once + CurDAG->SetNodeMap(&crtNodeMap); + NamedRegionTimer T("combine1", "DAG Combining 1", GroupName, GroupDescription, TimePassesIsEnabled); CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); Index: lib/Target/Connex/Connex.h =================================================================== --- lib/Target/Connex/Connex.h +++ lib/Target/Connex/Connex.h @@ -0,0 +1,35 @@ +//===-- Connex.h - Top-level interface for Connex representation ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEX_H +#define LLVM_LIB_TARGET_CONNEX_CONNEX_H + +#include "MCTargetDesc/ConnexMCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" + + +// We define reserved register(s) of Connex to use for: +// - handling COPY instructions in WHERE blocks +// (see ConnexTargetMachine.cpp and ConnexISelLowering.cpp), etc +#define CONNEX_RESERVED_REGISTER_01 Connex::Wh30 +#define CONNEX_RESERVED_REGISTER_02 Connex::Wh31 +#define CONNEX_RESERVED_REGISTER_03 Connex::Wh29 + +#define COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + +namespace llvm { +class ConnexTargetMachine; + +FunctionPass *createConnexISelDag(ConnexTargetMachine &TM); +} + +#endif Index: lib/Target/Connex/ConnexAsmPrinter.cpp =================================================================== --- lib/Target/Connex/ConnexAsmPrinter.cpp +++ lib/Target/Connex/ConnexAsmPrinter.cpp @@ -0,0 +1,1271 @@ +//===-- ConnexAsmPrinter.cpp - Connex LLVM assembly writer ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to the Connex assembly language. +// +//===----------------------------------------------------------------------===// + +#include "Connex.h" +#include "ConnexConfig.h" +#include "ConnexAsmPrinterLoopNests.h" +#include "ConnexInstrInfo.h" +#include "ConnexMCInstLower.h" +#include "ConnexTargetMachine.h" +// 2019_03_30_TODO: #include "BTFDebug.h" +#include "InstPrinter/ConnexInstPrinter.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" //See http://llvm.org/docs/CommandLine.html +#include + + +using namespace llvm; + +// Inspired from llvm/lib/CodeGen/TargetPassConfig.cpp +static cl::opt EnableCorrectBBsASMPrint("enable-correct-asm-print", + cl::Hidden, + cl::init(true), + //cl::desc("Enable special instrumentation in ConnexASMPrinter") + cl::desc("Correct the BBs of 2nd innermost loop in loop nests of kernels " + "and use normally REPEAT for it and host-side Opincaa C++ for() as " + "the innermost loop")); + +static cl::opt TreatRepeat2ndInnerLoopGlobalTmp("treat-repeat-2nd-inner-loop", + cl::Hidden, + cl::init(true), + cl::desc("Treat well 2nd inner loop in kernel and use normally REPEAT " + "for it and host-side Opincaa C++ for() as the inner loop")); + + +#define DEBUG_TYPE "asm-printer" + + + +// We need to store the correspondence between MachineInstr and the lowered +// MCInst, since MCInst does not. +// This is used in ConnexInstPrinter.cpp. +const MachineInstr *crtMI; +extern std::unordered_map mapLD_ST_REPEAT_InlineAsm; + + + + +namespace { +class ConnexAsmPrinter : public AsmPrinter { +public: + explicit ConnexAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)) {} + + StringRef getPassName() const override { return "Connex Assembly Printer"; } + + /* + (From http://llvm.org/docs/doxygen/html/classllvm_1_1MachineFunctionPass.html + we see SelectionDAGISel and AsmPrinter were the only passes that inherit + MachineFunctionPass, from this back end.) + From http://llvm.org/docs/doxygen/html/AsmPrinter_8h_source.html: + /// Set up the AsmPrinter when we are working on a new module. If your pass + /// overrides this, it must make sure to explicitly call this implementation. + */ + + bool isVectorBody(StringRef &&strRef) { + #define STR_VECTOR_BODY "vector.body" + #define STR_VECTOR_BODY_PREHEADER ".preheader" + + LLVM_DEBUG(dbgs() << "isVectorBody(): strRef = " << strRef << "\n"); + + // We can have several BBs with name vector.bodyXYZT (but we do NOT + // search for STR_VECTOR_BODY_PREHEADER, which can be e.g., + // vector.body40.preheader) + if (strRef.startswith(StringRef(STR_VECTOR_BODY)) && + strRef.endswith(StringRef(STR_VECTOR_BODY_PREHEADER))) + return false; + + if (strRef.startswith(StringRef(STR_VECTOR_BODY)) == false) + return false; + + /* + const char *str = strRef.data(); + + if ((strncmp(str, STR_VECTOR_BODY, + strlen(STR_VECTOR_BODY)) == 0) && + (strncmp(str + strlen(str) - strlen(STR_VECTOR_BODY_PREHEADER), + STR_VECTOR_BODY_PREHEADER, + strlen(STR_VECTOR_BODY_PREHEADER)) == 0)) + return false; + + if (strncmp(str, STR_VECTOR_BODY, strlen(STR_VECTOR_BODY)) != 0) + return false; + */ + + LLVM_DEBUG(dbgs() << "isVectorBody(): returning true\n"); + + return true; + } + + + int ifImmSpecialUpdateMap(const MachineInstr *MI, const MachineInstr *MI2) { + unsigned imm; + + if (MI2->getOpcode() == Connex::REPEAT) { + const MachineOperand &MI2MO0 = MI2->getOperand(0); + LLVM_DEBUG(dbgs() << "ifImmSpecialUpdateMap(): MI2MO0 = " + << MI2MO0 << "\n"); + + imm = MI2MO0.getImm(); + } + else { + const MachineOperand &MI2MO0 = MI2->getOperand(0); + LLVM_DEBUG(dbgs() << "ifImmSpecialUpdateMap(): MI2MO0 = " + << MI2MO0 << "\n"); + + const MachineOperand &MI2MO1 = MI2->getOperand(1); + LLVM_DEBUG(dbgs() << "ifImmSpecialUpdateMap(): MI2MO1 = " + << MI2MO1 << "\n"); + + imm = MI2MO1.getImm(); + } + + LLVM_DEBUG(dbgs() << "ifImmSpecialUpdateMap(): imm = " + << imm << "\n"); + + if ((imm == CONNEX_MEM_NUM_ROWS + 10) || + (imm == VALUE_BOGUS_REPEAT_X_TIMES)) { + LLVM_DEBUG(dbgs() << "ifImmSpecialUpdateMap(): MI2 = " + << *MI2 << "\n"); + LLVM_DEBUG(dbgs() << "ifImmSpecialUpdateMap(): MI->getOperand(0) = " + << MI->getOperand(0) << "\n"); + LLVM_DEBUG(dbgs() << "ifImmSpecialUpdateMap(): MI = " + << MI + << ", MI2 (ptr) = " << MI2 << "\n"); + + mapLD_ST_REPEAT_InlineAsm[MI2] = MI; + return 1; + } + + return -1; + } + + + void MoveToFrontRepeat(MachineBasicBlock *MBB) { + LLVM_DEBUG(dbgs() << "Entered MoveToFrontRepeat(MBB = " + << MBB << ")\n"); + + // Moving the REPEAT and it's symbolic operand in INLINEASM at the + // front of the MBB. + for (auto MIItr = MBB->begin(); MIItr != MBB->end(); ++MIItr) { + MachineInstr *MI = &(*MIItr); + + if (MI->getOpcode() == Connex::REPEAT_SYM_IMM) { + LLVM_DEBUG(dbgs() << "MoveToFrontRepeat(): Found Connex::REPEAT_SYM_IMM\n"); + MIItr++; + + MachineInstr *MI2 = &(*MIItr); + + if (MI2->isInlineAsm()) { + LLVM_DEBUG(dbgs() << "MoveToFrontRepeat(): Moving the successor " + "INLINEASM together with the Connex::REPEAT_SYM_IMM\n"); + + MBB->remove(MI2); + MBB->insert(MBB->front(), MI2); + } + else { + MIItr++; + MI2 = &(*MIItr); + + LLVM_DEBUG(dbgs() << "MoveToFrontRepeat(): Moving the following " + "(not successor) INLINEASM together with the " + "Connex::REPEAT_SYM_IMM\n"); + //MIItr++; + + if (MI2->isInlineAsm()) { + MBB->remove(MI2); + MBB->insert(MBB->front(), MI2); + } + else { + assert(0 && "Can't find INLINEASM associated to REPEAT_SYM_IMM"); + } + } + + LLVM_DEBUG(dbgs() << "MoveToFrontRepeat(): Moving Connex::REPEAT_SYM_IMM\n"); + + MBB->remove(MI); + MBB->insert(MBB->front(), MI); + + break; + } + } + } + + + void MoveToFrontInlineAsm(MachineBasicBlock *MBB, char *strToSearch) { + LLVM_DEBUG(dbgs() << "Entered MoveToFrontInlineAsm(MBB = " + << MBB + << ", strToSearch = " << strToSearch << ")\n"); + + // Moving the REPEAT and it's symbolic operand in INLINEASM at the + // front of the MBB. + for (auto MIItr = MBB->begin(); MIItr != MBB->end(); /* ++MIItr */) { + MachineInstr *MI = &(*MIItr); + + // We avoid iterator invalidation: + // See some comments on iterator invalidation (when doing remove) at + // http://llvm.1065342.n5.nabble.com/deleting-or-replacing-a-MachineInst-td77723.html + MachineBasicBlock::iterator MIsucc = MIItr; + MIsucc++; + + if (MI->isInlineAsm()) { + LLVM_DEBUG(dbgs() << " MoveToFrontInlineAsm(): found INLINEASM MI = " + << *MI << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + for (unsigned index = 0; index < MI->getNumOperands(); index++) { + MachineOperand *miOpnd; + miOpnd = & (MI->getOperand(index)); + + LLVM_DEBUG(dbgs() << " MI->getOperand(" << index << ") = " + << *miOpnd << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineOperand.html + if (miOpnd->isSymbol()) { + const char *symStr = miOpnd->getSymbolName(); + LLVM_DEBUG(dbgs() << " MoveToFrontInlineAsm(): symStr = " + << symStr << "\n"); + + if (strstr(symStr, strToSearch) != NULL) { + LLVM_DEBUG(dbgs() << " MoveToFrontInlineAsm(): Found INLINEASM " + "with strToSearch in the symbol " + "operand\n"); + //"with host-side for loop" + //break; + + MBB->remove(MI); + MBB->insert(MBB->front(), MI); + } + } + } + } + + // We avoid iterator invalidation + MIItr = MIsucc; + } + } + + + /* + This moves to the front of the MBB a number of 3 (if justOne == false), + or 1 (if justOne == true) ASM inline expression(s) IF the 1st inline + expression has Opincaa kernel begin. + + We require to run first this function with justOne == false and then + with justOne == true. + + More exactly, in LoopVectorize.cpp we added, among others, the following + 3 ASM inline expressions (consecutively): + - 1 BEGIN_KERNEL INLINEASM instruction used as loop prologue + - 1 END_KERNEL INLINEASM instruction used as + loop prologue (END_KERNEL part) + - 1 BEGIN_KERNEL INLINEASM instruction for + the loop. + We move these 3 instructions to the front of + MBB when justOne == false. This ensures that eventual + less-likely case of having a VLOAD_H_SYM_IMM (and inline ASM associated, + containing the symbolic operand) manually generated by me + in ConnexISelDAGToDAG.cpp is not going to be first instruction, before + the Opincaa loop header ASM inline expression. + We also make sure that eventual loads from spills are put inside the loop + prologue. + + We move 1 instruction to the front since in runOnMachineFunction() we put + all instructions of the predecessor (has to be only 1 predecessor) of + vector.body at the front of MBB, so we have to move the BEGIN_KERNEL of + the loop prologue. + */ + void MoveToFront(MachineBasicBlock *MBB, bool justOne) { + MachineInstr *tmp1, *tmp2, *tmp3; //, *tmp4; + int counter = 0; + + LLVM_DEBUG(dbgs() << "Entered MoveToFront(justOne = " + << justOne << ")\n"); + + + /* We compute MIItrLastLoadAssociatedToSpill, an iterator (pointer) to + the first instruction after the loads (fills) from spills at the + beginning of the BB. + */ + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + /* IMPORTANT: make sure we put this initialization after any other MBB mutation + in order to use it well to move the 3 INLINEASM instructions. + */ + MachineBasicBlock::iterator MIItrLastLoadAssociatedToSpill = MBB->front(); + + if (justOne == false) { + for (auto MIItr2 = MBB->begin(); MIItr2 != MBB->end(); ++MIItr2) { + MachineInstr *MI = &(*MIItr2); + + LLVM_DEBUG(dbgs() << " MoveToFront(): MI = " + << *MI + << ", MI->getOpcode() = " + << MI->getOpcode() + << "\n"); + + unsigned imm = -1; + if (MI->getOpcode() == Connex::LD_H) { + /* Inspired from + http://llvm.org/docs/doxygen/html/MachineInstr_8cpp_source.html, + method MachineInstr::isIdenticalTo() + */ + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + if (MO.isImm()) { + imm = MO.getImm(); + LLVM_DEBUG(dbgs() << " MoveToFront(): imm = " + << imm << "\n"); + break; + } + } + + //if (MI == is a vector load (LD_H), with offset address + /* If the imm operand > CONNEX_MEM_NUM_ROWS - 32 it (normally) + * means that the operation is generated in + * ConnexInstrInfo::storeRegToStackSlot() and + * ConnexInstrInfo::loadRegFromStackSlot(), + * part of a spill or load from spill operation. + * Note that on Connex we do not have a stack per se, + * but we emulate it at the end of the LS memory. + */ + if ((imm >= CONNEX_MEM_NUM_ROWS - 32) && + (imm < CONNEX_MEM_NUM_ROWS)) { + //MIItr2++; + MIItrLastLoadAssociatedToSpill = MIItr2; + MIItrLastLoadAssociatedToSpill++; + } + } + } // end for + } // if (justOne == false) + + /* Moving the ISD::INLINEASM instruction containing the opincaa kernel + begin at the very front of this BB. */ + for (auto MIItr = MBB->begin(); MIItr != MBB->end(); + ++MIItr, ++counter) { + MachineInstr *MI = &(*MIItr); + + if (MI->isInlineAsm()) { + LLVM_DEBUG(dbgs() << " MoveToFront() found INLINEASM MI = " + << *MI << "\n"); + + bool isOpincaaCodeBegin = false; + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + for (unsigned index = 0; index < MI->getNumOperands(); index++) { + MachineOperand *miOpndOpincaaCodeBegin; // = NULL; + miOpndOpincaaCodeBegin = & (MI->getOperand(index)); + + LLVM_DEBUG(dbgs() << " MI->getOperand(" << index << ") = " + << *miOpndOpincaaCodeBegin << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineOperand.html + if (miOpndOpincaaCodeBegin->isSymbol()) { + const char *symStr = miOpndOpincaaCodeBegin->getSymbolName(); + LLVM_DEBUG(dbgs() << " MoveToFront(): symStr = " + << symStr << "\n"); + if (strstr(symStr, STR_OPINCAA_CODE_BEGIN) != NULL) { + isOpincaaCodeBegin = true; + break; + } + } + } + + if (isOpincaaCodeBegin) { + if (counter != 0) { + // We move only if not at the beginning of MBB + tmp1 = MI; + LLVM_DEBUG(dbgs() << " MoveToFront(): moving INLINEASM to the front (counter = " + << counter << ", justOne = " + << justOne << ")\n"); + + if (justOne == true) { + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + MBB->remove(tmp1); + MBB->insert(MBB->front(), tmp1); + } + else { + /* We move the next 3 instructions to the front of + MBB, namely: + - 1 BEGIN_KERNEL INLINEASM instruction used as + loop prologue + - 1 END_KERNEL INLINEASM instruction used as + loop prologue (END_KERNEL part) + - 1 BEGIN_KERNEL INLINEASM instruction for + the loop. + + TODO TODO TODO TODO: check tmp3 and tmp2 are + also INLINEASM */ + + MIItr++; + tmp2 = &(*MIItr); + + MIItr++; + tmp3 = &(*MIItr); + + LLVM_DEBUG(dbgs() << " MoveToFront(): tmp1 = " + << *tmp1 << "\n"); + LLVM_DEBUG(dbgs() << " MoveToFront(): tmp2 = " + << *tmp2 << "\n"); + LLVM_DEBUG(dbgs() << " MoveToFront(): tmp3 = " + << *tmp3 << "\n"); + /* + MBB->remove(tmp4); + //MBB->insert(MBB->front(), tmp3); + */ + + MBB->remove(tmp3); + + MBB->remove(tmp2); + + MBB->remove(tmp1); + + /* TODO TODO TODO TODO TODO: check that the iterator + MIItrLastLoadAssociatedToSpill does NOT get + invalidated - it seems it is not invalidated even if we + change MBB, which is so because the instruction + to which the iterator points to is NOT changed. */ + MBB->insert(MIItrLastLoadAssociatedToSpill, tmp1); + MBB->insert(MIItrLastLoadAssociatedToSpill, tmp2); + MBB->insert(MIItrLastLoadAssociatedToSpill, tmp3); + } + } // END if (counter != 0) + break; + } // END if (isOpincaaCodeBegin) + } + //counter++; + } + } // END MoveToFront() + + + // Moving the last ISD::INLINEASM instruction of MBB at the very back of MBB + void MoveToBackLastInlineAsm(MachineBasicBlock *MBB) { + MachineInstr *tmp1; //, *tmp2, *tmp3; + int counter = 0; + + LLVM_DEBUG(dbgs() << " MoveToBackLastInlineAsm(): MBB = " + << *MBB << "\n"); + + for (auto MIItr = MBB->rbegin(); MIItr != MBB->rend(); + ++MIItr, ++counter) { + MachineInstr *MI = &(*MIItr); + + if (MI->isInlineAsm()) { + LLVM_DEBUG(dbgs() << " MoveToBackLastInlineAsm() found INLINEASM MI = " + << *MI << "\n"); + + bool isOpincaaCodeEnd = false; + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + for (unsigned index = 0; index < MI->getNumOperands(); index++) { + MachineOperand *miOpndOpincaaCodeEnd; // = NULL; + miOpndOpincaaCodeEnd = & (MI->getOperand(index)); + + LLVM_DEBUG(dbgs() << " MI->getOperand(" << index << ") = " + << *miOpndOpincaaCodeEnd << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineOperand.html + if (miOpndOpincaaCodeEnd->isSymbol()) { + const char *symStr = miOpndOpincaaCodeEnd->getSymbolName(); + LLVM_DEBUG(dbgs() << " MoveToBackLastInlineAsm(): symStr = " + << symStr << "\n"); + if (strstr(symStr, STR_OPINCAA_CODE_END) != NULL) { + isOpincaaCodeEnd = true; + break; + } + } + } + + if (isOpincaaCodeEnd) { + //if (counter != 0) { // We move only if not at the beginning of MBB + tmp1 = MI; + LLVM_DEBUG(dbgs() << " MoveToBackLastInlineAsm(): moving INLINEASM to the front (counter = " + << counter << ")\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + MBB->remove(tmp1); + MBB->insert(MBB->end(), tmp1); + //} + break; + } + } + //counter++; + } + } // END MoveToBack() + + + void ReplaceWithSymbolicIndex(MachineBasicBlock *MBB) { + assert(0 && "ReplaceWithSymbolicIndex() does NOT do anything anymore"); + + LLVM_DEBUG(dbgs() << "Entered ReplaceWithSymbolicIndex()\n"); + + unsigned imm = -1; + + for (auto &MI : *MBB) { + if ((MI.getOpcode() == Connex::LD_H) || + (MI.getOpcode() == Connex::ST_H)) { + /* Inspired from + http://llvm.org/docs/doxygen/html/MachineInstr_8cpp_source.html, + method MachineInstr::isIdenticalTo() + */ + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (MO.isImm()) { + imm = MO.getImm(); + LLVM_DEBUG(dbgs() << " ReplaceWithSymbolicIndex(): imm = " + << imm << "\n"); + /* + if (imm == CONNEX_MEM_NUM_ROWS - 32 - 10) { + MO.setImm((int64_t)-1); + } + */ + break; + } + } + } + } + } + + + // We add at the front of vector.body the instructions + // for the predecessor of vector.body basic-block DIFFERENT than + // vector.body (normally vector.ph). + void copyInstructionsFromPred(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock * &predMBBGood) { + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + /* (See also https://fossies.org/linux/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp + * method DeadMachineInstructionElim::runOnMachineFunction() for + * an example of iteration backwards). + */ + //for (auto &predMI : (*predMBB)) + unsigned counterPredMBB = 0; + + // rbegin() is a reverse_iterator + for (auto predMIItr = predMBBGood->rbegin(); + predMIItr != predMBBGood->rend(); + predMIItr++, counterPredMBB++) { + MachineInstr *predMI = &(*predMIItr); + + LLVM_DEBUG(dbgs() << " copyInstructionsFromPred(): predMI = " + << *predMI << "\n"); + + // Need to insert them in different order + if (predMI->isBundle()) { + LLVM_DEBUG(dbgs() << " copyInstructionsFromPred(): handling bundle\n"); + + const MachineBasicBlock *MBBBundle = predMI->getParent(); + //MachineBasicBlock::const_instr_iterator I = ++MI->getIterator(); + MachineBasicBlock::const_instr_iterator I = predMI->getIterator(); + + // IMPORTANT: We assume we work with finalized bundles + I++; + + // THIS cycles ~forever... EmitInstruction(& (*I) ); + + assert(I != MBBBundle->instr_end()); + const MachineInstr *I1 = & (*I); + LLVM_DEBUG(dbgs() << " copyInstructionsFromPredConnexAsmPrinter::runOnMachineFunction(): I1 = " + << *I1 << "\n"); + // + I++; + + + // IMPORTANT: We assume we work with bundles with only 2 instructions + + /* + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + bool isInsideBundle () const + Return true if MI is in a bundle (but not the first MI in a bundle). + bool isBundled () const + Return true if this instruction part of a bundle. + */ + /* + // TODO: this fails if bundle created in addPreSched2() + // (before post-RA scheduler): + assert(I->isInsideBundle()); + assert(I->isBundled()); + */ + // + /* + // TODO: this fails if bundle created in addPreSched2() + // (before post-RA scheduler): + assert(I->isInsideBundle()); + assert(I->isBundled()); + */ + assert(I != MBBBundle->instr_end()); + const MachineInstr *I2 = & (*I); + + MachineInstr *newPredMI2 = MF.CloneMachineInstr(I2); + LLVM_DEBUG(dbgs() << " copyInstructionsFromPred(): newPredMI2 = " + << *newPredMI2 << "\n"); + MBB.insert(MBB.front(), newPredMI2); + + MachineInstr *newPredMI1 = MF.CloneMachineInstr(I1); + LLVM_DEBUG(dbgs() << " copyInstructionsFromPred(): newPredMI1 = " + << *newPredMI1 << "\n"); + MBB.insert(MBB.front(), newPredMI1); + + /* + while (I != MBBBundle->instr_end() && I->isInsideBundle()) { + MachineInstr *newPredMI = + MF.CloneMachineInstr(& (*I)); + MBB.insert(MBB.front(), newPredMI); + + //EmitInstruction(& (*I) ); + + ++I; + } + */ + + LLVM_DEBUG(dbgs() << " copyInstructionsFromPred(): END handling bundle\n"); + + continue; + } + + + /* + * We avoid the last instruction of predMBBGood, since it is an + * unconditional JMP + */ + if (counterPredMBB == 0 && + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + predMI->isUnconditionalBranch()) { // predMBBGood->size()) + /* For llc -O3 it removes the JMP at the end of + vector.ph, hence it merges it with vector.body, + even if it leaves the entry label of vector.body. + So we need to check if predMI is JMP with + isUnconditionalBranch(). */ + LLVM_DEBUG(dbgs() << " copyInstructionsFromPred(): found a JMP, " + "so not copying it in vector.body\n"); + continue; + } + + /* IMPORTANT note: EmitInstruction() fails for ISD::INLINEASM + EmitInstruction(&predMI); + */ + + /* See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineFunction.html + MachineInstr *CloneMachineInstr(const MachineInstr *Orig); + CloneMachineInstr - Create a new MachineInstr which is a + copy of the 'Orig' instruction, identical in all ways except + the instruction has no parent, prev, or next. + */ + MachineInstr *newPredMI = MF.CloneMachineInstr(predMI); + + //MBB.insert(MBB.front(), &predMI); + // Gives error: "Assertion `!N->getParent() && + // "machine instruction already in a basic block"' failed." + MBB.insert(MBB.front(), newPredMI); + } + + #ifdef NNNNO + /* + * I guess normally we should have 2 predecessors, but since I mess + * up in LoopVectorize.cpp the vector.body block in some cases + * (e.g., with a few iterations, in the order of magnitude of the + * vector unit width) it can remain with only 1 predecessor. + */ + assert(numPredecessors <= 2 && + "vector.body should have at most 2 predecessors: itself and one more"); + #endif + } + + + // IMPORTANT: We copy from successor BB (middle.block) to vector.body BB + void CopyInstructionsFromSucc(MachineFunction &MF, MachineBasicBlock &MBB) { + LLVM_DEBUG(dbgs() << " CopyInstructionsFromSucc(): Move code from succ of block " + << MBB.getName().data() << "\n"); + + int numSuccessors = 0; + + for (auto succMBB : MBB.successors()) { + numSuccessors++; + + StringRef strSuccMBB = succMBB->getName(); + LLVM_DEBUG(dbgs() << " CopyInstructionsFromSucc(): strSuccMBB = " + << strSuccMBB << "\n"); + + /* + if (isVectorBody(strPredMBB) == true) + continue; + */ + + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + /* (See also https://fossies.org/linux/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp + * method DeadMachineInstructionElim::runOnMachineFunction() for + * an example of iteration backwards). + */ + //for (auto &predMI : (*predMBB)) + unsigned counterSuccMBB = 0; + + // rbegin() is a reverse_iterator + for (auto succMIItr = succMBB->begin(); + succMIItr != succMBB->end(); + succMIItr++, counterSuccMBB++) { + MachineInstr *succMI = &(*succMIItr); + + LLVM_DEBUG(dbgs() << " CopyInstructionsFromSucc(): succMI = " + << *succMI << "\n"); + + /* + * We avoid the last instruction of predMBB, since it is an + * unconditional JMP + */ + if ( + // counterSuccMBB == 0 && + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + (succMI->isUnconditionalBranch() || + succMI->isConditionalBranch()) ) { // predMBB->size()) + /* For llc -O3 it removes the JMP at the end of + vector.ph, hence it merges it with vector.body, + even if it leaves the entry label of vector.body. + So we need to check if predMI is JMP with + isUnconditionalBranch(). */ + LLVM_DEBUG(dbgs() << "CopyInstructionsFromSucc(): found a JMP, " + "so not copying it in vector.body\n"); + continue; + } + + /* IMPORTANT note: EmitInstruction() fails for ISD::INLINEASM + EmitInstruction(&predMI); + */ + + /* See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineFunction.html + MachineInstr *CloneMachineInstr(const MachineInstr *Orig); + CloneMachineInstr - Create a new MachineInstr which is a + copy of the 'Orig' instruction, identical in all ways except + the instruction has no parent, prev, or next. + */ + MachineInstr *newSuccMI = MF.CloneMachineInstr(succMI); + + // Gives error: "Assertion `!N->getParent() && "machine instruction already in a basic block"' failed." + //MBB.insert(MBB.front(), &predMI); + MBB.insert(MBB.back(), newSuccMI); + } + + // Instead of break we should check if predMBB is the BB "just" + // above predMBBGood or below + break; + } + + assert(numSuccessors == 1); + } // END CopyInstructionsFromSucc() + + +//#define TRY_DFS +#ifdef TRY_DFS +#define RPO + std::map visitedMBB; + std::vector sortedListMBB; + + void DFS(MachineBasicBlock *n) { + // See http://www.cplusplus.com/reference/map/map/count/ + if (visitedMBB.count(n) != 0) + return; + + // See http://www.cplusplus.com/reference/map/map/insert/ + visitedMBB.insert(std::pair(n, true)); + + #ifndef RPO + sortedListMBB.push_back(n); + #endif + + StringRef strN = n->getName(); + LLVM_DEBUG(dbgs() << "DFS(): BB name: = " << strN + << ", n = " << n << "\n"); + + #ifdef NNNO + // If in the successors we have vector.ph, vector.body, etc we choose those + // first. + for (auto MBB : n->successors()) { + StringRef strMBB = MBB->getName(); + /* + LLVM_DEBUG(dbgs() << "DFS(): BB name: = " << strMBB + << ", MBB = " << MBB << "\n"); + */ + if (strMBB.equals(StringRef("min.iters.checked")) || + // somewhat-IMPORTANT-TODO: check only for "vector.*" not for all these below + strMBB.equals(StringRef("vector.memcheck")) || + strMBB.equals(StringRef("vector.ph")) || + strMBB.equals(StringRef("vector.body.preheader")) || + strMBB.equals(StringRef("vector.body"))) { + DFS(MBB); // This will update visitedMBB to avoid further visits + } + } + #endif + + //for (auto &MBB : n->successors()) + for (auto MBB : n->successors()) { + /* + const char *strMBB = MBB->getName().data(); + LLVM_DEBUG(dbgs() << "DFS(): BB name: = " << strMBB + << ", MBB = " << MBB << "\n"); + */ + DFS(MBB); + } + + #ifdef RPO + sortedListMBB.push_back(n); + #endif + } +#endif // TRY_DFS + + + /// Emit the specified function out to the OutStreamer. + bool runOnMachineFunction(MachineFunction &MF) override { + LLVM_DEBUG(dbgs() + << "Entered ConnexAsmPrinter::runOnMachineFunction().\n"); + LLVM_DEBUG(dbgs() << " EnableCorrectBBsASMPrint = " + << EnableCorrectBBsASMPrint << "\n"); + + MachineBasicBlock *entryMBB = NULL; + + #ifdef TRY_DFS + LLVM_DEBUG(dbgs() << "Printing the MBBs, as they are ordered now:\n"); + + /* Looking at http://llvm.org/doxygen/classllvm_1_1MachineFunction.html + * it seems it's not possible to obtain the root(s) of the MB otherwise. + */ + for (auto &MBB : MF) { + if (entryMBB == NULL) + entryMBB = &MBB; + StringRef strMBB = MBB.getName(); + LLVM_DEBUG(dbgs() << " BB name: = " << strMBB << "\n"); + } + // + visitedMBB.clear(); + sortedListMBB.clear(); + DFS(entryMBB); + // + #ifdef RPO + LLVM_DEBUG(dbgs() << "ConnexAsmPrinter: (RPO) sortedListMBB = \n"); + + for (int idxSListMBB = sortedListMBB.size() - 1; + idxSListMBB >= 0; idxSListMBB--) { + MachineBasicBlock *MBB = sortedListMBB[idxSListMBB]; + StringRef strMBB = MBB->getName(); + LLVM_DEBUG(dbgs() << " BB name: = " << strMBB + << ", MBB = " << MBB << "\n"); + } + #else + LLVM_DEBUG(dbgs() << "ConnexAsmPrinter: sortedListMBB = \n"); + + for (auto &MBB : sortedListMBB) { + StringRef strMBB = MBB->getName(); + LLVM_DEBUG(dbgs() << " BB name: = " << strMBB + << ", MBB = " << MBB << "\n"); + } + #endif + + /* + LLVM_DEBUG(dbgs() + << "Printing the MBBs, as they are ordered after MF.sort():\n"); + + for (auto &MBB : MF) { + StringRef strMBB = MBB.getName(); + LLVM_DEBUG(dbgs() << " BB name: = " << strMBB << "\n"); + } + */ + #endif // TRY_DFS + + int numVectorizedLoops = 0; + bool TreatRepeat2ndInnerLoopGlobal; + + // We read from startLoc.txt the configuration of the loop nests + // in order to fill correctly the std::vector treatRepeat2ndInnerLoop. + readStartLocFile(const_cast("startLoc.txt"), true); + LLVM_DEBUG(dbgs() + << "runOnMachineFunction(): treatRepeat2ndInnerLoop.size() = " + << treatRepeat2ndInnerLoop.size() << "\n"); + + if (EnableCorrectBBsASMPrint) { + // processFunction() just updates mapLD_ST_REPEAT_InlineAsm for the + // given function. + processFunction(&MF); + + this->MF = &MF; + + // Inspired from ConnexRegisterInfo.cpp: + //const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + + // Inspired from http://llvm.org/docs/doxygen/html/AsmPrinter_8cpp_source.html: + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineFunction.html + for (auto &MBB : MF) { + if (numVectorizedLoops >= (int)treatRepeat2ndInnerLoop.size()) + TreatRepeat2ndInnerLoopGlobal = false; + else + TreatRepeat2ndInnerLoopGlobal = treatRepeat2ndInnerLoop[numVectorizedLoops]; + + LLVM_DEBUG(dbgs() + << "runOnMachineFunction(): TreatRepeat2ndInnerLoopGlobal = " + << TreatRepeat2ndInnerLoopGlobal << "\n"); + LLVM_DEBUG(dbgs() << "runOnMachineFunction(): numVectorizedLoops = " + << numVectorizedLoops << "\n"); + + if (TreatRepeat2ndInnerLoopGlobal == true) { + // TODO: think a bit: we should always call MoveToFrontRepeat() - we complicate a bit, BUT it is highly unlikely to have a REPEAT() after the last vector.body + // A bit inefficient - we try all MBB + MoveToFrontRepeat(&MBB); + } + else { + // If we do this we risk to have comments like "Map/Reduction part" + // after the REPEAT Opincaa instruction. + MoveToFrontRepeat(&MBB); + } + + // We take care to put the beginning marker for Opincaa kernel at the + // very front of its basic block, MBB - we try all MBBs. + LLVM_DEBUG(dbgs() << + "Calling MoveToFrontInlineAsm(STR_OPINCAA_CODE_BEGIN)\n"); + MoveToFrontInlineAsm(&MBB, const_cast(STR_OPINCAA_CODE_BEGIN)); + LLVM_DEBUG(dbgs() << + "Finished calling MoveToFrontInlineAsm(STR_OPINCAA_CODE_BEGIN)\n"); + + if (isVectorBody(MBB.getName()) == false) + continue; + + numVectorizedLoops++; + + //MoveToFrontRepeat(MBB); + // + //ReplaceWithSymbolicIndex(&MBB); + /* IMPORTANT: + * We move the Inline ASM expressions to the beginning of the BB, + * by using MoveToFront(), + * such that, immediately after (see code below) we put the + * instructions of the predecessor of the vector.body BB + * at the top and then call MoveToFront(&MBB, true) again + * to make the code OK. + */ + //MoveToFront(&MBB, false); + + MachineBasicBlock *predMBBGood; + int numPredecessors = 0; + for (auto predMBB : MBB.predecessors()) { + numPredecessors++; + + if (isVectorBody(predMBB->getName()) == true) + continue; + else + predMBBGood = predMBB; + } + + // I guess normally we should have 2 predecessors, but since I mess + // up in LoopVectorize.cpp the vector.body block in some cases + // (e.g., with a few iterations, in the order of magnitude of the + // vector unit width) it can remain with only 1 predecessor. + assert(numPredecessors <= 2 && "vector.body should have at most " + "2 predecessors: itself and one more"); + + if (TreatRepeat2ndInnerLoopGlobal == false) { + //copyInstructionsFromPred(MF, MBB, predMBBGood); + + // We move the header of the Opincaa kernel + MoveToFront(predMBBGood, true); + } + + // Does NOT help: MoveToFront(&MBB, true); + LLVM_DEBUG(dbgs() << + " runOnMachineFunction(): calling MoveToFrontInlineAsm(&MBB)\n"); + //MoveToFront(&MBB, false); + MoveToFrontInlineAsm(&MBB, const_cast("for (")); + + if (TreatRepeat2ndInnerLoopGlobal == true) { + MoveToBackLastInlineAsm(&MBB); + } + } // END for (auto &MBB : MF) + } // end if EnableCorrectBBsASMPrint + + SetupMachineFunction(MF); + EmitFunctionBody(); + + return false; + } // end bool runOnMachineFunction(MachineFunction &MF) + + + void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, + const char *Modifier = nullptr); + + + void EmitInstruction(const MachineInstr *MI) override; + + + // Taken from MSP430 back end + void printSrcMemOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O); + + + // processFunction() just updates mapLD_ST_REPEAT_InlineAsm. + void processFunction(const MachineFunction *MF) { + LLVM_DEBUG(dbgs() << "Entered processFunction()\n"); + + for (auto &MBB : *MF) { + for (auto MIItr = MBB.begin(); MIItr != MBB.end(); ++MIItr) { + const MachineInstr *MI = &(*MIItr); + + LLVM_DEBUG(dbgs() << "processFunction(): MI = " + << *MI << "\n"); + + if (MI->isInlineAsm()) { + // TODO TODO: check also that the InlineAsm contains the substring "note that this line is normally NOT printed in the final .cpp" + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + MachineBasicBlock::const_iterator MIItr2 = MIItr; + // TODO TODO: check for more instr, not just the next... it should help... + MIItr2++; + + const MachineInstr *MI2 = &(*MIItr2); + LLVM_DEBUG(dbgs() << "processFunction(): MI2->getOpcode() = " + << MI2->getOpcode() << "\n"); + LLVM_DEBUG(dbgs() << "processFunction(): MI2 = " + << MI2 << "\n"); + + if (MI2->getOpcode() == 0) { + /* It crashes when giving dbgs << *MI2, unfortunately... + This case happens since I changed how I treat the + writeDataToArray...() primitives in LoopVectorize.cpp + because now I don't put them at the beginning of + vector.body. */ + } + else { + LLVM_DEBUG(dbgs() << "processFunction(): MI2 = " + << *MI2 << "\n"); + } + + bool validCase = false; + if ((MI2->getOpcode() == Connex::LD_H) || + (MI2->getOpcode() == Connex::ST_H) || + (MI2->getOpcode() == Connex::REPEAT)) { + validCase = true; + } + else + if (MI2->getOpcode() == Connex::VLOAD_H) { + MIItr2++; + MI2 = &(*MIItr2); + + if (MI2->getOpcode() == Connex::ST_H) { + // TODO TODO TODO TODO: verify ALSO that dest vector register of MI2 (VLOAD_H) is used in ST_H instruction + validCase = true; + } + } + + if (validCase) { + if (ifImmSpecialUpdateMap(MI, MI2) == -1) { + /* For test 300_Opincaa_BUG_Connex/STDerr_llc_01 + we require to look 1 more instruction. + */ + MIItr2++; + MI2 = &(*MIItr2); + + if ((MI2->getOpcode() == Connex::LD_H) || + (MI2->getOpcode() == Connex::ST_H)) { + //validCase = true; + ifImmSpecialUpdateMap(MI, MI2); + } + } + } + } + } + } + } // END processFunction() + + + bool /*ConnexAsmPrinter::*/ PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + LLVM_DEBUG(dbgs() << "Entered PrintAsmMemoryOperand()\n"); + return false; + } + + + bool /* ConnexAsmPrinter:: */ PrintAsmOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + LLVM_DEBUG(dbgs() << "Entered PrintAsmOperand()\n"); + return false; + } + + + void PrintSpecial(const MachineInstr *MI, raw_ostream &OS, + const char *Code) const { + LLVM_DEBUG(dbgs() << "Entered PrintSpecial()\n"); + } + + + void printOffset(int64_t Offset, raw_ostream &OS) const { + LLVM_DEBUG(dbgs() << "Entered printOffset()\n"); + } + + + // Note: NOT called + void EmitInt32(int Value) const { + LLVM_DEBUG(dbgs() << "Entered EmitInt32()\n"); + } +}; // END class ConnexAsmPrinter + +} // END namespace + + +/* +// From [LLVM]/llvm38Nov2016/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +void ConnexAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) + O << (unsigned short int)MO.getImm(); + else + printOperand(MI, opNum, O); +} + +// From [LLVM]/llvm38Nov2016/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +void ConnexAsmPrinter::printUnsignedImm8(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) + O << (unsigned short int)(unsigned char)MO.getImm(); + else + printOperand(MI, opNum, O); +} +*/ + + +// TODO: remove since it seems it's NOT called +void ConnexAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O, const char *Modifier) { + LLVM_DEBUG(dbgs() << "Entered ConnexAsmPrinter::printOperand()\n"); + const MachineOperand &MO = MI->getOperand(OpNum); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << ConnexInstPrinter::getRegisterName(MO.getReg()); + break; + + case MachineOperand::MO_Immediate: { + unsigned imm = MO.getImm(); + LLVM_DEBUG(dbgs() << "printOperand(): imm = " << imm << "\n"); + + if (imm == CONNEX_MEM_NUM_ROWS + 10) { + O << STR_LOOP_SYMBOLIC_INDEX; + } + else { + O << MO.getImm(); + } + //O << MO.getImm(); + break; + } + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + break; + + case MachineOperand::MO_GlobalAddress: + O << *getSymbol(MO.getGlobal()); + break; + + default: + llvm_unreachable(""); + } +} + + +void ConnexAsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + const MachineOperand &Base = MI->getOperand(OpNum); + const MachineOperand &Disp = MI->getOperand(OpNum+1); + + // Print displacement first + + // Imm here is in fact global address - print extra modifier. + if (Disp.isImm() && !Base.getReg()) + O << '&'; + + printOperand(MI, OpNum+1, O, "nohash"); + + // Print register base field + if (Base.getReg()) { + O << '('; + printOperand(MI, OpNum, O); + O << ')'; + } +} + + +void ConnexAsmPrinter::EmitInstruction(const MachineInstr *MI) { + LLVM_DEBUG(dbgs() << "Entered ConnexAsmPrinter::EmitInstruction()...\n"); + + /* Inspired from lib/Target/AMDGPU/AMDGPUMCInstLower.cpp + (actually it's class AMDGPUAsmPrinter) + */ + if (MI->isBundle()) { + LLVM_DEBUG(dbgs() << " EmitInstruction(): handling bundle\n"); + const MachineBasicBlock *MBB = MI->getParent(); + //MachineBasicBlock::const_instr_iterator I = ++MI->getIterator(); + MachineBasicBlock::const_instr_iterator I = MI->getIterator(); + I++; + // THIS cycles ~forever... EmitInstruction(& (*I) ); + + /* + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + bool isInsideBundle () const + Return true if MI is in a bundle (but not the first MI in a bundle). + */ + while (I != MBB->instr_end() && I->isInsideBundle()) { + EmitInstruction(& (*I) ); + ++I; + } + + // Prints wrong instructions: EmitInstruction(& (*I) ); + return; + } + + //#ifdef ORIGINAL_CODE + ConnexMCInstLower MCInstLowering(OutContext, *this); + + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + + crtMI = MI; + + EmitToStreamer(*OutStreamer, TmpInst); + + //OutStreamer->EmitInstruction(MIPred, getSubtargetInfo()); + //#endif + + //AsmPrinter::EmitInstruction(MI); +} // END ConnexAsmPrinter::EmitInstruction() + + +// Force static initialization. +extern "C" void LLVMInitializeConnexAsmPrinter() { + RegisterAsmPrinter Z(TheConnexTarget); +} + Index: lib/Target/Connex/ConnexAsmPrinterLoopNests.h =================================================================== --- lib/Target/Connex/ConnexAsmPrinterLoopNests.h +++ lib/Target/Connex/ConnexAsmPrinterLoopNests.h @@ -0,0 +1,126 @@ +//===-- ConnexAsmPrinterLoopNests.h - -----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file implements reading the startLoc.txt file with info about start and +/// end locations of loops nests, generated by the LoopVectorize pass. +// Used by ConnexAsmPrinter.cpp and ReplaceLoopsWithOpincaaKernels.cpp. +//===----------------------------------------------------------------------===// + +#ifndef CONNEX_ASM_PRINTER_LOOP_NESTS_H +#define CONNEX_ASM_PRINTER_LOOP_NESTS_H + +// Used by ReplaceLoopsWithOpincaaKernels.cpp and ConnexAsmPrinter.cpp + +std::vector treatRepeat2ndInnerLoop; +// The start and end of the innermost (or 2nd innermost) loop +std::vector linStart, colStart, linEnd, colEnd; +// +std::vector linStartLoopNest, colStartLoopNest, linEndLoopNest, colEndLoopNest; + +/* +We read in vectors the lines and columns of the innermost loop + and, if there is one, also of the outermost loop + of the loop nests specified in the startLoc.txt file. + We put in treatRepeat2ndInnerLoop vector true + depending if the loop nest has more than 1 loop in the nest, + false otherwise. + +Note: We keep the numbering from 1 throughout the ENTIRE program, + BUT in FindEndLoop() we decrement the value. +*/ +void readStartLocFile(char *fileNameSrc, bool silentFail=false) { + int index; + char str[MAXLEN_STR]; + + int linStartTmp, colStartTmp; + int linEndTmp, colEndTmp; + + FILE *fin = fopen(fileNameSrc, "rt"); + + /* We need to process each loop, from the last in the file to the first, + therefore preserving the line & column numbers of the loops that + remain to be replaces. + */ + if (silentFail) { + if (fin == NULL) { + printf("startLoc.txt file NOT found (maybe NO loop was vectorized)"); + return; + } + } + assert(fin != NULL && + "readStartLocFile(): fileNameSrc (e.g., startLoc.txt) file NOT found (maybe NO loop was vectorized). " + "Anyhow cannot automatically replace in source file vectorized loops with Opincaa kernels."); + + //for (index = 0; index < replaceString.size(); index++) + for (index = 0; ; index++) { + // We read the line with the C++ comment and discard it + if (fgets(str, MAXLEN_STR - 1, fin) == NULL) + break; + + printf("str = %s\n", str); + fflush(stdout); + + // We read the coordinates of the innermost loop of the crt nest + fscanf(fin, "%d %d %d %d\r\n", &linStartTmp, &colStartTmp, + &linEndTmp, &colEndTmp); + // + printf("readStartLocFile(): index = %d\n", index); + + printf("readStartLocFile(): (linStart = %d, colStart = %d) -> " + "(linEndTmp = %d, colEndTmp = %d)\n", + linStartTmp, colStartTmp, linEndTmp, colEndTmp); + fflush(stdout); + // + linStart.push_back(linStartTmp); + colStart.push_back(colStartTmp); + linEnd.push_back(linEndTmp); + colEnd.push_back(colEndTmp); + assert(linStartTmp <= linEndTmp); + + // We check if the next line is one with C++ comment + int ch = getc(fin); + ungetc(ch, fin); + + printf("readStartLocFile(): ch = %d\n", (int)ch); + fflush(stdout); + + if ((ch == '/') || (ch == -1)) { + treatRepeat2ndInnerLoop.push_back(false); + + linStartLoopNest.push_back(-1); + colStartLoopNest.push_back(-1); + linEndLoopNest.push_back(-1); + colEndLoopNest.push_back(-1); + } + else { + // We read the coordinates of the outermost loop of the crt nest + treatRepeat2ndInnerLoop.push_back(true); + + fscanf(fin, "%d %d %d %d\r\n", &linStartTmp, &colStartTmp, + &linEndTmp, &colEndTmp); + printf("readStartLocFile(): (linStart = %d, colStart = %d) -> " + "(linEndTmp = %d, colEndTmp = %d)\n", + linStartTmp, colStartTmp, linEndTmp, colEndTmp); + fflush(stdout); + + linStartLoopNest.push_back(linStartTmp); + colStartLoopNest.push_back(colStartTmp); + linEndLoopNest.push_back(linEndTmp); + colEndLoopNest.push_back(colEndTmp); + } + + printf("readStartLocFile(): treatRepeat2ndInnerLoop[%d] = %d\n", + index, (int)treatRepeat2ndInnerLoop[index]); + fflush(stdout); + } + + fclose(fin); +} // END readStartLocFile() + +#endif // end CONNEX_ASM_PRINTER_LOOP_NESTS_H Index: lib/Target/Connex/ConnexConfig.h =================================================================== --- lib/Target/Connex/ConnexConfig.h +++ lib/Target/Connex/ConnexConfig.h @@ -0,0 +1,78 @@ +#ifndef CONNEX_CONFIG_ALEX +#define CONNEX_CONFIG_ALEX + +// This file is used by ConnexISelDAGToDAG.cpp, ConnexISelLowering.h, +// ReplaceLoopsWithOpincaaKernels.cpp. + +// The macros in this header file are strategic, in the sense that the back end +// could target a Connex vector processor of different vector length. +// There are also some other important macros like: CONNEX_MEM_NUM_ROWS_EXTRA +// (used to keep spilled registers, or tables for f16 operations like sqrt +// or div, etc), STR_OPINCAA, etc. + + +// These 2 types are defined also in Opincaa lib, in include/Architecture.h +typedef short TypeElement; +typedef unsigned short UnsignedTypeElement; + + +// The vector length of the Connex back end, which could be different +// from the actual vector length of the Connex processor. +#define CONNEX_VECTOR_LENGTH 8 + +#define TYPE_SIZEOF 2 +#define CONNEX_LINE_SIZE (CONNEX_VECTOR_LENGTH * TYPE_SIZEOF) + +//#define STR_LOOP_SYMBOLIC_INDEX "indexLLVM_LV / CONNEX_VECTOR_LENGTH" +// NOTE: make sure it is equiavlent to the above commented macro +// NOTE: keep the paranthesis since >> has low operator priority +#define STR_LOOP_SYMBOLIC_INDEX "(indexLLVM_LV >> 7)" + +// This is the type of the scalar processor (normally the BPF processor) operand +// TODO_CHANGE_BACKEND: +#define TYPE_SCALAR_ELEMENT MVT::i64 +//#define TYPE_ELEMENT MVT::i32 + +//#define TYPE_VECTOR MVT::v8i64 +//#define TYPE_VECTOR MVT::v16i32 +//#define TYPE_VECTOR MVT::v32i16 +//#define TYPE_VECTOR_I16 MVT::v128i16 +#define TYPE_VECTOR_I16 MVT::v8i16 +//#define TYPE_VECTOR_ELEMENT MVT::i64 +#define TYPE_VECTOR_I16_ELEMENT MVT::i16 + +//#define TYPE_VECTOR_I32 MVT::v64i32 +#define TYPE_VECTOR_I32 MVT::v4i32 +#define TYPE_VECTOR_I32_ELEMENT MVT::i32 + +//#define TYPE_VECTOR_F16 MVT::v128f16 +#define TYPE_VECTOR_F16 MVT::v8f16 +#define TYPE_VECTOR_F16_ELEMENT MVT::f16 + + +#define TYPE_VECTOR_I16_ELEMENT_BITSIZE 16 +#define TYPE_VECTOR_I32_ELEMENT_BITSIZE 32 +#define TYPE_VECTOR_F16_ELEMENT_BITSIZE 16 + + +#define CONNEX_MEM_NUM_ROWS 1024 +// For 64 lanes: #define CONNEX_MEM_NUM_ROWS 2048 +// Extra LS memory for spills and LUTs for div/sqrt.f16, etc +#define CONNEX_MEM_NUM_ROWS_EXTRA 200 + +// NOTE: normally REPEAT accepts immediates in interval 0..1023 +#define VALUE_BOGUS_REPEAT_X_TIMES 32761 + + +//#ifndef MAXLEN_STR +#define MAXLEN_STR 8192 +//#endif + +// Used in ConnexAsmPrinter.cpp and LoopVectorize.cpp +#define STR_OPINCAA_CODE_BEGIN "// START_OPINCAA_HOST_DEVICE_CODE" +#define STR_OPINCAA_CODE_END "// END_OPINCAA_HOST_DEVICE_CODE" + +#define STR_OPINCAA_KERNEL_REDUCE_BEFORE_END "REDUCE R(0); // We add a 'bogus' REDUCE to wait for it" + +#endif + Index: lib/Target/Connex/ConnexFrameLowering.h =================================================================== --- lib/Target/Connex/ConnexFrameLowering.h +++ lib/Target/Connex/ConnexFrameLowering.h @@ -0,0 +1,41 @@ +//===-- ConnexFrameLowering.h - Define frame lowering for Connex -----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +// This class implements Connex-specific bits of TargetFrameLowering class. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXFRAMELOWERING_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXFRAMELOWERING_H + +#include "llvm/CodeGen/TargetFrameLowering.h" + +namespace llvm { +class ConnexSubtarget; + +class ConnexFrameLowering : public TargetFrameLowering { +public: + explicit ConnexFrameLowering(const ConnexSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {} + + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + + bool hasFP(const MachineFunction &MF) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override { + return MBB.erase(MI); + } +}; +} +#endif Index: lib/Target/Connex/ConnexFrameLowering.cpp =================================================================== --- lib/Target/Connex/ConnexFrameLowering.cpp +++ lib/Target/Connex/ConnexFrameLowering.cpp @@ -0,0 +1,39 @@ +//===-- ConnexFrameLowering.cpp - Connex Frame Information ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the Connex implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "ConnexFrameLowering.h" +#include "ConnexInstrInfo.h" +#include "ConnexSubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +bool ConnexFrameLowering::hasFP(const MachineFunction &MF) const { return true; } + +void ConnexFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const {} + +void ConnexFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const {} + +void ConnexFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + SavedRegs.reset(Connex::R6); + SavedRegs.reset(Connex::R7); + SavedRegs.reset(Connex::R8); + SavedRegs.reset(Connex::R9); +} Index: lib/Target/Connex/ConnexHazardRecognizer.h =================================================================== --- lib/Target/Connex/ConnexHazardRecognizer.h +++ lib/Target/Connex/ConnexHazardRecognizer.h @@ -0,0 +1,79 @@ +//===-- ConnexHazardRecognizer.h - Define frame lowering for Connex -----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +//===----------------------------------------------------------------------===// + + +/* Inspired from llvm/lib/Target/PowerPC/PPCHazardRecognizer.h: + /// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based + /// hazard recognizer for PPC ooo processors with dispatch-group hazards. +*/ + + +#ifndef LLVM_LIB_TARGET_CONNEX_HAZARDRECOGNIZER_H +#define LLVM_LIB_TARGET_CONNEX_HAZARDRECOGNIZER_H + +#include "ConnexInstrInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" + +namespace llvm { + +/* NOTE: ScheduleHazardRecognizer is basically an "interface" + * (almost abstract, i.e. almost no functionality implemented)class, so better + * stick with ScoreboardHazardRecognizer if its functionality is OK for me: +class ConnexDispatchGroupSBHazardRecognizer : public ScheduleHazardRecognizer { +*/ + +/* We choose to inherit the ScoreboardHazardRecognizer because only this + * performs out-of-order scheduling, and NOT ScheduleHazardRecognizer. + */ +class ConnexDispatchGroupSBHazardRecognizer : public ScoreboardHazardRecognizer { + const ScheduleDAG *DAG; + bool isDataHazard(SUnit *SU); + + /* + SmallVector CurGroup; + unsigned CurSlots, CurBranches; + + bool isLoadAfterStore(SUnit *SU); + bool isBCTRAfterSet(SUnit *SU); + bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots); + */ + +public: + ConnexDispatchGroupSBHazardRecognizer(const InstrItineraryData *ItinData, + const ScheduleDAG *DAG_) : + ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) + //, CurSlots(0), CurBranches(0) + { + //DEBUG(dbgs() << "Entered ConnexDispatchGroupSBHazardRecognizer()\n"); + } + + HazardType getHazardType(SUnit *SU, int Stalls) override; + + unsigned PreEmitNoops(SUnit *SU) override; + /* + bool ShouldPreferAnother(SUnit* SU) override; + */ + void EmitInstruction(SUnit *SU) override; + /* + void AdvanceCycle() override; + void RecedeCycle() override; + void Reset() override; + void EmitNoop() override; + */ +}; + +} + +#endif + Index: lib/Target/Connex/ConnexHazardRecognizer.cpp =================================================================== --- lib/Target/Connex/ConnexHazardRecognizer.cpp +++ lib/Target/Connex/ConnexHazardRecognizer.cpp @@ -0,0 +1,471 @@ +//===-- ConnexHazardRecognizer.cpp - Connex Hazard Recognizer Impls --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements hazard recognizer for scheduling on PowerPC processors. +// +//===----------------------------------------------------------------------===// + + +// Inspired from llvm/lib/Target/PowerPC/PPCHazardRecognizer.cpp + +/* +The delay slot issues that need to be handled are for: + - where normally; but NOW ([!!!!THINK BETTER - we added support for i32]) we only generate WHERE for the VSELECT LLVM IR instruction + a bundle of 4 instructions (in ConnexTargetMachine.cpp, passes PassCreateWhereBlocks and PassFinalizeBundles). + Basically we expand the following pseudo-machine instruction: + dst = VSELECT pred, true_assignment, false_assignment: + to the following Connex machine instr: + (note the comparison is excluded from the bundle - + it's scheduled before it) + // For pred == false + dst = false_assignment + WHERExy + // For pred == true: + dst = true_assignment + END_WHERE + + // The comparison is excluded from the bundle (SHOULD be scheduled before it) + predicate-false register assignment + WHERExy + predicate-true register assignment + END_WHERE + Note: I tried to use TII->PredicateInstruction() but id didn't work - see http://lists.llvm.org/pipermail/llvm-dev/2017-March/111026.html + - read, write + - iwrite + for each operation updating the register used by these instructions just before, which can be: + iread, vload, ldix, multlo/hi, ldsh, add/c, sub/c, eq/ult/lt, (i)shl, (i)shr, (i)shra, popcount, not/or/and/xor. + +Similarly with the wherexx Connex instruction. + +The point is that we should try NOT to focus on the delay slots of the producer instructions (in number of 24), but focus on these delays at the consumer side because there are only 6 consumer instructions (read/write, iwrite, wherecr/eq/lt). + +Not only that, but we should try to fill the delay slots with instructions in out-of-order fashion. + +Hal Finkel pointed me to lib/Target/PowerPC/PPCHazardRecognizers.cpp: + On 2/3/2017 10:25 PM, Hal Finkel wrote: + > Hi Alex, + > You can program a post-RA scheduler which will return NoopHazard in the appropriate + > circumstances. You can look at the PowerPC target (e.g. + > lib/Target/PowerPC/PPCHazardRecognizers.cpp) as an example. + +I guess Hal recommends customizing the post-RA scheduler because after RA we have finished all(?) instruction selection steps and we handle MachineInstr, which makes life simpler for us to see if we have ST_H or ST_INDIRECT, etc. +See the Figure with passes in \cite{Cardoso_Lopes2014}, page 134. + +\cite{Cardoso_Lopes2014} + "There are three distinct scheduler executions in the code generator: + two prior and one post register allocation. The frst works on + SelectionDAG nodes while the other two work on machine + instructions" + + "The scheduler runs before and after register allocation. However, the SDNode + instruction representation is only available in the former while the latter uses the + MachineInstr class. To cope with both SDNodes and MachineInstrs, the SUnit class + (see the fle /include/llvm/CodeGen/ScheduleDAG.h) abstracts the + underlying instruction representation as the unit used during instruction scheduling." + +See also http://llvm.org/docs/doxygen/html/classllvm_1_1ScheduleHazardRecognizer.html#details + <> +*/ + + + + +#include "ConnexHazardRecognizer.h" +#include "Connex.h" +#include "ConnexInstrInfo.h" +#include "ConnexTargetMachine.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "Misc.h" // For dumpSU() + +using namespace llvm; + + + +#define DEBUG_TYPE "post-RA-sched" + +// getPredMachineInstr() is declared in ConnexInstrInfo.cpp +extern MachineInstr *getPredMachineInstr(MachineInstr *MI, MachineInstr **succMI); + + + + + + +/* + From http://llvm.org/docs/doxygen/html/ScheduleHazardRecognizer_8h_source.html#l00078: + 00073 /// PreEmitNoops - This callback is invoked prior to emitting an instruction. + 00074 /// It should return the number of noops to emit prior to the provided + 00075 /// instruction. + 00076 /// Note: This is only used during PostRA scheduling. EmitNoop is not called + 00077 /// for these noops. + * + */ +unsigned ConnexDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { + assert(SU->isInstr() == true); + + /* + MachineInstr *MI = SU->getInstr(); + int MIOpcode = MI->getOpcode(); + if (MIOpcode == Connex::LD_INDIRECT_H) + */ + if (isDataHazard(SU)) + return 1; + + return ScoreboardHazardRecognizer::PreEmitNoops(SU); +} + + +bool ConnexDispatchGroupSBHazardRecognizer::isDataHazard(SUnit *SU) { + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MCInstrDesc.html + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (MCID == NULL) + return false; + + /* + // Note: MCPhysReg is an integer - + // see http://llvm.org/docs/doxygen/html/namespacellvm.html: + // "typedef uint16_t llvm::MCPhysReg" + const MCPhysReg *MCIDArray = MCID->getImplicitUses(); + unsigned numUses = MCID->getNumImplicitUses(); seems it is always 0 + */ + //const MCOperandInfo *MCIDArray = MCID->OpInfo; + unsigned numUses = MCID->getNumOperands() - MCID->getNumDefs(); + //LLVM_DEBUG(dbgs() << " isDataHazard(): SU = " << numUses << "\n"); + LLVM_DEBUG(dbgs() << " isDataHazard(): numUses = " << numUses << "\n"); + LLVM_DEBUG(dbgs() << " isDataHazard(): MCID->getNumOperands() = " + << MCID->getNumOperands() << "\n"); + LLVM_DEBUG(dbgs() << " isDataHazard(): MCID->getNumDefs() = " + << MCID->getNumDefs() << "\n"); + + + assert(SU->isInstr() == true); + + MachineInstr *MI = SU->getInstr(); + LLVM_DEBUG(dbgs() << " isDataHazard(): MI ="; + MI->dump(); + ); + + int MIOpcode = MI->getOpcode(); + LLVM_DEBUG(dbgs() << " isDataHazard(): MI->getOpcode() = " + << MI->getOpcode() << "\n"); + + + if (MIOpcode == Connex::ST_INDIRECT_H || + MIOpcode == Connex::ST_INDIRECT_W || + MIOpcode == Connex::ST_INDIRECT_MASKED_H || + MIOpcode == Connex::ST_H) { + /* NOTE: END_REPEAT returns, to my surprise, also mayStore(). + But we should not worry about this since END_REPEAT takes no + parameter. */ + /* + if (MCID->mayStore()) + if (MCID->mayLoad()) + */ + LLVM_DEBUG(dbgs() << " isDataHazard(): SU is Store\n"); + } + else + if (MIOpcode == Connex::LD_INDIRECT_H || + MIOpcode == Connex::LD_INDIRECT_W || + MIOpcode == Connex::LD_INDIRECT_MASKED_H) { + LLVM_DEBUG(dbgs() << " isDataHazard(): SU is Load\n"); + } + else + if ( + //assert(MIOpcode != Connex::WHERECRY); + //MIOpcode == Connex::WHERECRY || + MIOpcode == Connex::WHEREEQ_BUNDLE_H || + MIOpcode == Connex::WHERELT_BUNDLE_H || + MIOpcode == Connex::WHEREULT_BUNDLE_H) { + LLVM_DEBUG(dbgs() << " isDataHazard(): SU is Where\n"); + } + else { + LLVM_DEBUG(dbgs() << " isDataHazard(): SU NOT producing data hazard\n"); + + // VERY IMPORTANT + return false; + } + + LLVM_DEBUG(dbgs() << " isDataHazard(): MI->getNumOperands() = " + << MI->getNumOperands() << "\n"); + + /* + Why does getHazardType() find 3 Loads - because I was considering pred in DAG (SDNode), not in MachineInstr list, where it should be only 1? + + This should cover these cases described in ConnexISA.docx: + - (i)write using register defined in the previous instruction: + LS[R1] = R4 + LS[5] = R1 + and also this slightly different case: + LS[R10] = R1 + + - read using register defined in the previous instruction + R4 = LS[R1] + + - wherexx using the flag defined in the previous instruction + R1 = (R2 == R3) + WHERE_EQUAL + */ + + /* small-TODO: understand conceptually what PPC was doing with dispatch group. + + IMPORTANT: We keep this search for predecessors of SU in the DAG and not for + THE only predecessor of the MachineInstr (we are at Post-RA scheduler) + contained in SU because MAYBE/it is possible that when doing + ScoreboardHazardRecognizer (out-of-order scheduling to fill delay slots) + we could benefit from the DAG predecessors - QUITE UNLIKELY, but maybe + so. Otherwise, we should ONLY look at the + getPredMachineInstr(MachineInstr *MI). + + For any predecessors of SU with which we + have an ordering dependency, return true. */ + for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); + + if (PredMCID == NULL) // || !PredMCID->mayStore()) + continue; + + /* SU->Preds is SmallVector of SDep. + * - see http://llvm.org/docs/doxygen/html/classllvm_1_1SUnit.html + * - see http://llvm.org/docs/doxygen/html/classllvm_1_1SDep.html + */ + MachineInstr *PredMI = (SU->Preds[i].getSUnit())->getInstr(); + MachineInstr *tmpNotUsed; + if (PredMI != getPredMachineInstr(MI, &tmpNotUsed)) { + LLVM_DEBUG(dbgs() << " isDataHazard(): jumping DAG predecessor that is " + "NOT MachineInstr predecessor: PredMI ="; + PredMI->dump(); + dbgs() << " for MI ="; + MI->dump(); + ); + continue; + } + + LLVM_DEBUG(dbgs() << " isDataHazard(): Found DAG predecessor that is " + "MachineInstr predecessor: PredMI ="; + PredMI->dump(); + dbgs() << " for MI ="; + MI->dump(); + ); + + LLVM_DEBUG(dbgs() << " isDataHazard(SU->Preds[" + << i << "] = "; + PredMI->dump(); + //(SU->Preds[i].getSUnit())->dump(DAG); + //PredMCID->dump(DAG); + dbgs() << ")\n"); + + /* + * // TODO: check BETTER we have to check SU->Preds[i] is THE prev + instruction in the list of MachineInstr - .getParent() + * TODO TODO TODO: we have to check for + * LD_INDIRECT_H for the memory (offset) register, + * not the passthrough (or mask). + */ + + /* + const MCPhysReg *PredMCIDArray = PredMCID->getImplicitDefs(); + unsigned numDefs = PredMCID->getNumImplicitDefs(); seems it is always 0 + */ + unsigned numDefs = PredMCID->getNumDefs(); + //const MCOperandInfo *PredMCIDArray = PredMCID->OpInfo; + LLVM_DEBUG(dbgs() << " isDataHazard(): numDefs = " << numDefs << "\n"); + LLVM_DEBUG(dbgs() << " isDataHazard(): PredMI->getNumOperands() = " + << PredMI->getNumOperands() << "\n"); + LLVM_DEBUG(dbgs() << " isDataHazard(): PredMCID->getNumOperands() = " + << PredMCID->getNumOperands() << "\n"); + LLVM_DEBUG(dbgs() << " isDataHazard(): PredMCID->getNumDefs() = " + << PredMCID->getNumDefs() << "\n"); + + int idUseStart; + if (MIOpcode == Connex::LD_INDIRECT_H || MIOpcode == Connex::LD_INDIRECT_W || + MIOpcode == Connex::LD_INDIRECT_MASKED_H) { + LLVM_DEBUG(dbgs() << " isDataHazard(): PredMI->getOpcode() = " + << PredMI->getOpcode() << "\n"); + + if (PredMI->isInlineAsm()) { + LLVM_DEBUG(dbgs() + << " isDataHazard(): PredMI is INLINEASM so return true" + << "\n"); + /* We assume that the PredMI INLINEAASM is NOT a Connex + * instruction, but a host-side Opincaa C++ for loop. + * In such case, we can have 2 data hazards with MI: + * - one with the instruction above this C++ for statement + * - one with the instruction at the end of this for loop + * when we unroll (if the trip-count of the loop is >1) + * this for loop + * + * IMPORTANT-TODO: make full checks and + * return true only if it + * is the case, to be more efficient. + */ + // IMPORTANT-TODO: return true; + } + + /* %Wh5, %BoolMask1 = LD_INDIRECT_MASKED_H %Wh4, %BoolMask0, %Wh0; mem:LD256[inttoptr (i16 51 to i16*)](tbaa=!12)(alias.scope=!16) + The arguments ("uses") of LD_INDIRECT_MASKED_H are: + %Wh4 - I think it is the passthrough register + (if mask bit is 0 we use passthrough) + %BoolMask0 - is the mask + %Wh0 - the offset register (if mask bit is 0 we use passthrough) + Note that Connex does NOT support masked gather just with read + (it requires WHERE also and things become more complex than + just masked gather, in principle) + */ + + if (MIOpcode == Connex::LD_INDIRECT_MASKED_H) { + idUseStart = MCID->getNumDefs() + 2; // 1 for passthrough, 1 for bool mask + } + else + if (MIOpcode == Connex::LD_INDIRECT_H || MIOpcode == Connex::LD_INDIRECT_W) { + idUseStart = MCID->getNumDefs(); // 1 for passthrough, 1 for bool mask + } + } + else { + idUseStart = MCID->getNumDefs(); + } + + for (unsigned idUse = idUseStart; idUse < numUses; idUse++) { + /* + LLVM_DEBUG(dbgs() << " isDataHazard(): MCIDArray[" << idUse + << "] = " << MCIDArray[idUse] << "\n"); + */ + LLVM_DEBUG(dbgs() << " isDataHazard(): MI->getOperand(" << idUse + << ") = " << MI->getOperand(idUse) << "\n"); + for (unsigned idDef = 0; idDef < numDefs; idDef++) { + /* + LLVM_DEBUG(dbgs() << " isDataHazard(): PredMCIDArray[" << idDef + << "] = " << PredMCIDArray[idDef] << "\n"); + if (PredMCIDArray[idDef] == MCIDArray[idUse]) { + LLVM_DEBUG(dbgs() << " isDataHazard(): found an instr sequence that has to be separated by NOP to avoid true dependency hazard\n"); + return true; + } + */ + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineOperand.html + const MachineOperand &PredMIMO = PredMI->getOperand(idDef); + const MachineOperand &MIMO = MI->getOperand(idUse); + LLVM_DEBUG(dbgs() << " isDataHazard(): PredMI->getOperand(" << idDef + << ") = " << PredMI->getOperand(idDef) << "\n"); + + if ((PredMI->getOpcode() != Connex::END_WHERE) && + (PredMI->getOpcode() != Connex::WHEREEQ) && + (PredMI->getOpcode() != Connex::WHERELT) && + (PredMI->getOpcode() != Connex::WHERECRY) && + PredMIMO.isReg() && MIMO.isReg() && + PredMIMO.getReg() == MIMO.getReg()) { + LLVM_DEBUG(dbgs() + << " isDataHazard(): found an instr sequence " + "(defReg = PredOpcode; write/read/Where useReg;) and " + "defReg == useReg. " + "This sequence has to be separated by NOP to avoid " + "true dependency hazard\n"); + return true; + } + } + } + /* + if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) + continue; + */ + //return true; + } + + return false; +} + + +ScheduleHazardRecognizer::HazardType +ConnexDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + #ifdef USE_GETHAZARDTYPE + static bool emittedNoop = false; + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1SUnit.html + LLVM_DEBUG(dbgs() << "ConnexDispatchGroupSBHazardRecognizer::getHazardType(SU = "; + SU->dump(DAG); + dbgs() << ", Stalls = " << Stalls << ") and " + << "emittedNoop = " << emittedNoop << "\n"); + + //if (Stalls == 0 && isLoadAfterStore(SU)) + if (Stalls == 0 && // no (pipeline?) stalls + emittedNoop == false && // TODO This is a ~lousy solution, but can generate several NOPs in a function, etc + /* TODO TODO: the problem I have is due to wrong instr + itineraries??? */ + isDataHazard(SU)) { + LLVM_DEBUG(dbgs() << " getHazardType(): return NoopHazard\n"); + + emittedNoop = true; + + return NoopHazard; + /* TODO TODO TODO TODO TODO TODO TODO: figure out how to make this work. + Does NOT help at all (no change in code - not NOP, + nor other useful instr in the delay slot): + return Hazard; + */ + } + else { + emittedNoop = false; + } + + return NoHazard; + #endif + + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); +} + +void ConnexDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) { + unsigned i, ie; + + LLVM_DEBUG(dbgs() << "Entered Connex's ConnexDispatchGroupSBHazardRecognizer::EmitInstruction("; + dumpSU(SU, dbgs()); + dbgs() << ")\n"); + // + assert(SU->isInstr() == true); + MachineInstr *MI = SU->getInstr(); + MachineBasicBlock *MBB = MI->getParent(); + LLVM_DEBUG(dbgs() << " EmitInstruction(): MBB = " + << MBB->getFullName() << "\n" + //MBB->dump(); + ); + + LLVM_DEBUG(dbgs() << " SU->Succs.size() = " + << SU->Succs.size() << "\n"); + LLVM_DEBUG(dbgs() << " SU->Preds.size() = " + << SU->Preds.size() << "\n"); + + for (i = 0, ie = (unsigned) SU->Succs.size(); i != ie; ++i) { + MachineInstr *SuccMI = (SU->Succs[i].getSUnit())->getInstr(); + if (SuccMI == NULL) { + LLVM_DEBUG(dbgs() << " SU->Succs[" + << i << "] = NULL\n"); + } + else { + LLVM_DEBUG(dbgs() << " SU->Succs[" + << i << "] = "; + SuccMI->dump(); + dbgs() << "\n"); + } + } + for (i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + MachineInstr *PredMI = (SU->Preds[i].getSUnit())->getInstr(); + if (PredMI == NULL) { + LLVM_DEBUG(dbgs() << " SU->Preds[" + << i << "] = NULL\n"); + } + else { + LLVM_DEBUG(dbgs() << " SU->Preds[" + << i << "] = "; + PredMI->dump(); + dbgs() << "\n"); + } + } + + return ScoreboardHazardRecognizer::EmitInstruction(SU); +} + Index: lib/Target/Connex/ConnexHazardRecognizerPreRAScheduler.h =================================================================== --- lib/Target/Connex/ConnexHazardRecognizerPreRAScheduler.h +++ lib/Target/Connex/ConnexHazardRecognizerPreRAScheduler.h @@ -0,0 +1,70 @@ +//===-- ConnexHazardRecognizerPreRAScheduler.h - Define frame lowering for Connex -----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +//===----------------------------------------------------------------------===// + +/* Inspired from llvm/lib/Target/PowerPC/PPCHazardRecognizer.h: + /// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based + /// hazard recognizer for PPC ooo processors with dispatch-group hazards. +*/ + + +#ifndef LLVM_LIB_TARGET_CONNEX_HAZARDRECOGNIZER_PRE_RA_SCHEDULER_H +#define LLVM_LIB_TARGET_CONNEX_HAZARDRECOGNIZER_PRE_RA_SCHEDULER_H + +#include "ConnexInstrInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" + +namespace llvm { + +/* We choose to inherit the ScoreboardHazardRecognizer because only this + * performs out-of-order scheduling, and NOT ScheduleHazardRecognizer. + */ +class ConnexDispatchGroupSBHazardRecognizerPreRAScheduler : public ScoreboardHazardRecognizer { + const ScheduleDAG *DAG; + bool isReadAfterWrite(SUnit *SU); + + /* + SmallVector CurGroup; + unsigned CurSlots, CurBranches; + + bool isLoadAfterStore(SUnit *SU); + bool isBCTRAfterSet(SUnit *SU); + bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots); + */ + +public: + ConnexDispatchGroupSBHazardRecognizerPreRAScheduler( + const InstrItineraryData *ItinData, + const ScheduleDAG *DAG_) : + ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) { + //DEBUG(dbgs() + // << "Entered ConnexDispatchGroupSBHazardRecognizerPreRAScheduler()\n"); + } + + HazardType getHazardType(SUnit *SU, int Stalls) override; + /* + bool ShouldPreferAnother(SUnit* SU) override; + */ + unsigned PreEmitNoops(SUnit *SU) override; + void EmitInstruction(SUnit *SU) override; + /* + void AdvanceCycle() override; + void RecedeCycle() override; + void Reset() override; + */ + void EmitNoop() override; +}; + +} + +#endif Index: lib/Target/Connex/ConnexHazardRecognizerPreRAScheduler.cpp =================================================================== --- lib/Target/Connex/ConnexHazardRecognizerPreRAScheduler.cpp +++ lib/Target/Connex/ConnexHazardRecognizerPreRAScheduler.cpp @@ -0,0 +1,337 @@ +//===-- ConnexHazardRecognizerPreRAScheduler.cpp - Connex Hazard Recognizer Impls --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements hazard recognizer for scheduling on PowerPC processors. +// +//===----------------------------------------------------------------------===// + +// Inspired from llvm/lib/Target/PowerPC/PPCHazardRecognizer.cpp + +#include "ConnexHazardRecognizerPreRAScheduler.h" +#include "Connex.h" +#include "ConnexInstrInfo.h" +#include "ConnexTargetMachine.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "Misc.h" // For dumpSU() + +using namespace llvm; + + + +#define DEBUG_TYPE "pre-RA-sched" + +/* +SUnit is meant for both types of schedulers: + - pre-RA, which deals with MachineSDNode and SDNode. + - post-RA, which deal with MachineInstr +But note that here we are a pre-RA scheduler. +So, as expected here an SUnit contains ONLY MachineSDNode and SDNode. +*/ +bool ConnexDispatchGroupSBHazardRecognizerPreRAScheduler::isReadAfterWrite(SUnit *SU) { + /* From http://llvm.org/docs/doxygen/html/classllvm_1_1MCInstrDesc.html + NOTE: although SU->isInstr() == false, we can use DAG->getInstrDesc(SU). + */ + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + + if (MCID == NULL) + return false; + + LLVM_DEBUG(dbgs() << "isReadAfterWrite(SU = "; + dumpSU(SU, dbgs()); + dbgs() << ")\n"); + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): SU->Succs.size() = " + << SU->Succs.size() << "\n"); + /* See http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html#l00481 + /// Test if this node has a post-isel opcode, directly + /// corresponding to a MachineInstr opcode. + */ + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): (SU->getNode())->isMachineOpcode() = " + << (SU->getNode())->isMachineOpcode() << "\n"); + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): (SU->getNode())->getOpcode() = " + << (SU->getNode())->getOpcode() << "\n"); + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): (SU->getNode())->getMachineOpcode() = " + << (SU->getNode())->getMachineOpcode() << "\n"); + +#ifdef USE_FOUNDINLINEASM + bool foundINLINEASM = false; +#endif + //MachineInstr *SUpred_INLINEASM = NULL; + for (unsigned int i = 0; i < SU->Succs.size(); ++i) { + SUnit *SUsucc = SU->Succs[i].getSUnit(); + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): SU->Succs[" << i << "] = "; + dumpSU(SUsucc, dbgs()); + dbgs() << ")\n"); + + if ((SUsucc->getNode())->isMachineOpcode() == false) { + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): (SUsucc->getNode())->getOpcode() = " + << (SUsucc->getNode())->getOpcode() << "\n"); + } + else{ + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): (SUsucc->getNode())->getMachineOpcode() = " + << (SUsucc->getNode())->getMachineOpcode() << "\n"); + } + + if ( ((SUsucc->getNode())->isMachineOpcode() == false) && + ((SUsucc->getNode())->getOpcode() == ISD::INLINEASM) ) { + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): Found SDNode ISD::INLINEASM\n"); + +#ifdef USE_FOUNDINLINEASM + foundINLINEASM = true; +#endif + /* + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + // This fails with: llvm::MachineInstr* llvm::SUnit::getInstr() const: Assertion `!Node && "Reading MachineInstr of SUnit with SDNode!"' failed. + SUpred_INLINEASM = SUsucc->getInstr(); + assert(SUpred_INLINEASM != NULL); + if ( ((SU->getNode())->isMachineOpcode() == true) && + ((SU->getNode())->getMachineOpcode() == Connex::VLOAD_H_SYM_IMM) ) { + SUpred_INLINEASM->bundleWithPred(); + } + */ + } + } + + // See http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html#l00486 + if ( ((SU->getNode())->isMachineOpcode() == true) && + ((SU->getNode())->getMachineOpcode() == Connex::VLOAD_H_SYM_IMM) ) { + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): Found Connex::VLOAD_H_SYM_IMM\n"); + + /* + if (foundINLINEASM == true) { + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): before getInstr()\n"); + // Gives error: <> + (SU->getInstr())->bundleWithSucc(); + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): after getInstr()\n"); + } + */ + /* + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): SU->Preds[0] = "; + (SU->Preds[0].getSUnit())->dump(DAG); + dbgs() << ")\n"); + */ + } + + /* + // Note: MCPhysReg is an integer - see http://llvm.org/docs/doxygen/html/namespacellvm.html: "typedef uint16_t llvm::MCPhysReg" + const MCPhysReg *MCIDArray = MCID->getImplicitUses(); + unsigned numUses = MCID->getNumImplicitUses(); seems it is always 0 + */ + //const MCOperandInfo *MCIDArray = MCID->OpInfo; + unsigned numUses = MCID->getNumOperands() - MCID->getNumDefs(); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): numUses = " << numUses << "\n"); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): MCID->getNumOperands() = " + << MCID->getNumOperands() << "\n"); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): MCID->getNumDefs() = " + << MCID->getNumDefs() << "\n"); + + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SU->Preds.size() = " + << SU->Preds.size() << "\n"); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SU->Succs.size() = " + << SU->Succs.size() << "\n"); + + /* + if (!MCID->mayLoad()) + return false; + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SU can load\n"); + */ + /* TODO: NOTE: END_REPEAT returns also mayStore(). But we should not worry + about this since END_REPEAT takes no parameter. */ + if (!MCID->mayStore()) + return false; + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SU can store\n"); + + // IMPORTANT: In the standard pre-RA, END_REPEAT has isInstr() == false + assert(SU->isInstr() == false); + /* + // TODO TODO TODO TODO: try to treat this since REPEAT is also intrinsic and can have conditional hazards + if (SU->isInstr() == false) { + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SU->isInstr() == false\n"); + return false; + } + */ + + SDNode *SDN = SU->getNode(); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SDN->getNumOperands() = " + << SDN->getNumOperands() << "\n"); + + // SU is a load; for any predecessors in this dispatch group, that are stores, + // and with which we have an ordering dependency, return true. + for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); + + if (PredMCID == NULL) // || !PredMCID->mayStore()) + continue; + + /* SU->Preds is SmallVector of SDep. + * - see http://llvm.org/docs/doxygen/html/classllvm_1_1SUnit.html + * - see http://llvm.org/docs/doxygen/html/classllvm_1_1SDep.html + */ + SDNode *PredSDN = (SU->Preds[i].getSUnit())->getNode(); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(SU->Preds[" + << i << "] = "; + PredSDN->dump(); + //(SU->Preds[i].getSUnit())->dump(DAG); + //PredMCID->dump(DAG); + dbgs() << ")\n"); + + + /* + const MCPhysReg *PredMCIDArray = PredMCID->getImplicitDefs(); + unsigned numDefs = PredMCID->getNumImplicitDefs(); seems it is always 0 + */ + unsigned numDefs = PredMCID->getNumDefs(); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): numDefs = " << numDefs << "\n"); + //const MCOperandInfo *PredMCIDArray = PredMCID->OpInfo; + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): PredSDN->getNumOperands() = " + << PredSDN->getNumOperands() << "\n"); + /* + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): PredSDN->getNumDefs() = " + << PredMCID->getNumDefs() << "\n"); + */ + + + //for (unsigned idUse = MCID->getNumDefs(); idUse < numUses; idUse++) + for (unsigned idUse = 0; idUse < numUses; idUse++) { + /* + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): MCIDArray[" << idUse + << "] = " << MCIDArray[idUse] << "\n"); + */ + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SDN->getOperand(" << idUse + << ") = "; + SDN->getOperand(idUse)->dump(); + dbgs() << "\n"); + //for (unsigned idDef = 0; idDef < PredSDN->getNumOperands(); idDef++) + for (unsigned idDef = 0; idDef < numDefs; idDef++) { + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): PredSDN->getOperand(" << idUse + << ") = "; + PredSDN->getOperand(idDef)->dump(); + dbgs() << "\n"); + + //if (PredSDN->getOperand(idDef) == SDN->getOperand(idUse)) + if (PredSDN == SDN->getOperand(idUse).getNode()) { + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): Found PredSDN == SDN->getOperand(idUse)\n"); + return true; + } + /* + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): PredMCIDArray[" << idDef + << "] = " << PredMCIDArray[idDef] << "\n"); + if (PredMCIDArray[idDef] == MCIDArray[idUse]) { + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): found an instr sequence that has to be separated by NOP to avoid true dependency hazard\n"); + return true; + } + */ + + /* + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineOperand.html + const MachineOperand &PredMIMO = PredMI->getOperand(idDef); + const MachineOperand &MIMO = MI->getOperand(idUse); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): PredMI->getOperand(" + << idDef + << ") = " << PredMI->getOperand(idDef) << "\n"); + + if (PredMIMO.isReg() && MIMO.isReg() && + PredMIMO.getReg() == MIMO.getReg()) { + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): found an instr sequence that has to be separated by NOP to avoid true dependency hazard\n"); + return true; + } + */ + } + } + /* + if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) + continue; + */ + //return true; + } + + return false; +} + +ScheduleHazardRecognizer::HazardType +ConnexDispatchGroupSBHazardRecognizerPreRAScheduler::getHazardType(SUnit *SU, int Stalls) { + static bool emittedNoop = false; + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1SUnit.html + LLVM_DEBUG(dbgs() << "ConnexDispatchGroupSBHazardRecognizerPreRAScheduler::getHazardType(SU = "; + dumpSU(SU, dbgs()); + dbgs() << ", Stalls = " << Stalls << ")\n"); + + //if (Stalls == 0 && isLoadAfterStore(SU)) + if (Stalls == 0 && // no (pipeline?) stalls + emittedNoop == false && // TODO TODO TODO This is a very louzy tmp solution + isReadAfterWrite(SU)) { + LLVM_DEBUG(dbgs() << " Pre-RA: getHazardType(): return NoopHazard\n"); + + emittedNoop = true; + + return NoopHazard; + } + + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); +} + +void ConnexDispatchGroupSBHazardRecognizerPreRAScheduler::EmitInstruction(SUnit *SU) { + unsigned i, ie; + + LLVM_DEBUG(dbgs() << "Entered Connex's PreRA EmitInstruction("; + dumpSU(SU, dbgs()); + dbgs() << ")\n"); + LLVM_DEBUG(dbgs() << " SU->Succs.size() = " + << SU->Succs.size() << "\n"); + LLVM_DEBUG(dbgs() << " SU->Preds.size() = " + << SU->Preds.size() << "\n"); + + for (i = 0, ie = (unsigned) SU->Succs.size(); i != ie; ++i) { + MachineInstr *SuccMI = (SU->Succs[i].getSUnit())->getInstr(); + if (SuccMI == NULL) { + LLVM_DEBUG(dbgs() << " SU->Succs[" + << i << "] = NULL\n"); + } + else { + LLVM_DEBUG(dbgs() << " SU->Succs[" + << i << "] = "; + SuccMI->dump(); + dbgs() << "\n"); + } + } + for (i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + MachineInstr *PredMI = (SU->Preds[i].getSUnit())->getInstr(); + if (PredMI == NULL) { + LLVM_DEBUG(dbgs() << " SU->Preds[" + << i << "] = NULL\n"); + } + else { + LLVM_DEBUG(dbgs() << " SU->Preds[" + << i << "] = "; + PredMI->dump(); + dbgs() << "\n"); + } + } + + return ScoreboardHazardRecognizer::EmitInstruction(SU); +} + +/* See also http://llvm.org/docs/doxygen/html/classllvm_1_1ScheduleHazardRecognizer.html +PreEmitNoops - This callback is invoked prior to emitting an instruction. +*/ +unsigned ConnexDispatchGroupSBHazardRecognizerPreRAScheduler::PreEmitNoops(SUnit *SU) { + LLVM_DEBUG(dbgs() << "Entered Connex's PreRA PreEmitNoops()\n"); + return 0; +} + +/* See also http://llvm.org/docs/doxygen/html/classllvm_1_1ScheduleHazardRecognizer.html +EmitNoop - This callback is invoked when a noop was added to the instruction stream. +*/ +void ConnexDispatchGroupSBHazardRecognizerPreRAScheduler::EmitNoop() { + LLVM_DEBUG(dbgs() << "Entered Connex's PreRA EmitNoops()\n"); +} + Index: lib/Target/Connex/ConnexISelDAGToDAG.cpp =================================================================== --- lib/Target/Connex/ConnexISelDAGToDAG.cpp +++ lib/Target/Connex/ConnexISelDAGToDAG.cpp @@ -0,0 +1,5094 @@ +//===-- ConnexISelDAGToDAG.cpp - A dag to dag inst selector for Connex ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a DAG pattern matching instruction selector for Connex, +// converting from a legalized dag to a Connex dag. +// +//===----------------------------------------------------------------------===// + +#include "Connex.h" +#include "ConnexRegisterInfo.h" +#include "ConnexSubtarget.h" +#include "ConnexTargetMachine.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + + +// Gives error (we are NOT in the right directory): #include "SelectionDAGBuilder.h" +//#include "../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h" + +#define DEBUG_TYPE "connex-isel" + +//#include "../lib/Transforms/Vectorize/RecoverFromLlvmIR.h" +#include "RecoverFromLlvmIR.h" + + +using namespace llvm; + + + +#include "ConnexConfig.h" + +/* To help reading ASM code we put some useful comments (INLINE Asm nodes) + with where the emulation of unsupported operatio of type i32/f16/etc + starts and ends. +*/ +#define MARKER_FOR_EMULATION + + +/* IMPORTANT: these macros with BITCAST can add hazards due to delay slots. + We recommend disabling these macros. + */ +#define BITCAST_MAY2017_05_28 +//#define BITCAST_2018_06_F16 + + +/* + ConnexISelDAGToDAG is a subclass of SelectionDAGISel: + class ConnexDAGToDAGISel : public SelectionDAGISel + + From SelectionDAGBuilder.h: // the key is Value *, SDValue is the value + DenseMap NodeMap; + void setValue(const Value *V, SDValue NewN) { + SDValue &N = NodeMap[V]; + assert(!N.getNode() && "Already set a value for this node!"); + N = NewN; + } + (See http://llvm.org/docs/doxygen/html/classllvm_1_1DenseMap.html) + (http://llvm.org/docs/doxygen/html/classllvm_1_1DenseMapBase.html and + http://llvm.org/docs/doxygen/html/DenseMap_8h_source.html) + + From include/llvm/CodeGen/SelectionDAGISel.h: + /// SelectionDAGISel - This is the common base class used for SelectionDAG-based + /// pattern-matching instruction selectors. + class SelectionDAGISel : public MachineFunctionPass { + public: + TargetMachine &TM; + const TargetLibraryInfo *LibInfo; + FunctionLoweringInfo *FuncInfo; + MachineFunction *MF; + MachineRegisterInfo *RegInfo; + SelectionDAG *CurDAG; + SelectionDAGBuilder *SDB; +*/ + + +static bool isUnitSteppedZeroStartingVector(const BuildVectorSDNode *N) { + unsigned int nOps = N->getNumOperands(); + + assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector"); + + LLVM_DEBUG(dbgs() << "Entered isUnitStridedZeroStartingVector()\n"); + //SDValue Operand0 = N->getOperand(0); + + /* + assert(N->getOperand(0) == N->getOperand(1)); + assert(N->getOperand(0) == N->getOperand(2)); + */ + + for (unsigned int i = 0; i < nOps; ++i) { + LLVM_DEBUG(dbgs() << "N->getOperand(" << i << ") = "; + N->getOperand(i)->dump(); dbgs() << "\n"); + //return false; + } + +#ifdef NOTNOTNOT + SDNode *Nop0 = (N->getOperand(0)).getNode(); + LLVM_DEBUG(dbgs() << "Nop0->getOperand(0) = "; + Nop0->getOperand(0)->dump(); dbgs() << "\n"); + + SDNode *Nop00 = (Nop0->getOperand(0)).getNode(); + LLVM_DEBUG(dbgs() << "Nop00->getOperand(0) = "; + Nop00->getOperand(0)->dump(); dbgs() << "\n"); + + + SDNode *Nop000 = (Nop00->getOperand(0)).getNode(); + LLVM_DEBUG(dbgs() << "Nop000->getOperand(0) = "; + Nop000->getOperand(0)->dump(); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Nop000->getOperand(1) = "; + Nop000->getOperand(1)->dump(); dbgs() << "\n"); + + /* For: + Nop000->getOperand(1) = t1: i64 = Register %vreg3 + it does not have any operands. + */ + SDNode *Nop0001 = (Nop000->getOperand(1)).getNode(); + LLVM_DEBUG(dbgs() << "Nop0001->getOperand(0) = "; + Nop0001->getOperand(0)->dump(); dbgs() << "\n"); +#endif + + LLVM_DEBUG(dbgs() << "Exiting isUnitStridedZeroStartingVector()\n"); + + return true; +} + + + +// Instruction Selector Implementation +namespace { + +class ConnexDAGToDAGISel : public SelectionDAGISel { +public: + explicit ConnexDAGToDAGISel(ConnexTargetMachine &TM) : SelectionDAGISel(TM) {} + + StringRef getPassName() const override { + return "Connex DAG->DAG Pattern Instruction Selection"; + } + +private: + // Include the pieces autogenerated from the target description. + #include "ConnexGenDAGISel.inc" + + bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base); + + void selectBUILD_VECTOR(SDNode *Node); + void selectVECTOR_SHUFFLE(SDNode *Node); + + SDNode *selectVSELECT(SDNode *Node); + + SDNode *selectReduceI32(SDNode *Node); + SDNode *selectAddI32(SDNode *Node); + SDNode *selectSubI32(SDNode *Node); + SDNode *selectMulI32(SDNode *Node); + SDNode *selectSraI32(SDNode *Node); + // + SDNode *selectDivI16(SDNode *Node); + // + SDNode *selectReduceF16(SDNode *Node); + SDNode *selectAddF16(SDNode *Node); + SDNode *selectSubF16(SDNode *Node); + SDNode *selectMulF16(SDNode *Node); + SDNode *selectDivF16(SDNode *Node); + SDNode *selectLtF16(SDNode *Node); + + void Select(SDNode *N) override; + + // Complex Pattern for address selection. + bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset); + + // Added from MipsSEISelDAGToDAG.cpp + bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, + SDValue &Offset, unsigned OffsetBits) const; + bool selectAddrRegImm10(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; + bool selectIntAddrMSA(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + + + // In Mips we have MipsSEIselDAGToDAG inheriting MipsIselDAGToDAG, but + // in Connex we do NOT, so we comment the override qualifier + /// \brief Select constant vector splats. + bool selectVSplat(SDNode *N, APInt &Imm, + unsigned MinSizeInBits) const; //override; + /// \brief Select constant vector splats whose value fits in a given integer. + bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, + unsigned ImmBitSize) const; + /// \brief Select constant vector splats whose value fits in a uimm1. + bool selectVSplatUimm1(SDValue N, SDValue &Imm) const; // override; + /// \brief Select constant vector splats whose value fits in a uimm2. + bool selectVSplatUimm2(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value fits in a uimm3. + bool selectVSplatUimm3(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value fits in a uimm4. + bool selectVSplatUimm4(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value fits in a uimm5. + bool selectVSplatUimm5(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value fits in a uimm6. + bool selectVSplatUimm6(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value fits in a uimm8. + bool selectVSplatUimm8(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value fits in a simm5. + bool selectVSplatSimm5(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value is a power of 2. + bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value is the inverse of a + /// power of 2. + bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value is a run of set bits + /// ending at the most significant bit + bool selectVSplatMaskL(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value is a run of set bits + /// starting at bit zero. + bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; //override; +}; // end class ConnexDAGToDAGISel +} // end namespace + + +// ComplexPattern used on Connex Load/Store instructions +bool ConnexDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { + // if Address is FI, get the TargetFrameIndex. + SDLoc DL(Addr); + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + // TODO_CHANGE_BACKEND: + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TYPE_SCALAR_ELEMENT); + + Offset = CurDAG->getTargetConstant(0, DL, TYPE_SCALAR_ELEMENT); + return true; + } + + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; + + // Addresses of the form Addr+const or Addr|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + if (isInt<32>(CN->getSExtValue())) { + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = + dyn_cast(Addr.getOperand(0))) + // TODO_CHANGE_BACKEND: + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TYPE_SCALAR_ELEMENT); + else + Base = Addr.getOperand(0); + + // TODO_CHANGE_BACKEND: + Offset = CurDAG->getTargetConstant(CN->getSExtValue(), DL, TYPE_SCALAR_ELEMENT); + + return true; + } + } + + Base = Addr; + // TODO_CHANGE_BACKEND: + Offset = CurDAG->getTargetConstant(0, DL, TYPE_SCALAR_ELEMENT); + + return true; +} + + +// ComplexPattern used on Connex FI instruction +bool ConnexDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { + SDLoc DL(Addr); + + if (!CurDAG->isBaseWithConstantOffset(Addr)) + return false; + + // Addresses of the form Addr+const or Addr|const + ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + if (isInt<32>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast(Addr.getOperand(0))) + // TODO_CHANGE_BACKEND: + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TYPE_SCALAR_ELEMENT); + else + return false; + + // TODO_CHANGE_BACKEND: + Offset = CurDAG->getTargetConstant(CN->getSExtValue(), DL, TYPE_SCALAR_ELEMENT); + return true; + } + + return false; +} + + +// IMPORTANT: Note that RecoverCExpressionFromSDNode() is used only for +// REPEAT and BUILD_VECTOR nodes, in method Select(). +std::string RecoverCExpressionFromSDNode(SDNode *theSDNode, + DenseMap &SDBNodeMap, + bool failOver) { + /* + NOTE: the SelectionDAGISel::crtNodeMap, defined in + lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp, + discussed at http://lists.llvm.org/pipermail/llvm-dev/2016-November/107361.html + + getNodeMap() (method defined by me) returns the NodeMap object from + SelectionDAGBuilder.h with this definition: + DenseMap NodeMap; + + Note however that this info is not enough since some SDNodes get generated + in the following phases of the back end, namely: + - DAG combining - see lib/CodeGen/SelectionDAG/DAGCombiner.cpp + + This class gets invoked much later, after all the ones mentioned above have + finished. + */ + LLVM_DEBUG(dbgs() + << "Entered RecoverCExpressionFromSDNode() (ConnexISelDAGToDAG.cpp)\n"); + + std::string res; + + // Important note: class SelectionDAGBuilder is forward declared. + //assert(SDB != NULL); + //assert(SDB->NodeMap[(const Value *)NULL]); // NodeMap is private + //DenseMapBase<> + //auto iterNodeMap = SDB->NodeMap.begin(); + + //bool res = SDB->HasTailCall; + + //DenseMap &SDBNodeMap = crtNodeMap; //SDB->getNodeMap(); + + //unsigned size = SDB->NodeMap.size(); + LLVM_DEBUG(dbgs() << "RecoverCExpressionFromSDNode(): SDB->NodeMap.size() = " + << SDBNodeMap.size() + << ", theSDNode = "; + theSDNode->dump(); + dbgs() + << ", theSDNode (ptr) = " + << theSDNode + << "\n"); + + /* + We retrieve from the SDBNodeMap the associated LLVM IR Instruction for + theSDNode (SDNode created by SelectionDAGBuilder). + */ + + int counter = 0; + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1DenseMapBase.html + for (auto iterNodeMap = SDBNodeMap.begin(); + iterNodeMap != SDBNodeMap.end(); iterNodeMap++, counter++) { + + // Type (in error of g++) is: llvm::detail::DenseMapPair + auto tmp1 = (*iterNodeMap); + //SDValue tmp1N = (*iterNodeMap); + //auto tmp2 = (*iterNodeMap); //->second; + + //Value *crtValue = tmp1.first; + const Instruction *crtValue = (const Instruction *)(tmp1.first); + SDNode *crtSDNode = tmp1.second.getNode(); + + LLVM_DEBUG(dbgs() << "RecoverCExpressionFromSDNode(): [#" << counter + << "] tmp1.first = " + << *crtValue + /* + << ", tmp1.second = "; + tmp1.second.dump(); + dbgs() << "\n" + */ + << "\n" + ); + + //assert(crtNode != nullptr); + if (crtSDNode != nullptr) { + LLVM_DEBUG(dbgs() << "RecoverCExpressionFromSDNode(): tmp1.second.getNode() = "; + crtSDNode->dump(); + dbgs() << "\n"); + //<< *crtNode << "\n"); + + if (crtSDNode == theSDNode) { + LLVM_DEBUG(dbgs() << "RecoverCExpressionFromSDNode(): Found a match:...\n"); + + /* + This corresponds to cases like: + From 300_Opincaa/sSub/STDerr_llc_01 + RecoverCExpressionFromSDNode(): tmp1.first = %broadcast.splatinsert10 = insertelement <128 x i16> undef, i16 %sub, i32 0, !dbg !8 + RecoverCExpressionFromSDNode(): tmp1.second.getNode() = t33: v128i16 = BUILD_VECTOR t35, t35, t35, ... t35 + + We can see here that the machine-independent back end instruction BUILD_VECTOR + is more complex (abstract) than the LLVM IR insertelement. + The equivalent to BUILD_VECTOR LLVM IR program uses also a shufflevector instruction: + %broadcast.splatinsert10 = insertelement <128 x i16> undef, i16 %sub, i32 0, !dbg !8 + %broadcast.splat11 = shufflevector <128 x i16> %broadcast.splatinsert10, <128 x i16> undef, <128 x i32> zeroinitializer, !dbg !8 + + Note that RecoverCExpressionFromSDNode() is used only for BUILD_VECTOR. + + For the SSD benchmark, the associated instruction is though + ShuffleVector + (see /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/90_CV/SSD/B/STDerr_llc_01). + */ + /* + assert( (crtValue->getOpcode() == Instruction::InsertElement) || + (crtValue->getOpcode() == Instruction::ShuffleVector) + ); + */ + + + Instruction *crtValueOp1; + + switch (crtValue->getOpcode()) { + case Instruction::InsertElement: + case Instruction::ShuffleVector: + if (crtValue->getOpcode() == Instruction::InsertElement) { + crtValueOp1 = (Instruction *)(crtValue->getOperand(1)); + } + else { + crtValueOp1 = (Instruction *)(crtValue->getOperand(0)); + assert(crtValueOp1->getOpcode() == Instruction::InsertElement); + // TODO: check that crtValueOp1->getOperand(0) is vec undef, crtValueOp1->getOperand(2) is 0 + crtValueOp1 = (Instruction *)(crtValueOp1->getOperand(1)); + } + LLVM_DEBUG(dbgs() << " crtValueOp1 = " + << *crtValueOp1 << "\n"); + + getExprForDMATransfer = true; + res = getExpr(crtValueOp1); + LLVM_DEBUG(dbgs() << " getExpr(crtValueOp1) = " + << res << "\n"); + break; + default: + getExprForDMATransfer = true; + res = getExpr(const_cast(crtValue)); + + LLVM_DEBUG(dbgs() << " getExpr(crtValue) = " + << res << "\n"); + break; + } + break; + } + } + else { + LLVM_DEBUG(dbgs() << + "RecoverCExpressionFromSDNode(): tmp1.second.getNode() == nullptr\n\n"); + } + } // end for + + //assert(res.size() != 0); + if (res.length() == 0) { + if (failOver) { + //#define NVEC_STR "n.vec" + #define NVEC_STR "VTC_ceil" + + /* TODO TODO TODO TODO TODO TODO TODO: find, if possible a better + solution. Keep track of the SelectionDAGs of all BBs, not just the + current BB. */ + + LLVM_DEBUG(dbgs() << + "RecoverCExpressionFromSDNode(): failOver == true --> we look " + "for NVEC_STR (vector tripcount defined in LoopVectorize.cpp) " + "in SDBNodeMap and retrieve for it\n"); + + /* Although not a great alternative, we look in SDBNodeMap for + * an entry containing %n.vec - this should exist from a previous + * BB. + */ + for (auto iterNodeMap = SDBNodeMap.begin(); + iterNodeMap != SDBNodeMap.end(); iterNodeMap++, counter++) { + auto tmp1 = (*iterNodeMap); + const Instruction *crtValue = (const Instruction *)(tmp1.first); + + LLVM_DEBUG(dbgs() << "RecoverCExpressionFromSDNode(): crtValue = " + << *crtValue << "\n"); + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1StringRef.html + //if (crtValue->getName().str() == NVEC_STR) + if (strncmp(crtValue->getName().str().c_str(), NVEC_STR, + strlen(NVEC_STR)) == 0) { + getExprForDMATransfer = true; + + res = getExpr(const_cast(crtValue)); + LLVM_DEBUG(dbgs() << " RecoverCExpressionFromSDNode(): res = " + << res << "\n"); + + /* TODO TODO TODO TODO: this is NOT good if the res already + contains a constant such as 1 - OK we could take out + CreateDiv in LoopVectorize.cpp, etc */ + + // res = res + " / CONNEX_VECTOR_LENGTH"; // Unfortunately, we hard code this also here... + } + } + } + else { + assert(res.length() != 0); + } + } + + return res; +} + + + +// Inspired from lib/Target/X86/X86ISelDAGToDAG.cpp +bool ConnexDAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, + SDValue &Index) { + LLVM_DEBUG(dbgs() << "Entered ConnexDAGToDAGISel::selectVectorAddr()\n"); + + LLVM_DEBUG(dbgs() << " selectVectorAddr(): Parent = "; Parent->dump(CurDAG); + dbgs() << "\n N = "; N->dump(CurDAG); + /* + dbgs() << "\n Base.getNode() = " << Base.getNode(); + dbgs() << "\n Base = "; Base->dump(CurDAG); + */ + dbgs() << "\n"); + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MaskedGatherScatterSDNode.html + MaskedGatherScatterSDNode *Mgs = dyn_cast(Parent); + if (!Mgs) + return false; + + /* + // Retrieve the "scalar base pointer" (as said also at + // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20150831/297534.html) + Base = Mgs->getBasePtr(); + */ + Index = Mgs->getIndex(); + //Base = Mgs->getIndex(); + + LLVM_DEBUG(dbgs() << " selectVectorAddr(), after update: Parent = "; + Parent->dump(CurDAG); + dbgs() << "\n N = "; N->dump(CurDAG); + dbgs() << "\n Index.getNode() = " << Index.getNode(); + dbgs() << "\n Index = "; Index->dump(CurDAG); + dbgs() << "\n"); + + +#ifdef NOTNOT + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MaskedGatherScatterSDNode.html + MaskedGatherScatterSDNode *Mgs = dyn_cast(Parent); + if (!Mgs) + return false; + + LLVM_DEBUG(dbgs() << " after update, selectVectorAddr(): Parent = "; + Parent->dump(CurDAG); + dbgs() << "\n N = "; + N->dump(CurDAG); + dbgs() << "\n Base.getNode() = " << Base.getNode(); + dbgs() << "\n Base = "; Base->dump(CurDAG); + dbgs() << "\n Scale.getNode() = " << Scale.getNode(); + dbgs() << "\n ScalarSize = " << ScalarSize; + //dbgs() << "\n Scale = "; Scale->dump(CurDAG); + dbgs() << "\n Index.getNode() = " << Index.getNode(); + dbgs() << "\n Index = "; Index->dump(CurDAG); + dbgs() << "\n Disp.getNode() = " << Disp.getNode(); + dbgs() << "\n Disp = "; Disp->dump(CurDAG); + dbgs() << "\n Segment.getNode() = " << Segment.getNode(); + dbgs() << "\n Segment = "; Segment->dump(CurDAG); + dbgs() << "\n"); +#endif + + LLVM_DEBUG(dbgs() << "Exiting ConnexDAGToDAGISel::selectVectorAddr()\n"); + + return true; +} + + +SDNode *CreateInlineAsmNode(SelectionDAG *CurDAG, + std::string asmString, + SDNode *nodeSymImm, + SDLoc &DL, + bool specialCase=false) { + // This step is very IMPORTANT: + // IMPORTANT: As of Oct 2016, we must malloc the char * that is passed to + // getTargetExternalSymbol as a reference, so we must make sure + // the value persists after we get out of this function. + // Hopefully no leak will happen either - maybe when deleting + // SDNode the destructor frees the char *. + // With difficulty I found with Google this method doing + // creation of the SDNode, which is used also by + // getTargetExternalSymbol(). + // template + // SDNodeT *newSDNode(ArgTypes &&... Args) { + // return new (NodeAllocator.template Allocate()) + // SDNodeT(std::forward(Args)...); + // } + char *exprStrChar = (char *)malloc(MAXLEN_STR); + strcpy(exprStrChar, asmString.c_str()); + LLVM_DEBUG(dbgs() << "CreateInlineAsmNode(): exprStrChar = " + << exprStrChar << "\n"); + /* + See http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html: + SDValue getTargetExternalSymbol (const char *Sym, EVT VT, + unsigned char TargetFlags=0) + */ + SDValue extSym = CurDAG->getTargetExternalSymbol(//"VLOAD R_todo, !!!!\n", + //exprStr.c_str(), + exprStrChar, + // + //TYPE_VECTOR_I16 + MVT::i64 + ); + SDNode *extSymNode = extSym.getNode(); + LLVM_DEBUG(dbgs() << "CreateInlineAsmNode(): extSymNode = "; + extSymNode->dump(); + dbgs() << "\n"); + + /* + From http://llvm.org/doxygen/namespacellvm_1_1ISD.html + "INLINEASM - Represents an inline asm block. + This node always has two return values: a chain and a flag result. + The inputs are as follows: + Operand #0 : Input chain. + Operand #1 : a ExternalSymbolSDNode with a pointer to the asm string. + Operand #2 : a MDNodeSDNode with the !srcloc metadata. + Operand #3 : HasSideEffect, IsAlignStack bits. + After this, it is followed by a list of operands with this format: + ConstantSDNode: Flags that encode whether it is a mem or not, the + of operands that follow, etc. + See InlineAsm.h. ... however many operands ... Operand #last: Optional, an incoming flag." + */ + std::vector opsInline; + + // This generates either: + // - a glue edge/link if the return type is MVT::Glue + // - a chain edge/link if the return type is MVT::Other + // between the nodeSymImm and the INLINEASM node. + if (specialCase) { + //opsInline.push_back(CurDAG->getEntryNode()); + } + else { + opsInline.push_back(SDValue(nodeSymImm, 0)); + } + // + opsInline.push_back(extSym); //SDValue(extSym, 0)); + + +//#ifdef NOTNOT + /* Creating a null-MDNode MDNodeSDNode object. + Inspiring from (since only SelectionDAG can call constructor) + http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html: + 01756 class MDNodeSDNode : public SDNode { + 01757 const MDNode *MD; + 01758 friend class SelectionDAG; + 01759 explicit MDNodeSDNode(const MDNode *md) + 01760 : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md) + 01761 {} + See also, although not helpful, + http://llvm.org/docs/doxygen/html/classllvm_1_1MDNodeSDNode.html . + */ + /* Does NOT work: MDNodeSDNode mdNodeSDNode; // = MDNodeSDNode::getMD(); + is private: MDNodeSDNode::MDNodeSDNode(mdNode); */ + +#ifdef INTERESTING_BUG + /* IMPORTANT: this NON-standard mdNode created below gives errors + if we generate 2 or more of these nodes in the compiled ASM + module generated. + See for example + /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/!/300_Opincaa/sAdd_BUG/STDerr_llc_01_old01 + for error: + ::ret_type llvm::cast(Y*) [with X = llvm::ValueAsMetadata; Y = const llvm::Metadata; typename llvm::c + ast_retty::ret_type = const llvm::ValueAsMetadata*]: Assertion `isa(Val) && "cast() argument of incompatible type!"' failed. + (reported in http://lists.llvm.org/pipermail/llvm-dev/2016-October/106629.html). + + This error is given when trying to print what when succesful gives: + ; dbg:ReduceSymbolic.c:18:5 + (I presume is the MDNode, and is where the error is given). + */ + + // Creating an SDNode MDNode (MetaData) with a ch out port + SDValue mdNode = CurDAG->getNode(ISD::MDNODE_SDNODE, DL, + CurDAG->getVTList(MVT::Other) + ); + SDNode *mdNodeSDNode = mdNode.getNode(); + /* + SDNode *mdNodeSDNode = CurDAG->getNode(ISD::MDNODE_SDNODE, DL, + CurDAG->getVTList(MVT::Other) + //Node->getOperand(0) //gives error: ScheduleDAG.cpp:425: unsigned int llvm::ScheduleDAG::VerifyScheduledDAG(bool): Assertion `!AnyNotSched' failed. + ).getNode(); + */ +#endif +//#endif + + // Creating a NON-null-MDNode MDNodeSDNode object (has a + // hexadecimal value when outputing the DOT file). + /* From + http://llvm.org/docs/doxygen/html/classllvm_1_1MDNode.html: + Detailed Description + Metadata nodes can be uniqued, like constants, or distinct. + */ + // Actually inspired from http://ftp.nchc.org.tw/NetBSD/NetBSD-current/src/external/bsd/llvm/dist/llvm/unittests/IR/MetadataTest.cpp + MDNode *mdNode = MDNode::get(* (CurDAG->getContext()), None); + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + <> + */ + SDNode *mdNodeSDNode = CurDAG->getMDNode(mdNode).getNode(); + // + /* Avoiding error - see /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/DawnCC/30l_dotprod_f16/5/STDerr_llc_01_old03: + << Assertion `Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && "Chain and glue operands should occur at end of operand list!"' failed. + */ + if (specialCase == false) { + opsInline.push_back(SDValue(mdNodeSDNode, 0)); + } + +#ifdef NOTNOT + /* Inspiring from (since only SelectionDAG can call + constructor) + http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html + 01435 class ConstantSDNode : public SDNode + 01436 const ConstantInt *Value; + 01437 friend class SelectionDAG; + 01438 ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, + 01439 DebugLoc DL, EVT VT) + 01440 : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, + 01441 0, DL, getSDVTList(VT)), Value(val) + 01442 SubclassData |= (uint16_t)isOpaque; + 01443 } + */ + SDValue targetConstant = CurDAG->getNode(ISD::TargetConstant, DL, + CurDAG->getVTList(MVT::i64)); + SDNode *targetConstantSDNode = targetConstant.getNode(); +#endif + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + <> + */ + SDValue targetConstant = CurDAG->getTargetConstant(1, DL, MVT::i64); + SDNode *targetConstantSDNode = targetConstant.getNode(); + // + opsInline.push_back(SDValue(targetConstantSDNode, 0)); // TargetConstant<1>, 0) - a i64 port); + +#ifdef DO_NOT_EXEC_BUT_INTERESTING + /* Unfortunately, ISD::INLINEASM accepts only ConstantSDNode + from 2nd operand onwards - see InstrEmitter.cpp, line 966: + unsigned Flags = + cast(Node->getOperand(i))->getZExtValue(); + */ + // Unsuccessful - Attempting to add a chain edge + SDValue NodeOp0 = Node->getOperand(0); + LLVM_DEBUG(dbgs() << "Selecting NodeOp0 = "; + NodeOp0->dump(); + dbgs() << '\n'); + opsInline.push_back(NodeOp0); +#endif + + if (specialCase) + opsInline.push_back(SDValue(nodeSymImm, 0)); + + // Note that you can also look at the .dot file output + // from the LLVM I-sel stage to get an idea on how an + // INLINEASM node looks. + + // Related to CODE2018_07_01 + SDNode *inlineAsmNode; + if (specialCase == true) { + inlineAsmNode = CurDAG->getMachineNode( + Connex::INLINEASM, + DL, + // Result types: + //CurDAG->getVTList(TYPE_VECTOR_I16), + CurDAG->getVTList(MVT::Other, MVT::Glue), + opsInline); + } + else { + SDValue inlineAsm = CurDAG->getNode( + // We use this non-machine SDNode to avoid + // <> e.g. + // in middle.block + ISD::INLINEASM, + DL, + // Result types: + //CurDAG->getVTList(TYPE_VECTOR_I16), + CurDAG->getVTList(MVT::Other, MVT::Glue), + opsInline); + inlineAsmNode = inlineAsm.getNode(); + } + + LLVM_DEBUG(dbgs() << "CreateInlineAsmNode(): inlineAsmNode = "; + inlineAsmNode->dump(); + //dbgs() << '\n' + ); + + return inlineAsmNode; +} // END CreateInlineAsmNode() + + + + +static SDValue ChangeVectorType(SDValue InOp, MVT NVT, SelectionDAG &DAG, + bool FillWithZeroes = false) { + // Check if InOp already has the right width. + MVT InVT = InOp.getSimpleValueType(); + if (InVT == NVT) + return InOp; + + if (InOp.isUndef()) + return DAG.getUNDEF(NVT); + + /* + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + */ + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + /* + assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && + "Unexpected request for vector widening"); + */ + assert(WidenNumElts == InNumElts && "WidenNumElts == InNumElts failed"); + + EVT EltVT = NVT.getVectorElementType(); + + SDLoc dl(InOp); + if (InOp.getOpcode() == ISD::CONCAT_VECTORS && + InOp.getNumOperands() == 2) { + SDValue N1 = InOp.getOperand(1); + if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) || + N1.isUndef()) { + InOp = InOp.getOperand(0); + InVT = InOp.getSimpleValueType(); + InNumElts = InVT.getVectorNumElements(); + } + } + + if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) { + SmallVector Ops; + for (unsigned i = 0; i < InNumElts; ++i) { + //Ops.push_back(InOp.getOperand(i)); + Ops.push_back(InOp.getOperand(0)); + } + + + /* + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : + DAG.getUNDEF(EltVT); + for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) + Ops.push_back(FillVal); + */ + SDValue res = DAG.getBuildVector(NVT, dl, Ops); + + LLVM_DEBUG(dbgs() << "Exiting ChangeVectorType() with: res = " + << res.getNode() << ".\n"); + + return res; + } + + assert(0 && "ChangeVectorType(): I guess this case should not be reached"); + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) : + DAG.getUNDEF(NVT); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal, + InOp, DAG.getIntPtrConstant(0, dl)); +} + + + +void ConnexDAGToDAGISel::selectBUILD_VECTOR(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered selectBUILD_VECTOR().\n"); + + // NEW32 + EVT typeVecNode; + SDLoc DL(Node); + + BuildVectorSDNode *BVN = cast(Node); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned LdiOp; + EVT ResTy = BVN->getValueType(0); + EVT ViaVecTy; + + bool needsConvertionToResultType = true; + + SDNode *Res; + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): We are in the case TYPE_VECTOR_I32\n"); + /* + TODO TODO TODO TODO TODO TODO TODO TODO: + Although so far we do not have a test for this case, in principle we + should lower the following target-independent SDNode: + BUILD_VECTOR i32ct + to: + R0 = 1; + R1 = VLOAD i32ct_lower16bits; + R2 = VLOAD i32ct_higher16bits; + CELLSHR R2, R0; + WHERE_EQ (INDEX & 1 == 1) // for all odd indices + R1 = R2 | R2; + END_WHERE; + */ + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): We are in the case TYPE_VECTOR_I16\n"); + } + typeVecNode = ResTy; + + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1BuildVectorSDNode.html: + bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, + unsigned &SplatBitSize, bool &HasAnyUndefs, + unsigned MinSplatBits=0, bool isBigEndian=false) const + Check if this is a constant splat, and if so, find the smallest element + size that splats the vector. + + By constant splat we understand a vector filled with the same + constant value in all elements. + */ + if (BVN->isConstantSplat(SplatValue, SplatUndef, + SplatBitSize, HasAnyUndefs, + 8, true) == false) { + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): " + "BVN->isConstantSplat() == false:\n"); + + /* Checking if we have a symbolic splat. + From + http://llvm.org/docs/doxygen/html/classllvm_1_1BuildVectorSDNode.html: + SDValue getSplatValue (BitVector *UndefElements=nullptr) const + <> + */ + SDValue symbolicValue = BVN->getSplatValue(); + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): symbolicValue.getNode() = " + << symbolicValue.getNode() << "\n"); + + // Inspired VAGUELY from + // http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html + if (symbolicValue.getNode() != nullptr) { + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): symbolicValue = "; + symbolicValue->dump(); + dbgs() << "\n"); + //LdiOp = Connex::VLOAD_H_STR; + + /* For the case BUILD_VECTOR is a variable splat + (contains the same variable in all elements of the vector), + we retrieve the C expression from the variable and generate + an inlineasm with VLOAD variable_C_Expression (so this is Opincaa host + and Connex ASM code together). */ + + + + /* + From http://llvm.org/docs/doxygen/html/namespacellvm_1_1ISD.html: + <> + Also, ISD::INLINEASM accepts only objects of type ConstantSDNode + from 2nd operand onwards - see InstrEmitter.cpp, line 966: + unsigned Flags = + cast(Node->getOperand(i))->getZExtValue(); + + Examples of creating an INLINEASM SDNode, in llc: + From llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp, + (or llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp) : + + if (Glue.getNode()) + AsmNodeOperands.push_back(Glue); + if (!Changed) + return false; + + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), + CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); + New->setNodeId(-1); + ReplaceNode(N, New.getNode()); + + Less useful: From SelectionDAGISel.cpp: + void SelectionDAGISel::Select_INLINEASM(SDNode *N) { + SDLoc DL(N); + + std::vector Ops(N->op_begin(), N->op_end()); + SelectInlineAsmMemoryOperands(Ops, DL); + + const EVT VTs[] = {MVT::Other, MVT::Glue}; + SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops); + New->setNodeId(-1); + ReplaceUses(N, New.getNode()); + CurDAG->RemoveDeadNode(N); + } + + From SelectionDAGBuilder.cpp: + Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), + DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); + + LESS relevant note: to create an InlineAsm Value in LLVM program, + in clang/opt, we can use API described at: + http://llvm.org/docs/doxygen/html/classllvm_1_1InlineAsm.html + http://llvm.org/docs/doxygen/html/InlineAsm_8h_source.html + http://llvm.org/docs/doxygen/html/InlineAsm_8cpp_source.html + */ + + + /* + This gives llc error: + <NodeNum] > Node2Index[I->getSUnit()->NodeNum] && + "Wrong topological sorting"' failed.>> + Res = Node; + */ + + /* + SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), + getTargetNode(N, Ty, DAG, GOTFlag)); + + From ...: + t37: ch,glue = inlineasm t34, TargetExternalSymbol:i64'sum = connexGlobal->readReduction(); + ... // END making a separate scope + // END_HOST_DEVICE_CODE', MDNode:ch, TargetConstant:i64<1> + */ + + /* + // This is outdated: NOT surpisingly, this results in creating a node like: + // RED_H getVTList(MVT::Other, MVT::Glue), + it gives error: + // <> + + When using + CurDAG->getVTList(TYPE_VECTOR_I16), + llc gives error: + llvm/include/llvm/CodeGen/SelectionDAGNodes.h:662: + const llvm::SDValue& llvm::SDNode::getOperand(unsigned int) const: + Assertion `Num < NumOperands && "Invalid child # of SDNode!"' failed. + + This fails at instruction scheduling. + */ + + SDValue InFlag(nullptr, 0); // NO Glue - Null incoming flag value. + // Inspired from ConnexISelLowering.cpp + MachineFunction &MF = CurDAG->getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + /* From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB); + //SDValue bb = CurDAG->getBasicBlock(MachineBasicBlock *MBB); + */ + + //SDValue firstAsmInlineSDValue; + SDNode *firstAsmInlineSDNode = NULL; + for (auto dagIter = CurDAG->allnodes_begin(); //allnodes_iterator + dagIter != CurDAG->allnodes_end(); + dagIter++) { + SDNode iterSDNode = *dagIter; + //iterSDValue + //SDNode *iterSDNode = dagIter->getNode(); + /* + LLVM_DEBUG(dbgs() << "dagIter = "; + iterSDNode.dump(CurDAG); + dbgs() << '\n'); + */ + if (iterSDNode.getOpcode() == ISD::INLINEASM) { + firstAsmInlineSDNode = &iterSDNode; + break; + } + } + + // Using the MDNode - because Inline gives error: + //firstAsmInlineSDNode = (firstAsmInlineSDNode->getOperand(2)).getNode(); + + if (firstAsmInlineSDNode == NULL) + firstAsmInlineSDNode = (CurDAG->getEntryNode()).getNode(); + LLVM_DEBUG(dbgs() << "firstAsmInlineSDNode = " + << firstAsmInlineSDNode + << "\n"); + LLVM_DEBUG(dbgs() << "firstAsmInlineSDNode = "; + firstAsmInlineSDNode->dump(); + dbgs() << "[END]\n"); + + SDValue firstAsmInlineSDValue = SDValue(firstAsmInlineSDNode, 0); + LLVM_DEBUG(dbgs() << "firstAsmInlineSDValue = "; + firstAsmInlineSDValue->dump(); + dbgs() << "[END]\n"); + + #ifdef NOTNOT + unsigned virtScalarReg = RegInfo.createVirtualRegister( + &Connex::GPRRegClass); + // In the end not useful: Adding it just to force ordering between predecessors of Node and Res + SDValue copyToRegAux = CurDAG->getCopyToReg( + //CurDAG->getEntryNode(), // messes up scheduling + Node->getOperand(0), + + DL, + virtScalarReg, + Node->getOperand(0), + InFlag); + #endif + + /* TODO TODO TODO TODO TODO: Treat preoperly case + typeVecNode == TYPE_VECTOR_I32. + I.e., with multiple VLOAD_H, CELL_SH*, WHERE*, etc*/ + + SDNode *vloadSpecial = CurDAG->getMachineNode( + typeVecNode == TYPE_VECTOR_I16 ? + Connex::VLOAD_H_SYM_IMM : + //Connex::VLOAD_W_SYM_IMM, + Connex::VLOAD_H_SYM_IMM, + DL, + // + // We add MVT::Glue to the return + // types to avoid that llc performs CSE + // on these nodes: if this getMachineNode() + // function is called more than once we + // return the same value again and again + // (i.e., perform CSE) since the node doesn't + // take any actual inputs. + // - see why this is so at + // http://llvm.org/docs/doxygen/html/SelectionDAG_8cpp_source.html#l06206 */ + CurDAG->getVTList( + // typeVecNode, + TYPE_VECTOR_I16, + MVT::Glue), + // + CurDAG->getEntryNode() + // We add a chain edge + /* TODO TODO TODO VERY IMPORTANT - figure if + I can do this better + (maybe in Selection Lowering): + //SDValue(firstAsmInlineSDNode, 0) + firstAsmInlineSDValue */ + //SDValue(copyToRegAux, 0), + //copyToRegAux + /* + Gives error: InstrEmitter.cpp:782: + void llvm::InstrEmitter::EmitMachineNode(llvm::SDNode*, + bool, bool, llvm::DenseMap&): + Assertion `NumMIOperands >= II.getNumOperands() && + NumMIOperands <= II.getNumOperands() + + II.getNumImplicitDefs() + NumImpUses && + "#operands for dag node doesn't match .td file!"' failed. + */ + // Node->getOperand(0) + ); + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): vloadSpecial = " + << vloadSpecial << ".\n" + << "vloadSpecial = "; + vloadSpecial->dump(); + dbgs() << "\n"); + + + std::string exprStr = RecoverCExpressionFromSDNode(Node, crtNodeMap, + #ifdef NEW_2019_03_21 + false + #else + true + #endif + ); + //std::string exprStr = RecoverCExpressionFromSDNode( + // symbolicValue.getNode(), crtNodeMap); + exprStr = " " + exprStr; + exprStr = exprStr + "; // MSA_I16"; + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): exprStr = " + << exprStr << "\n"); + + SDNode *inlineAsmNode = CreateInlineAsmNode(CurDAG, exprStr, + vloadSpecial, DL); + + /* VERY IMPORTANT: + You might wonder why we require creating also + SDNodes CopyToReg and CopyFromReg. + We put them to preserve the INLINEASM SDNode, which does NOT + have a type and needs to be chained/glued to its VLOAD* and + the result (Res) from this instr-selection needs to be + a vector type (typeVecNode). + If we don't put them (e.g., we make + Res = inlineAsmNode; + we end up with erroneous cases like this + (which gives an assertion failure like: + "#operands for dag node doesn't match .td file!"): + SU(10): t71: v128i16,glue = VLOAD_H_SYM_IMM t0 + SU(9): t74: ch,glue = inlineasm t71, + TargetExternalSymbol:i64' ((N + -1) << 1)) + 2) / + (((int *)&CONNEX_VL)[0])) ...; + // MSA_I10', MDNode:ch<0x1724220>, TargetConstant:i64<1> + SU(8): t75: v64i32 = NOP_BITCONVERT_WH t74 + */ + unsigned virtRegRes = RegInfo.createVirtualRegister( + typeVecNode == TYPE_VECTOR_I16 ? + &Connex::VectorHRegClass : + #ifdef PREFERABLY_NOT_2019_03_21 + &Connex::MSA128WRegClass + #else + &Connex::VectorHRegClass + #endif + ); + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html: + SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT) + SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT, + SDValue Glue) + // + SDValue getCopyToReg (SDValue Chain, SDLoc dl, unsigned Reg, SDValue N) + SDValue getCopyToReg (SDValue Chain, SDLoc dl, unsigned Reg, SDValue N, + SDValue Glue) + SDValue getCopyToReg (SDValue Chain, SDLoc dl, SDValue Reg, SDValue N, + SDValue Glue) + */ + + SDValue copyToRegRes = CurDAG->getCopyToReg( + //CurDAG->getEntryNode(), // messes up scheduling + //SDValue(vloadSpecial, 0), // this should be considered chain edge, even if VLOAD does NOT have output ch port + SDValue(inlineAsmNode, 0), + //extSym, + + DL, + virtRegRes, + SDValue(vloadSpecial, 0), + InFlag); + + SDValue copyFromRegRes = CurDAG->getCopyFromReg( + copyToRegRes, // chain + DL, + virtRegRes, + typeVecNode + //, copyToRegOp2 + ); + + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html: + SDValue getRegister (unsigned Reg, EVT VT) + */ + //Res = CurDAG->getRegister(virtRegRes, TYPE_VECTOR_I16).getNode(); + Res = copyFromRegRes.getNode(); + + + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): Res = "; + Res->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): inlineAsmNode = "; + inlineAsmNode->dump(); + dbgs() << "\n"); + + /* TODO TODO TODO TODO TODO TODO: make sure I am not deleting an SDNode nc + incoming on the chain port of Node, where nc is an arbitrary + node which happened to be before Node. */ + // ReplaceNode(Node, Res); + // return; + + needsConvertionToResultType = false; + } // END symbolicValue.getNode() != nullptr + else { + bool isUnitStepped = isUnitSteppedZeroStartingVector(BVN); + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): isUnitStepped = true\n"); + + if (isUnitStepped) { + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): isUnitStepped = true\n"); + /* + LLVM_DEBUG(dbgs() << "Select() for ISD::BUILD_VECTOR: Res = "; + Res->print(dbgs()); dbgs() << "\n"); + */ + + LdiOp = Connex::LDIX_H; + + ViaVecTy = TYPE_VECTOR_I16; + /* + //return std::make_pair(false, nullptr); + LLVM_DEBUG(dbgs() << "Select() for ISD::BUILD_VECTOR: exiting with 1st return nullptr\n"); + + return; + */ + + /* IMPORTANT: We use Connex's LDIX (LDIX_H) + instruction to load the immediate value Imm in all vector elements. */ + Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy); + + if (ResTy != ViaVecTy) { + // If LdiOp is writing to a different register class to ResTy, then + // fix it up here. This COPY_TO_REGCLASS should never cause a move.v + // since the source and destination register sets contain the same + // registers. + const TargetLowering *TLI = getTargetLowering(); + MVT ResTySimple = ResTy.getSimpleVT(); + const TargetRegisterClass *RC = TLI->getRegClassFor(ResTySimple); + + LLVM_DEBUG(dbgs() + << "selectBUILD_VECTOR(): before CurDAG->getMachineNode()\n"); + Res = CurDAG->getMachineNode(Connex::COPY_TO_REGCLASS, DL, + ResTy, SDValue(Res, 0), + CurDAG->getTargetConstant(RC->getID(), + DL, + // TODO_CHANGE_BACKEND: + //MVT::i64)); + TYPE_SCALAR_ELEMENT)); + } + } + } + } // END BVN->isConstantSplat == false + else { + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): BVN->isConstantSplat() == true, " + << "SplatValue = " << SplatValue + << ", SplatUndef = " << SplatUndef + << ", SplatBitSize = " << SplatBitSize + << "\n"); + + // TODO_CHANGE_BACKEND: + //if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) + if (SplatBitSize != TYPE_VECTOR_I16_ELEMENT_BITSIZE) { + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): SplatBitSize == " + << SplatBitSize + << "(8 is NOT supported in our back end)\n"); + // IMPORTANT-TODO: kindda wicked hack - try to avoid by working defining in TableGen the right conversion records + // TODO_CHANGE_BACKEND: + SplatBitSize = 16; + //SplatBitSize = 32; + //SplatBitSize = 64; + + LLVM_DEBUG(dbgs() << " --> Extending element type to SplatBitSize = " + << SplatBitSize << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1APInt.html + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR: SplatValue = " + << SplatValue.toString(10, 1) << "\n"); + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR: SplatValue.getBitWidth() = " + << SplatValue.getBitWidth() << "\n"); + + //!!!! !!!! TODO TODO TODO This should be performed through TableGen + //if (SplatBitSize > SplatValue.getBitWidth()) + // See http://llvm.org/docs/doxygen/html/classllvm_1_1APInt.html + SplatValue = SplatValue.zextOrTrunc(SplatBitSize); + + LLVM_DEBUG(dbgs() << "Select() for ISD::BUILD_VECTOR: After, SplatValue.getBitWidth() = " + << SplatValue.getBitWidth() << "\n"); + } + + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR: SplatUndef = " + << SplatUndef.toString(10, 1) << "\n"); + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR: SplatBitSize = " + << SplatBitSize << "\n"); + + /* !!!! TODO: VLOAD is NOT a feasible option if BUILD_VECTOR is loaded + with DIFFERENT constant values. */ + + switch (SplatBitSize) { + default: + //return std::make_pair(false, nullptr); + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR: exiting with 2nd return nullptr\n"); + return; + case 8: + //LdiOp = Connex::VLOAD_B; + LdiOp = Connex::VLOAD_H; + // TODO_CHANGE_BACKEND: + //ViaVecTy = MVT::v16i8; + //ViaVecTy = MVT::v16i32; + ViaVecTy = TYPE_VECTOR_I16; + /* + LdiOp = Connex::VLOAD_H; + ViaVecTy = MVT::v8i64; + */ + break; + case 16: + LdiOp = Connex::VLOAD_H; + // TODO_CHANGE_BACKEND: + ViaVecTy = TYPE_VECTOR_I16; + break; + case 32: + // TODO_CHANGE_BACKEND: + // TODO TODO: we should add also WHERE and vload depending on index + LdiOp = Connex::VLOAD_H; + ViaVecTy = TYPE_VECTOR_I16; + /* + LdiOp = Connex::VLOAD_W; + ViaVecTy = TYPE_VECTOR_I32; */ + break; + case 64: + assert(0 && "Connex supports only 16 bits immediate operands - see ConnexISA.docx"); + LdiOp = Connex::VLOAD_W; // TODO: actually VLOAD_D + // TODO_CHANGE_BACKEND: + //ViaVecTy = MVT::v8i64; + ViaVecTy = TYPE_VECTOR_I16; + break; + /* + LdiOp = Connex::VLOAD_H; //VLOAD: + ViaVecTy = MVT::v8i64; + break; + */ + } + + /* + From http://llvm.org/docs/doxygen/html/APInt_8h_source.html: + 00379 bool isSignedIntN(unsigned N) const + Check if this APInt has an N-bits signed integer value. + */ + if (!SplatValue.isSignedIntN(16)) { + //return std::make_pair(false, nullptr); + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR: exiting via 3rd return nullptr\n"); + + return; + } + LLVM_DEBUG(dbgs() + << "selectBUILD_VECTOR: SplatValue.isSignedIntN(16) == true\n"); + + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR: SplatValue = " + << SplatValue.toString(10, 1) << "\n"); + + // See http://llvm.org/docs/doxygen/html/structllvm_1_1EVT.html + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR: ViaVecTy.getVectorElementType() = " + << ViaVecTy.getVectorElementType().getEVTString() + << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1SDLoc.html and http://llvm.org/docs/doxygen/html/classllvm_1_1DebugLoc.html + //LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR: DL = " << DL.getDebugLoc().getLoc() << "\n"); + + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR: before CurDAG->getTargetConstant()\n"); + SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, + ViaVecTy.getVectorElementType()); + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR: after CurDAG->getTargetConstant()\n"); + // See http://llvm.org/docs/doxygen/html/classllvm_1_1SDValue.html + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR: Imm = "; + Imm.dump(); + dbgs() << "\n"); + + /* IMPORTANT: if we got this far then we use Connex's VLOAD (VLOAD_H) + instruction to load the immediate value Imm in all vector elements. */ + Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm); + + // It doesn't make sense to use target independent BITCAST + /* + Res = CurDAG->getMachineNode(ISD::BITCAST, DL, + typeVecNode, SDValue(Res2, 0)); + */ + + #ifdef DIFFERENT_IMPLEMENTATION_TO_BITCAST_FROM_v64i16_to_v128i16 + if (ResTy != ViaVecTy) { + // If LdiOp is writing to a different register class to ResTy, then + // fix it up here. This COPY_TO_REGCLASS should never cause a move.v + // since the source and destination register sets contain the same + // registers. + const TargetLowering *TLI = getTargetLowering(); + MVT ResTySimple = ResTy.getSimpleVT(); + const TargetRegisterClass *RC = TLI->getRegClassFor(ResTySimple); + + LLVM_DEBUG(dbgs() + << "selectBUILD_VECTOR(): before CurDAG->getMachineNode()\n"); + Res = CurDAG->getMachineNode(Connex::COPY_TO_REGCLASS, DL, + ResTy, SDValue(Res, 0), + CurDAG->getTargetConstant(RC->getID(), DL, + // TODO_CHANGE_BACKEND: + TYPE_SCALAR_ELEMENT)); + } + #endif + } + + if (ResTy == TYPE_VECTOR_I32 && needsConvertionToResultType) { + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): Adding NOP_BITCONVERT_HW node\n"); + + SDNode *ResOrig = Res; + Res = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, DL, + typeVecNode, SDValue(ResOrig, 0)); + } + + /* + return std::make_pair(true, Res); + */ + LLVM_DEBUG(dbgs() << "selectBUILD_VECTOR(): Res = "; + /* print() gives "Segmentation fault" when BUILD_VECTOR + contains vars Res->print(dbgs()); dbgs() << "\n"); */ + Res->dump(CurDAG); + dbgs() << "\n"); + + ReplaceNode(Node, Res); +} // END selectBUILD_VECTOR() + + +SDNode *ConnexDAGToDAGISel::selectReduceI32(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered selectReduceI32(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() + << "selectReduceI32(): We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + // NOTE: Opnd 1 is a ct + SDValue nodeOpSrc = Node->getOperand(2); + + // We need to preserve the node that was chained with Node to avoid it is removed + SDValue nodeOpChain = Node->getOperand(0); // Opnd 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "selectReduceI32(): nodeOpSrc.getValueType() = " + << nodeOpSrc.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectReduceI32(): nodeOpSrc = "; + (nodeOpSrc.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_I32); + +#ifdef MARKER_FOR_EMULATION + SDNode *nodeOpSrcCastBogus = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // This gives a serious error: MVT::Glue, + nodeOpSrc, + // chain edge + nodeOpChain + ); + + std::string exprStrBegin = "// Starting RED.i32 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCastBogus, DL); + LLVM_DEBUG(dbgs() << "selectReduceI32: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); + + /* This node is also bogus, only for the sake of "sandwhiching" the INLINE + assembly with 2 NOPs. + */ + SDNode *nodeOpSrcCast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HH, // IMPORTANT: this is a BOGUS NOP_BITCONVERT - we just put it since it has a Glue result, while nodeOpSrcCast2 does NOT + DL, + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + SDValue(nodeOpSrcCastBogus, 0), + // chain + SDValue(inlineAsmNodeBegin, 0) + ); +#else + SDNode *nodeOpSrcCast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + nodeOpSrc, + // chain edge + nodeOpChain + ); +#endif + + +#include "Select_REDi32_OpincaaCodeGen.h" + + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing RED.i32 emulation ;)"; + + /* + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + reduceHigh16, DL); + LLVM_DEBUG(dbgs() << "selectReduceI32(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); + + LLVM_DEBUG(dbgs() << "selectReduceI32(): reduceHigh16 = "; + reduceHigh16->dump(CurDAG); dbgs() << "\n"); + // return inlineAsmNodeEnd; // Gives error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> + */ + + SDNode *resHH = CreateInlineAsmNode(CurDAG, exprStrEnd, reduceHigh16, DL); + + + /* + // This node is also bogus, only for the sake of "sandwhiching" the INLINE + // assembly with 2 instructions. + SDNode *resHH = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HH, + DL, + // Gives error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> TYPE_VECTOR_I16, + // Gives error: <= II.getNumOperands() && NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + NumImpUses && "#operands for dag node doesn't match .td file!"' failed.>> MVT::Other, + SDValue(reduceHigh16, 0), + // chain edge + //SDValue(resH, 1) + SDValue(inlineAsmNodeEnd, 0) + ); + */ + LLVM_DEBUG(dbgs() << "selectReduceI32(): resHH = "; + resHH->dump(CurDAG); + dbgs() << "\n"); + + return resHH; +#else + LLVM_DEBUG(dbgs() << "selectReduceI32(): reduceHigh16 = "; + reduceHigh16->dump(CurDAG); + dbgs() << "\n"); + + return reduceHigh16; +#endif +} // END selectReduceI32() + + + +SDNode *ConnexDAGToDAGISel::selectReduceF16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered selectReduceF16(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() + << "selectReduceF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + // NOTE: Opnd 1 is a ct + SDValue nodeOpSrc = Node->getOperand(2); + + // We need to preserve the node that was chained with Node to avoid it is removed + SDValue nodeOpChain = Node->getOperand(0); // Opnd 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "selectReduceF16(): nodeOpSrc.getValueType() = " + << nodeOpSrc.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectReduceF16(): nodeOpSrc = "; + (nodeOpSrc.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + +#ifdef MARKER_FOR_EMULATION + SDNode *nodeOpSrcCastBogus1 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // This gives a serious error: MVT::Glue, + nodeOpSrc, + // chain edge + nodeOpChain + ); + + std::string exprStrBegin = "// Starting red.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCastBogus1, DL); + LLVM_DEBUG(dbgs() << "selectReduceF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); + + /* This node is also bogus, only for the sake of "sandwhiching" the INLINE + assembly with 2 NOPs. + */ + SDNode *nodeOpSrcCast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HH, // IMPORTANT: this is a BOGUS NOP_BITCONVERT - we just put it since it has a Glue result, while nodeOpSrcCast2 does NOT + DL, + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + SDValue(nodeOpSrcCastBogus1, 0), + // chain + SDValue(inlineAsmNodeBegin, 0) + ); +#else + SDNode *nodeOpSrcCast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + nodeOpSrc, + // chain edge + nodeOpChain + ); +#endif + + +#include "Select_REDf16_OpincaaCodeGen.h" + + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing red.f16 emulation ;)"; + /* + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + reduceH, DL); + LLVM_DEBUG(dbgs() << "selectReduceF16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); + */ + SDNode *reduceHH = CreateInlineAsmNode(CurDAG, exprStrEnd, + reduceH, DL); + + LLVM_DEBUG(dbgs() << "SelectReduceF16(): reduceH = "; + reduceH->dump(CurDAG); + dbgs() << "\n"); + // return inlineAsmNodeEnd; // Gives error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> + + /* + // This node is also bogus, only for the sake of "sandwhiching" the INLINE + // assembly with 2 instructions. + SDNode *reduceHH = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HH, + DL, + // Gives error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> TYPE_VECTOR_I16, + // Gives error: <= II.getNumOperands() && NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + NumImpUses && "#operands for dag node doesn't match .td file!"' failed.>> MVT::Other, + SDValue(reduceH, 0), + // chain edge + //SDValue(reduceH, 1) + SDValue(inlineAsmNodeEnd, 0) + ); + */ + LLVM_DEBUG(dbgs() << "selectReduceF16(): reduceHH = "; + reduceHH->dump(CurDAG); + dbgs() << "\n"); + + return reduceHH; +#else + LLVM_DEBUG(dbgs() << "selectReduceF16(): reduceH = "; + reduceH->dump(CurDAG); + dbgs() << "\n"); + + return reduceH; +#endif +} // END selectReduceF16() + + + +SDNode *ConnexDAGToDAGISel::selectAddI32(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered selectAddI32(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + /* + We look into doing "instruction-select" to + OpDst = ADD OpSRC1, OpSRC2 + where the 3 operands are vectors of type : + */ + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "selectAddI32(): We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "selectAddI32(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectAddI32(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "selectAddI32(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectAddI32(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_I32); + + /* + VERY IMPORTANT: + We convert the v64i32 add operation into a sequence of nodes that take as + input the v64i32 operands of the operation convert them to v128i16 operands + using the NOP_BITCONVERT_WH nodes and then instantiating the SDNodes + emulating the v64i32 add operation. + At the end we put a NOP_BITCONVERT_HW SDNode converting the result from + v128i16 to v64i32. + Note that these NOP_BITCONVERT_* nodes are more helpful conceptually - but + they also keep the nodes s.t. they are not scheduled badly. + */ + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting ADD.i32 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "selectAddI32: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + + + #include "Select_ADDi32_OpincaaCodeGen.h" + + + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing ADD.i32 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, //resH, + DL); + LLVM_DEBUG(dbgs() << "selectAddI32(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); + + LLVM_DEBUG(dbgs() << "selectAddI32(): resH = "; + resH->dump(CurDAG); dbgs() << "\n"); + // return inlineAsmNodeEnd; // Gives error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> +#endif + + + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resH, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resH, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "selectAddI32(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END selectAddI32() + + +SDNode *ConnexDAGToDAGISel::selectSubI32(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered selectSubI32(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "selectSubI32(): We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "selectSubI32(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectSubI32(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "selectSubI32(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectSubI32(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_I32); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + nodeOpSrc1); + // + std::string exprStrBegin = "// Starting SUB.i32 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "Select() for SUB.i32: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); + // + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + //SDValue(nodeOpSrcCast1, 1) + SDValue(inlineAsmNodeBegin, 0) + ); + + + #include "Select_SUBi32_OpincaaCodeGen.h" + + + std::string exprStrEnd = "// Finishing SUB.i32 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, //resH, + DL); + LLVM_DEBUG(dbgs() << "selectSubI32(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); + dbgs() << "\n"); + + + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resH, 0), + // chain edge + //SDValue(resH, 1) + SDValue(inlineAsmNodeEnd, 0) + ); + LLVM_DEBUG(dbgs() << "selectSubI32(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END selectSubI32() + + +SDNode *ConnexDAGToDAGISel::selectMulI32(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered selectMulI32(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "selectMulI32(): We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "selectMulI32(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectMulI32(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "selectMulI32(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectMulI32(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_I32); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting MUL.i32 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "selectMulI32: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + // Note: COPY generated by TwoAddressInctruction in WHERE blocks and handled + // by me in ConnexTargetMachine.cpp, etc) + +//#include "Select_MULTi32_SignAndMagnitude_OpincaaCodeGen.h" +#include "Select_MULTi32_ComplementedRepresentation_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing MUL.i32 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, //resH, + DL); + LLVM_DEBUG(dbgs() << "selectMulI32(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); + dbgs() << "\n"); +#endif + + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resH, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resH, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "selectMulI32(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END selectMulI32() + + +SDNode *ConnexDAGToDAGISel::selectSraI32(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered selectSraI32(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "selectSraI32(): We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOp0 = Node->getOperand(0); + SDValue nodeOp1 = Node->getOperand(1); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + #else + MVT::Glue, + #endif + nodeOp0); + + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting SHRA.i32 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "selectSraI32(): inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOp1, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + +#include "Select_SHRAi32_OpincaaCodeGen.h" + + LLVM_DEBUG(dbgs() << "selectSraI32(): resH = "; + resH->dump(CurDAG); + dbgs() << "\n"); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing SHRA.i32 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, //resH, + DL); + LLVM_DEBUG(dbgs() << "selectSraI32(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resH, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resH, 1) + #endif + ); + + LLVM_DEBUG(dbgs() << "selectSraI32(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END selectSraI32() + + + + +SDNode *ConnexDAGToDAGISel::selectAddF16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered selectAddF16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "selectAddF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "selectAddF16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectAddF16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "selectAddF16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectAddF16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting add.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "selectAddF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + // Note: COPY generated by TwoAddressInctruction in WHERE blocks and handled + // by me in ConnexTargetMachine.cpp, etc + +#include "Select_ADDf16_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing add.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, // resF16, + DL); + LLVM_DEBUG(dbgs() << "selectAddF16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); + dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resF16, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resF16, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "selectAddF16(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END selectAddF16() + + +SDNode *ConnexDAGToDAGISel::selectSubF16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered selectSubF16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "selectSubF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "selectSubF16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectSubF16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "selectSubF16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectSubF16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting sub.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "selectSubF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + // Note: COPY generated by TwoAddressInctruction in WHERE blocks and handled + // by me in ConnexTargetMachine.cpp, etc) + +#include "Select_SUBf16_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing sub.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, // resF16, + DL); + LLVM_DEBUG(dbgs() << "SelectSubF16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); + dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resF16, 0), + // chain edge +#ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) +#else + SDValue(resF16, 1) +#endif + ); + LLVM_DEBUG(dbgs() << "selectSubF16(): resW = "; + resW->dump(CurDAG); dbgs() << "\n"); + + return resW; +} // END selectSubF16() + +SDNode *ConnexDAGToDAGISel::selectLtF16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered selectLtF16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() + << "selectLtF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "selectLtF16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectLtF16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "selectLtF16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectLtF16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting lt.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "selectLtF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + // Note: COPY generated by TwoAddressInctruction in WHERE blocks and handled + // by me in ConnexTargetMachine.cpp, etc) + +#include "Select_LTf16_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing lt.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, //resF16, + DL); + LLVM_DEBUG(dbgs() << "selectLtF16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(resF16, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resF16, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "selectLtF16(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END selectLtF16() + + +SDNode *ConnexDAGToDAGISel::selectMulF16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered selectMulF16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() + << "selectMulF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "selectMulF16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectMulF16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "selectMulF16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectMulF16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting mult.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "selectMulF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + // Note: COPY generated by TwoAddressInctruction in WHERE blocks and handled + // by me in ConnexTargetMachine.cpp, etc) + +#include "Select_MULTf16_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing mult.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, + DL); + LLVM_DEBUG(dbgs() << "selectMulF16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); + dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resF16, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + #error Normally no longer supported + SDValue(resF16, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "selectMulF16(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END selectMulF16() + + +SDNode *ConnexDAGToDAGISel::selectDivF16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered selectDivF16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "selectDivF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "selectDivF16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectDivF16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "selectDivF16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectDivF16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting div.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "selectDivF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + // Note: COPY generated by TwoAddressInctruction in WHERE blocks and handled + // by me in ConnexTargetMachine.cpp, etc) + +#include "Select_DIVf16_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing div.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, + DL); + LLVM_DEBUG(dbgs() << "selectDivF16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); + dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resF16, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + #error Normally no longer supported + SDValue(resF16, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "selectDivF16(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END selectDivF16() + + +SDNode *ConnexDAGToDAGISel::selectDivI16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered selectDivI16(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "selectDivI16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectDivI16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "selectDivI16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectDivI16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_I32); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode( + // IMPORTANT: this is a BOGUS + // NOP_BITCONVERT - we just + // put it since it has a Glue + // result, while + // nodeOpSrcCast1 does NOT + Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting DIV.i16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "selectDivI16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode( + // IMPORTANT: this is a BOGUS + // NOP_BITCONVERT - we just + // put it since it has a Glue + // result, while + // nodeOpSrcCast1 does NOT + Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // This gives error: MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + +#include "Select_DIVi16_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing DIV.i16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + resH, DL); + LLVM_DEBUG(dbgs() << "selectDivI16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); + dbgs() << "\n"); + + LLVM_DEBUG(dbgs() << "selectDivI16(): resH = "; + resH->dump(CurDAG); + dbgs() << "\n"); + // return inlineAsmNodeEnd; // Gives error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> + + SDNode *resHH = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + SDValue(resH, 0), + // chain edge + //SDValue(resH, 1) + SDValue(inlineAsmNodeEnd, 0) + ); + LLVM_DEBUG(dbgs() << "selectDivI16(): resHH = "; + resHH->dump(CurDAG); + dbgs() << "\n"); + + return resHH; +#else + return resH; +#endif +} // END selectDivI16() + + + +SDNode *ConnexDAGToDAGISel::selectVSELECT(SDNode *vselectNode) { + LLVM_DEBUG(dbgs() << "Entered selectVSELECT(): Selecting vselectNode = "; + vselectNode->dump(CurDAG); + dbgs() << "\n"); + /* + Basically we expand ("instruction-select") the following machine-independent instruction: + dst = VSELECT pred, true_assignment, false_assignment: + to the following Connex machine instr sequence: + (note the comparison is excluded from the listing below + and will be scheduled before it) + + // For pred == false + dst = false_assignment + WHERExy + // For pred == true: + dst = true_assignment + END_WHERE + + NOTE: we could use a WHERE !pred to assign for the false case, + but our above solution "destructive" assignment is OK and + it takes fewer instructions. + */ + + + /* + In the end I do VSELECT treatment here, in + ConnexISelDAGToDAG, and not in ISelLowering::LowerOperation. + + Note that register allocation is performed after Instruction selection + (see [Cardoso_2014], Figure on page 134). + + Note that although it is not required to create virtual registers for + the ORV_H machine instructions (since we failed to add a ch input port + to the setcc - see 50_IfConversion/Setcc_with_ch_input_port_NOT_working + - and I guess we would fail here also), we create it for the true + ORV_H because we need to make the associated predecessor CopyToRegister a + successor of WHEREEQ, otherwise the WHEREEQ would not have a successor. + TODO if we are extremely precious: + I guess we could make a succcessor of WHEREEQ the CopyToReg successor + of ORV_H and could get rid of all input virtual registers. + NOTE: we canNOT get rid of the virtual register that keeps the result of + both ORV_H, because we can replace it only with a VSELECT (reminds me + of dataflow machines and multiplexors :) ), BUT we want + to lower VSELECT in other components. + + Note that the nodes we create here have to have correct ordering, otherwise + instruction selection can fail or have wrong semantics. + */ + + // END_WHERE, etc are defined in anonymous enum in TableGen generated ConnexGenInstrInfo.inc + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html: + // LLVMContext * getContext () const + + SDLoc DL(vselectNode); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + //SDValue chain = DAG.getEntryNode(); + + assert(vselectNode->getNumOperands() == 3); + LLVM_DEBUG(dbgs() << " LowerOperation(): Initially vselectNode->use_size() = " + << vselectNode->use_size() << "\n"); + for (SDNode::use_iterator UI = vselectNode->use_begin(), + UE = vselectNode->use_end(); UI != UE; ++UI) { + // Note: UI is an SDNode * + LLVM_DEBUG(dbgs() << " LowerOperation(): Initially a use of vselectNode is: "; + UI->print(dbgs()); + dbgs() << "\n"); + } + + //EVT nodeResType = vselectNode->getValueType(0); + SDValue vselectNodeOp0 = vselectNode->getOperand(0); + SDValue vselectNodeOp1 = vselectNode->getOperand(1); + SDValue vselectNodeOp2 = vselectNode->getOperand(2); + + LLVM_DEBUG(dbgs() << "selectVSELECT(): vselectNodeOp0.getValueType() = " + << vselectNodeOp0.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectVSELECT(): vselectNodeOp0 = "; + (vselectNodeOp0.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "selectVSELECT(): vselectNodeOp1.getValueType() = " + << vselectNodeOp1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "selectVSELECT(): vselectNodeOp1 = "; + (vselectNodeOp1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "selectVSELECT(): vselectNodeOp2 = "; + (vselectNodeOp2.getNode())->dump(); + dbgs() << "\n"); + + SDValue setCC = vselectNodeOp0; + SDNode *setCCNode = setCC.getNode(); + SDValue setCCPred = vselectNodeOp0.getNode()->getOperand(2); + SDNode *setCCPredNode = setCCPred.getNode(); + // + LLVM_DEBUG(dbgs() << "selectVSELECT(): setCCPredNode = "; + //<< setCCPredNode + setCCPredNode->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "selectVSELECT(): setCCNode = "; + //<< setCCPredNode + setCCNode->dump(); + dbgs() << "\n"); + + assert(setCCPredNode->isMachineOpcode() == false); + assert(setCCPredNode->getOpcode() == ISD::CONDCODE); + + EVT ResTy = TYPE_VECTOR_I16; + + unsigned whereOpcode; + switch (cast(setCCPredNode)->get()) { + case ISD::SETEQ: + whereOpcode = Connex::WHEREEQ; + break; + case ISD::SETLT: + whereOpcode = Connex::WHERELT; + break; + case ISD::SETOLT: { + whereOpcode = Connex::WHEREEQ; + + ResTy = TYPE_VECTOR_F16; + + // We ISel an lt.f16 and compare its result with 1. + SDNode *resLtF16 = selectLtF16(setCCNode); + + // VLOAD 1; + SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); + SDNode *vload1 = CurDAG->getMachineNode(Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // Glue input edge + SDValue(resLtF16, 1) + ); + + SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(resLtF16, 0), + SDValue(vload1, 0), + // Glue input edge + SDValue(vload1, 1) + ); + LLVM_DEBUG(dbgs() << "selectVSELECT(): eq1 = "; + eq1->dump(); + dbgs() << "\n"); + + ReplaceNode(setCCNode, eq1); + setCCNode = eq1; + setCC = SDValue(eq1, 0); + + break; + } + case ISD::SETULT: + whereOpcode = Connex::WHEREULT; + break; + default: + assert(0 && "case not reachable"); + break; + } + + +#ifdef OLD_UNTIL_2018_07_26 + /* IMPORTANT: In essence this is ONLY to allocate a virtual register to use + it later for the TargetMachine. + */ + unsigned virtRegDst = RegInfo->createVirtualRegister( + &Connex::VectorHRegClass); + + SDNode *useSetCC = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + // TODO: actually it should be a NOP_BOGUS + DL, + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + setCC); + + // IMPORTANT: This should NOT be correct - we put an OR, which following + // ConnexISA.docx (and especially the Opincaa simulator) should alter the + // Connex flags between the predicate and the WHERE. + // + // Note however, that following scalar_alu.v (and scalar_logic.v), the logic + // operations do NOT alter the Connex flags - see + // e.g. /home/alarm/Experiments/Testing_OR_wrt_flags/STD_run_*. + // + // In https://en.wikipedia.org/wiki/Zero_flag + // " the zero flag is used to check the result of an arithmetic operation, + // including bitwise logical instructions. + // It is set if an arithmetic result is zero, and reset otherwise." + // "In some instruction sets such as the MIPS architecture, a dedicated + // flag register is not used; jump instructions instead check a register + // for zero. " + SDNode *copyFalse = CurDAG->getMachineNode(Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + vselectNodeOp2, + vselectNodeOp2, + // chain edge + //SDValue(vloadCt0, 1) // Glue edge + //SDValue(setCCNode, 0) + SDValue(useSetCC, 1) + //setCC + ); + + SDValue copyToRegDst1 = CurDAG->getCopyToReg( + // chain + SDValue(copyFalse, 1), + DL, + virtRegDst, + // Value copied to register + SDValue(copyFalse, 0) + ); + + /* + SDValue copyFromRegDest = DAG.getCopyFromReg( + chain, + //SDValue(endWhere, 0), + DL, + regDest, + TYPE_VECTOR_I16 // result type + //endWhere->getOperand(0)); //RegTy); + //SDValue(endWhere, 0) + ); + */ + SDNode *whereEq = CurDAG->getMachineNode(whereOpcode, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + SDValue(copyFalse, 0), + // chain edge + //SDValue(idxPredicate, 1) + //setCCPred + SDValue(copyFalse, 1) + ); +#else + // small-TODO: try to get rid of this Connex::NOP_BITCONVERT_WH, although I already tried and I got ISel errors + SDNode *useSetCC = CurDAG->getMachineNode( + // Works: + Connex::NOP_BITCONVERT_WH, + // Gives error: <<#operands for dag node doesn't match .td file!>> Connex::NOP_BOGUS, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + setCC); // The result of SDNode EQ_H + + #ifdef ANOTHER_WITHOUT_NOP_BETWEEN_PRED_AND_WHERE + /* IMPORTANT: This should NOT be correct - we put an OR, which following + * ConnexISA.docx (and especially the Opincaa simulator) should alter the + * Connex flags between the predicate and the WHERE. + * + * Note however, that following scalar_alu.v (and scalar_logic.v), the logic + * operations do NOT alter the Connex flags - see + * e.g. /home/alarm/Experiments/Testing_OR_wrt_flags/STD_run_*. + * + * In https://en.wikipedia.org/wiki/Zero_flag + * " the zero flag is used to check the result of an arithmetic operation, + * including bitwise logical instructions. + * It is set if an arithmetic result is zero, and reset otherwise." + * "In some instruction sets such as the MIPS architecture, a dedicated + * flag register is not used; jump instructions instead check a register + * for zero. " + */ + SDNode *copyFalse = CurDAG->getMachineNode(Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + vselectNodeOp2, + vselectNodeOp2, + // chain edge + //SDValue(vloadCt0, 1) // Glue edge + //SDValue(setCCNode, 0) + //SDValue(useSetCC, 1), + SDValue(useSetCC, 1) + //setCC // TODO: check + ); + + SDNode *whereEq = CurDAG->getMachineNode(whereOpcode, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + SDValue(copyFalse, 0), + // chain edge + //SDValue(idxPredicate, 1) + //setCCPred + SDValue(copyFalse, 1) + ); + #else + SDValue ct1 = CurDAG->getConstant(1 /* Num of cycles to NOP */, + DL, MVT::i16, true, false); + SDNode *copyFalse = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct1, + // glue (or chain) input edge + //SDValue(eq1, 1) + // ERROR: setCC + //SDValue(setCCNode, 1) + SDValue(useSetCC, 1) + ); + + SDNode *whereEq = CurDAG->getMachineNode(whereOpcode, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + //SDValue(copyFalse, 0), + vselectNodeOp2, + // Glue/chain edge + //SDValue(idxPredicate, 1) + //setCCPred + SDValue(copyFalse, 0) + ); + #endif +#endif + + // IMPORTANT: Note that we use ORV_SPECIAL_H, which puts a tied-to constraint + // to allocate to the same physical vector register (dst) both vSelectNodeOp1 + // and vSelectNodeOp2. + // Therefore, this ORV_SPECIAL_H puts over the vSelectNodeOp2, + // the false value, the values of the selected (where the predicate is true) + // vSelectNodeOp1. + SDNode *copyTrue = CurDAG->getMachineNode(Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + vselectNodeOp1, + vselectNodeOp1, + vselectNodeOp2, + // Glue edge + SDValue(whereEq, 1) // Glue edge + ); + + SDNode *endWhere = CurDAG->getMachineNode(Connex::END_WHERE, + DL, + // 2018_09_08 ResTy, + //TYPE_VECTOR_I16, + // 2018_09_08 + MVT::Other, + // 2018_09_08 SDValue(copyTrue, 0), + //MVT::Glue, + /* Important: we put this bogus + operand here to force the PostRA + scheduler to keep the + WHERE..END_WHERE block intact + withOUT using instruction bundles. + */ + // chain edge + SDValue(copyTrue, 1) // Glue edge + ); + std::string exprStrEnd = "// Finishing VSELECT emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + endWhere, + DL); + LLVM_DEBUG(dbgs() << "selectVSELECT(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); + + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + //typeVecNode, + TYPE_VECTOR_F16, + SDValue(copyTrue, 0), + // chain edge + SDValue(inlineAsmNodeEnd, 0) + ); + + LLVM_DEBUG(dbgs() << "selectVSELECT(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + LLVM_DEBUG(dbgs() << "selectVSELECT(): whereEq = "; + whereEq->dump(CurDAG); + dbgs() << "\n"); + + /* + // Unfortunately this messes up at register allocation because it considers + // the result of the ORV_SPECIAL_H node to be dead after the use at END_WHERE + SDValue copyFromRegDest = CurDAG->getCopyFromReg( + //chain, + SDValue(endWhere, 0), + DL, + virtRegDst, + TYPE_VECTOR_I16 + ); + + SDNode *res = copyFromRegDest.getNode(); + LLVM_DEBUG(dbgs() << "selectVSELECT(): res = "; + res->dump(CurDAG); + dbgs() << "\n"); + */ + + //SDNode *res = endWhere; + SDNode *res = resW; + + return res; +} // END selectVSELECT() + + + + +// Note: all ISD opcodes can be also found at +// http://llvm.org/docs/doxygen/html/namespacellvm_1_1ISD.html. +// There are also Connex opcodes that are generated by TableGen. +void ConnexDAGToDAGISel::Select(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + + // Dump information about the Node being selected + LLVM_DEBUG(dbgs() << "Entered ConnexDAGToDAGISel::Select(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "Opcode = " << Opcode << "\n"); + + // If we have a (custom) Machine node, it means we already have selected it + if (Node->isMachineOpcode()) { + LLVM_DEBUG(dbgs() << "== "; + Node->dump(CurDAG); + dbgs() << '\n'); + return; + } + + // tablegen selection should be handled here. + switch (Opcode) { + default: + LLVM_DEBUG(dbgs() << "ConnexDAGToDAGISel::Select(): default case: Opcode = " + << Opcode << "\n"); + break; + + /* From http://llvm.org/docs/doxygen/html/ISDOpcodes_8h_source.html: + 00156 /// OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) + 00157 /// This node represents a target intrinsic function with side effects that + 00158 /// does not return a result. The first operand is a chain pointer. The + 00159 /// second is the ID number of the intrinsic from the llvm::Intrinsic + 00160 /// namespace. The operands to the intrinsic follow. + */ + case ISD::INTRINSIC_VOID: { + LLVM_DEBUG(dbgs() << "ConnexDAGToDAGISel::Select(): case ISD::INTRINSIC_VOID" + << "\n"); + + unsigned intrinsicOpcode = cast( + Node->getOperand(1))->getZExtValue(); + LLVM_DEBUG(dbgs() << "intrinsicOpcode = " << intrinsicOpcode << "\n"); + + /* + LLVM_DEBUG(dbgs() << "Intrinsic::connex_end_repeat = " + << Intrinsic::connex_end_repeat << "\n"); + LLVM_DEBUG(dbgs() << "Intrinsic::connex_reduce = " + << Intrinsic::connex_reduce << "\n"); + LLVM_DEBUG(dbgs() << "Intrinsic::connex_repeat_x_times = " + << Intrinsic::connex_repeat_x_times << "\n"); + */ + + LLVM_DEBUG(dbgs() << "Node->getOperand(0) = "; + Node->getOperand(0).dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Node->getOperand(1) = "; + Node->getOperand(1).dump(); + dbgs() << "\n"); + + switch (intrinsicOpcode) { + case Intrinsic::connex_repeat_x_times: { + SDLoc DL(Node); + + /* llvm.connex.repeat.x.times SDNode has 3 operands: + * - 0, which is the chain - a bit to my surprise + * SelectionDAGBuilder puts as input to the chain port + * the node just above it, not SDNode t0 + * - 1, which is the intrinsic's opcode + * - 2, which is the actual parameter + * t16: ch = llvm.connex.repeat.x.times t10, TargetConstant:i64<471>, t15 + * + */ + + LLVM_DEBUG(dbgs() << "ConnexDAGToDAGISel::Select(): case " + "Intrinsic::connex_repeat_x_times" + << "\n"); + LLVM_DEBUG(dbgs() << " Node->getOperand(2) = "; + Node->getOperand(2).dump(); + dbgs() << "\n"); + +#define CODE2018_06_29 + /* + std::vector opsSDVRepeat; + opsSDVRepeat.push_back(CurDAG->getEntryNode()); + std::vector opsEVTRepeat; + // MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, ArrayRef< EVT > ResultTys, ArrayRef< SDValue > Ops) + */ + SDNode *repeatSpecial = CurDAG->getMachineNode( + Connex::REPEAT_SYM_IMM, + DL, + // Return types + #ifdef CODE2018_06_29 + /* Gives error when doing "List Scheduling": + - when doing things as correct as possible (glue edge put in CONNEX::INLINEASM as the last operand): + <> + + - glue edge put in + ISD::INLINEASM as the last operand): <> + + - <getNodeId() == -1 && "Node already inserted!">> + - because I put this Glue edge as 1st operand of INLINEASM, which is documented as being wrong + */ + MVT::Glue, + #else + MVT::Other, + #endif + // We add a chain edge + /* IMPORTANT: this was wrong since + * when we give ReplaceNode() it + * deletes the platform independent + * REPEAT SDNode which has as input opnd0 + * (Node->getOperand(0), an Inline + * ASM epxression, as discussed, + * fed on the chain input port) and opnd0 + * is not used by any other + * node. + WRONG: CurDAG->getEntryNode() + + * But now I give opnd0 as input + * to the chain port of the new + * machine-dependent node + * and this avoids + * opnd0 becoming a dead node and + * be eventually removed. + */ + Node->getOperand(0) + ); + LLVM_DEBUG(dbgs() << "Select() for Intrinsic::connex_repeat_x_times: repeatSpecial = "; + repeatSpecial->dump(); + dbgs() << "\n"); + + SDNode *op2 = Node->getOperand(2).getNode(); + LLVM_DEBUG(dbgs() << "op2 = "; + op2->dump(); + dbgs() << "\n"); + std::string exprStr = " " + + RecoverCExpressionFromSDNode(op2, crtNodeMap, true) + + ");"; + + SDNode *inlineAsmNode = CreateInlineAsmNode(CurDAG, exprStr, + repeatSpecial, DL + #ifdef CODE2018_06_29 + , true + #endif + ); + + //ReplaceAllUsesWith(Node, inlineAsmNode); + //CurDAG->RemoveDeadNode(Node); // Gives at scheduling error: Assertion `Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] && "Wrong topological sorting"' failed. + // ReplaceNode defined in https://llvm.org/svn/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h. + + ReplaceNode(Node, inlineAsmNode); + + // This takes out the REPEAT and symbolic expression INLINE Asm + //ReplaceNode(Node, Node->getOperand(0).getNode()); + return; + } + /* + case Intrinsic::connex_end_repeat: + // Note: this case is handled in TableGen match pattern in ConnexInstrInfo_REPEAT.td + */ + default: + break; + } + } + + /* From http://llvm.org/docs/doxygen/html/ISDOpcodes_8h_source.html: + 00148 /// RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) + 00149 /// This node represents a target intrinsic function with side effects that + 00150 /// returns a result. The first operand is a chain pointer. The second is + 00151 /// the ID number of the intrinsic from the llvm::Intrinsic namespace. The + 00152 /// operands to the intrinsic follow. The node has two results, the result + 00153 /// of the intrinsic and an output chain. + */ + case ISD::INTRINSIC_W_CHAIN: { + LLVM_DEBUG(dbgs() + << "ConnexDAGToDAGISel::Select(): case ISD::INTRINSIC_W_CHAIN" + << "\n"); + unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); + LLVM_DEBUG(dbgs() << "IntNo = " << IntNo << "\n"); + switch (IntNo) { + case Intrinsic::connex_load_byte: + case Intrinsic::connex_load_half: + case Intrinsic::connex_load_word: { + SDLoc DL(Node); + SDValue Chain = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue Skb = Node->getOperand(2); + SDValue N3 = Node->getOperand(3); + + // TODO_CHANGE_BACKEND: + //SDValue R6Reg = CurDAG->getRegister(Connex::R6, MVT::i64); + SDValue R6Reg = CurDAG->getRegister(Connex::R6, TYPE_SCALAR_ELEMENT); + + Chain = CurDAG->getCopyToReg(Chain, DL, R6Reg, Skb, SDValue()); + Node = CurDAG->UpdateNodeOperands(Node, Chain, N1, R6Reg, N3); + break; + } + case Intrinsic::connex_reduce: { + //EVT ResTy = Node->getValueType(0); + EVT ResTy = (Node->getOperand(2).getNode())->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for Intrinsic::connex_reduce:\n" + << " ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for connex_reduce.i32\n"); + + SDNode *reduceHigh16 = selectReduceI32(Node); + + ReplaceNode(Node, reduceHigh16); // Res // does NOT work - gives RT error: whereEq); + //ReplaceNode(Node, nodeOpSrcCast); + + /* + // See http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + CurDAG->SelectNodeTo(Node, + Connex::RED_H, + TYPE_VECTOR_I16, + SDValue(vloadCt0_srcAux, 0)); + */ + return; + } // END case Intrinsic::connex_reduce_i32 + else + if (ResTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "Select() for connex_reduce.f16\n"); + + SDNode *reduceH = selectReduceF16(Node); + + ReplaceNode(Node, reduceH); + + return; + } // END case Intrinsic::connex_reduce_f16 + } + } + break; + } + + case ISD::FrameIndex: { + int FI = cast(Node)->getIndex(); + EVT VT = Node->getValueType(0); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); + unsigned Opc = Connex::MOV_rr; + if (Node->hasOneUse()) { + CurDAG->SelectNodeTo(Node, Opc, VT, TFI); + return; + } + ReplaceNode(Node, CurDAG->getMachineNode(Opc, SDLoc(Node), VT, TFI)); + return; + } + + + case ISD::INSERT_VECTOR_ELT: { + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::INSERT_VECTOR_ELT.\n"); + return; + } + /* + case ISD::SETCC: { + SDNode *res = Select...(Node); + ReplaceNode(Node, res); + return; + } + */ + case ISD::VSELECT: { + SDNode *res = selectVSELECT(Node); + ReplaceNode(Node, res); + return; + } + case ISD::FADD: { + EVT ResTy = Node->getValueType(0); + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::FADD: \n" + << " ResTy = " << ResTy.getEVTString() + << "\n"); + + SDLoc DL(Node); + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + // NEW_FP16 + if (ResTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "Select() for FADD: " + "We are in the case TYPE_VECTOR_F16\n"); + + + //#define OLD_NOT_CORRECT_ONLY_GOOD_FOR_MATMUL_128_F16 + #ifdef OLD_NOT_CORRECT_ONLY_GOOD_FOR_MATMUL_128_F16 +// TODO TODO TODO TODO TODO TODO TODO: use instead of Connex::NOP_BITCONVERT_WH a new node called Connex::NOP_BITCONVERT_F16H + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + ResTy, + nodeOpSrc1 + ); + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + ResTy, + //MVT::Other, + nodeOpSrc2 + ); + + SDNode *res = CurDAG->getMachineNode(Connex::ADDV_H, + DL, + ResTy, + MVT::Other, + SDValue(nodeOpSrcCast1, 0), + SDValue(nodeOpSrcCast2, 0) + // glue (or chain) input edge + //SDValue(nodeOpSrcCast2, 0) + ); + + SDNode *resW = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + ResTy, + SDValue(res, 0), + // chain edge + SDValue(res, 1) + ); + ReplaceNode(Node, resW); + #else // OLD_NOT_CORRECT_ONLY_GOOD_FOR_MATMUL_128_F16 + SDNode *res = selectAddF16(Node); + ReplaceNode(Node, res); + #endif + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): res = "; + res->dump(); + dbgs() << "\n"); + + return; + } + else + if (ResTy == MVT::f16) { + // small-MEGA-TODO: we should emulate with BPF assembler the add.f16 scalar op. This means we need to use a NOP_CONVERT_F16_TO_I64, etc + LLVM_DEBUG(dbgs() << "Select() for FADD: We are in the case MVT::F16\n"); + SDNode *res = CurDAG->getMachineNode(Connex::ADD_rr, // This is actually a BPF instruction + DL, + ResTy, + // NOT working - error <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>>: MVT::i64, + //MVT::Other, + //nodeOpSrc1, // I guess this is not needed, since the auto-ISeled BPF instructions don't need it either + nodeOpSrc1, + nodeOpSrc2 + //opChain + ); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): res = "; + res->dump(); + dbgs() << "\n"); + + return; + } + + } // END ISD::FADD + case ISD::FSUB: { + EVT ResTy = Node->getValueType(0); + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::FSUB: \n" + << " ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "Select() for FSUB: " + "We are in the case TYPE_VECTOR_F16\n"); + //typeVecNode = TYPE_VECTOR_F16; + +#define GOOD + #ifdef GOOD + SDNode *res = selectSubF16(Node); + #else + SDLoc DL(Node); + SDNode *res = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_F16, + MVT::Glue, + Node->getOperand(0), + Node->getOperand(1) + // glue (or chain) input edge + //SDValue(lt0, 1) + ); + #endif + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::FSUB: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + } + case ISD::FMUL: { + EVT ResTy = Node->getValueType(0); + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::FMUL: \n" + << " ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "Select() for FMUL: " + "We are in the case TYPE_VECTOR_F16\n"); + //typeVecNode = TYPE_VECTOR_F16; + + //TODO TODO TODO + SDNode *res = selectMulF16(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::FMUL: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + } + // NEW32 + case ISD::ADD: { + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::ADD: \n" + << " ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for ADD: " + "We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDNode *res = selectAddI32(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::ADD: res = "; + res->dump(CurDAG); dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for ADD: " + "We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END ISD::ADD + // NEW32 + case ISD::SUB: { + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::SUB.\n" + << "Select() for SUB: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for SUB: " + "We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDNode *res = selectSubI32(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::SUB: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for SUB: " + "We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END ISD::SUB + case ISD::MUL: { + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::MUL.\n"); + + LLVM_DEBUG(dbgs() << "Select() for MUL: " + "ResTy = " + << ResTy.getEVTString() + << "\n"); + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for MUL: " + "We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDNode *res = selectMulI32(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::MUL: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for ISD::MUL: We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END case ISD::MUL + case ISD::FDIV: { + EVT ResTy = Node->getValueType(0); + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::FDIV: \n" + << " ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "Select() for FDIV: " + "We are in the case TYPE_VECTOR_F16\n"); + //typeVecNode = TYPE_VECTOR_F16; + + //TODO TODO TODO + SDNode *res = selectDivF16(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::FDIV: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + /* + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::FDIV.\n"); + + LLVM_DEBUG(dbgs() << "Select() for FDIV: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + */ + } + // TODO TODO TODO TODO TODO: should be also case ISD::SDIVREM: + case ISD::SDIV: { + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::DIV.\n"); + + LLVM_DEBUG(dbgs() << "Select() for DIV: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for DIV: " + "We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + assert(0 && "Not implemented"); + + /* SDNode *res = SelectDivI32(Node); + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::DIV: res = "; + res->dump(CurDAG); dbgs() << "\n"); + */ + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for ISD::DIV: " + "We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + + SDNode *res = selectDivI16(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::DIV: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + + break; + } + case ISD::OR: { + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::OR.\n"); + LLVM_DEBUG(dbgs() << "Select() for OR: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for OR: We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOp0 = Node->getOperand(0); + SDValue nodeOp1 = Node->getOperand(1); + + SDNode *nodeOp0Cast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + nodeOp0); + SDNode *nodeOp1Cast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + nodeOp1); + + SDNode *Res16 = CurDAG->getMachineNode(Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + SDValue(nodeOp0Cast, 0), + SDValue(nodeOp1Cast, 0) + ); + LLVM_DEBUG(dbgs() << "Select() for ISD::OR: Res16 = "; + Res16->dump(CurDAG); + dbgs() << "\n"); + + SDNode *Res = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(Res16, 0)); + + ReplaceNode(Node, Res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::OR: Res = "; + Res->dump(CurDAG); + dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for OR: We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END ISD::OR + // NEW32 + case ISD::AND: { + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::AND.\n"); + + + LLVM_DEBUG(dbgs() << "Select() for AND: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for AND: We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOp0 = Node->getOperand(0); + SDValue nodeOp1 = Node->getOperand(1); + + SDNode *nodeOp0Cast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + nodeOp0); + SDNode *nodeOp1Cast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + nodeOp1); + + SDNode *Res16 = CurDAG->getMachineNode(Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + SDValue(nodeOp0Cast, 0), + SDValue(nodeOp1Cast, 0) + ); + LLVM_DEBUG(dbgs() << "Select() for ISD::AND: Res16 = "; + Res16->dump(CurDAG); + dbgs() << "\n"); + + SDNode *Res = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(Res16, 0)); + + ReplaceNode(Node, Res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::AND: Res = "; + Res->dump(CurDAG); + dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for AND: We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END ISD::AND + // NEW32 + case ISD::XOR: { + SDLoc DL(Node); + + /* !!!!TODO TODO: check that the flags are also equivalent: XOR i16 + sets flags like SUBC: + see ConnexVector.cpp + BINARY_OP_FLAGS_LIKE_SUBC(^) - look for the macros + */ + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::XOR.\n"); + + LLVM_DEBUG(dbgs() << "Select() for XOR: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for XOR: We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOp0 = Node->getOperand(0); + SDValue nodeOp1 = Node->getOperand(1); + + SDNode *nodeOp0Cast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + nodeOp0); + SDNode *nodeOp1Cast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + nodeOp1); + + SDNode *Res16 = CurDAG->getMachineNode(Connex::XORV_H, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + SDValue(nodeOp0Cast, 0), + SDValue(nodeOp1Cast, 0) + ); + LLVM_DEBUG(dbgs() << "Select() for ISD::XOR: Res16 = "; + Res16->dump(CurDAG); + dbgs() << "\n"); + + SDNode *Res = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(Res16, 0)); + + ReplaceNode(Node, Res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::XOR: Res = "; + Res->dump(CurDAG); + dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for XOR: We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END ISD::XOR + // NEW32 + case ISD::SRA: { // Arithmetic Shift Right + // See http://llvm.org/docs/LangRef.html#ashr-instruction + // and https://en.wikipedia.org/wiki/Arithmetic_shift + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::SRA.\n"); + LLVM_DEBUG(dbgs() << "Select() for SRA: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "selectSraI32() for MUL: We are in the case TYPE_VECTOR_I32\n"); + //typeVecNode = TYPE_VECTOR_I32; + + #ifdef TODO_INTERESTING + //ConstantSDNode *nodeOp0CtSDNode = cast(nodeOp1); + BuildVectorSDNode *BVN = cast(nodeOp1.getNode()); + //!!!! TODO: need to discriminate case: immediate operand - it takes fewer cycles + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN->isConstantSplat(SplatValue, SplatUndef, + SplatBitSize, HasAnyUndefs, + 8, true) == true) { + LLVM_DEBUG(dbgs() << "Select() for SRA: BVN->isConstantSplat() == TRUE\n"); + // MEGA-TODO: in this case we should do ISHRA.i32 instead of SHRA.i32 + } + #endif + + SDNode *res = selectSraI32(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::SRA: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for SRA: We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END ISD::SRA + // NEW32 + case ISD::MGATHER: { + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::MGATHER.\n"); + LLVM_DEBUG(dbgs() << " Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + + SDLoc DL(Node); + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + + MaskedGatherSDNode *nodeGather = dyn_cast(Node); + assert(nodeGather != NULL); + + // See http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html#l02107 + SDValue indexVec = nodeGather->getIndex(); + SDValue passthruVec = nodeGather->getPassThru(); + + LLVM_DEBUG(dbgs() << "Select() for MGATHER: indexVec = "; + (indexVec.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select() for MGATHER: passthruVec = "; + (passthruVec.getNode())->dump(); + dbgs() << "\n"); + + EVT opIndexVecTy = indexVec.getValueType(); + EVT opValVecTy = passthruVec.getValueType(); + + LLVM_DEBUG(dbgs() << "Select() for MGATHER: opIndexVecTy = " + << opIndexVecTy.getEVTString() + << ", opValVecTy = " << opValVecTy.getEVTString() + << ", ResTy = " << ResTy.getEVTString() + << "\n"); + + SDValue opChain = Node->getOperand(0); + LLVM_DEBUG(dbgs() << "Select() for MGATHER: opChain = "; + (opChain.getNode())->dump(); + dbgs() << "\n"); + + // NEW_FP16 + //if (opValVecTy == TYPE_VECTOR_F16) + if (ResTy == TYPE_VECTOR_F16) { + typeVecNode = TYPE_VECTOR_F16; + + LLVM_DEBUG(dbgs() << "Select() for MGATHER: We are in the case ResTy == TYPE_VECTOR_F16\n"); + + #ifdef BITCAST_2018_06_F16 + SDNode *indexVec16 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + // The address operand + indexVec); + #endif + + SDNode *Res16 = CurDAG->getMachineNode(Connex::LD_INDIRECT_H, + DL, + #ifdef BITCAST_2018_06_F16 + TYPE_VECTOR_I16, + #else + typeVecNode, // We prevent getting error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> + #endif + //MVT::Other, + #ifdef BITCAST_2018_06_F16 + SDValue(indexVec16, 0), //indexVec + #else + indexVec, + #endif + opChain + ); + + SDNode *Res; + #ifdef BITCAST_2018_06_F16 + Res = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(Res16, 0)); + #else + Res = Res16; + #endif + + // TODO TODO TODO TODO TODO TODO TODO: use instead of Connex::NOP_BITCONVERT_WH a new node called Connex::NOP_BITCONVERT_F16H + #ifdef GOOD_I_THINK_TESTED_JUN_21_2018_SOME_BUG_IN_MULF16 + SDNode *Res16 = CurDAG->getMachineNode(Connex::LD_INDIRECT_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + indexVec + ); + SDNode *Res = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + //typeVecNode, + ResTy, + SDValue(Res16, 0)); + #endif + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + else + if (opIndexVecTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for MGATHER: We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + /* VERY IMPORTANT: we add opChain to chain this new node with the node + the target-independent masked_gather node was chained with. + If we do not do this then we will eventually have other useful + chained nodes removed, resulting in a incorrect/partial program. */ + /* TODO: not sure if the chain is going to always be operand 0. + However masked_gather has a chain following attribute SDNPHasChain, + see include/llvm/Target/TargetSelectionDAG.td + See also indirectly the other params (methods get*()) of + MaskedGatherScatterSDNode at + http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html#l02107 + */ + #ifdef USE16bits_2017_05_27 + SDNode *Res = CurDAG->getMachineNode(Connex::LD_INDIRECT_W, + DL, + typeVecNode, + //MVT::Other, + indexVec, + opChain + ); + #else + #ifdef BITCAST_MAY2017_05_28 + SDNode *indexVec16 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + // The address operand + indexVec); + #endif + SDNode *Res16 = CurDAG->getMachineNode(Connex::LD_INDIRECT_H, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + //MVT::Other, + #ifdef BITCAST_MAY2017_05_28 + SDValue(indexVec16, 0), //indexVec + #else + indexVec, + #endif + opChain + ); + SDNode *Res; + #ifdef BITCAST_MAY2017_05_28 + Res = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + MVT::Other, // We need this only for DotProd.i16 + SDValue(Res16, 0)); + #else + Res = Res16; + #endif + #endif + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + else + if (opIndexVecTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for MGATHER: We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + + SDNode *Res = CurDAG->getMachineNode(Connex::LD_INDIRECT_H, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + /* Usually it comes with ch + putting it here avoids error + <> */ + MVT::Other, + indexVec, + opChain + ); + + LLVM_DEBUG(dbgs() << "Res = "; + Res->dump(CurDAG); + dbgs() << "\n"); + ReplaceNode(Node, Res); + + return; + } + //Res = CurDAG->getMachineNode(Connex::LD_INDIRECT_W, DL, ViaVecTy, Node->getOperand(0)); + //Res = CurDAG->getMachineNode(LD_INDIRECT_W_DESC_BASE, DL, ViaVecTy, Node->getOperand(0)); + //Res = CurDAG->getMachineNode(ST_INDIRECT_H_DESC_BASE, DL, ViaVecTy, Node->getOperand(0)); + + break; + } // END ISD::MGATHER + // NEW32 + case ISD::MSCATTER: { + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::MSCATTER.\n"); + + SDLoc DL(Node); + MVT typeVecNode; + // For SCATTER it is chain: EVT ResTy = Node->getValueType(0); + //MVT mResTy = ResTy.getSimpleVT(); + + + MaskedScatterSDNode *nodeScatter = dyn_cast(Node); + // See llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html#l02107 + SDValue indexVec = nodeScatter->getIndex(); + SDValue sourceVec = nodeScatter->getValue(); + + EVT opIndexVecTy = indexVec.getValueType(); /*Node->getOperand(0).getValueType(); */ // getSimpleValueType(); + EVT opSourceVecTy = sourceVec.getValueType(); + + LLVM_DEBUG(dbgs() << "Select() for MSCATTER: " + << "opIndexVecTy = " << opIndexVecTy.getEVTString() + << ", opSourceVecTy = " << opSourceVecTy.getEVTString() + << "\n"); + + // NEW_FP16 + if (opSourceVecTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "Select() for MSCATTER: We are in the case " + "opSourceVecTy == TYPE_VECTOR_F16\n"); + +// TODO TODO TODO TODO TODO TODO TODO: use instead of Connex::NOP_BITCONVERT_WH a new node called Connex::NOP_BITCONVERT_F16H + #ifdef BITCAST_2018_06_F16 + SDNode *sourceVec16 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + sourceVec + ); + SDNode *Res = CurDAG->getMachineNode(Connex::ST_INDIRECT_H, + DL, + MVT::Other, + indexVec, + SDValue(sourceVec16, 0) + ); + #else + SDNode *Res = CurDAG->getMachineNode(Connex::ST_INDIRECT_H, + DL, + MVT::Other, + indexVec, + sourceVec + ); + #endif + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + else + if (opIndexVecTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for MSCATTER: We are in the case opIndexVecTy == TYPE_VECTOR_I32\n"); + + typeVecNode = TYPE_VECTOR_I32; + + /* VERY IMPORTANT: we add opChain to chain this new node with the node + the target-independent masked_gather node was chained with. + If we do not do this then we will eventually have other useful + chained nodes removed, resulting in a incorrect/partial program. */ + /* TODO: not sure if the chain is going to always be operand 0. + However masked_gather has a chain following attribute SDNPHasChain, + see include/llvm/Target/TargetSelectionDAG.td + See also indirectly the other params (methods get*()) of + MaskedGatherScatterSDNode at + http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html#l02107 + */ + SDValue opChain = Node->getOperand(0); + LLVM_DEBUG(dbgs() << "Select() for MSCATTER: opChain = "; + (opChain.getNode())->dump(); + dbgs() << "\n"); + #ifdef USE16bits_2017_05_27 + SDNode *Res = CurDAG->getMachineNode(Connex::ST_INDIRECT_W, + DL, + //typeVecNode, + //voidEVT, + MVT::Other, + indexVec, + sourceVec + //,opChain + ); + #else + #ifdef BITCAST_MAY2017_05_28 + SDNode *indexVec16 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + indexVec); + SDNode *sourceVec16 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + sourceVec + ); + #endif + SDNode *Res = CurDAG->getMachineNode(Connex::ST_INDIRECT_H, + DL, + //typeVecNode, + //voidEVT, + MVT::Other, + #ifdef BITCAST_MAY2017_05_28 + SDValue(indexVec16, 0), //indexVec, + //sourceVec + SDValue(sourceVec16, 0) + #else + indexVec, + sourceVec + #endif + /* + //,opChain + TODO TODO TODO: figure out why can't I add a chain + edge to scatter like I did for MGAHTER + MAYBE use: CurDAG->getVTList(MVT::Other, MVT::Glue), */ + ); + #endif + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + else + if (opIndexVecTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for MSCATTER: We are in the case " + "opIndexVecTy == TYPE_VECTOR_I16\n"); + + typeVecNode = TYPE_VECTOR_I16; + + SDNode *Res = CurDAG->getMachineNode(Connex::ST_INDIRECT_H, + DL, + //typeVecNode, + //voidEVT, + MVT::Other, + indexVec, + sourceVec + ); + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + + /* + LLVMContext &theContext = *(CurDAG->getContext()); + EVT voidEVT = EVT::getEVT(Type::getVoidTy(theContext)); + LLVM_DEBUG(dbgs() << " voidEVT = " + << voidEVT.getEVTString() << "\n"); + */ + + break; + } // END ISD::MSCATTER + case ISD::ConstantPool: { + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::ConstantPool.\n"); + LLVM_DEBUG(dbgs() << " Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + + SDLoc DL(Node); + + // MEGA-TODO: check for splat 0..CVL-1 + // MEGA-TODO: I need to return TYPE_VECTOR_I16 (maybe create a virtreg also) + SDNode *Res = CurDAG->getMachineNode(Connex::LDIX_H, + DL, + MVT::i64 + //TYPE_VECTOR_I16, + // We add a chain edge + //CurDAG->getEntryNode() + //sourceVec, + //offsetVec + //MVT::Other + //offset, + //basePtr, + //opChain + ); + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + // NEW_FP16: required for non-vector BBs like for.body + case ISD::LOAD: { + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::LOAD.\n"); + LLVM_DEBUG(dbgs() << " Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + + SDLoc DL(Node); + EVT typeVecNode; + EVT resTy = Node->getValueType(0); + + LoadSDNode *nodeLoad = dyn_cast(Node); + assert(nodeLoad != NULL); + + // See http://llvm.org/doxygen/SelectionDAGNodes_8h_source.html#l02048 + SDValue opChain = nodeLoad->getOperand(0); + SDValue basePtr = nodeLoad->getBasePtr(); // Operand 1 + SDValue offset = nodeLoad->getOffset(); // Operand 2 + LLVM_DEBUG(dbgs() << "Select() for LOAD: basePtr = "; + (basePtr.getNode())->dump(); + dbgs() << "Select() for LOAD: offset = "; + (offset.getNode())->dump(); + dbgs() << "Select() for LOAD: opChain = "; + (opChain.getNode())->dump(); + dbgs() << "\n"); + + EVT offsetTy = offset.getValueType(); + + LLVM_DEBUG(dbgs() << "Select() for LOAD: " + << "resTy = " << resTy.getEVTString() + << ", offsetTy = " << offsetTy.getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "Select() for LOAD: offset = "; + (offset.getNode())->dump(); + dbgs() << " basePtr = "; + (basePtr.getNode())->dump(); + dbgs() << " opChain = "; + (opChain.getNode())->dump(); + dbgs() << "\n"); + + if (resTy == MVT::f16) { + LLVM_DEBUG(dbgs() << "Select() for LOAD: We are in the case resTy == MVT::f16\n"); + + // small-TODO: although useless, normally we should emulate f16 on BPF + SDNode *Res16 = CurDAG->getMachineNode(Connex::LDH, + DL, + resTy, + MVT::Other, + // NOT useful: MVT::Other, + ////offset, + // Error: <> opChain, + basePtr, // , + // + // IMPORTANT: unfortunately this operand becomes a register, not an immediate: offset, + CurDAG->getTargetConstant(0, DL, MVT::i64), // TODO: we should put probably a different value than 0 + // + opChain + // This gives < 0>> , basePtr + ); + SDNode *Res = Res16; + + #ifdef NOT_GOOD + //SDNode *Res16 = offset.getNode(); + SDNode *Res = CurDAG->getMachineNode( + //Connex::NOP_BPF, // This must take an immediate operand + // An unnecessary NOP: Connex::NOP, + Connex::NOP_BOGUS, + DL, + resTy, + MVT::Other, + // We add a chain edge + //CurDAG->getEntryNode() + //sourceVec, + //offsetVec + opChain + ); + /* + // Extremely crappy - VERY BAD. + // VERY BAD: It messes up at test covar - wrong register types, + // vector register mixed with scalar register: e.g. R7 = r4 | r4; + SDNode *Res = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + //typeVecNode, + resTy, + MVT::Other, + basePtr, + // NOTE: we can also use offset + //SDValue(Res16, 0) + opChain + ); + //offsetVec); + */ + #endif // NOT_GOOD + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << " Select(): Res->getOpcode() = " + << Res->getOpcode() + << "\n"); + + ReplaceNode(Node, Res); + + return; + } // END if (resTy == MVT::f16) + else + if (resTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for LOAD: We are in the case " + "resTy == TYPE_VECTOR_I16\n"); + + SDNode *Res = CurDAG->getMachineNode(Connex::LD_INDIRECT_H, + DL, + TYPE_VECTOR_I16, + // We add a chain edge + //CurDAG->getEntryNode() + //sourceVec, + //offsetVec + MVT::Other, + //offset, + basePtr, + opChain + ); + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + + break; + } // END ISD::LOAD + // NEW_FP16: normally required for non-vector BBs like for.body + case ISD::STORE: { + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::STORE.\n"); + LLVM_DEBUG(dbgs() << " Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + + SDLoc DL(Node); + EVT typeNode; + EVT resTy = Node->getValueType(0); + + + StoreSDNode *nodeStore = dyn_cast(Node); + assert(nodeStore != NULL); + + // See http://llvm.org/doxygen/SelectionDAGNodes_8h_source.html#l02076 + SDValue opChain = nodeStore->getOperand(0); + SDValue source = nodeStore->getValue(); // Operand 1 + SDValue basePtr = nodeStore->getBasePtr(); // Operand 2 + SDValue offset = nodeStore->getOffset(); // Operand 3 + LLVM_DEBUG(dbgs() << "Select() for STORE: offset = "; + (offset.getNode())->dump(); + dbgs() << "Select() for STORE: basePtr = "; + (basePtr.getNode())->dump(); + dbgs() << "Select() for STORE: source = "; + (source.getNode())->dump(); + dbgs() << "Select() for STORE: opChain = "; + (opChain.getNode())->dump(); + dbgs() << "\n"); + + EVT offsetTy = offset.getValueType(); + EVT sourceTy = source.getValueType(); + + LLVM_DEBUG(dbgs() << "Select() for STORE: " + << "sourceTy = " << sourceTy.getEVTString() + << ", offsetTy = " << offsetTy.getEVTString() + << ", resTy = " << resTy.getEVTString() + << "\n"); + + if (sourceTy == MVT::f16) { + /* We need to treat this case because the BPF processor doesn't + have any floating point support. + */ + LLVM_DEBUG(dbgs() << "Select() for STORE: We are in the case sourceTy == MVT::f16\n"); + + // I was not able to make this one work - see below: #define DOES_NOT_WORK_AND_DONNO_WHY_SPENT_4_HOURS + //#define DOES_NOT_WORK_AND_DONNO_WHY_SPENT_4_HOURS + #ifdef DOES_NOT_WORK_AND_DONNO_WHY_SPENT_4_HOURS + /* MINOR TODO: I get this error here, but we can use the other solution, at the + #else: <> + */ + /* + SDNode *Res16 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + MVT::i16, + source, + opChain + ); + LLVM_DEBUG(dbgs() << "Select(): Res16 = "; + Res16->dump(); + dbgs() << "\n"); + */ + + SDNode *Res = CurDAG->getMachineNode(Connex::STH, + DL, + MVT::Other, + //source, + //SDValue(Res16, 0), + opChain, + source, + basePtr, + offset + //, opChain + // This gives < 0>> , basePtr + ); + #else + /* + // TODO: use instead of Connex::NOP_BITCONVERT_WH a new node called Connex::NOP_BITCONVERT_F16H + SDNode *Res = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + MVT::Other, + sourceVec, + offsetVec + ); + */ + + /* Crappy but it works: this is a scalar f16 STORE - we simply + avoid generating a useful instruction - we just replace it + with "pseudo"-instruction NOP_BOGUS, which doesn't have a + useful assembly instruction. + */ + SDNode *Res = CurDAG->getMachineNode( + //Connex::NOP_BPF, // This must take an immediate operand + // An unnecessary NOP: Connex::NOP, + Connex::NOP_BOGUS, + DL, + MVT::Other, + // We add a chain edge + //CurDAG->getEntryNode() + //sourceVec, + //offsetVec + opChain + ); + //assert(0 && "I don't think it's implemented - anyhow I don't think it's (much) used - we should try harder with NOP_BITCONVERT, etc..."); + #endif + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + else + if (sourceTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for STORE: We are in the case " + "sourceTy == TYPE_VECTOR_I16\n"); + + SDNode *Res = CurDAG->getMachineNode(Connex::ST_INDIRECT_H, + DL, + // We add a chain edge + //CurDAG->getEntryNode() + //sourceVec, + //offsetVec + MVT::Other, + offset, + source, + opChain + ); + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + + break; + } // END ISD::STORE + + // Inspired from MipsSEISelDAGToDAG.cpp + case ISD::BUILD_VECTOR: { + selectBUILD_VECTOR(Node); + return; + } // END case ISD::BUILD_VECTOR + /* + // VERY IMPORTANT: In ISelLowering the DAG Combiner changes + // (I think in all cases) the vector_shuffle SDNode into a BUILD_VECTOR. + case ISD::VECTOR_SHUFFLE: { + selectVECTOR_SHUFFLE(Node); + return; + } // END case ISD::VECTOR_SHUFFLE + */ + } // END switch (Opcode) + + /* + // Select the default instruction + SDNode *ResNode = SelectCode(Node); + + LLVM_DEBUG(dbgs() << "=> "; + if (ResNode == nullptr || ResNode == Node) + Node->dump(CurDAG); + else + ResNode->dump(CurDAG); + dbgs() << '\n'); + + LLVM_DEBUG(dbgs() << "Exiting Select()\n"); // - but first calling SelectCode()\n"); + ReplaceNode(Node, ResNode); + return; + */ + + // Select the default instruction + //SDNode *ResNode = SelectCode(Node); + SelectCode(Node); +} + + +FunctionPass *llvm::createConnexISelDag(ConnexTargetMachine &TM) { + return new ConnexDAGToDAGISel(TM); +} + + +// Added from MipsSEISelDAGToDAG.cpp +/// Match frameindex +bool ConnexDAGToDAGISel::selectAddrFrameIndex(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + EVT ValTy = Addr.getValueType(); + + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), ValTy); + return true; + } + return false; +} + + +// Added from MipsSEISelDAGToDAG.cpp +/// Match frameindex+offset and frameindex|offset +bool ConnexDAGToDAGISel::selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, + SDValue &Offset, + unsigned OffsetBits) const { + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + if (isIntN(OffsetBits, CN->getSExtValue())) { + EVT ValTy = Addr.getValueType(); + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast + (Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), + ValTy); + return true; + } + } + return false; +} + + +// Added from MipsSEISelDAGToDAG.cpp +bool ConnexDAGToDAGISel::selectAddrRegImm10(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10)) + return true; + + return false; +} + + +// Added from MipsSEISelDAGToDAG.cpp +bool ConnexDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getValueType()); + return true; +} + + +// Added from MipsSEISelDAGToDAG.cpp +bool ConnexDAGToDAGISel::selectIntAddrMSA(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrRegImm10(Addr, Base, Offset)) + return true; + + if (selectAddrDefault(Addr, Base, Offset)) + return true; + + return false; +} + + +// Added from MipsSEISelDAGToDAG.cpp +// Select constant vector splats. +// +// Returns true and sets Imm if: +// * MSA is enabled +// * N is a ISD::BUILD_VECTOR representing a constant splat +bool ConnexDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, + unsigned MinSizeInBits) const { + LLVM_DEBUG(dbgs() << "Entered ConnexDAGToDAGISel::selectVSplat()\n"); + + /* + if (!Subtarget->hasMSA()) + return false; + */ + + BuildVectorSDNode *Node = dyn_cast(N); + + if (!Node) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + MinSizeInBits, + // !Subtarget->isLittle() + false)) + return false; + + Imm = SplatValue; + + LLVM_DEBUG(dbgs() << "ConnexDAGToDAGISel::selectVSplat(): returning true\n"); + return true; +} + + +// Select constant vector splats. +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value fits in an integer with the specified signed-ness and +// width. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +// +// It's worth noting that this function is not used as part of the selection +// of ldi.[bhwd] since it does not permit using the wrong-typed ldi.[bhwd] +// instruction to achieve the desired bit pattern. ldi.[bhwd] is selected in +// MipsSEDAGToDAGISel::selectNode. +bool ConnexDAGToDAGISel::selectVSplatCommon(SDValue N, SDValue &Imm, + bool Signed, unsigned ImmBitSize) const { + LLVM_DEBUG(dbgs() << "Entered ConnexDAGToDAGISel::selectVSplatCommon()\n"); + + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + + if (( Signed && ImmValue.isSignedIntN(ImmBitSize)) || + (!Signed && ImmValue.isIntN(ImmBitSize))) { + Imm = CurDAG->getTargetConstant(ImmValue, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + +// Select constant vector splats. +bool ConnexDAGToDAGISel:: +selectVSplatUimm1(SDValue N, SDValue &Imm) const { + LLVM_DEBUG(dbgs() << "Entered selectVSplatUimm1()\n"); + return selectVSplatCommon(N, Imm, false, 1); +} + +bool ConnexDAGToDAGISel:: +selectVSplatUimm2(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 2); +} + +bool ConnexDAGToDAGISel:: +selectVSplatUimm3(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 3); +} + +// Select constant vector splats. +bool ConnexDAGToDAGISel:: +selectVSplatUimm4(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 4); +} + +// Select constant vector splats. +bool ConnexDAGToDAGISel:: +selectVSplatUimm5(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 5); +} + + +// Select constant vector splats. +bool ConnexDAGToDAGISel:: +selectVSplatUimm6(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 6); +} + + +// Select constant vector splats. +bool ConnexDAGToDAGISel:: +selectVSplatUimm8(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 8); +} + + +// Select constant vector splats. +bool ConnexDAGToDAGISel:: +selectVSplatSimm5(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, true, 5); +} + + +// Select constant vector splats whose value is a power of 2. +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a power of two. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +bool ConnexDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = ImmValue.exactLogBase2(); + + if (Log2 != -1) { + Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + + +// Select constant vector splats whose value only has a consecutive sequence +// of left-most bits set (e.g. 0b11...1100...00). +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a consecutive sequence of left-most bits. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +bool ConnexDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + // Extract the run of set bits starting with bit zero from the bitwise + // inverse of ImmValue, and test that the inverse of this is the same + // as the original value. + if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) { + + Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), SDLoc(N), + EltTy); + return true; + } + } + + return false; +} + + +// Select constant vector splats whose value only has a consecutive sequence +// of right-most bits set (e.g. 0b00...0011...11). +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a consecutive sequence of right-most bits. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +bool ConnexDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + // Extract the run of set bits starting with bit zero, and test that the + // result is the same as the original value + if (ImmValue == (ImmValue & ~(ImmValue + 1))) { + Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), SDLoc(N), + EltTy); + return true; + } + } + + return false; +} + + +bool ConnexDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, + SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = (~ImmValue).exactLogBase2(); + + if (Log2 != -1) { + Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + Index: lib/Target/Connex/ConnexISelLowering.h =================================================================== --- lib/Target/Connex/ConnexISelLowering.h +++ lib/Target/Connex/ConnexISelLowering.h @@ -0,0 +1,213 @@ +//===-- ConnexISelLowering.h - Connex DAG Lowering Interface ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the interfaces that Connex uses to lower LLVM code into a +/// selection DAG. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXISELLOWERING_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXISELLOWERING_H + +#include "Connex.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLowering.h" + +#include "ConnexConfig.h" + + + +namespace llvm { +class ConnexSubtarget; + +namespace ConnexISD { + /* + From http://llvm.org/docs/doxygen/html/namespacellvm_1_1ISD.html: + <> + */ + enum NodeType : unsigned { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + RET_FLAG, + CALL, + SELECT_CC, + BR_CC, + + /* Inspired from lib/Target/X86/X86ISelLowering.h + /// A wrapper node for TargetConstantPool, + /// TargetExternalSymbol, and TargetGlobalAddress. + */ + Wrapper, + + // From [LLVM]/llvm/lib/Target/Mips/MipsISelLowering.h + // Extended vector element extraction + VEXTRACT_SEXT_ELT, + VEXTRACT_ZEXT_ELT, + + //ConstantPool, + + // Vector Shuffle with mask as an operand + VSHF, // Generic shuffle + SHF, // 4-element set shuffle. + ILVEV, // Interleave even elements + ILVOD, // Interleave odd elements + ILVL, // Interleave left elements + ILVR, // Interleave right elements + PCKEV, // Pack even elements + PCKOD, // Pack odd elements + }; +} + + +class ConnexTargetLowering : public TargetLowering { +public: + explicit ConnexTargetLowering(const TargetMachine &TM, + const ConnexSubtarget &STI); + + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + + // Inspired from lib/Target/AMDGPU/AMDGPUISelLowering.h + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const; + + // Provide custom lowering hooks for some operations. + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + + // This method returns the name of a target specific DAG node. + const char *getTargetNodeName(unsigned Opcode) const override; + + MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const override; + +private: + /* + // From llvm/lib/Target/Mips/MipsISelLowering.h + // Create a TargetGlobalAddress node. + SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetExternalSymbol node. + SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetBlockAddress node. + SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetJumpTable node. + SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + */ + // Create a TargetConstantPool node. + SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Added from lib/Target/Mips/MipsSEISelLowering.cpp (method addMSAIntType) + void addVectorIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); + + // Inspired from lib/Target/Mips/MipsSEISelLowering.cpp, addMSAFloatType() + void addVectorFloatType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC); + + bool allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const; + + void replaceAddI32UseWithADDVH(MVT &aType, SDValue &Index, + SelectionDAG &DAG) const; + + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + /*static */ SDValue LowerMGATHER(SDValue &Op, + //const ConnexSubtarget &Subtarget, + SelectionDAG &DAG) const; + /*static */ SDValue LowerMSCATTER(SDValue &Op, + //const ConnexSubtarget &Subtarget, + SelectionDAG &DAG) const; + + // Lower the result values of a call, copying them out of physregs into vregs + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, + const SDLoc &DL, SelectionDAG &DAG, + SmallVectorImpl &InVals) const; + + // Maximum number of arguments to a call + static const unsigned MaxArgs; + + // Lower a call into CALLSEQ_START - ConnexISD:CALL - CALLSEQ_END chain + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + + // Lower incoming arguments, copy physregs into vregs + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Ins, + const SDLoc &DL, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SDLoc &DL, + SelectionDAG &DAG) const override; + + EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + MachineFunction &MF) const override { + #define DEBUG_TYPE "connex-lower" + + LLVM_DEBUG(dbgs() << "Entered getOptimalMemOpType(Size = " << Size + << ")\n"); + + return Size >= 8 ? MVT::i64 : MVT::i32; + + // TODO_CHANGE_BACKEND - Seems it's NOT required: + //return Size >= 8 ? TYPE_VECTOR_ELEMENT : MVT::i32; + + #undef DEBUG_TYPE + } + + bool shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const override { + return true; + } + + SDValue LowerVSELECT(SDValue &Op, SelectionDAG &DAG) const; + + // From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.h + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerADD_I32(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerADD_F16(SDValue &Op, SelectionDAG *CurDAG) const; + SDValue LowerMUL_F16(SDValue &Op, SelectionDAG *CurDAG) const; + SDValue LowerREDUCE_F16(SDValue &Op, SelectionDAG *CurDAG) const; + + SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; + + + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + // + EVT getSetCCResultType(const DataLayout &, LLVMContext &, EVT VT) const; +}; // end class ConnexTargetLowering +} // end namespace llvm + +#endif + Index: lib/Target/Connex/ConnexISelLowering.cpp =================================================================== --- lib/Target/Connex/ConnexISelLowering.cpp +++ lib/Target/Connex/ConnexISelLowering.cpp @@ -0,0 +1,3561 @@ +//===-- ConnexISelLowering.cpp - Connex DAG Lowering Implementation ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Connex uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "ConnexISelLowering.h" +#include "Connex.h" +#include "ConnexTargetMachine.h" +#include "ConnexSubtarget.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +// +// See http://llvm.org/docs/doxygen/html/classllvm_1_1DILocation.html +#include "llvm/IR/DebugInfoMetadata.h" + +using namespace llvm; + + + +#define DEBUG_TYPE "connex-lower" + + +//#define DO_F16_EMULATION_IN_ISEL_LOWERING +#ifdef DO_F16_EMULATION_IN_ISEL_LOWERING + #define DO_MUL_F16_EMULATION_IN_ISEL_LOWERING + #define DO_ADD_F16_EMULATION_IN_ISEL_LOWERING +#endif + + +static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) { + MachineFunction &MF = DAG.getMachineFunction(); + DAG.getContext()->diagnose( + DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc())); +} + +static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg, + SDValue Val) { + MachineFunction &MF = DAG.getMachineFunction(); + std::string Str; + raw_string_ostream OS(Str); + OS << Msg; + Val->print(OS); + OS.flush(); + DAG.getContext()->diagnose( + DiagnosticInfoUnsupported(MF.getFunction(), Str, DL.getDebugLoc())); +} + + + +/* +SDValue MipsTargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); +} + +SDValue MipsTargetLowering::getTargetNode(ExternalSymbolSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); +} + +SDValue MipsTargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); +} + +SDValue MipsTargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); +} +*/ + +SDValue ConnexTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(), + N->getOffset(), Flag); +} + + +// Enable vector (inspired from Mips MSA) support for the given integer +// type and Register class. +void ConnexTargetLowering::addVectorIntType(MVT::SimpleValueType aType, + const TargetRegisterClass *RC) { + LLVM_DEBUG(dbgs() << "Entered addVectorIntType(aType = " + << aType << ")\n"); + //LLVM_DEBUG(dbgs() << "addVectorIntType(): "; RC->dump(); dbgs() << "\n"); + + addRegisterClass(aType, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, aType, Expand); + + + //Original code: + //setOperationAction(ISD::BITCAST, aType, Legal); + setOperationAction(ISD::BITCAST, aType, Custom); + /* + setOperationAction(ISD::BITCAST, aType, Promote); + //setOperationAction(ISD::BITCAST, TYPE_VECTOR_I32, Promote); + // Inspired from book Cardoso_2014, page 152 + AddPromotedToType(ISD::BITCAST, TYPE_VECTOR_I16, TYPE_VECTOR_I32); + */ + + // This is found in include/llvm/Target/TargetSelectionDAG.td + setOperationAction(ISD::NON_EXTLOAD, aType, Legal); + setOperationAction(ISD::EXTLOAD, aType, Legal); + + setOperationAction(ISD::LOAD, aType, Legal); + setOperationAction(ISD::STORE, aType, Legal); + + /* IMPORTANT: NONE of these seem to be required anymore after the last + changes of the TableGen spec in ConnexInstrInfo_vec.td. + + // This is to help instruction selection of masked_gather: + //addVectorIntType(MVT::v128i64, &Connex::VectorHRegClass); + // + //setOperationAction(ISD::MGATHER, aType, Legal); + //setOperationAction(ISD::MGATHER, aType, Custom); + */ + /* Required if we work with index vector that is not zeroinitializer, + * or if it is LD256[] + * NOT with something like LD256[%B] */ + //setOperationAction(ISD::MGATHER, MVT::v64i32, Custom); + // We require this to call replaceAddI32UseWithADDVH() + setOperationAction(ISD::MGATHER, aType, Custom); + //setOperationAction(ISD::MGATHER, aType, Legal); + //setOperationAction(ISD::MGATHER, aType, Legal); + /* + setOperationAction(ISD::MGATHER, aType, Legal); + setOperationAction(ISD::MGATHER, MVT::v128i64, Legal); + */ + + /* + * Failing to put this line gives the following STRANGE error - can't explain + * why this happens: + * include/llvm/CodeGen/ValueTypes.h:249: + * unsigned int llvm::EVT::getVectorNumElements() const: + * Assertion `isVector() && "Invalid vector type!"' failed. + */ + //setOperationAction(ISD::MSCATTER, aType, Legal); + // + setOperationAction(ISD::MSCATTER, aType, Custom); +/* + setOperationAction(ISD::MSCATTER, MVT::v64i32, Expand); + AddPromotedToType(ISD::MSCATTER, TYPE_VECTOR_I32, TYPE_VECTOR_I16); +*/ + setOperationAction(ISD::EXTRACT_VECTOR_ELT, aType, Custom); + + // TODO!!!!: do a call to addVectorIntType(MVT::i32) instead of this + /* + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i16, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, aType, Legal); + */ + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, aType, Custom); + + setOperationAction(ISD::BUILD_VECTOR, aType, Custom); + + // TODO!!!!: do a call to addVectorIntType(MVT::i32) instead of this + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, aType, Custom); + + setOperationAction(ISD::ADD, aType, Legal); + setOperationAction(ISD::AND, aType, Legal); + setOperationAction(ISD::CTLZ, aType, Legal); + setOperationAction(ISD::CTPOP, aType, Legal); + setOperationAction(ISD::MUL, aType, Legal); + setOperationAction(ISD::OR, aType, Legal); + + //setOperationAction(ISD::SDIV, aType, Custom); + setOperationAction(ISD::SDIV, aType, Legal); + + //setOperationAction(ISD::SREM, aType, Custom); + setOperationAction(ISD::SREM, aType, Legal); + + setOperationAction(ISD::SHL, aType, Legal); + + setOperationAction(ISD::SRA, aType, Legal); + //setOperationAction(ISD::SRA, aType, Custom); + + setOperationAction(ISD::SRL, aType, Legal); + setOperationAction(ISD::SUB, aType, Legal); + + //setOperationAction(ISD::UDIV, aType, Custom); //Legal); + setOperationAction(ISD::UDIV, aType, Legal); + + //setOperationAction(ISD::UREM, aType, Custom); //Legal); + setOperationAction(ISD::UREM, aType, Legal); + + setOperationAction(ISD::VECTOR_SHUFFLE, aType, Custom); +#ifdef IMPLEMENT_VSELECT_WITH_PSEUDOINSTRS_BUNDLES + setOperationAction(ISD::VSELECT, aType, Custom); +#else + setOperationAction(ISD::VSELECT, aType, Legal); +#endif + setOperationAction(ISD::XOR, aType, Legal); + + /* + if (aType == MVT::v4i32 || aType == MVT::v2i64) { + setOperationAction(ISD::FP_TO_SINT, aType, Legal); + setOperationAction(ISD::FP_TO_UINT, aType, Legal); + setOperationAction(ISD::SINT_TO_FP, aType, Legal); + setOperationAction(ISD::UINT_TO_FP, aType, Legal); + } + */ + + // changed + setOperationAction(ISD::SETCC, aType, Legal); + /* + * Following advice Bruno Cardoso - see email Jun 7, 2016 from + * alex.susu@gmail.com + setOperationAction(ISD::SETCC, aType, Custom); // Expand, Promote or Legal + */ + + setCondCodeAction(ISD::SETEQ, aType, Legal); + setCondCodeAction(ISD::SETNE, aType, Expand); + setCondCodeAction(ISD::SETGE, aType, Expand); + setCondCodeAction(ISD::SETGT, aType, Expand); + setCondCodeAction(ISD::SETUGE, aType, Expand); + setCondCodeAction(ISD::SETUGT, aType, Expand); +} + + + +// Inspired from lib/Target/Mips/MipsSEISelLowering.cpp, addMSAFloatType() +// Enable support for the given floating-point type and Register class. +void ConnexTargetLowering::addVectorFloatType(MVT::SimpleValueType aType, + const TargetRegisterClass *RC) { + LLVM_DEBUG(dbgs() << "Entered addVectorFloatType(aType = " + << aType << ")\n"); + addRegisterClass(aType, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, aType, Expand); + + setOperationAction(ISD::LOAD, aType, Legal); + setOperationAction(ISD::STORE, aType, Legal); + setOperationAction(ISD::BITCAST, aType, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, aType, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, aType, Legal); + setOperationAction(ISD::BUILD_VECTOR, aType, Custom); + + setOperationAction(ISD::MGATHER, aType, Custom); + setOperationAction(ISD::MSCATTER, aType, Legal); + // TODO: only if we use f32, f64 I guess: setOperationAction(ISD::MSCATTER, aType, Custom); + + + //if (Ty != MVT::v8f16) { + setOperationAction(ISD::FABS, aType, Legal); + #ifdef DO_ADD_F16_EMULATION_IN_ISEL_LOWERING + // 2018_08_17: + setOperationAction(ISD::FADD, aType, Custom); + #else + setOperationAction(ISD::FADD, aType, Legal); + #endif + // + setOperationAction(ISD::FDIV, aType, Legal); + setOperationAction(ISD::FEXP2, aType, Legal); + setOperationAction(ISD::FLOG2, aType, Legal); + setOperationAction(ISD::FMA, aType, Legal); + #ifdef DO_MUL_F16_EMULATION_IN_ISEL_LOWERING + setOperationAction(ISD::FMUL, aType, Custom); + #else + // 2018_08_17: + setOperationAction(ISD::FMUL, aType, Legal); + #endif + setOperationAction(ISD::FRINT, aType, Legal); + setOperationAction(ISD::FSQRT, aType, Legal); + setOperationAction(ISD::FSUB, aType, Legal); + setOperationAction(ISD::VSELECT, aType, Legal); + + setOperationAction(ISD::SETCC, aType, Legal); + setCondCodeAction(ISD::SETOGE, aType, Expand); + setCondCodeAction(ISD::SETOGT, aType, Expand); + setCondCodeAction(ISD::SETUGE, aType, Expand); + setCondCodeAction(ISD::SETUGT, aType, Expand); + setCondCodeAction(ISD::SETGE, aType, Expand); + setCondCodeAction(ISD::SETGT, aType, Expand); + //} +} + + +// Inspired from llvm/lib/Target/Mips/MipsSEISelLowering.cpp +bool ConnexTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { + //MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + + // if (Subtarget.systemSupportsUnalignedAccess()) { + // MIPS32r6/MIPS64r6 is required to support unaligned access. It's + // implementation defined whether this is handled by hardware, software, or + // a hybrid of the two but it's expected that most implementations will + // handle the majority of cases in hardware. + if (Fast) + *Fast = true; + return true; + // } + + /* + switch (SVT) { + case MVT::i64: + case MVT::i32: + if (Fast) + *Fast = true; + return true; + default: + return false; + } + */ +} + + + +ConnexTargetLowering::ConnexTargetLowering(const TargetMachine &TM, + const ConnexSubtarget &STI) + : TargetLowering(TM) { + + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::ConnexTargetLowering(): \n"); + + // Inspired from lib/Target/ARM/ARMISelLowering.cpp, ARMTargetLowering::ARMTargetLowering() + static const struct { + const RTLIB::Libcall Op; + const char *const Name; + const CallingConv::ID CC; + const ISD::CondCode Cond; + } MemOpsLibraryCalls[] = { + // Memory operations + // RTABI chapter 4.3.4 + /* + // NOTE: CallingConv::ARM_AAPCS is defined in http://llvm.org/docs/doxygen/html/namespacellvm_1_1CallingConv.html + { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + */ + { RTLIB::MEMMOVE, "memmove", CallingConv::C, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "memset", CallingConv::C, ISD::SETCC_INVALID }, + }; + + for (const auto &LC : MemOpsLibraryCalls) { + LLVM_DEBUG(dbgs() << "ConnexTargetLowering::ConnexTargetLowering(): " + "registering RT-Libcall LC.name = " + << LC.Name << "\n"); + + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); + } + // END + + + // Set up the register classes. + // TODO_CHANGE_BACKEND: + //addRegisterClass(MVT::i64, &Connex::GPRRegClass); + addRegisterClass(TYPE_SCALAR_ELEMENT, &Connex::GPRRegClass); + + // Taken from llvm/lib/Target/Mips/MipsSEISelLowering.cpp + //if (Subtarget.hasDSP() || Subtarget.hasMSA()) { + // Expand all truncating stores and extending loads. + for (MVT VT0 : MVT::vector_valuetypes()) { + for (MVT VT1 : MVT::vector_valuetypes()) { + #ifdef NOT_NOT_NOT + LLVM_DEBUG(dbgs() << "VT0.getSizeInBits() = " + << VT0.getSizeInBits() << "\n"); + LLVM_DEBUG(dbgs() << "VT1.getSizeInBits() = " + << VT1.getSizeInBits() << "\n"); + #endif + setTruncStoreAction(VT0, VT1, Expand); + //This is WRONG - it was added by me and caused llc to give core dump: setLoadExtAction(ISD::STORE, VT0, VT1, Expand); + + setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); + } + } + //} + + + // As said in [Pandey_2015], page 152: + // "The legalize phase can also instruct the kind of classes of registers + // supported for given data." + + // Taken from llvm/lib/Target/Mips/MipsSEISelLowering.cpp + //if (Subtarget.hasMSA()) { + /* + addVectorIntType(MVT::v16i8, &Connex::MSA128BRegClass); + addVectorIntType(MVT::v8i16, &Connex::VectorHRegClass); + addVectorIntType(MVT::v4i32, &Connex::MSA128WRegClass); + addVectorIntType(MVT::v2i64, &Connex::VectorHRegClass); + */ + + /* + // TODO to add these reg classes in the end + addVectorIntType(MVT::v64i8, &Connex::MSA128BRegClass); + addVectorIntType(MVT::v32i16, &Connex::VectorHRegClass); + addVectorIntType(MVT::v16i32, &Connex::MSA128WRegClass); + */ + // TODO_CHANGE_BACKEND: + // This is to help instruction selection of masked_gather: + //addVectorIntType(MVT::v8i64, &Connex::VectorHRegClass); + // To prevent error: <getSize() && "Size mismatch!"' failed.>>: + addVectorIntType(MVT::v8i64, &Connex::VectorHRegClass); // 2019_03_30 + // + addVectorIntType(TYPE_VECTOR_I16, &Connex::VectorHRegClass); + + // NEW32 + //addVectorIntType(TYPE_VECTOR_I32, &Connex::MSA128WRegClass); + addVectorIntType(TYPE_VECTOR_I32, &Connex::VectorHRegClass); + // + + /* + // These are not useful since we already gave addVectorIntType(TYPE_VECTOR_I32) above + // NEW32 + LLVM_DEBUG(dbgs() << "Calling addRegisterClass(TYPE_VECTOR_I32, &Connex::MSA128WRegClass)\n"); + addRegisterClass(TYPE_VECTOR_I32, &Connex::MSA128WRegClass); + */ + + /* + LLVM_DEBUG(dbgs() << "Calling setOperationAction(ISD::ADD, Custom)\n"); + setOperationAction(ISD::ADD, TYPE_VECTOR_I32, Custom); + */ + + + /* + LLVM_DEBUG(dbgs() << "Calling setOperationAction(ISD::ADD, Expand)\n"); + setOperationAction(ISD::ADD, TYPE_VECTOR_I32, Expand); + AddPromotedToType(ISD::ADD, TYPE_VECTOR_I32, TYPE_VECTOR_I16); + */ + +#ifdef SPECIAL_BITCAST_PROMOTE_EXPAND + // NEW32 + /* This normally results in having at I-sel something like: + Legally typed node: t35: v64i32,ch = masked_gather)> t21, undef:v64i32, t37, Constant:i64<51>, t23 + Promote integer result: t535: i32 = extract_vector_elt t35, Constant:i64<0> + Legally typed node: t727: i64 = extract_vector_elt t35, Constant:i64<0> + Promote integer result: t538: i32 = extract_vector_elt t35, Constant:i64<1> + Legally typed node: t728: i64 = extract_vector_elt t35, Constant:i64<1> + */ + + // Inspired from book Cardoso_2014, page 152 + // + LLVM_DEBUG(dbgs() << "Calling setOperationAction(ISD::OR, Expand)\n"); + setOperationAction(ISD::OR, TYPE_VECTOR_I32, /*Promote*/ Expand); + AddPromotedToType(ISD::OR, /*src*/ TYPE_VECTOR_I32, /*dst*/ TYPE_VECTOR_I16); + + + LLVM_DEBUG(dbgs() << "ISD::BITCAST - we use setOperationAction(..., Expand).\n"); + setOperationAction(ISD::BITCAST, TYPE_VECTOR_I16, Expand /*Promote*/); + AddPromotedToType(ISD::BITCAST, /*src*/ TYPE_VECTOR_I16, /*dst*/ TYPE_VECTOR_I32); + setOperationAction(ISD::BITCAST, TYPE_VECTOR_I32, /*Promote*/ Expand); + AddPromotedToType(ISD::BITCAST, /*src*/ TYPE_VECTOR_I32, /*dst*/ TYPE_VECTOR_I16); + + LLVM_DEBUG(dbgs() << "ISD::ADD - we use setOperationAction(..., Expand).\n"); + setOperationAction(ISD::ADD, TYPE_VECTOR_I16, Expand /*Promote*/); + AddPromotedToType(ISD::ADD, /*src*/ TYPE_VECTOR_I16, /*dst*/ TYPE_VECTOR_I32); + setOperationAction(ISD::ADD, TYPE_VECTOR_I32, /*Promote*/ Expand); + AddPromotedToType(ISD::ADD, /*src*/ TYPE_VECTOR_I32, /*dst*/ TYPE_VECTOR_I16); +#endif + + //addVectorFloatType(MVT::v128f16, &Connex::VectorHRegClass); + addVectorFloatType(TYPE_VECTOR_F16, &Connex::VectorHRegClass); + + /* + addVectorFloatType(MVT::v8f16, &Mips::VectorHRegClass); + addVectorFloatType(MVT::v4f32, &Mips::MSA128WRegClass); + addVectorFloatType(MVT::v2f64, &Mips::VectorHRegClass); + */ + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetLoweringBase.html: + void llvm::TargetLoweringBase::setTargetDAGCombine(ISD::NodeType NT) [inline, protected] + <> + */ + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::SRA); +#ifdef IMPLEMENT_VSELECT_WITH_PSEUDOINSTRS_BUNDLES + setTargetDAGCombine(ISD::VSELECT); +#endif + setTargetDAGCombine(ISD::XOR); + //} + + + /* VERY IMPORTANT: this is why I've spent ~5 days of debugging + * - the computeRegisterProperties() function is called at the end of the + * constructor in lib/Target/Mips/MipsSEISelLowering.cpp (or + * Mips16SEILoweing.cpp; note that ARM/ARMISelLowering.cpp is somewhat similar + * to our case - computeRegisterProperties() is called AFTER all + * addRegisterClass() calls). + * But here it is called in the "middle", after the types are being + * declared - i.e., addRegisterClass() has to be called BEFORE + * computeRegisterProperties() - THIS IS VERY IMPORTANT. + */ + // Compute derived properties from the register classes + computeRegisterProperties(STI.getRegisterInfo()); + + setStackPointerRegisterToSaveRestore(Connex::R11); + + #ifdef NEW_BIGGER_OPS + /* + setOperationAction(ISD::DIV, MVT::u16, Custom); + setOperationAction(ISD::DIV, MVT::i16, Custom); + */ + if (MVT::i16 != TYPE_SCALAR_ELEMENT) { + setOperationAction(ISD::MUL, MVT::i16, Custom); + } + if (MVT::i32 != TYPE_SCALAR_ELEMENT) { + setOperationAction(ISD::ADD, MVT::i32, Custom); + setOperationAction(ISD::SUB, MVT::i32, Custom); + setOperationAction(ISD::MUL, MVT::i32, Custom); + } + #endif + /* + setOperationAction(ISD::~~~~VLOAD, TYPE_VECTOR_I32, Custom); + setOperationAction(ISD::MGATHER, TYPE_VECTOR_I32, Custom); + */ + + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetLoweringBase.html + void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) + Indicate that the specified operation does not work with the specified type and indicate what to do about it. + + // From http://llvm.org/docs/WritingAnLLVMBackend.html#the-selectiondag-legalize-phase + "For some operations, simple type promotion or operation expansion may be insufficient. + [...] + In the LowerOperation method, for each Custom operation, a case statement should be added to indicate what function to call. + " + */ +// TODO_CHANGE_BACKEND: + setOperationAction(ISD::BR_CC, TYPE_SCALAR_ELEMENT, Custom); + + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + + setOperationAction(ISD::SETCC, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::SELECT, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::SELECT_CC, TYPE_SCALAR_ELEMENT, Custom); + + setOperationAction(ISD::GlobalAddress, TYPE_SCALAR_ELEMENT, Custom); + + setOperationAction(ISD::DYNAMIC_STACKALLOC, TYPE_SCALAR_ELEMENT, Custom); + + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + + setOperationAction(ISD::SDIVREM, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::UDIVREM, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::SREM, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::UREM, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::MULHU, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::MULHS, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::UMUL_LOHI, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::SMUL_LOHI, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::ADDC, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::ADDE, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::SUBC, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::SUBE, TYPE_SCALAR_ELEMENT, Expand); + + + setOperationAction(ISD::ROTR, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::ROTL, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::SHL_PARTS, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::SRL_PARTS, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::SRA_PARTS, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::CTTZ, TYPE_SCALAR_ELEMENT, Custom); + setOperationAction(ISD::CTLZ, TYPE_SCALAR_ELEMENT, Custom); + // + setOperationAction(ISD::CTTZ_ZERO_UNDEF, TYPE_SCALAR_ELEMENT, Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, TYPE_SCALAR_ELEMENT, Custom); + + setOperationAction(ISD::CTPOP, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); + + /* + Inspired from llvm/lib/Target/X86/X86ISelLowering.cpp. + + IMPORTANT: From https://llvm.org/svn/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h + + /// Convenience method to set an operation to Promote and specify the type + /// in a single call. + void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { + setOperationAction(Opc, OrigVT, Promote); + AddPromotedToType(Opc, OrigVT, DestVT); + } + */ + setOperationPromotedToType(ISD::ConstantPool, MVT::i32, TYPE_SCALAR_ELEMENT); + setOperationPromotedToType(ISD::Constant, MVT::i32, TYPE_SCALAR_ELEMENT); + setOperationPromotedToType(ISD::ADD, MVT::i32, TYPE_SCALAR_ELEMENT); + + setOperationPromotedToType(ISD::ADD, MVT::i16, TYPE_SCALAR_ELEMENT); + setOperationPromotedToType(ISD::ADD, MVT::i64, MVT::i32); + + + // Inspired from AMDGPU/AMDGPUISelLowering.cpp + //Need DAG EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), MVT::i32); + //LLVM_DEBUG(dbgs() << "addVectorIntType(): LegalVT " << LegalVT << "\n"); + + // Extended load operations for i1 types must be promoted + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); + } + + setBooleanContents(ZeroOrOneBooleanContent); + + // Function alignments (log2) + setMinFunctionAlignment(3); + setPrefFunctionAlignment(3); + + // inline memcpy() for kernel to see explicit copy + MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CONNEX_VECTOR_LENGTH; + MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CONNEX_VECTOR_LENGTH; + MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CONNEX_VECTOR_LENGTH; + + + // Inspired from ARMISelLowering.cpp: + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, MVT::i64, Legal); + setIndexedLoadAction(im, MVT::i16, Promote); + setIndexedStoreAction(im, MVT::i64, Legal); + setIndexedStoreAction(im, MVT::i16, Promote); + } + +#ifdef NOTNOT + /* + LLVM_DEBUG(dbgs() << "addVectorIntType(): calling setTypeAction()\n"); + ValueTypeActionImpl::setTypeAction(MVT::i16, TypePromoteInteger); + ValueTypeActionImpl::setTypeAction(MVT::i32, TypePromoteInteger); + // + LLVM_DEBUG(dbgs() << "addVectorIntType(): calling setTypeAction()\n"); + ValueTypeActionImpl::setTypeAction(MVT::i16, TypeLegal); + ValueTypeActionImpl::setTypeAction(MVT::i32, TypeLegal); + */ + + /* + IMPORTANT: the whole reason I am using below setTypeAction() is that we get + error: + <> + when using setOperationAction(Intrinsic::connex_repeat_x_times...): + //setOperationAction(Intrinsic::connex_repeat_x_times, MVT::i16, Expand); //Legal); + //setOperationAction(Intrinsic::connex_repeat_x_times, MVT::i32, Legal); + //setOperationAction(Intrinsic::connex_repeat_x_times, MVT::i32, Promote); + + + IMPORTANT: This piece of code HAS to be put at the end of this method because + otherwise one or more of the above calls are rendering this + setTypeAction() below useless. + But then it gives error like: + <, BasicBlock:ch >> + because I made i16 a legal type and the instruction scheduler does NOT + have to promote it to i64, although br_cc requires it (see TableGen + definition). + To fix this we should make sure we put setTypeAction(MVT::i16, TypeLegal) + before all setOperation...() that intefere with it. + + See http://llvm.org/docs/doxygen/html/TargetLowering_8h_source.html#l00096 + for enum LegalizeTypeAction. + */ + #ifdef DO_F16_EMULATION_IN_ISEL_LOWERING + // 2018_08_17 + setOperationAction(Intrinsic::connex_reduce_f16, MVT::f16, Custom); + #endif + + LLVM_DEBUG(dbgs() << "ConnexTargetLowering(): calling " + "setTypeAction(MVT::i16, ...)\n"); + LegalizeTypeAction ta = ValueTypeActions.getTypeAction(MVT::i16); + LLVM_DEBUG(dbgs() + << " Before setTypeAction(MVT::i16, ...), i16 has action " + << ta << "\n"); + + // Inspired from lib/Target/X86/X86ISelLowering.cpp: + // Gives error: "Do not know how to promote this operator's operand!" + ValueTypeActions.setTypeAction(MVT::i16, TypeLegal); + // Gives error: "Do not know how to promote this operator's operand!" + //ValueTypeActions.setTypeAction(MVT::i16, TypeExpandInteger); //TypePromoteInteger); + ta = ValueTypeActions.getTypeAction(/* DAG.getContext(), */ MVT::i16); //TypeLegal); + LLVM_DEBUG(dbgs() + << " setTypeAction(MVT::i16, ...) has set for i16 action to " + << ta << "\n"); +#endif // NOTNOT + + + // NEW_FP16: it seems these are very useful + // See http://llvm.org/doxygen/TargetLowering_8h_source.html#l00122 + // Quite GOOD: + ValueTypeActions.setTypeAction(MVT::f16, TypeLegal); + + /* "// Convert this float to a same size integer type, + if an operation is not supported in target HW." */ + // ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat); //TypePromoteInteger); + + setOperationAction(ISD::MSCATTER, TYPE_VECTOR_F16, Legal); + + /* + // It seems it does not help: + + setOperationAction(ISD::LOAD, MVT::f16, Promote); + + // Gives: << UNREACHABLE executed at /home/asusu/LLVM/llvm38Nov2016/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:465!>> + //setOperationAction(ISD::STORE, MVT::f16, LibCall); + + setOperationAction(ISD::STORE, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, LibCall); + */ + + + AddPromotedToType(ISD::LOAD, MVT::f16, MVT::i16); + AddPromotedToType(ISD::STORE, MVT::f16, MVT::i16); + /* + // It seems it doesn't help: + AddPromotedToType(ISD::FADD, MVT::f16, MVT::i16); + */ + // END NEW_FP16 + + LLVM_DEBUG(dbgs() << "Exiting ConnexTargetLowering()\n"); +} // END ConnexTargetLowering::ConnexTargetLowering() + + + +// Inspired from lib/Target/AMDGPU/AMDGPUISelLowering.cpp +SDValue ConnexTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + const Function &Fn = DAG.getMachineFunction().getFunction(); + + DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "unsupported dynamic alloca", + SDLoc(Op).getDebugLoc()); + DAG.getContext()->diagnose(NoDynamicAlloca); + auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)}; + return DAG.getMergeValues(Ops, SDLoc()); +} + + +// Inspired from lib/Target/X86/X86ISelLowering.cpp +// Widen vector InOp to vector type NVT. +static SDValue ChangeVectorType(SDValue InOp, + MVT NVT, + SelectionDAG &DAG, + bool FillWithZeroes = false, + // This is meant for the index operand of MGATHER and MSCATTER + bool allowUnsafeChanges = false) { + LLVM_DEBUG(dbgs() << " ChangeVectorType(): InOp = "; + InOp.dump(); + dbgs() << "\n"); + + LLVM_DEBUG(dbgs() << " ChangeVectorType(): NVT = " + << ((EVT)NVT).getEVTString() << "\n"); + + // Check if InOp already has the right width. + MVT InVT = InOp.getSimpleValueType(); + if (InVT == NVT) + return InOp; + + if (InOp.isUndef()) + return DAG.getUNDEF(NVT); + + /* + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + */ + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + LLVM_DEBUG(dbgs() << " ChangeVectorType(): InNumElts = " << InNumElts + << "\n WidenNumElts = " << WidenNumElts << "\n"); + /* + assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && + "Unexpected request for vector widening"); + */ + if (allowUnsafeChanges == false) + assert(WidenNumElts == InNumElts && + "WidenNumElts == InNumElts failed"); + + EVT EltVT = NVT.getVectorElementType(); + + SDLoc dl(InOp); + if (InOp.getOpcode() == ISD::CONCAT_VECTORS && + InOp.getNumOperands() == 2) { + SDValue N1 = InOp.getOperand(1); + if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) || + N1.isUndef()) { + InOp = InOp.getOperand(0); + InVT = InOp.getSimpleValueType(); + InNumElts = InVT.getVectorNumElements(); + } + } + + if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) { + SmallVector Ops; + for (unsigned i = 0; i < InNumElts; ++i) + Ops.push_back(InOp.getOperand(i)); + + /* + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : + DAG.getUNDEF(EltVT); + for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) + Ops.push_back(FillVal); + */ + return DAG.getBuildVector(NVT, dl, Ops); + } + + LLVM_DEBUG(dbgs() << " ChangeVectorType(): InOp = "; + // << InOp.getNode() << "\n"); + InOp.dump(); + dbgs() << "\n"); + + if (allowUnsafeChanges == false) { + assert(0 && "ChangeVectorType(): I guess this case should not be reached"); + } + else { + assert(0 && "Finish implementation"); + // MEGA-TODO: implement well, although this case might NOT be required + SDValue Ops[] = { InOp.getOperand(0), InOp.getOperand(1) }; + + LLVM_DEBUG(dbgs() << " ChangeVectorType(): Ops[0] = "; + Ops[0].dump(); + dbgs() << "\n"); + + LLVM_DEBUG(dbgs() << " ChangeVectorType(): Ops[1] = "; + Ops[1].dump(); + dbgs() << "\n"); + + SDValue res = DAG.getNode(InOp->getOpcode(), dl, + NVT, + Ops); + + LLVM_DEBUG(dbgs() << "ChangeVectorType(): res = "; + res.dump(); + dbgs() << "\n"); + + return res; + } + + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) : + DAG.getUNDEF(NVT); + + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, + NVT, FillVal, + InOp, DAG.getIntPtrConstant(0, dl)); +} // END ChangeVectorType() + + + + +void ConnexTargetLowering::replaceAddI32UseWithADDVH(MVT &aType, + SDValue &Index, + SelectionDAG &DAG) const { + SDLoc dl(Index); + + LLVM_DEBUG(dbgs() << "Entered ReplaceAddI32UseWithADDVH()\n"); + + /* + We make an unsafe assumption that if the Index of the + MSCATTER/MGATHER instruction is used in an ADD, then this Index is an + induction variable and we can change it to i16 type + (we also assume this ind.var is NOT overflowing the i16 type). + MEGA-TODO: check if initializing this Index is safely done on i32 type or on i16. + + VERY IMPORTANT: + The Connex processor we target allows only + indirect Loads (and Stores) that work on lanes of ONLY 16-bits. + Therefore we need to make sure that the index/address register is not + used in i32 operations and if it is we change them to MachineNodes + here, in the ISelLowering phase (before ISelDAGToDAG), that have + actually type v128i16. + IMPORTANT-TODO: make at least a check that the BUILD_VECTOR with initial + index/address value is a short (i16) value AND LOWER the v64i32 to v128i16 + by doing a splat with the lower 16-bits value of element 0 + */ + + // Inspired from LegalizeTypes.cpp + SDNode *nodeIndex = Index.getNode(); + for (SDNode::use_iterator UI = nodeIndex->use_begin(), UE = nodeIndex->use_end(); + UI != UE; ++UI) { + SDNode *nUser = UI.getUse().getUser(); + + /* + if (UI.getUse().getResNo() == i) + assert(UI->getNodeId() == NewNode && + "Remapped value has non-trivial use!"); + */ + LLVM_DEBUG(dbgs() << "replaceAddI32UseWithADDVH(): nUser = "; + nUser->dump(); + //dbgs() << "\n" + ); + + if (nUser->getOpcode() == ISD::ADD) { + LLVM_DEBUG(dbgs() << "replaceAddI32UseWithADDVH(): Converting nUser " + "ISD::ADD to MachineSDNode Connex::ADDV_H\n"); + + /* IMPORTANT: We do here an unsafe type hack: we use ADDV_H which actually + * has TYPE_VECTOR_I16 and declare the type returned is TYPE_VECTOR_I32. + * It is a type mismatch at the level of semantics of the defined + * MachineSDNodes of Connex - I've actually done this before and + * it seems SelectionDAG doesn't complain. + * (Note that llc actually does TypeLegalization). + */ + SDNode *nUserNew = DAG.getMachineNode(Connex::ADDV_H, + dl, + //TYPE_VECTOR_I16, + aType, + //Ops + nUser->getOperand(0), + nUser->getOperand(1)); + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + DAG.ReplaceAllUsesWith(nUser, nUserNew); + } + } +} + + + +// Inspired from [LLVM]/llvm/lib/Target/X86/X86ISelLowering.cpp +/* static */ +SDValue ConnexTargetLowering::LowerMGATHER(SDValue &Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerMGATHER()\n"); + + MaskedGatherSDNode *N = cast(Op.getNode()); + + SDLoc dl(Op); + EVT resVT = Op.getSimpleValueType(); + + LLVM_DEBUG(dbgs() << "LowerMGATHER(): " + << "resVT = " << resVT.getEVTString() + << "\n"); + + SDValue Index = N->getIndex(); + SDValue Mask = N->getMask(); + SDValue Src = N->getPassThru(); // this is actually passthru + MVT IndexVT = Index.getSimpleValueType(); + MVT MaskVT = Mask.getSimpleValueType(); + + //unsigned NumElts = VT.getVectorNumElements(); + //assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); + + /* + if (!Subtarget.hasVLX() && !VT.is512BitVector() && + !Index.getSimpleValueType().is512BitVector()) { + // AVX512F supports only 512-bit vectors. Or data or index should + // be 512 bit wide. If now the both index and data are 256-bit, but + // the vector contains 8 elements, we just sign-extend the index + if (NumElts == 8) { + Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), Index }; + DAG.UpdateNodeOperands(N, Ops); + return Op; + } + + // Minimal number of elements in Gather + NumElts = 8; + // Index + MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts); + Index = ExtendToType(Index, NewIndexVT, DAG); + if (IndexVT.getScalarType() == MVT::i32) + Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); + + // Mask + MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts); + // At this point we have promoted mask operand + assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); + MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); + Mask = ExtendToType(Mask, ExtMaskVT, DAG, true); + Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask); + + // The pass-thru value + MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts); + Src = ExtendToType(Src, NewVT, DAG); + + SDValue Ops[] = { N->getChain(), Src, Mask, N->getBasePtr(), Index }; + SDValue NewGather = DAG.getMaskedGather(DAG.getVTList(NewVT, MVT::Other), + N->getMemoryVT(), dl, Ops, + N->getMemOperand()); + SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, + NewGather.getValue(0), + DAG.getIntPtrConstant(0, dl)); + SDValue RetOps[] = {Exract, NewGather.getValue(1)}; + return DAG.getMergeValues(RetOps, dl); + } + */ + + LLVM_DEBUG(dbgs() << " LowerMGATHER(): Op.getNode() = " << Op.getNode() + << "\n Op = "; + Op->dump(&DAG)); + + //MVT NewVT = MVT::v128i16; + //SDValue Index2 = ExtendToType(Index, NewVT, DAG); + // The index value - is normally i32, and we have to lower it to i16 + MVT aType = (resVT == TYPE_VECTOR_I16 ? TYPE_VECTOR_I16 : TYPE_VECTOR_I32); + /* + // 2018_06_23 + MVT aType = (resVT == TYPE_VECTOR_I16 ? TYPE_VECTOR_I16 : TYPE_VECTOR_I16); + */ + // NEW_FP16 + if (resVT == TYPE_VECTOR_F16) + // I guess this case NEVER happens + aType = TYPE_VECTOR_I16; + + // 2019_03_30: SDValue Index2 = ChangeVectorType(Index, aType, DAG); + SDValue Index2 = ChangeVectorType(Index, aType, DAG, false, true); + /* + // 2018_06_23 + SDValue Index2 = ChangeVectorType(Index, aType, DAG, false, true); + + We get the following error: + <hasAnyUseOfValue(i) || + From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> + The reason is that we change index to have type v128i16, while + masked_gather has type v64i32, and this type difference gives the + assertion error. + */ + + + + LLVM_DEBUG(dbgs() << " LowerMGATHER(): Index = "; + Index->dump();); + LLVM_DEBUG(dbgs() << " LowerMGATHER(): N->getNumOperands() = " + << N->getNumOperands() << "\n"); + LLVM_DEBUG(dbgs() << " ChangeVectorType(): N = "; + N->dump();); // << "\n"); + + // 2018_06_23 + if (aType == TYPE_VECTOR_I32) { + replaceAddI32UseWithADDVH(aType, Index, DAG); + } + + assert(N->getNumOperands() == 6); + /* The definition of the MaskedGatherSDNode class can be found at + * http://llvm.org/doxygen/SelectionDAGNodes_8h_source.html#l02324 + * IMPORTANT NOTE: we are treating here the machine-independent + * masked_gather, which has different parameters than the + * machine-SDNode masked_gather node defined in TableGen (with params specified by + * constraints defined in SDTMaskedGather). + * machine-independent masked_gather looks like: + * t21: v128i16,ch = masked_gather t0, t29, t35, Constant:i64<51>, t32 + * where: + * - 1st param (in this case t0) is chain (this case, EntryToken) + * - 2nd param (in this case t29) is passthru (vector) + * - 3rd param (in this case t35) is mask (vector) + * - 4th param (in this case Constant) is the base pointer (scalar) of the loads + * (the origin/reference for the index of the gather) + * (the base of GEP, also repeated in LD256[...] symbolically) + * NOTE: if it has value TargetConstant:i64<0> then we have LD256[] - this seems to always make llc crash. + * - 5th param (in this case t32) is index (vector). + * - 6th param is scale. + */ +#ifdef NOTNOT_2019_03_30 + // IMPORTANT: Here we avoid materializing the passthru operand + SDValue ct = DAG.getConstant(1, dl, MVT::i64); + SDValue Ops[] = { + N->getOperand(0), + + // passthru + N->getOperand(1), + // Cycles forever in I-selection: DAG.getUNDEF(TYPE_VECTOR_I16), + // See http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + + // mask + N->getOperand(2), + + // base pointer + ct, + //N->getOperand(3), + + Index2 + //Index + }; + + DAG.UpdateNodeOperands(N, Ops); +#endif + + /* + */ + LLVM_DEBUG(dbgs() << " LowerMGATHER(), after update: Op.getNode() = " + << Op.getNode() //->dump(CurDAG); + << "\n Op = "; + Op->dump(&DAG); + dbgs() << "\n N = " << N; + dbgs() << "\n N = "; N->dump(&DAG); + //dbgs() << "\n Scale = "; Scale->dump(CurDAG); + dbgs() << "\n Index.getNode() = " << Index.getNode(); //<< ", Base.getNode() = " << Base.getNode(); + dbgs() << "\n Index = "; Index->dump(&DAG); + // + dbgs() << "\n N->getBasePtr() = "; N->getBasePtr()->dump(&DAG); + // + dbgs() << "\n Index2.getNode() = " << Index2.getNode(); //<< ", Base.getNode() = " << Base.getNode(); + dbgs() << "\n Index2 = "; Index2->dump(&DAG); + // + dbgs() << "\n Mask.getNode() = " << Mask.getNode(); //<< ", Base.getNode() = " << Base.getNode(); + dbgs() << "\n Mask = "; Mask->dump(&DAG); + // + dbgs() << "\n Src.getNode() = " << Src.getNode(); //<< ", Base.getNode() = " << Base.getNode(); + dbgs() << "\n Src = "; Src->dump(&DAG); + // + /* + // Not working + dbgs() << "\n resVT.SimpleTy = " << ((MVT)resVT).SimpleTy; + dbgs() << "\n IndexVT.SimpleTy = " << IndexVT.SimpleTy; + dbgs() << "\n MaskVT.SimpleTy = " << MaskVT.SimpleTy; + */ + // + dbgs() << "\n"); + + /* + // NOT working + if (N->getNumValues() > 1) { + LLVM_DEBUG(dbgs() << " LowerMGATHER(): calling getMergeValues()\n"); + SDValue RetOps[] = {Op.getValue(0), Op.getValue(1)}; + // NOT working: still gives assertion error after this: + // <getNumValues() && "Custom lowering returned the wrong number of results!"' failed.>> + // (and modifying LowerOperationWrapper() also does NOT help). + return DAG.getMergeValues(RetOps, dl); + } + */ + + LLVM_DEBUG(dbgs() << "Exiting ConnexTargetLowering::LowerMGATHER()\n"); + + return Op; +} // END ConnexTargetLowering::LowerMGATHER() + + +// We only basically implemente in LowerMSCATTER() a call to replaceAddI32UseWithADDVH(aType, Index) +SDValue ConnexTargetLowering::LowerMSCATTER(SDValue &Op, + //const ConnexSubtarget &Subtarget, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerMSCATTER()\n"); + + MaskedScatterSDNode *N = cast(Op.getNode()); + + SDLoc dl(Op); + // This returns ch for the MSCATTER SDNode: EVT resVT = Op.getSimpleValueType(); + + SDValue Index = N->getIndex(); + SDValue Mask = N->getMask(); + SDValue Src = N->getValue(); // this is actually passthru + MVT IndexVT = Index.getSimpleValueType(); + MVT MaskVT = Mask.getSimpleValueType(); + EVT SrcVT = Src.getSimpleValueType(); + LLVM_DEBUG(dbgs() << "LowerMSCATTER(): " + << "SrcVT = " << SrcVT.getEVTString() + << "\n"); + + LLVM_DEBUG(dbgs() << " LowerMSCATTER(): Op.getNode() = " << Op.getNode(); + dbgs() << "\n Op = "; Op->dump(&DAG)); + + // The index value - is normally i32, and we have to lower it to i16 + MVT aType = (SrcVT == TYPE_VECTOR_I16 ? TYPE_VECTOR_I16 : TYPE_VECTOR_I32); + + + LLVM_DEBUG(dbgs() << " LowerMSCATTER(): Index = "; + Index->dump();); + + // 2018_06_23 + if (aType == TYPE_VECTOR_I32) { + replaceAddI32UseWithADDVH(aType, Index, DAG); + } + + LLVM_DEBUG(dbgs() << "Exiting ConnexTargetLowering::LowerMSCATTER()\n"); + + return Op; +} // END ConnexTargetLowering::LowerMSCATTER() + + + + +#ifdef DO_F16_EMULATION_IN_ISEL_LOWERING + + #define MARKER_FOR_EMULATION + +extern SDNode *CreateInlineAsmNode(SelectionDAG *CurDAG, std::string asmString, + SDNode *nodeSYM_IMM, SDLoc &DL, + bool specialCase=false); + + +SDValue ConnexTargetLowering::LowerMUL_F16(SDValue &Op, + SelectionDAG *CurDAG) const { + SDNode *Node = Op.getNode(); + + LLVM_DEBUG(dbgs() << "Entered LowerMUL_F16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResVecTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting MUL.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + +/* +// Tested - works well, but a bit complicated and inefficient. +// BUT a GOOD test for the various issues that can appear in llc +// (COPY generated by TwoAddressInctruction in WHERE blocks and handled by me +// in ConnexTargetMachine.cpp, etc) +*/ +#include "Select_MULf16_OpincaaCodeGen.h" + + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing MUL.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + resF16, DL); + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resF16, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resF16, 1) + #endif + ); + + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return SDValue(resW, 0); +} // END LowerMUL_F16() + + +SDValue ConnexTargetLowering::LowerADD_F16(SDValue &Op, + SelectionDAG *CurDAG) const { + SDNode *Node = Op.getNode(); + + LLVM_DEBUG(dbgs() << "Entered LowerADD_F16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResVecTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "LowerADD_F16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "LowerADD_F16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "LowerADD_F16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "LowerADD_F16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "LowerADD_F16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting ADD.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "LowerADD_F16(): inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + +/* +// Tested - works well, but a bit complicated and inefficient. +// BUT a GOOD test for the various issues that can appear in llc +// (COPY generated by TwoAddressInctruction in WHERE blocks and handled by me +// in ConnexTargetMachine.cpp, etc) +*/ +#include "Select_ADDf16_OpincaaCodeGen.h" + + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing ADD.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + resF16, DL); + LLVM_DEBUG(dbgs() << "LowerADD_F16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resF16, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resF16, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "LowerADD_F16(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return SDValue(resW, 0); +} // END LowerADD_F16() + + +SDValue ConnexTargetLowering::LowerREDUCE_F16(SDValue &Op, + SelectionDAG *CurDAG) const { + SDNode *Node = Op.getNode(); + + LLVM_DEBUG(dbgs() << "Entered SelectReduceF16(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResVecTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() + << "SelectReduceF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + // NOTE: Opnd 1 is a ct + SDValue nodeOpSrc = Node->getOperand(2); + + // We need to preserve the node that was chained with Node to avoid it is removed + SDValue nodeOpChain = Node->getOperand(0); // Opnd 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "SelectReduceF16(): nodeOpSrc.getValueType() = " + << nodeOpSrc.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectReduceF16(): nodeOpSrc = "; + (nodeOpSrc.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + +#ifdef MARKER_FOR_EMULATION + SDNode *nodeOpSrcCastBogus1 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // This gives a serious error: MVT::Glue, + nodeOpSrc, + // chain edge + nodeOpChain + ); + + std::string exprStrBegin = "// Starting RED.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCastBogus1, DL); + LLVM_DEBUG(dbgs() << "SelectReduceF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); + + /* This node is also bogus, only for the sake of "sandwhiching" the INLINE + assembly with 2 NOPs. + */ + SDNode *nodeOpSrcCast = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HH, // IMPORTANT: this is a BOGUS NOP_BITCONVERT - we just put it since it has a Glue result, while nodeOpSrcCast2 does NOT + DL, + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + SDValue(nodeOpSrcCastBogus1, 0), + // chain + SDValue(inlineAsmNodeBegin, 0) + ); +#else + SDNode *nodeOpSrcCast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + nodeOpSrc, + // chain edge + nodeOpChain + ); + +#endif + + return SDValue(); +} // END LowerREDUCE_F16() + + +#else // ! DO_F16_EMULATION_IN_ISEL_LOWERING +SDValue ConnexTargetLowering::LowerMUL_F16(SDValue &Op, + SelectionDAG *CurDAG) const { + return SDValue(); +} // END LowerMUL_F16() + + +SDValue ConnexTargetLowering::LowerADD_F16(SDValue &Op, + SelectionDAG *CurDAG) const { + return SDValue(); +} // END LowerADD_F16() + + +SDValue ConnexTargetLowering::LowerREDUCE_F16(SDValue &Op, + SelectionDAG *CurDAG) const { + return SDValue(); +} // END LowerREDUCE_F16() +#endif // #ifdef DO_F16_EMULATION_IN_ISEL_LOWERING + + +/* static */ SDValue ConnexTargetLowering::LowerVSELECT(SDValue &Op, + //const ConnexSubtarget &Subtarget, + SelectionDAG &DAG) const { + assert(0 && "This code is no longer executed."); + + /* + case ISD::VSELECT: + SDNode *N = Op.getNode(); + return performVSELECTCombine(N, DAG); + static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { + EVT Ty = N->getValueType(0); + + if (Ty.is128BitVector() && Ty.isInteger()) { + // Try the following combines: + // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b) + // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b) + // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b) + // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b) + // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b) + // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b) + // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b) + // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b) + // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but + // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the + // legalizer. + SDValue Op0 = N->getOperand(0); + + if (Op0->getOpcode() != ISD::SETCC) + return SDValue(); + + ISD::CondCode CondCode = cast(Op0->getOperand(2))->get(); + bool Signed; + + if (CondCode == ISD::SETLT || CondCode == ISD::SETLE) + Signed = true; + else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE) + Signed = false; + else + return SDValue(); + + SDValue Op1 = N->getOperand(1); + SDValue Op2 = N->getOperand(2); + SDValue Op0Op0 = Op0->getOperand(0); + SDValue Op0Op1 = Op0->getOperand(1); + + if (Op1 == Op0Op0 && Op2 == Op0Op1) + return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N), + Ty, Op1, Op2); + else if (Op1 == Op0Op1 && Op2 == Op0Op0) + return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N), + Ty, Op1, Op2); + } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) { + SDValue SetCC = N->getOperand(0); + + if (SetCC.getOpcode() != MipsISD::SETCC_DSP) + return SDValue(); + + return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, + SetCC.getOperand(0), SetCC.getOperand(1), + N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); + } + + return SDValue(); + } + */ + + + /* + VERY VERY IMPORTANT: I prefer to do VSELECT treatment here, in + ISelLowering::LowerOperation, not in ConnexISelDAGToDAG, + because, I only do lowering, and only where required I put Machine Nodes + (i.e. ORV_H nodes). + So I let ISelDAGToDAG to do proper selection after this lowering, and + in ISelDAGToDAG some of the TableGen patterns are being used for the + lowered VSELECT. + + Note that register allocation is performed after Instruction selection + (see [Cardoso_2014], Figure on page 134). + %So we have to replace VSELECT before Register allocation. + + Note that although it is not required to create virtual registers for + the ORV_H machine instructions (since we failed to add a ch input port + to the setcc - see 50_IfConversion/Setcc_with_ch_input_port_NOT_working + - and I guess we would fail here also), we create it for the true + ORV_H because we need to make the associated predecessor CopyToRegister a + successor of WHEREEQ, otherwise the WHEREEQ would not have a successor. + TODO if we are extremely precious: + I guess we could make a succcessor of WHEREEQ the CopyToReg successor + of ORV_H and could get rid of all input virtual registers. + NOTE: we canNOT get rid of the virtual register that keeps the result of + both ORV_H, because we can replace it only with a VSELECT (reminds me + of dataflow machines and multiplexors :) ), BUT we want + to lower VSELECT in other components. + + Note that the nodes we create here have to have correct ordering, + otherwise instruction selection can fail or have wrong semantics. + */ + LLVM_DEBUG(dbgs() << "Treating LowerOperation() for ISD::VSELECT...\n"); + + // END_WHERE, etc are defined in anonymous enum in TableGen generated ConnexGenInstrInfo.inc + + /* From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html: + * LLVMContext * getContext () const + */ + + + LLVMContext &theContext = *(DAG.getContext()); + EVT voidEVT = EVT::getEVT(Type::getVoidTy(theContext)); + LLVM_DEBUG(dbgs() << " LowerOperation(): voidEVT = " + << voidEVT.getEVTString() << "\n"); + + SDValue chain = DAG.getEntryNode(); + SDValue InFlag(nullptr, 0); // NO Glue - Null incoming flag value. + + SDNode *vselectNode = Op.getNode(); + assert(vselectNode->getNumOperands() == 3); + LLVM_DEBUG(dbgs() << " LowerOperation(): Initially, vselectNode->use_size() = " + << vselectNode->use_size() + << "\n"); + + for (SDNode::use_iterator UI = vselectNode->use_begin(), UE = vselectNode->use_end(); + UI != UE; ++UI) { + // Note: UI is an SDNode * + LLVM_DEBUG(dbgs() << " LowerOperation(): Initially, one use of vselectNode is: "; + UI->print(dbgs()); + dbgs() << "\n"); + } + + //EVT nodeResType = vselectNode->getValueType(0); + SDValue vselectNodeOp0 = vselectNode->getOperand(0); + SDValue vselectNodeOp1 = vselectNode->getOperand(1); + SDValue vselectNodeOp2 = vselectNode->getOperand(2); + + SDValue setCC = vselectNode->getOperand(0); + SDNode *setCCNode = setCC.getNode(); + SDValue setCCPred = (vselectNode->getOperand(0)).getNode()->getOperand(2); + SDNode *setCCPredNode = setCCPred.getNode(); + + assert(setCCPredNode->isMachineOpcode() == false); + assert(setCCPredNode->getOpcode() == ISD::CONDCODE); + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1SDLoc.html + //const SDLoc DL; + //const SDLoc DL(vselectNode); + const SDLoc DL(vselectNodeOp0); // trying to avoid problems when giving DeleteNode(vselectNode) + + // Inspired from ConnexISelLowering.cpp + MachineFunction &MF = DAG.getMachineFunction(); + //MachineRegisterInfo &RegInfo = MF.getRegInfo(); + // Inspiring from MipsSEISelLowering.cpp + //MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + + + unsigned whereOpcode; + switch (cast(setCCPredNode)->get()) { + case ISD::SETEQ: + whereOpcode = Connex::WHEREEQ_BUNDLE_H; + break; + case ISD::SETLT: + whereOpcode = Connex::WHERELT_BUNDLE_H; + break; + case ISD::SETULT: + whereOpcode = Connex::WHEREULT_BUNDLE_H; + break; + default: + assert(0 && "case not reachable"); + break; + } + +#define WORKING_WITH_PHYSICAL_REGISTER + +#ifdef WORKING_WITH_PHYSICAL_REGISTER + unsigned regDest = CONNEX_RESERVED_REGISTER_01; + + /* + unsigned virtRegRes = RegInfo.createVirtualRegister( + &Connex::VectorHRegClass); + */ +#else + /* IMPORTANT: In essence this is ONLY to allocate a virtual register to use + it later for the TargetMachine, PassPredicate. + */ + unsigned regDest = RegInfo->createVirtualRegister( + &Connex::VectorHRegClass); + + SDValue copyFromRegDest = DAG.getCopyFromReg( + chain, + //SDValue(endWhere, 0), + DL, + regDest, + TYPE_VECTOR_I16 // result type + //endWhere->getOperand(0) //RegTy + //SDValue(endWhere, 0) + ); +#endif + + // Signature: MachineSDNode *getMachineNode (unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1, SDValue Op2, SDValue Op3) + SDNode *whereXY = DAG.getMachineNode(whereOpcode, + DL, + // Return type + //voidEVT //, + TYPE_VECTOR_I16, + //MVT::Glue, + //TYPE_VECTOR_I16, + //orNodeTrue->getValue(0) + + vselectNodeOp2, + vselectNodeOp1, + /* NOTE: if we move this value as 1st parameter it crashes with: Assertion `NumMIOperands >= II.getNumOperands() && NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + NumImpUses && "#operands for dag node doesn't match .td file!"' failed., etc */ + //SDValue(copyToRegFalse.getNode(), 0), // the ch output port of CopyToReg + + //vselectNodeOp0 + #ifdef WORKING_WITH_PHYSICAL_REGISTER + SDValue(setCCNode, 0) + #else + copyFromRegDest + #endif + // The glue output port of CopyToReg. + //SDValue(copyToRegFalse.getNode(), 1) + //setCCNode->getOperand(1), + //copyToRegOp2 + ); + LLVM_DEBUG(dbgs() << " LowerOperation(): whereXY (chained with setCC) = "; + whereXY->print(dbgs()); + dbgs() << "\n"); + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + //DAG.ReplaceAllUsesWith(vselectNode, whereXY); + //return SDValue(whereXY, 0); + +#ifdef WORKING_WITH_PHYSICAL_REGISTER + SDValue copyFromRegDest = DAG.getCopyFromReg( + //chain, + //SDValue(endWhere, 0), + SDValue(whereXY, 0), + DL, + regDest, + TYPE_VECTOR_I16 // result type + //endWhere->getOperand(0)); //RegTy); + //SDValue(endWhere, 0) + ); +#endif + + /* VERY IMPORTANT: the rest of codegen is performed in ConnexTargetMachine.cpp, + PassPredicate, + since we do NOT want the scheduler to do OoO or even DCE on the + instructions we add - this was the case when we were generating + everything here in lowering. + */ + +#ifdef WORKING_WITH_PHYSICAL_REGISTER + DAG.ReplaceAllUsesWith(vselectNode, ©FromRegDest); + return copyFromRegDest; + + /* + DAG.ReplaceAllUsesWith(vselectNode, whereXY); + return SDValue(whereXY, 0); + */ +#else + DAG.ReplaceAllUsesWith(vselectNode, whereXY); + return copyFromRegDest; +#endif +} // END LowerVSELECT() + + +/* +From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetLowering.html: + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const + <> + */ +// From [LLVM]/llvm38Nov2016/llvm/include/llvm/CodeGen/ISDOpcodes.h +SDValue ConnexTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + // This will print the numeric (decimal) value of the Opcode. + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerOperation(): " + << "Op.getOpcode() = " << Op.getOpcode() + << ", getTargetNodeName() = " + << getTargetNodeName(Op.getOpcode()) << "\n"); + + /* + LLVM_DEBUG(dbgs() << "ConnexTargetLowering::LowerOperation(): ISD::VSELECT = " + << ISD::VSELECT << "\n"); + if (Op.getOpcode() == ISD::VSELECT) + LLVM_DEBUG(dbgs() << "ConnexTargetLowering::LowerOperation() - ISD::VSELECT\n"); + */ + + + switch (Op.getOpcode()) { + #ifdef NEW_BIGGER_OPS + //!!!! TODO TODO: check for the type to be i32/u32 + (Op.getOperand(0).getValueType() == MVT::i32) && + (Op.getOperand(1).getValueType() == MVT::i32) { + /* + * % NOTE: reg alloc is NOT performed yet - but this is + * dataflow mostly... + * + // Let's do an incorrect, but simpler version: + Dest_v32i16 = Src1_v32i16 ADD Src2_v32i16 + //Reg_dest_low16 = Reg_src1_low16 ADD Reg_src2_low16 + //Reg_dest_high16 = Reg_src1_high16 ADD Reg_src2_high16 + Reg_tmp = 1 + //WHERE INDEX & 1 == 0 + LDIX (load index of the Processing Element) %to Reg_tmp2 + AND 1 + == 0 + WHERE true + WHERE CARRY + Reg_dest_high16 = Reg_src1_high16 ADD Reg_tmp + END_WHERE + END_WHERE + */ + return DAG.getNode(ConnexISD::ADD, + DL, + Op.getValueType(), + Chain, + LHS, + RHS, + // TODO_CHANGE_BACKEND: + //DAG.getConstant(CC, DL, MVT::i64), Dest); + DAG.getConstant(CC, DL, TYPE_SCALAR_ELEMENT), Dest); + } + + /* + * The Op.getOperand(0).getValueType() == MVT::u32 + * in this + return DAG.getNode(ConnexISD::ADD, DL, Op.getValueType(), Chain, LHS, RHS, + // TODO_CHANGE_BACKEND: + //DAG.getConstant(CC, DL, MVT::i64), Dest); + DAG.getConstant(CC, DL, TYPE_SCALAR_ELEMENT), Dest); + */ + return Lower(Op, DAG); + #endif + + #ifdef DO_F16_EMULATION_IN_ISEL_LOWERING + // NEW_FP16 + /* + case ISD::Intrinsic::connex_reduce_f16: { + LLVM_DEBUG(dbgs() << "LowerOperation() for Intrinsic::connex_reduce_f16\n"); + + SDLoc DL(Op); + SDNode *Node = Op.getNode(); + EVT ResVecTy = Node->getValueType(0); + // MEGA-TODO: input opnd has to have type TYPE_VECTOR_F16 + if (ResVecTy == MVT::f16) { + LLVM_DEBUG(dbgs() << "LowerOperation() for Intrinsic::connex_reduce_f16 for f16\n"); + return LowerREDUCE_F16(Op, &DAG); + } + + break; + } + */ + + // 2018_08_17_HANDLING_F16_IN_ISEL_LOWERING + case ISD::FMUL: { + LLVM_DEBUG(dbgs() << "LowerOperation() for FMUL\n"); + + SDLoc DL(Op); + SDNode *Node = Op.getNode(); + EVT ResVecTy = Node->getValueType(0); + + #ifdef DO_MUL_F16_EMULATION_IN_ISEL_LOWERING + //if (ResVecTy == MVT::f16) + if (ResVecTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "LowerOperation() for FMUL for f16\n"); + return LowerMUL_F16(Op, &DAG); + } + #endif + + break; + } + case ISD::FADD: { + LLVM_DEBUG(dbgs() << "LowerOperation() for FADD\n"); + + SDLoc DL(Op); + SDNode *Node = Op.getNode(); + EVT ResVecTy = Node->getValueType(0); + + #ifdef DO_ADD_F16_EMULATION_IN_ISEL_LOWERING + //if (ResVecTy == MVT::f16) + if (ResVecTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "LowerOperation() for FADD for f16\n"); + return LowerADD_F16(Op, &DAG); + //return DAG.getNode(Connex::ADD_rr, + // DL, + // Op.getValueType(), + // Op.getOperand(1), + // Op.getOperand(2)); + } + #endif + + break; + } +#endif // #ifdef DO_F16_EMULATION_IN_ISEL_LOWERING + + case ISD::BR_CC: + return LowerBR_CC(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::SELECT_CC: + return LowerSELECT_CC(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + // Inspired from [LLVM]/llvm/lib/Target/ARM/ARMISelLowering.cpp + return LowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + // From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.cpp + return LowerEXTRACT_VECTOR_ELT(Op, DAG); + // return EXTRACT_VECTOR_ELT; + case ISD::BUILD_VECTOR: + // From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.cpp + return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + // From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.cpp + return LowerVECTOR_SHUFFLE(Op, DAG); + + // Inspired from lib/Target/AMDGPU/AMDGPUISelLowering.cpp + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + + // From [LLVM]/llvm/lib/Target/Mips/MipsISelLowering.cpp + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + + case ISD::MGATHER: + // From [LLVM]/llvm/lib/Target/X86/X86ISelLowering.cpp + return LowerMGATHER(Op, DAG); + + case ISD::MSCATTER: + // From [LLVM]/llvm/lib/Target/X86/X86ISelLowering.cpp + return LowerMSCATTER(Op, DAG); + + #ifdef TREAT_SETCC_VSELECT + /* + // Inspired From lib/Target/Mips/MipsSEISelLowering.cpp + static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { + bool IsV216 = (Ty == MVT::v2i16); + + switch (CC) { + case ISD::SETEQ: + case ISD::SETNE: return true; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return IsV216; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return !IsV216; + default: return false; + } + } + + case ISD::SETCC: + //static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { + SDNode *N = Op.getNode(); + + EVT Ty = N->getValueType(0); + + if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) + return SDValue(); + + if (!isLegalDSPCondCode(Ty, cast(N->getOperand(2))->get())) + return SDValue(); + + return DAG.getNode(MipsISD::SETCC_DSP, + SDLoc(N), + Ty, + N->getOperand(0), + N->getOperand(1), + N->getOperand(2)); + //} + + + */ + #endif + + case ISD::VSELECT: { + //return LowerVSELECT(Op, DAG); + } // END ISD::VSELECT + + default: + llvm_unreachable("unimplemented operand"); + } +} // END ConnexTargetLowering::LowerOperation + + + +// Calling Convention Implementation +#include "ConnexGenCallingConv.inc" + + + +// Taken from lib/Target/Mips/MipsISelLowering.cpp +static SDValue UnpackFromArgumentSlot(SDValue Val, const CCValAssign &VA, + EVT ArgVT, const SDLoc &DL, + SelectionDAG &DAG) { + MVT LocVT = VA.getLocVT(); + EVT ValVT = VA.getValVT(); + + // Shift into the upper bits if necessary. + switch (VA.getLocInfo()) { + default: + break; + case CCValAssign::AExtUpper: + case CCValAssign::SExtUpper: + case CCValAssign::ZExtUpper: { + unsigned ValSizeInBits = ArgVT.getSizeInBits(); + unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); + unsigned Opcode = + VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; + Val = DAG.getNode( + Opcode, DL, VA.getLocVT(), Val, + DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); + break; + } + } + + // If this is an value smaller than the argument slot size (32-bit for O32, + // 64-bit for N32/N64), it has been promoted in some way to the argument slot + // size. Extract the value and insert any appropriate assertions regarding + // sign/zero extension. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::AExtUpper: + case CCValAssign::AExt: + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::SExtUpper: + case CCValAssign::SExt: + Val = DAG.getNode(ISD::AssertSext, DL, LocVT, Val, DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::ZExtUpper: + case CCValAssign::ZExt: + Val = DAG.getNode(ISD::AssertZext, DL, LocVT, Val, DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); + break; + } + + return Val; +} + +#ifdef NOT_NOT_NOT +void ConnexTargetLowering::writeVarArgRegs(std::vector &OutChains, + SDValue Chain, const SDLoc &DL, + SelectionDAG &DAG, + CCState &State) const { + ArrayRef ArgRegs = ABI.GetVarArgRegs(); + unsigned Idx = State.getFirstUnallocated(ArgRegs); + unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); + MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); + const TargetRegisterClass *RC = getRegClassFor(RegTy); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + + // Offset of the first variable argument from stack pointer. + int VaArgOffset; + + if (ArgRegs.size() == Idx) + VaArgOffset = alignTo(State.getNextStackOffset(), RegSizeInBytes); + else { + VaArgOffset = + (int)ABI.GetCalleeAllocdArgSizeInBytes(State.getCallingConv()) - + (int)(RegSizeInBytes * (ArgRegs.size() - Idx)); + } + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true); + MipsFI->setVarArgsFrameIndex(FI); + + // Copy the integer registers that have not been used for argument passing + // to the argument register save area. For O32, the save area is allocated + // in the caller's stack frame, while for N32/64, it is allocated in the + // callee's stack frame. + for (unsigned I = Idx; I < ArgRegs.size(); + ++I, VaArgOffset += RegSizeInBytes) { + unsigned Reg = addLiveIn(MF, ArgRegs[I], RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); + FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, + MachinePointerInfo(), false, false, 0); + cast(Store.getNode())->getMemOperand()->setValue( + (Value *)nullptr); + OutChains.push_back(Store); + } +} +#endif + +SDValue ConnexTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerFormalArguments()\n"); + + switch (CallConv) { + default: + llvm_unreachable("Unsupported calling convention"); + case CallingConv::C: + case CallingConv::Fast: + break; + } + + // Inspired from lib/Target/Mips/MipsISelLowering.cpp, MipsTargetLowering::LowerFormalArguments(): + // Used with vargs to acumulate store chains. + std::vector OutChains; + + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + +// TODO_CHANGE_BACKEND: + //CCInfo.AnalyzeFormalArguments(Ins, CC_Connex64); + CCInfo.AnalyzeFormalArguments(Ins, CC_Connex64); + + unsigned i = 0; + for (auto &VA : ArgLocs) { + if (VA.isRegLoc()) { + LLVM_DEBUG(dbgs() << "LowerFormalArguments(): case VA.isRegLoc()\n"); + // Arguments passed in registers + EVT RegVT = VA.getLocVT(); + switch (RegVT.getSimpleVT().SimpleTy) { + default: { + errs() << "LowerFormalArguments Unhandled argument type: " + << RegVT.getEVTString() << '\n'; + llvm_unreachable(0); + } + // TODO_CHANGE_BACKEND: + case TYPE_SCALAR_ELEMENT: + unsigned VReg = RegInfo.createVirtualRegister(&Connex::GPRRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); + + // If this is an 8/16/32-bit value, it is really passed promoted to 64 + // bits. Insert an assert[sz]ext to capture this, then truncate to the + // right size. + if (VA.getLocInfo() == CCValAssign::SExt) + ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + + if (VA.getLocInfo() != CCValAssign::Full) + ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); + + InVals.push_back(ArgValue); + } + } // END VA.isRegLoc() + else { + /* + fail(DL, DAG, "defined with too many args"); + InVals.push_back(DAG.getConstant(0, DL, VA.getLocVT())); + */ + + LLVM_DEBUG(dbgs() << "LowerFormalArguments(): case NOT VA.isRegLoc()\n"); + + // Inspired from lib/Target/Mips/MipsISelLowering.cpp, MipsTargetLowering::LowerFormalArguments(): + MachineFrameInfo &MFI = MF.getFrameInfo(); + + MVT LocVT = VA.getLocVT(); + + /* + if (ABI.IsO32()) { + // We ought to be able to use LocVT directly but O32 sets it to i32 + // when allocating floating point values to integer registers. + // This shouldn't influence how we load the value into registers unless + // we are targeting softfloat. + if (VA.getValVT().isFloatingPoint() && !Subtarget.useSoftFloat()) + LocVT = VA.getValVT(); + } + */ + // sanity check + assert(VA.isMemLoc()); + + // The stack pointer offset is relative to the caller stack frame. + int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, + VA.getLocMemOffset(), true); + + // Create load nodes to retrieve arguments from the stack + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI)); + // 2019_03_30: false, false, false, 0); + OutChains.push_back(ArgValue.getValue(1)); + + ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG); + + InVals.push_back(ArgValue); + } + + i++; + } + + /* + if (IsVarArg || MF.getFunction()->hasStructRetAttr()) { + fail(DL, DAG, "functions with VarArgs or StructRet are not supported"); + } + */ + + // Inspired from lib/Target/Mips/MipsISelLowering.cpp, MipsTargetLowering::LowerFormalArguments(): +#ifdef NOT_NOT_NOT + ConnexFunctionInfo *MipsFI = MF.getInfo(); + unsigned e = ArgLocs.size(); + for (i = 0 ; i != e; ++i) { + // The mips ABIs for returning structs by value requires that we copy + // the sret argument into $v0 for the return. Save the argument into + // a virtual register so that we can access it from the return points. + if (Ins[i].Flags.isSRet()) { + unsigned Reg = MipsFI->getSRetReturnReg(); + if (!Reg) { + Reg = MF.getRegInfo().createVirtualRegister( + // TODO_CHANGE_BACKEND: + //getRegClassFor(ABI.IsN64() ? MVT::i64 : MVT::i32)); + getRegClassFor(ABI.IsN64() ? MVT::i64 : MVT::i32)); + MipsFI->setSRetReturnReg(Reg); + } + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain); + break; + } + } + + if (IsVarArg) + writeVarArgRegs(OutChains, Chain, DL, DAG, CCInfo); +#endif + + // All stores are grouped in one node to allow the matching between + // the size of Ins and InVals. This only happens when on varg functions + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + } + + return Chain; +} + + +const unsigned ConnexTargetLowering::MaxArgs = 5; + +SDValue ConnexTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + auto &Outs = CLI.Outs; + auto &OutVals = CLI.OutVals; + auto &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &IsTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + MachineFunction &MF = DAG.getMachineFunction(); + + // Connex target does not support tail call optimization. + IsTailCall = false; + + switch (CallConv) { + default: + report_fatal_error("Unsupported calling convention"); + case CallingConv::Fast: + case CallingConv::C: + break; + } + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CC_Connex64); + + unsigned NumBytes = CCInfo.getNextStackOffset(); + + if (Outs.size() > MaxArgs) + fail(CLI.DL, DAG, "too many args to ", Callee); + + for (auto &Arg : Outs) { + ISD::ArgFlagsTy Flags = Arg.Flags; + if (!Flags.isByVal()) + continue; + + fail(CLI.DL, DAG, "pass by value not supported ", Callee); + } + + auto PtrVT = getPointerTy(MF.getDataLayout()); + Chain = DAG.getCALLSEQ_START(Chain, + // 2019_3_30: DAG.getConstant(NumBytes, CLI.DL, PtrVT, true), + NumBytes, 0, + CLI.DL); + + SmallVector, MaxArgs> RegsToPass; + + //LLVM_DEBUG(dbgs() << "DAG. = "; DAG.dump(); /* << "\n" */); + LLVM_DEBUG(dbgs() << "DAG = "; DAG.dump(); /* << "\n" */); + //LLVM_DEBUG(dbgs() << "CLI = " << CLI << "\n"); + LLVM_DEBUG(dbgs() << "InVals.size() = " << InVals.size() << "\n"); + + for (unsigned j = 0; j < InVals.size(); ++j) { + //LLVM_DEBUG(dbgs() << "InVals[j] = " << InVals[j] << "\n"); + LLVM_DEBUG(dbgs() << "InVals[" << j << "] = "; + InVals[j]->dump(); /* << "\n" */); + } + LLVM_DEBUG(dbgs() << "ArgLocs.size() = " << ArgLocs.size() << "\n"); + + // Walk arg assignments + for (unsigned i = 0, + e = std::min(static_cast(ArgLocs.size()), MaxArgs); + i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = OutVals[i]; + + //LLVM_DEBUG(dbgs() << "ArgLocs[i] = " << ArgLocs[i] << "\n"); + LLVM_DEBUG(dbgs() << "Arg = "; Arg->dump(); /* << "\n" */); + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, CLI.DL, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, CLI.DL, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, CLI.DL, VA.getLocVT(), Arg); + break; + } + + // Push arguments into RegsToPass vector + if (VA.isRegLoc()) + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + else { + if (VA.isMemLoc()) + LLVM_DEBUG(dbgs() << "VA.isMemLoc() == true\n"); + llvm_unreachable("call arg pass bug"); + } + } + + SDValue InFlag; + + // Build a sequence of copy-to-reg nodes chained together with token chain and + // flag operands which copy the outgoing args into registers. The InFlag in + // necessary since all emitted instructions must be stuck together. + for (auto &Reg : RegsToPass) { + Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT, + G->getOffset(), 0); + else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); + + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (auto &Reg : RegsToPass) + Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + Chain = DAG.getNode(ConnexISD::CALL, CLI.DL, NodeTys, Ops); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END( + Chain, DAG.getConstant(NumBytes, CLI.DL, PtrVT, true), + DAG.getConstant(0, CLI.DL, PtrVT, true), InFlag, CLI.DL); + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, CLI.DL, DAG, + InVals); +} + + +SDValue ConnexTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &DL, SelectionDAG &DAG) const { + unsigned Opc = ConnexISD::RET_FLAG; + + // CCValAssign - represent the assignment of the return value to a location + SmallVector RVLocs; + MachineFunction &MF = DAG.getMachineFunction(); + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); + + if (MF.getFunction().getReturnType()->isAggregateType()) { + fail(DL, DAG, "only integer returns supported"); + return DAG.getNode(Opc, DL, MVT::Other, Chain); + } + + // Analize return values. + CCInfo.AnalyzeReturn(Outs, RetCC_Connex64); + + SDValue Flag; + SmallVector RetOps(1, Chain); + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Flag); + + // Guarantee that all emitted copies are stuck together, + // avoiding something bad. + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + + return DAG.getNode(Opc, DL, MVT::Other, RetOps); +} + + +SDValue ConnexTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Ins, + const SDLoc &DL, + SelectionDAG &DAG, + SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); + + if (Ins.size() >= 2) { + fail(DL, DAG, "only small returns supported"); + for (unsigned i = 0, e = Ins.size(); i != e; ++i) + InVals.push_back(DAG.getConstant(0, DL, Ins[i].VT)); + return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InFlag).getValue(1); + } + + CCInfo.AnalyzeCallResult(Ins, RetCC_Connex64); + + // Copy all of the result registers out of their specified physreg. + for (auto &Val : RVLocs) { + Chain = DAG.getCopyFromReg(Chain, DL, Val.getLocReg(), + Val.getValVT(), InFlag).getValue(1); + InFlag = Chain.getValue(2); + InVals.push_back(Chain.getValue(0)); + } + + return Chain; +} + +static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { + switch (CC) { + default: + break; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETLT: + case ISD::SETLE: + CC = ISD::getSetCCSwappedOperands(CC); + std::swap(LHS, RHS); + break; + } +} + + +SDValue ConnexTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); + SDLoc DL(Op); + + NegateCC(LHS, RHS, CC); + + return DAG.getNode(ConnexISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS, + // TODO_CHANGE_BACKEND: + //DAG.getConstant(CC, DL, MVT::i64), Dest); + DAG.getConstant(CC, DL, TYPE_SCALAR_ELEMENT), Dest); +} + +SDValue ConnexTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue TrueV = Op.getOperand(2); + SDValue FalseV = Op.getOperand(3); + ISD::CondCode CC = cast(Op.getOperand(4))->get(); + SDLoc DL(Op); + + NegateCC(LHS, RHS, CC); + + // TODO_CHANGE_BACKEND: + //SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i64); + SDValue TargetCC = DAG.getConstant(CC, DL, TYPE_SCALAR_ELEMENT); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); + SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; + + return DAG.getNode(ConnexISD::SELECT_CC, DL, VTs, Ops); +} + +const char *ConnexTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch ((ConnexISD::NodeType)Opcode) { + case ConnexISD::FIRST_NUMBER: + break; + case ConnexISD::RET_FLAG: + return "ConnexISD::RET_FLAG"; + case ConnexISD::CALL: + return "ConnexISD::CALL"; + case ConnexISD::SELECT_CC: + return "ConnexISD::SELECT_CC"; + case ConnexISD::BR_CC: + return "ConnexISD::BR_CC"; + case ConnexISD::Wrapper: + return "ConnexISD::Wrapper"; + // Inspired from lib/Target/Mips/MipsISelLowering.cpp + case ConnexISD::VSHF: + return "ConnexISD::VSHF"; + /* We should IGNORE gcc -Wswitch when it gives: + <> + See definition of NodeType in ConnexISelLowering.h. + */ + case ISD::MGATHER: + return "ISD::MGATHER"; + // Probably not good + //case ConnexISD::VSELECT: + /* We should IGNORE gcc -Wswitch when it gives: + <> + See definition of NodeType in ConnexISelLowering.h. + */ + case ISD::VSELECT: + return "ISD::VSELECT"; + /* + case ConnexISD::ConstantPool: + return "ConnexISD::ConstantPool"; + */ + default: + //return TargetLowering::NodeType; + /* See + http://llvm.org/docs/doxygen/html/TargetLowering_8cpp_source.html + - returns nullptr: return TargetLowering::getTargetNodeName(Opcode); + */ + return "NONAME (getTargetNodeName NOT supporting this Opcode)"; + } + return nullptr; +} + +SDValue ConnexTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + const GlobalValue *GV = cast(Op)->getGlobal(); + + // TODO_CHANGE_BACKEND: + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, TYPE_SCALAR_ELEMENT); + + // TODO_CHANGE_BACKEND: + return DAG.getNode(ConnexISD::Wrapper, DL, TYPE_SCALAR_ELEMENT, GA); +} + +MachineBasicBlock * +ConnexTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + assert(MI.getOpcode() == Connex::Select && "Unexpected instr type to insert"); + + // To "insert" a SELECT instruction, we actually have to insert the diamond + // control-flow pattern. The incoming instruction knows the destination vreg + // to set, the condition code register to branch on, the true/false values to + // select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator I = ++BB->getIterator(); + + // ThisMBB: + // ... + // TrueVal = ... + // jmp_XX r1, r2 goto Copy1MBB + // fallthrough --> Copy0MBB + MachineBasicBlock *ThisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); + + F->insert(I, Copy0MBB); + F->insert(I, Copy1MBB); + // Update machine-CFG edges by transferring all successors of the current + // block to the new block which will contain the Phi node for the select. + Copy1MBB->splice(Copy1MBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + Copy1MBB->transferSuccessorsAndUpdatePHIs(BB); + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(Copy0MBB); + BB->addSuccessor(Copy1MBB); + + // Insert Branch if Flag + unsigned LHS = MI.getOperand(1).getReg(); + unsigned RHS = MI.getOperand(2).getReg(); + int CC = MI.getOperand(3).getImm(); + switch (CC) { + case ISD::SETGT: + BuildMI(BB, DL, TII.get(Connex::JSGT_rr)) + .addReg(LHS) + .addReg(RHS) + .addMBB(Copy1MBB); + break; + case ISD::SETUGT: + BuildMI(BB, DL, TII.get(Connex::JUGT_rr)) + .addReg(LHS) + .addReg(RHS) + .addMBB(Copy1MBB); + break; + case ISD::SETGE: + BuildMI(BB, DL, TII.get(Connex::JSGE_rr)) + .addReg(LHS) + .addReg(RHS) + .addMBB(Copy1MBB); + break; + case ISD::SETUGE: + BuildMI(BB, DL, TII.get(Connex::JUGE_rr)) + .addReg(LHS) + .addReg(RHS) + .addMBB(Copy1MBB); + break; + case ISD::SETEQ: + BuildMI(BB, DL, TII.get(Connex::JEQ_rr)) + .addReg(LHS) + .addReg(RHS) + .addMBB(Copy1MBB); + break; + case ISD::SETNE: + BuildMI(BB, DL, TII.get(Connex::JNE_rr)) + .addReg(LHS) + .addReg(RHS) + .addMBB(Copy1MBB); + break; + default: + report_fatal_error("unimplemented select CondCode " + Twine(CC)); + } + + // Copy0MBB: + // %FalseValue = ... + // # fallthrough to Copy1MBB + BB = Copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(Copy1MBB); + + // Copy1MBB: + // %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ] + // ... + BB = Copy1MBB; + BuildMI(*BB, BB->begin(), DL, TII.get(Connex::PHI), MI.getOperand(0).getReg()) + .addReg(MI.getOperand(5).getReg()) + .addMBB(Copy0MBB) + .addReg(MI.getOperand(4).getReg()) + .addMBB(ThisMBB); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + + +static bool isIndexVectorVal(const BuildVectorSDNode *N) { + unsigned int nOps = N->getNumOperands(); + + //SDValue Operand0 = N->getOperand(0); + + for (unsigned int i = 0; i < nOps; ++i) { + //if (N->getOperand(i) != Operand0) + // See http://llvm.org/docs/ProgrammersManual.html#the-isa-cast-and-dyn-cast-templates + ConstantSDNode *ctNode = dyn_cast(N->getOperand(i)); + if (ctNode == NULL) + return false; + + LLVM_DEBUG(dbgs() << " ctNode = "; ctNode->dump()); + + if (N->getConstantOperandVal(i) != i) + return false; + } + /* + if (Op->getOpcode() == ISD::UNDEF) + return true; + if (isConstantOrUndef(Op->getOperand(i))) + return true; + */ + + return true; +} + + +// From llvm/lib/Target/Mips/MipsSEISelLowering.cpp +static bool isSplatVector(const BuildVectorSDNode *N) { + unsigned int nOps = N->getNumOperands(); + assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector"); + + SDValue Operand0 = N->getOperand(0); + + for (unsigned int i = 1; i < nOps; ++i) { + if (N->getOperand(i) != Operand0) + return false; + } + + return true; +} + + +// From llvm/lib/Target/Mips/MipsSEISelLowering.cpp +static bool isConstantOrUndef(const SDValue Op) { + if (Op->getOpcode() == ISD::UNDEF) + return true; + if (isa(Op)) + return true; + if (isa(Op)) + return true; + return false; +} + + +// From /home/asusu/LLVM/llvm38Nov2016/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { + for (unsigned i = 0; i < Op->getNumOperands(); ++i) + if (isConstantOrUndef(Op->getOperand(i))) + return true; + return false; +} + + +// Getting inspired from lib/Target/X86/X86ISelLowering.cpp +SDValue ConnexTargetLowering::LowerBITCAST(SDValue Op, + SelectionDAG &DAG) const { + EVT SrcVT = Op.getOperand(0).getSimpleValueType(); + EVT DstVT = Op.getSimpleValueType(); + + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerBITCAST(): " + << "SrcVT = " << SrcVT.getEVTString() + << ", DstVT = " << DstVT.getEVTString() + << ". Returning SrcVT... \n"); + + //return SDValue(); + //return Op; + return Op.getOperand(0); +} + + +SDValue ConnexTargetLowering::LowerADD_I32(SDValue Op, + SelectionDAG &DAG) const { + // TODO TODO: build opnd0&1 that takes the same operands, but have type TYPE_VECTOR_I16 + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerADD_I32()\n"); + + assert(Op.getOperand(0).getValueType() == TYPE_VECTOR_I32); + + SDValue opnd0 = Op.getOperand(0); + SDValue opnd1 = Op.getOperand(1); + + // I need to convert the v128i16 vector operand to v64i32. + + SDValue opnd1Native = DAG.getNode(ISD::BITCAST, SDLoc(Op), + TYPE_VECTOR_I16, opnd0); + SDValue opnd2Native = DAG.getNode(ISD::BITCAST, SDLoc(Op), + TYPE_VECTOR_I16, opnd1); + + SDValue Result = DAG.getNode(ISD::ADD, + //ConnexISD::ADDV_H, + SDLoc(Op), + TYPE_VECTOR_I16, + opnd1Native, + opnd2Native); + + LLVM_DEBUG(dbgs() << "LowerADD_I32: UNSPECIFIED case\n"); + return Result; //SDValue(); +} + + +// From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the +// backend. +// +// Lowers according to the following rules: +// - Constant splats are legal as-is as long as the SplatBitSize is a power of +// 2 less than or equal to 64 and the value fits into a signed 10-bit +// immediate +// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize +// is a power of 2 less than or equal to 64 and the value does not fit into a +// signed 10-bit immediate +// - Non-constant splats are legal as-is. +// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. +// - All others are illegal and must be expanded. +SDValue ConnexTargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerBUILD_VECTOR()\n"); + + BuildVectorSDNode *BVN = cast(Op); + EVT ResTy = Op->getValueType(0); + SDLoc DL(Op); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + /* + if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) + return SDValue(); + */ + + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1BuildVectorSDNode.html: + bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, + unsigned &SplatBitSize, bool &HasAnyUndefs, + unsigned MinSplatBits=0, bool isBigEndian=false) const + Check if this is a constant splat, and if so, find the smallest element + size that splats the vector. + + By constant splat we understand a vector filled with the same + constant value in all elements. + */ + if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8, false) //, true) + //!Subtarget.isLittle()) + && SplatBitSize <= 64) { + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR(): Case isConstantSplat(): " + << "SplatValue = " << SplatValue + << ", SplatUndef = " << SplatUndef + << ", SplatBitSize = " << SplatBitSize + << "\n" + ); + /* + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: SplatValue = " << SplatValue.toString(10, 1) << "\n"); + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: SplatUndef = " << SplatUndef.toString(10, 1) << "\n"); + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: SplatBitSize = " << SplatBitSize << "\n"); + */ + + // We can only cope with 8 or 16 (NOT 32 or 64) bit elements + if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32) { // && SplatBitSize != 32 && SplatBitSize != 64) + + /* !!!! TODO TODO: NOT sure this is correct for case vector register is + v*i32 or v*i16 */ + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: canNOT cope with " << SplatBitSize + << " bits.\n"); + return SDValue(); + } + + // If the value fits into a simm10 then we can use ldi.[bhwd] + // However, if it isn't an integer type we will have to bitcast from an + // integer type first. Also, if there are any undefs, we must lower them + // to defined values first. + if (ResTy.isInteger() && !HasAnyUndefs && SplatValue.isSignedIntN(10)) { + // See http://llvm.org/docs/doxygen/html/classllvm_1_1SDValue.html + //LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: return Op (Op = " << Op << ")\n"); + LLVM_DEBUG(dbgs() << " LowerBUILD_VECTOR(): Case SIMM10 taken. " + << "(Op = "; Op->dump(); dbgs() << ")\n"); + + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: return Op\n"); + return Op; + + // TODO TODO TODO We should return as selected instruction VLOAD + } + + EVT ViaVecTy; + + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: SplatBitSize = " + << SplatBitSize << "\n"); + + switch (SplatBitSize) { + default: + return SDValue(); + + // TODO_CHANGE_BACKEND: + case 8: + //ViaVecTy = MVT::v16i8; + ViaVecTy = TYPE_VECTOR_I16; + break; + case 16: + ViaVecTy = TYPE_VECTOR_I16; + break; + case 32: + ViaVecTy = TYPE_VECTOR_I32; + break; + case 64: + ViaVecTy = MVT::v8i64; + /* !!!! TODO TODO: NOT sure this is correct for case vector register is + v*i32 or v*i16 */ + break; + /* + // There's no fill.d to fall back on for 64-bit values + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: case 64 return SDValue.\n"); + return SDValue(); + */ + } + + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: Before DAG.getConstant()\n"); + // SelectionDAG::getConstant will promote SplatValue appropriately. + SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: After DAG.getConstant()\n"); + + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR(): " + << "(Result = "; Result->dump(); dbgs() << ")\n"); + + // See http://llvm.org/docs/doxygen/html/structllvm_1_1EVT.html + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR(): " + << "(ResTy = " << ResTy.getEVTString() << ")\n"); + + /* + // Bitcast to the type we originally wanted + if (ViaVecTy != ResTy) + Result = DAG.getNode(ISD::BITCAST, SDLoc(BVN), ResTy, Result); + */ + + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: return Result\n"); + return Result; + } + else + if (isSplatVector(BVN)) { // This is used for splat vectors filled with the same variable + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: case isSplatVector(BVN)\n"); + return Op; + } + else + if (isIndexVectorVal(BVN)) { + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: case isIndexVectorVal(BVN)\n"); + + SDNode *Res = DAG.getMachineNode(Connex::LDIX_H, + DL, + TYPE_VECTOR_I16 + // We add a chain edge + //CurDAG->getEntryNode() + //opChain + ); + return SDValue(Res, 0); + + // LDIX_H + //return Op; + } +//#ifdef NOT_USEFUL_SO_FAR + else + // This case seems to not have been taken for BUILD_VECTOR from + // reduction pattern - + // see Tests/201_LoopVectorize/27_reduce_bugs/isConstantOrUndefBUILD_VECTOR + if (!isConstantOrUndefBUILD_VECTOR(BVN)) { + LLVM_DEBUG(dbgs() + << "LowerBUILD_VECTOR: case !isConstantOrUndefBUILD_VECTOR(BVN)\n"); + + // Use INSERT_VECTOR_ELT operations rather than expand to stores. + // The resulting code is the same length as the expansion, but it doesn't + // use memory operations + EVT ResTy = BVN->getValueType(0); + + assert(ResTy.isVector()); + + return Op; // Not 100% sure it covers all cases + } +//#endif + + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: UNSPECIFIED case\n"); + return SDValue(); +} + + +// Inspired from [LLVM]/llvm/lib/Target/ARM/ARMISelLowering.cpp +SDValue ConnexTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerINSERT_VECTOR_ELT().\n"); + + /* + TODO TODO: + We need to implement INSERT_VECTOR_ELT with: + WHERE INDEX == lane(op2) + VLOAD Rdst, ct (op3) + END_WHERE + */ + // INSERT_VECTOR_ELT is legal only for immediate indexes. + SDValue Lane = Op.getOperand(2); + if (!isa(Lane)) + return SDValue(); + + LLVM_DEBUG(dbgs() << "ConnexTargetLowering::LowerINSERT_VECTOR_ELT(): 2nd opnd (lane) is ct.\n"); + + return Op; +} + +/* +ALEX_TO_PROCESS +From /lib/Target/AMDGPU/AMDGPUISelLowering.h +/// This node is for VLIW targets and it is used to represent a vector + /// that is stored in consecutive registers with the same channel. + /// For example: + /// |X |Y|Z|W| + /// T0|v.x| | | | + /// T1|v.y| | | | + /// T2|v.z| | | | + /// T3|v.w| | | | + BUILD_VERTICAL_VECTOR, + + +From /home/asusu/LLVM/llvm38Nov2016/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Vector = Op.getOperand(0); + SDValue Value = Op.getOperand(1); + SDValue Index = Op.getOperand(2); + + if (isa(Index) || + Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) + return Op; + + Vector = vectorToVerticalVector(DAG, Vector); + SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), + Vector, Value, Index); + return vectorToVerticalVector(DAG, Insert); +} +*/ + + + +// From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. +// +// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We +// choose to sign-extend but we could have equally chosen zero-extend. The +// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT +// result into this node later (possibly changing it to a zero-extend in the +// process). +SDValue ConnexTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + SDValue Op0 = Op->getOperand(0); + EVT VecTy = Op0->getValueType(0); + + /* TODO : See http://llvm.org/docs/doxygen/html/classllvm_1_1SDValue.html - requires + to print each components: Type, operation, etc. */ + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerEXTRACT_VECTOR_ELT(): Op = "); + + return SDValue(); +} + + +// Inspired from llvm/lib/Target/X86/X86ISelLowering.cpp: +// +// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as +// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is +// one of the above mentioned nodes. It has to be wrapped because otherwise +// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only +// be used to form addressing mode. These wrapped nodes will be selected +// into MOV32ri. +SDValue ConnexTargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerConstantPool().\n"); + + ConstantPoolSDNode *CP = cast(Op); + + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the + // global base reg. + //unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr); + + /* If we avoid using WrapperKind in DAG.getNode() below then + * we end up with an instruction selection error like + <> TargetConstantPool:i64<<8 x i64> > 0 + llc: /home/asusu/LLVM/llvm38Nov2016/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1466: llvm::SDValue {anonymous}::DAGCombiner::combine(llvm::SDNode*): Assertion `N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!"' failed.>> + (see /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/201_LoopVectorize/25_GOOD_map/NEW/6/UF_1/NEW/STDerr31 ) + */ + unsigned WrapperKind = ConnexISD::Wrapper; + + //CodeModel::Model M = DAG.getTarget().getCodeModel(); + + auto PtrVT = getPointerTy(DAG.getDataLayout()); + // See http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + SDValue Result = DAG.getTargetConstantPool( + CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), + //OpFlag); + 0); + SDLoc DL(CP); + Result = DAG.getNode(WrapperKind, + //0, + DL, PtrVT, Result); + + return Result; +} + +// From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.h +/* Lower VECTOR_SHUFFLE into one of a number of instructions depending on the + indices in the shuffle. + + VERY IMPORTANT: Here, in ISelLowering the DAG Combiner changes + (I think in all cases) the vector_shuffle SDNode into a BUILD_VECTOR. + So we have to identify it here, before the DAG Combiner changes it and + replace it with the equivalent Connex instructions. + In fact, the DAG Combiner combines, if possible, a few vector_shuffles + into only one - I personally find it annoying, without any real benefit... +*/ +SDValue ConnexTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerVECTOR_SHUFFLE()\n"); + LLVM_DEBUG(dbgs() << " Op = "; + (Op.getNode())->dump()); + + //return SDValue(); + + EVT ResTy = Op->getValueType(0); + + // See http://llvm.org/doxygen/SelectionDAGNodes_8h_source.html#l01432 + ShuffleVectorSDNode *SVN = dyn_cast(Op); + assert(SVN != NULL); + + unsigned int numElemsMask = SVN->getValueType(0).getVectorNumElements(); + assert(numElemsMask == CONNEX_VECTOR_LENGTH); + + int mask[CONNEX_VECTOR_LENGTH]; + for (unsigned int i = 0; i < numElemsMask; ++i) { + mask[i] = SVN->getMaskElt(i); + LLVM_DEBUG(dbgs() << " mask[" << i << "] = " << mask[i] << "\n"); + } + + + if (mask[0] == 0) { + // It seems we have no shifting + } + else { + // It seems we have shifting by constant delta + int delta = mask[0]; + + bool shiftByDelta = true; + // Checking if we really have shifting by delta + int i; + for (i = 0; i < numElemsMask - delta; ++i) { + // MEGA-TODO: we should also check that we have delta-shift w.r.t. the 2nd data vector operand: if (mask[i] != CVL + i + delta) + if (mask[i] != i + delta) { + shiftByDelta = false; + break; + } + } + LLVM_DEBUG(dbgs() << " shiftByDelta = " << shiftByDelta << "\n"); + + bool circularShiftByDelta = false; + if (shiftByDelta == true) { + circularShiftByDelta = true; + for (i = numElemsMask - delta; i < numElemsMask; ++i) { + // MEGA-TODO: we should also check that we have circular-delta-shift w.r.t. the 2nd data vector operand + if (mask[i] != i + delta) { + circularShiftByDelta = false; + break; + } + } + } + LLVM_DEBUG(dbgs() << " circularShiftByDelta = " + << circularShiftByDelta << "\n"); + + bool assignPartOf2ndOpnd = true; + if (assignPartOf2ndOpnd == false) { + for (i = numElemsMask - delta; i < numElemsMask; ++i) { + if (mask[i] == CONNEX_VECTOR_LENGTH + i + delta) { + assignPartOf2ndOpnd = false; + break; + } + } + } + LLVM_DEBUG(dbgs() << " assignPartOf2ndOpnd = " + << assignPartOf2ndOpnd << "\n"); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + SDLoc DL(Op); + SDValue svnOp0 = SVN->getOperand(0); + SDValue svnOp1 = SVN->getOperand(1); + SDNode *ldSh; // Def required here + // + + if (circularShiftByDelta || shiftByDelta) { + SDValue chain = DAG.getEntryNode(); + + SDValue ctDelta = DAG.getConstant(delta, + DL, MVT::i16, true, false); + SDNode *vloadDelta = DAG.getMachineNode(Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ctDelta + // Glue (or chain) input edge + // TODO maybe: SDValue(ldIx, 1) + ); + + SDNode *cellShl = DAG.getMachineNode(Connex::CELLSHL_H, + DL, + // NO return type + MVT::Glue, + svnOp0, + SDValue(vloadDelta, 0), + // The glue input edge + SDValue(vloadDelta, 1) + ); + + // MEGA-TODO: put delta NOPs + SDValue ct1 = DAG.getConstant(1 /* Num of cycles to NOP */, + DL, MVT::i16, true, false); + SDNode *nop = DAG.getMachineNode(Connex::NOP_BPF, + DL, + MVT::Glue, + ct1, + // Glue/chain edge + SDValue(cellShl, 0) + ); + + ldSh = DAG.getMachineNode(Connex::LDSH_H, + DL, + // Return type + TYPE_VECTOR_I16, + MVT::Glue, + // The glue output port of predecessor + SDValue(nop, 0) + ); + } // END if (circularShiftByDelta || shiftByDelta) + + #ifdef BUGGY_DUE_TO_DAG_COMBINER + unsigned virtReg = RegInfo.createVirtualRegister(&Connex::VectorHRegClass); + /* + * VERY IMPORTANT: + * From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html: + * SDValue getCopyToReg(SDValue Chain, SDLoc dl, + unsigned Reg, + SDValue N, + SDValue Glue) + */ + SDValue copyToReg = DAG.getCopyToReg( + // VERY IMPORTANT: Chain input edge + (circularShiftByDelta || shiftByDelta) ? + SDValue(ldSh, 1) : + DAG.getEntryNode(), + + DL, + virtReg, + + // Value copied to register + (circularShiftByDelta || shiftByDelta) ? + SDValue(ldSh, 0) : svnOp1, + // VERY IMPORTANT: Glue input edge + (circularShiftByDelta || shiftByDelta) ? + SDValue(ldSh, 1) : + DAG.getEntryNode() // Hope this passes as a glue + ); + LLVM_DEBUG(dbgs() << " copyToReg = "; + (copyToReg.getNode())->dump()); + #endif + + SDNode *endWhere; // Definition required + + if (assignPartOf2ndOpnd) { + SDNode *ldIx = DAG.getMachineNode(Connex::LDIX_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // We add a chain edge + (circularShiftByDelta || shiftByDelta) ? + SDValue(ldSh, 1) : + DAG.getEntryNode() + ); + + SDValue ctCVLDelta = DAG.getConstant(CONNEX_VECTOR_LENGTH - delta, + DL, MVT::i16, true, false); + SDNode *vloadCVLDelta = DAG.getMachineNode(Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ctCVLDelta, + // Glue (or chain) input edge + SDValue(ldIx, 1) + ); + + SDNode *lt = DAG.getMachineNode(Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ldIx, 0), + SDValue(vloadCVLDelta, 0), + // Glue (or chain) input edge + SDValue(vloadCVLDelta, 1) + ); + + SDValue ct1 = DAG.getConstant(1 /* Num of cycles to NOP */, + DL, MVT::i16, true, false); + SDNode *nop = DAG.getMachineNode(Connex::NOP_BPF, + DL, + MVT::Glue, + ct1, + // Glue/chain edge + SDValue(lt, 1) + ); + + SDNode *whereLt = DAG.getMachineNode(Connex::WHERELT, //_BUNDLE_H, + DL, + // Return type + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt, 0), + //svnOp1, + // The glue output port of CopyToReg. + SDValue(nop, 0) + ); + + SDValue ct0 = DAG.getConstant(0, DL, MVT::i16, true, false); + SDNode *ishl = DAG.getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + svnOp1, + ct0, + #ifdef BUGGY_DUE_TO_DAG_COMBINER + DAG.getRegister(virtReg, TYPE_VECTOR_I16), + #else + (circularShiftByDelta || shiftByDelta) ? + SDValue(ldSh, 0) : svnOp1, + #endif + // Glue (or chain) input edge + SDValue(whereLt, 1) + ); + endWhere = DAG.getMachineNode( + Connex::END_WHERE, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl, 0), + // Glue (or chain) input edge + SDValue(ishl, 1) + ); + } // END if (assignPartOf2ndOpnd) + + if (assignPartOf2ndOpnd) + DAG.ReplaceAllUsesWith(SVN, endWhere); + else + if (circularShiftByDelta || shiftByDelta) + DAG.ReplaceAllUsesWith(SVN, ldSh); + } + + return SDValue(); + + /* + ShuffleVectorSDNode *N = SVN; + unsigned int nOps = N->getNumOperands(); + for (unsigned int i = 0; i < nOps; ++i) { + // See http://llvm.org/docs/ProgrammersManual.html#the-isa-cast-and-dyn-cast-templates + ConstantSDNode *ctNode = dyn_cast(N->getOperand(i)); + LLVM_DEBUG(dbgs() << " ctNode = " << ctNode << "\n"); + if (ctNode == NULL) + continue; //return false; + + LLVM_DEBUG(dbgs() << " *ctNode = "; ctNode->dump()); + + //if (N->getConstantOperandVal(i) != i) + // return false; + } + */ + + + //MEGA-TODO: check for delta..CVL-delta, 2CVL-delta.. 2CVL-1 + // MEGA-TODO TODO TODO: else if BVN is 0..x x + CVL + 1 .. 2CVL-1 + + /* !!!! TODO: here it was cycling forever in reduction + loop code - see /home/asusu/LLVM/llvm38Nov2016/llvm/build30/bin/Tests/201_LoopVectorize/27_reduce_bugs/STDerr_old15 + for exact details. */ + + // Note: HexagonISelLowering.cpp has also method LowerVECTOR_SHIFT() + + #ifdef NOT_ORIGINAL_CODE + // From MipsISelLowering.cpp + ShuffleVectorSDNode *Node = cast(Op); + + int ResTyNumElts = ResTy.getVectorNumElements(); + SmallVector Indices; + + for (int i = 0; i < ResTyNumElts; ++i) + Indices.push_back(Node->getMaskElt(i)); + + // splati.[bhwd] is preferable to the others but is matched from + // MipsISD::VSHF. + if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) + return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); + SDValue Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); + #endif +} + + +/* From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetLoweringBase.html: +virtual EVT getSetCCResultType (const DataLayout &DL, LLVMContext &Context, EVT VT) const + Return the ValueType of the result of SETCC operations. + +See also https://github.com/llvm-mirror/llvm/blob/master/lib/CodeGen/TargetLoweringBase.cpp + */ +// This code fixes the issue with type legalization of vector type: +// Reported in llvm-dev thread: +// http://lists.llvm.org/pipermail/llvm-dev/2016-June/100719.html +EVT ConnexTargetLowering::getSetCCResultType(const DataLayout &DL, + LLVMContext &Ctx, + EVT VT) const { + EVT res; + + LLVM_DEBUG(dbgs() + << "Entered ConnexTargetLowering::getSetCCResultType().\n" + << " VT = " + // See http://llvm.org/docs/doxygen/html/structllvm_1_1EVT.html + << VT.getEVTString() + << " [END]\n"); + + if (VT.isVector()) { + LLVM_DEBUG(dbgs() << "getSetCCResultType(): " + << "VT.getVectorNumElements() = " + << VT.getVectorNumElements() + << "\n"); + // From llvm/lib/Target/NVPTX/NVPTXISelLowering.h + //res = EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); + + // From llvm/lib/Target/NVPTX/MipsISelLowering.h + res = VT.changeVectorElementTypeToInteger(); + + LLVM_DEBUG(dbgs() + << "getSetCCResultType(), case VT.isVector(): res = " + // See http://llvm.org/docs/doxygen/html/structllvm_1_1EVT.html + << res.getEVTString() + << " [END]\n"); + + return res; + } + + + res = getPointerTy(DL).SimpleTy; + + LLVM_DEBUG(dbgs() << "getSetCCResultType(): res = " + // See http://llvm.org/docs/doxygen/html/structllvm_1_1EVT.html + << res.getEVTString() + << " [END]\n"); + + // Using the code from lib/CodeGen/TargetLoweringBase.cpp + return res; + + /* + // This was the original code from llvm/lib/Target/NVPTX/NVPTXISelLowering.h + Cycles forever - see !!!! + return MVT::i1; + */ + + /* Messes up 25_Map (for types i16 or i32), etc: + llc gives assertion error: + llc: lib/CodeGen/SelectionDAG/SelectionDAG.cpp:3116: llvm::SDValue llvm::SelectionDAG::getNode(unsigned int, const llvm::SDLoc&, llvm::EVT, llvm::SDValue): Assertion `VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ZERO_EXTEND!"' failed. + //return VT; + */ +} +/* +lib/Target/PowerPC/PPCISelLowering.cpp +EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, + EVT VT) const { + if (!VT.isVector()) + return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; + + if (Subtarget.hasQPX()) + return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); + + return VT.changeVectorElementTypeToInteger(); +} +*/ + Index: lib/Target/Connex/ConnexInstrInfo.h =================================================================== --- lib/Target/Connex/ConnexInstrInfo.h +++ lib/Target/Connex/ConnexInstrInfo.h @@ -0,0 +1,96 @@ +//===-- ConnexInstrInfo.h - Connex Instruction Information ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the Connex implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXINSTRINFO_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXINSTRINFO_H + +#include "Connex.h" +#include "ConnexRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "ConnexGenInstrInfo.inc" + +namespace llvm { + +class ConnexInstrInfo : public ConnexGenInstrInfo { + const ConnexRegisterInfo RI; + +public: + ConnexInstrInfo(); + + const ConnexRegisterInfo &getRegisterInfo() const { return RI; } + + + // Got a bit inspired from lib/Target/AMDGPU/SIInstrInfo.cpp + bool expandPostRAPseudo(MachineInstr &MI) const; + + + // Note: we do not use Pre-RA hazard recognizer since it works on the + // MachineInstr immediately after 1st scheduling pass, which is before the, + // RA, TwoAddressInstructionPass, etc - so a lot of other instructions + // will be added after 1st scheduling pass. + // We would like our post-RA Hazard recognizer to be able to reschedule + // instructions in a different order (with the ScoreBoardHazardRecognizer) + // in order to avoid inserting useless NOPs. + + // USE_POSTRA_SCHED + // Got inspired from llvm/lib/Target/PowerPC/PPCInstrInfo.h + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const override; + + + ScheduleHazardRecognizer * + CreateTargetMIHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const override; + + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const override; + + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; + + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef Cond, + const DebugLoc &DL, + int *BytesAdded = nullptr) const override; + + bool isPredicable(MachineInstr &MI) const; + +protected: + MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI, + MachineMemOperand::Flags Flag) const; +}; // end class ConnexInstrInfo +} // end namespace llvm + +#endif Index: lib/Target/Connex/ConnexInstrInfo.cpp =================================================================== --- lib/Target/Connex/ConnexInstrInfo.cpp +++ lib/Target/Connex/ConnexInstrInfo.cpp @@ -0,0 +1,954 @@ +//===-- ConnexInstrInfo.cpp - Connex Instruction Information ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the Connex implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Connex.h" +#include "ConnexHazardRecognizer.h" // USE_POSTRA_SCHED +#include "ConnexHazardRecognizerPreRAScheduler.h" +//#include "llvm/CodeGen/ScheduleDAG.h" +#include "ConnexInstrInfo.h" +#include "ConnexSubtarget.h" +#include "ConnexTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Support/Debug.h" +#define DEBUG_TYPE "connex-lower" + +#define GET_INSTRINFO_CTOR_DTOR +#include "ConnexGenInstrInfo.inc" + +using namespace llvm; + + + + + + +MachineInstr *getPredMachineInstr(MachineInstr *MI, MachineInstr **succMI) { + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MBB->findDebugLoc(MI); + + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): MI.getOpcode() = " + << MI->getOpcode() << "\n"); + + //switch (MI.getOpcode()) + + MachineInstr *predMI = NULL; + /* + MachineInstr *succMI = NULL; + */ + *succMI = NULL; + + for (MachineBasicBlock::iterator I = MBB->begin(), + IE = MBB->end(); I != IE; ++I) { + MachineInstr *IMI = (MachineInstr *)(&(*I)); + if (IMI == MI) { + I++; + *succMI = (MachineInstr *)(&(*I)); + break; + } + predMI = (MachineInstr *)(&(*I)); + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): (I in MBB of MI) I->getOpcode() = " + << I->getOpcode() << "\n"); + } + + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): MI = " + << MI + << "(" << MI << ")" + << "\n"); + if ((*succMI) != NULL && (*succMI) != nullptr) { + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): succMI = " + //We do not put this one because we can have issues with NULL/invalid MachineInstr (at least in case of llc -regalloc=fast) << **succMI + << "[TO BE DONE]" + << "(" << *succMI << ")" + << "\n"); + } + else { + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): succMI = NULL\n"); + } + + if (predMI != NULL) { + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): predMI = " + << *predMI + << "(" << predMI << ")" + << "\n"); + } + else { + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): predMI = NULL\n"); + } + + return predMI; +} + + +ConnexInstrInfo::ConnexInstrInfo() + : ConnexGenInstrInfo(Connex::ADJCALLSTACKDOWN, Connex::ADJCALLSTACKUP) {} + + +// Inspired from lib/Target/Mips/MipsInstrInfo.cpp +MachineMemOperand *ConnexInstrInfo::GetMemOperand(MachineBasicBlock &MBB, + int FI, + MachineMemOperand::Flags Flag + ) const { + LLVM_DEBUG(dbgs() << "Entered ConnexInstrInfo::GetMemOperand()\n"); + + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), + Flag, MFI.getObjectSize(FI), Align); +} + + +/* +From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html: + virtual void copyPhysReg (MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const + Emit instructions to copy a pair of physical registers. + virtual void storeRegToStackSlot (MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const + Store the specified register of the given register class to the specified stack frame index. + virtual void loadRegFromStackSlot (MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const + Load the specified register of the given register class from the specified stack frame index. +*/ +void ConnexInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { + LLVM_DEBUG(dbgs() + << "Entered ConnexInstrInfo::copyPhysReg(I = " << *I + << ", DestReg = " << DestReg + << ", SrcReg = " << SrcReg + << ")\n"); + + if (Connex::GPRRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(Connex::MOV_rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } + else + if (Connex::VectorHRegClass.contains(DestReg, SrcReg)) { + //llvm_unreachable("NOT implemented well!"); + + /* + // TODO_TODO + if (SrgReg == ct) { + BuildMI(MBB, I, DL, get(Connex::VLOAD_H), DestReg) + .addImm(ct) //, getKillRegState(KillSrc)) + .addReg(SrcReg); + } + */ + + BuildMI(MBB, I, DL, get(Connex::ORV_H), DestReg) + .addReg(SrcReg) //, getKillRegState(KillSrc)) + .addReg(SrcReg); + } + else + //if (Connex::BoolMaskRegClass.contains(DestReg, SrcReg)) + if (Connex::BoolMaskRegClass.contains(DestReg) || + Connex::BoolMaskRegClass.contains(SrcReg)) { + LLVM_DEBUG(dbgs() + << "ConnexInstrInfo::copyPhysReg(): DestReg or SrcReg are in BoolMask\n"); + /* + // IMPORTANT-TODO: what if register Wh31, also called R(31), is already in use for some other var? + BuildMI(MBB, I, DL, get(Connex::VLOAD_H), Connex::Wh31) + .addImm(0); + + BuildMI(MBB, I, DL, get(Connex::ORV_H), DestReg) + .addReg(SrcReg) //, getKillRegState(KillSrc)) + .addReg(Connex::Wh31, getKillRegState(KillSrc)); + */ + } + /* + // PREFERABLY_NOT_2019_03_21 + else + if ( (Connex::MSA128WRegClass.contains(DestReg) && + Connex::VectorHRegClass.contains(SrcReg)) || + // + (Connex::MSA128WRegClass.contains(SrcReg) && + Connex::VectorHRegClass.contains(DestReg)) ) { + + if (Connex::MSA128WRegClass.contains(DestReg)) { + LLVM_DEBUG(dbgs() + << "ConnexInstrInfo::copyPhysReg(): DestReg is TYPE_VECTOR_I32 and SrcReg is TYPE_VECTOR_I16\n"); + } + else + if (Connex::MSA128WRegClass.contains(DestReg)) { + LLVM_DEBUG(dbgs() + << "ConnexInstrInfo::copyPhysReg(): DestReg is TYPE_VECTOR_I16 and SrcReg is TYPE_VECTOR_I32\n"); + } + + // BuildMI(MBB, I, DL, get(Connex::INLINEASM)); // This makes llc give error: <> + // This works surprisingly: BuildMI(MBB, I, DL, get(Connex::NOP_BITCONVERT_HW)); + + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + //BuildMI(MBB, I, DL, get(Connex::NOP_BOGUS)); + BuildMI(MBB, I, DL, get(Connex::ORV_H), DestReg) + .addReg(SrcReg) //, getKillRegState(KillSrc)) + .addReg(SrcReg); + #endif + } + */ + else { + llvm_unreachable("Impossible reg-to-reg copy"); + } +} + + +// storeRegToStackSlot() and loadRegFromStackSlot() use +// the FI argument (frame index, the index within the current frame) +// +// This implements spilling of registers (both scalar, and vector). +void ConnexInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, bool IsKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + + assert(FI >= 2 && "It seems I assumed wrong that frame index >= 2"); + + /* MEGA-TODO: the FI is only 1 variable, and we basically have 2 stack frames: + - 1 for the scalar CPU + - normally 1 for the separate address-space LS memory Connex vector processor, + although Connex does NOT allow calls inside vector kernels, + BUT the CPU does although a good case is not simple. + + Think of a case where this mildly-viciated solution is NOT good for + programs (remember we output Opincaa programs and NO CPU assembly code, + and Connex does NOT allow calls inside vector kernels). + + Also, understand well why FI >= 2 always holds - it seems there is some prologue. + */ + unsigned ConnexLSOffsetSpillLoad = (CONNEX_MEM_NUM_ROWS + 1) - FI; + + if (I != MBB.end()) + DL = I->getDebugLoc(); + + if (RC == &Connex::GPRRegClass) { + BuildMI(MBB, I, DL, get(Connex::STD)) + .addReg(SrcReg, getKillRegState(IsKill)) + .addFrameIndex(FI) + .addImm(0); + } + else + if (RC == &Connex::VectorHRegClass) { + LLVM_DEBUG(dbgs() << " ConnexInstrInfo::storeRegToStackSlot(): Spilling Wh" + << SrcReg + << " to ConnexLSOffsetSpillLoad = " + << ConnexLSOffsetSpillLoad + << " (FI = " + << FI + << "), " + << "I == MBB.end() is " << (I == MBB.end()) + << ", MBB = " << MBB.getFullName() + << ", &MBB.front() = " << &(MBB.front()) << "\n" + << "MBB = " << MBB + //<< ", MBB.front() = " << MBB.front() + ); + + /* VERY IMPORTANT: after experimenting (see + /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/DawnCC/91_SAD_f16/FEATURE_LENGTH_128/A/STDerr_llc_01) + if we have INLINEASM at the beginning of the MBB, the MBB.front() is + the 1st instruction AFTER these INLINEASM - this is why we can end up + adding more NOPs... + IMPORTANT-TODO: we should take into consideration that vector.body has + INLINEASM with host-side for loop here normally. + */ + + // Note: this method is spilling the destination register of the instruction *(I-1) + /* + // I got a strange error in LLVM when printing in certain cases *I - see e.g. /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/DawnCC/90_SSD_f16/3/STDerr_llc_01_old03 + LLVM_DEBUG(dbgs() << " ConnexInstrInfo::storeRegToStackSlot(): *I = " + << *I); + */ + + /* + Important-TODO: maybe we can avoid inserting the NOP now by making the + post-RA (maybe even the pre-RA) scheduler reschedule instructions + to insert a useful instruction in this delay slot. + + Adding the NOP is mandatory if the previous instruction updates the + spilled register, since all (i)write instructions require the + insertion of a delay slot between them and the instructions that + generates their operands + - in this case the register to be written to the LS memory. + + It prints something like: + << + *(I--) = %vreg538 = XORV_H %vreg106, %vreg105; VectorH:%vreg538,%vreg106,%vreg105 dbg:test.c:48:36 + *I = %vreg175 = ADDV_H %vreg149, %vreg164; VectorH:%vreg175,%vreg149,%vreg164 dbg:test.c:48:36>> + + In this case it spills %vreg538 to LS memory + - with an instruction like LS[1020] = R... + */ + + MachineBasicBlock::iterator Iprev; // = I; + +#ifdef EXPERIMENTAL_2019_05 + bool IFront = (I == MBB.front()); +#endif + + MachineInstr *IMI; + if (I == MBB.end()) + IMI = NULL; + else + IMI = (MachineInstr *)(&(*I)); + + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): IMI = " + << IMI + << "\n"); + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): IMI == &MBB.front() = " + << (IMI == (&MBB.front()) ) + << "\n"); + + if ( (I != MBB.end()) && + (IMI != NULL) && + (IMI != (&MBB.front())) ) { + Iprev = I; + Iprev--; + MachineInstr *IprevMI = (MachineInstr *)(&(*Iprev)); + + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): IprevMI = " + << *IprevMI + << "\n"); + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): IprevMI->getNumOperands() = " + << IprevMI->getNumOperands() + << "\n"); + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): IprevMI->getOpcode() == Connex::INLINEASM = " + << (IprevMI->getOpcode() == Connex::INLINEASM) + << "\n"); + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): IprevMI->getOpcode() == Connex::VLOAD_H_SYM_IMM = " + << (IprevMI->getOpcode() == Connex::VLOAD_H_SYM_IMM) + << "\n"); + // The case where I screw up is LS[1013] = ... + // because the INLINEASM before it is the MBB.front() and is INLINEASM. + + if (IprevMI != NULL && + // NOT necessary: (IprevMI != (&MBB.front())) && + //(IMI != (&MBB.front())) && + (IprevMI->getNumOperands() > 0 || // MEGA-TODO: understand why I give this + IprevMI->getOpcode() == Connex::INLINEASM || + IprevMI->getOpcode() == Connex::VLOAD_H_SYM_IMM) ) { + + LLVM_DEBUG(dbgs() + << " storeRegToStackSlot(): Handling special cases.\n"); + + MachineOperand &I0Opnd = IprevMI->getOperand(0); + + if (IprevMI->getOpcode() == Connex::VLOAD_H_SYM_IMM) { + // Treating Symbolic immediate operands + // MEGA-TODO: check + /* + MachineBasicBlock::iterator I2 = I; + MachineInstr *I2MI; + I2++; + I2MI = I2; + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): I2MI = " + << *I2MI + << "\n"); + assert(I2MI->getOpcode() == Connex::INLINEASM); + */ + //assert(0 && "Bogus"); + assert(IprevMI->getNumOperands() > 0); // Just checking + assert(IMI->getOpcode() == Connex::INLINEASM && + "The INLINEASM with the immediate operand should be next " + "for VLOAD_H_SYM_IMM."); + + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): Treating " + "VLOAD_H_SYM_IMM case.\n"); + // Gives error: <> + I++; + //Iprev++; + } + + if ( (//IprevMI->getNumOperands() > 0 && + /* + (IprevMI->getOperand(0)).isReg() && + (IprevMI->getOperand(0)).isDef() && + (IprevMI->getOperand(0)).getReg() == SrcReg + */ + I0Opnd.isReg() && + I0Opnd.isDef() && + I0Opnd.getReg() == SrcReg + ) || + (IprevMI->getOpcode() == Connex::INLINEASM)) { + /* Important-TODO: check better: first, for SAD.f16 we have a COPY + between the host-for and the spill - so we should do these checks + after the hoisting of spills, etc - IMPORTANT: either in + ConnexAsmPrinter.cpp or PostRAHazardRecognizer which I'm afraid to + run for programs using bigger types like f16 - e.g., SSD.f16. + It is possible that the instruction IprevMI be a + VLOAD or a for loop that has an instruction with dst register + the one that is spilled. */ + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): Adding NOP_BPF to " + "avoid data hazards...[Explain better...]\n"); +#ifdef EXPERIMENTAL_2019_05 + BuildMI(MBB, I, DL, get(Connex::NOP_BPF)).addImm(1); +#endif + } + else { + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): Not putting NOP " + "after IprevMI = " + << *IprevMI + //<< " before I = " << *I << "\n"); + << " before: IMI = " << IMI << ",\n" + << " IMI->getOpcode() = " + << IMI->getOpcode() << "\n"); + /* I get some error here, from MachineInstr.cpp:1695: + "I = #0 0x00007faf1da72700" and then it + crashes without any warning: + << " I = " << *IMI << "\n"); */ + } + } + else { + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): else case for " + "if (IprevMI != NULL && ...)\n"); + } + } + else { + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): else case for " + "if (IMI != NULL && Iprev != &MBB.front())\n"); + + if (IMI == (&MBB.front())) { + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): case IMI == &MBB.front()\n"); + // We conservatively put a NOP before the spill (Store) +#ifdef EXPERIMENTAL_2019_05 + // MEGA MEGA-TODO: see /home/asusu/LLVM/Tests/DawnCC/35l_MatMul_f16/SIZE_256/H_CVL8_LLVMnew/A/STDerr_llc_01 - gives error: <> + BuildMI(MBB, I, DL, get(Connex::NOP_BPF)).addImm(1); +#endif + } + } + //BuildMI(MBB, I, DL, get(Connex::NOP_BOGUS)); + +#ifdef EXPERIMENTAL_2019_05 + if (IFront == false) { +#endif + BuildMI(MBB, I, DL, get(Connex::ST_SPILL_H)) + .addReg(SrcReg, getKillRegState(IsKill)) + /* + // Gives error I guess because it is a vector instruction, not eBPF one: + // void llvm::MachineInstr::addOperand(llvm::MachineFunction&, + // const llvm::MachineOperand&): Assertion `(isImpReg || Op.isRegMask() || + // MCID->isVariadic() || OpNo < MCID->getNumOperands() || isMetaDataOp) && + // "Trying to add an operand to a machine instr that is already done!"' + // failed. + .addFrameIndex(FI) + // Even if Connex does NOT have a stack, we can use LS mem to easily + // simulate it. + */ + .addImm(ConnexLSOffsetSpillLoad); +#ifdef EXPERIMENTAL + } +#endif + + LLVM_DEBUG(dbgs() << + " storeRegToStackSlot(): Added ST_SPILL_H instruction.\n"); + LLVM_DEBUG(dbgs() << + " storeRegToStackSlot(): MBB = " << MBB << "\n"); + } + else + if (RC == &Connex::BoolMaskRegClass) { + /* + BuildMI(MBB, I, DL, get(Connex::ST_H)) + .addReg(SrcReg, getKillRegState(IsKill)) + .addImm(CONNEX_MEM_NUM_ROWS - 100); + // TODO: this is just bogus I guess, no need to spill v8i1 register + */ + } + else { + llvm_unreachable("Connex back end: Can't store this register to stack slot"); + } +} + + + +// This implements filling/reloading - i.e., load for spilled registers +// (both scalar, and vector). +void ConnexInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + + assert(FI >= 2 && "I assumed wrong that frame index >= 2"); + + unsigned ConnexLSOffsetFillLoad = (CONNEX_MEM_NUM_ROWS + 1) - FI; + + if (I != MBB.end()) + DL = I->getDebugLoc(); + + if (RC == &Connex::GPRRegClass) { + BuildMI(MBB, I, DL, get(Connex::LDD), DestReg) + .addFrameIndex(FI) + .addImm(0); + } + else + if (RC == &Connex::VectorHRegClass) { + /* + // This actually generates a malformed scalar instruction with + // vector register + BuildMI(MBB, I, DL, get(Connex::LDD), DestReg) + .addFrameIndex(FI) + .addImm(0); + */ + /* + // It is NOT correct since LLVM assumes it uses a stack and the + // operations are sort of PUSH/POP. Even if Connex does NOT have + // a stack, we can use LS to easily simulate it. + BuildMI(MBB, I, DL, get(Connex::LD_H), DestReg) + .addImm(CONNEX_MEM_NUM_ROWS - 1 - DestReg); + */ + + LLVM_DEBUG(dbgs() << " ConnexInstrInfo::loadRegFromStackSlot(): Filling Wh" + << DestReg + << " from ConnexLSOffsetFillLoad = " + << ConnexLSOffsetFillLoad + << " (FI = " + << FI + << ")\n"); + + /* + IMPORTANT: Adding the NOP is NOT required, since the iread Connex + instruction does NOT require the insertion of a delay slot between + them and the instruction that uses the register read from the LS memory. + */ + BuildMI(MBB, I, DL, get(Connex::LD_FILL_H), DestReg) + .addImm(ConnexLSOffsetFillLoad); + // TODO TODO TODO: get num vector registers from ConnexRegisterInfo.td: def VectorH: RegisterClass<"Connex", [v128i16], 32, + } + else { + llvm_unreachable("Connex back end: Can't load this register from stack slot"); + } +} + +bool ConnexInstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // Start from the bottom of the block and work up, examining the + // terminator instructions. + MachineBasicBlock::iterator I = MBB.end(); + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + + // Working from the bottom, when we see a non-terminator + // instruction, we're done. + if (!isUnpredicatedTerminator(*I)) + break; + + // A terminator that isn't a branch can't easily be handled + // by this analysis. + if (!I->isBranch()) + return true; + + // Handle unconditional branches. + if (I->getOpcode() == Connex::JMP) { + if (!AllowModify) { + TBB = I->getOperand(0).getMBB(); + continue; + } + + // If the block has any instructions after a J, delete them. + while (std::next(I) != MBB.end()) + std::next(I)->eraseFromParent(); + Cond.clear(); + FBB = 0; + + // Delete the J if it's equivalent to a fall-through. + if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { + TBB = 0; + I->eraseFromParent(); + I = MBB.end(); + continue; + } + + // TBB is used to indicate the unconditinal destination. + TBB = I->getOperand(0).getMBB(); + continue; + } + // Cannot handle conditional branches + return true; + } + + return false; +} + +unsigned ConnexInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, + int *BytesAdded) const { + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + if (Cond.empty()) { + // Unconditional branch + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, DL, get(Connex::JMP)).addMBB(TBB); + return 1; + } + + llvm_unreachable("Unexpected conditional branch"); +} + +unsigned ConnexInstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + if (I->getOpcode() != Connex::JMP) + break; + // Remove the branch. + I->eraseFromParent(); + I = MBB.end(); + ++Count; + } + + return Count; +} + +/* +TODO TODO: better implement it in ConnexTargetMachine::addPreRegAlloc(), in + order to avoid any spills the register allocator might create. + +Creating in ConnexInstrInfo::expandPostRAPseudo() bundle instructions + with VLOAD_H_SYM_IMM + INLINEASM. + This is a decent compromise although I do NOT use pseudo-instructions, + using this after Register Allocation (PostRA) works because: + - IMPORTANT: INLINEASM is considered a pseudo-instruction (NOTE that + VLOAD_H_SYM_IMM is NOT considered a pseudo-instruction); + - pre-RA scheduler does NOT break the VLOAD_H_SYM_IMM from its associated + INLINEASM; + - register allocator does NOT break either the VLOAD_H_SYM_IMM from its + associated INLINEASM, more exactly it doesn't insert spills or fills + between the two instructions as far as I can see. IMPORTANT: however I + am NOT sure if this is always going to hold. +As of Feb 2017, class TargetInstrInfo + (see http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html) + has a few methods called on MachineInstr, but expandPostRAPseudo() seems + to be a very good candidate (also it has no method with MachineSDNode). + Anyhow, we could create and register our own pass working on MachineInstr in + order to bundle instructions together (or on MachineSDNode, before pre-RA + scheduler, although I guess it might be DIFFICULT to bundle from + MachineSDNode to MachineInstr, since we have to perform a simple scheduling). + +From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html + <> +*/ +bool ConnexInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + // Making expandPostRAPseudo() do nothing: + return false; + + LLVM_DEBUG(dbgs() << "ConnexInstrInfo::expandPostRAPseudo(): MI.getOpcode() = " + << MI.getOpcode() << "\n"); + + MachineBasicBlock *MBB = MI.getParent(); + DebugLoc DL = MBB->findDebugLoc(MI); + + /* + // Inspired from lib/Target/PowerPC/PPCCTRLoops.cpp + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PIE = MBB->pred_end(); PI != PIE; ++PI) + Preds.push_back(*PI); + */ + switch (MI.getOpcode()) { + default: + //return expandPostRAPseudo(MI); + return false; + + case Connex::VLOAD_H_SYM_IMM: + // This is just a placeholder for register allocation. + LLVM_DEBUG(dbgs() << + "ConnexInstrInfo::expandPostRAPseudo(): found VLOAD_H_SYM_IMM\n"); + //MI.eraseFromParent(); + break; + + case Connex::INLINEASM: + // This is just a placeholder for register allocation. + LLVM_DEBUG(dbgs() << + "ConnexInstrInfo::expandPostRAPseudo(): found INLINEASM\n"); + + /* + MachineInstr *predMI = NULL; + MachineInstr *succMI = NULL; + for (MachineBasicBlock::iterator I = MBB->begin(), + IE = MBB->end(); I != IE; ++I) { + MachineInstr *IMI = I; + if (IMI == &MI) { + I++; + succMI = I; + // predMI contains normally instruction VLOAD_H_SYM_IMM + break; + } + predMI = I; + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): (pred) I->getOpcode() = " + << I->getOpcode() << "\n"); + } + */ + MachineInstr *succMI; + MachineInstr *predMI = getPredMachineInstr(&MI, &succMI); + + if (predMI != NULL) { + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): predMI = " + << *predMI + << "(" << predMI << ")" + << "\n"); + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): succMI = " + << *succMI + << "(" << succMI << ")" + << "\n"); + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): MI = " + << MI + << "(" << &MI << ")" + << "\n"); + + if (predMI->getOpcode() == Connex::VLOAD_H_SYM_IMM) { + // Inspired from lib/Target/AMDGPU/SIInstrInfo.cpp + // (or Mips/MipsDelaySlotFiller.cpp) + /* Create a bundle so these instructions won't be re-ordered by the + post-RA scheduler. */ + + /* + #ifdef THIS_DOES_NOT_ASMPRINT_BUNDLES + MIBundleBuilder Bundler(*MBB, MI); + + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): predMI->getParent() = " + << predMI->getParent() << "\n"); + + // This must NOT be commented. Otherwise, it results in ~strange error + in ConnexMCInstLower::Lower() + predMI->eraseFromParent(); + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): appending predMI to bundle\n"); + Bundler.append(predMI); + + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): calling finalizeBundle()\n"); + // See http://llvm.org/docs/doxygen/html/MachineInstrBundle_8cpp_source.html#l00217 + llvm::finalizeBundle(*MBB, Bundler.begin()); + + MI.eraseFromParent(); + + #ifdef NOT_USEFUL + // Inspired from http://llvm.org/docs/doxygen/html/MachineInstrBuilder_8h_source.html#l00434 + MI.bundleWithPred(); + // Does NOT compile: llvm::finalizeBundle(MBB, predMI); + #endif + */ + + /* We now know that MI is the INLINEASM instruction that + needs to be bundled with the previous instruction, predMI. + */ + /* + We do NOT use MIBundleBuilder, with eventual MI/predMI/succMI.eraseFromParent(). + Just predMI and succMI iterators. + Note that succMI is required if we want to bundle + instructions in the interval + predMI..MI, where succMI = succ(MI). + + So we normally bundle here: predMI, MI (without succMI). + */ + /* See llvm.org/docs/doxygen/html/MachineInstrBundle_8cpp_source.html#l00106 + and http://llvm.org/docs/doxygen/html/MachineInstrBundle_8cpp_source.html#l00217 + */ + llvm::finalizeBundle(*MBB, + (MachineBasicBlock::instr_iterator)predMI, + (MachineBasicBlock::instr_iterator)succMI); + //(MachineBasicBlock::instr_iterator)&MI); + + /* + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MIBundleBuilder.html + // MIBundleBuilder (MachineBasicBlock &BB, MachineBasicBlock::iterator B, MachineBasicBlock::iterator E) + // Create a bundle from the sequence of instructions between B and E. + MIBundleBuilder Bundler(*MBB, predMI, MI); + + // MI.eraseFromParent(); + // Bundler.append(&MI); + + //Bundler.append(&MI); + // + + // Gives error + //include/llvm/CodeGen/MachineInstrBundleIterator.h:42: + //llvm::MachineInstrBundleIterator::MachineInstrBundleIterator(Ty*) + //[with Ty = llvm::MachineInstr]: + //Assertion `(!MI || !MI->isBundledWithPred()) && "It's not legal to + //initialize " "MachineInstrBundleIterator " "with a bundled MI"' failed. + ////MIBundleBuilder Bundler(*MBB, predMI, *succMI); + + // See http://llvm.org/docs/doxygen/html/MachineInstrBundle_8cpp_source.html#l00217 + llvm::finalizeBundle(*MBB, Bundler.begin()); + + MI.eraseFromParent(); + + // This yields error <<[with Ty = llvm::MachineInstr]: + // Assertion `(!MI || !MI->isBundledWithPred()) && + // "It's not legal to initialize " "MachineInstrBundleIterator " + // "with a bundled MI"' failed.>> + // predMI->eraseFromParent(); + */ + } + } + + break; + } + + LLVM_DEBUG(dbgs() << "Before exit expandPostRAPseudo():\n"); + // Gives error since MI can be bundled: <> MachineBasicBlock &MBB = *(MI.getParent()); + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + //for (auto it: *MBB) + for (MachineBasicBlock::iterator I = MBB->begin(), + IE = MBB->end(); I != IE; ++I) { + /* + LLVM_DEBUG(dbgs() << "ConnexInstrInfo::expandPostRAPseudo(): it->getOpcode() = " + << it->getOpcode() << "\n"); + */ + LLVM_DEBUG(dbgs() << " I = " << *I << "\n"); + /* + switch (MI.getOpcode()) { + } + */ + } + + /* + const SIRegisterInfo *TRI + = static_cast(ST.getRegisterInfo()); + MachineFunction &MF = MBB->getParent(); + unsigned Reg = MI.getOperand(0).getReg(); + unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0); + unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1); + + // Create a bundle so these instructions won't be re-ordered by the + // post-RA scheduler. + MIBundleBuilder Bundler(*MBB, MI); + Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); + + // Add 32-bit offset from this instruction to the start of the + // constant data. + Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo) + .addReg(RegLo) + .addOperand(MI.getOperand(1))); + + llvm::finalizeBundle(*MBB, Bundler.begin()); + + MI.eraseFromParent(); + break; + */ + + return false; +} // END ConnexInstrInfo::expandPostRAPseudo() + + +// USE_POSTRA_SCHED +// Inspired from llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +// See http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html +ScheduleHazardRecognizer *ConnexInstrInfo::CreateTargetPostRAHazardRecognizer( + const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + /* + unsigned Directive = + DAG->MF.getSubtarget().getDarwinDirective(); + */ + LLVM_DEBUG(dbgs() << "Entered ConnexInstrInfo::CreateTargetPostRAHazardRecognizer()\n"); + + return new ConnexDispatchGroupSBHazardRecognizer(II, DAG); +} + + +/* +ScheduleHazardRecognizer * +ConnexInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { + LLVM_DEBUG(dbgs() << "Entered ConnexInstrInfo::CreateTargetPostRAHazardRecognizer(MachineFunction)\n"); + +// TODO TODO TODO TODO TODO TODO TODO: Get inspired from AMDGPU how they added separate +// PostRA HazardRecognizer. +// See http://llvm.org/doxygen/classllvm_1_1MachineFunction.html + return new ConnexDispatchGroupSBHazardRecognizer(II, DAG); +} +*/ + +// Pre-RA MI-scheduler - used if I give llc -enable-misched ... +// See http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html +ScheduleHazardRecognizer *ConnexInstrInfo::CreateTargetMIHazardRecognizer( + const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + LLVM_DEBUG(dbgs() << + "Entered ConnexInstrInfo::CreateTargetMIHazardRecognizer()\n"); + + return new ConnexDispatchGroupSBHazardRecognizerPreRAScheduler(II, DAG); +} + + +/* +// USE_PRERA_HAZARD_RECOGNIZER + +// Pre-RA scheduler - default scheduler (no special param given to llc) +// See http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html +ScheduleHazardRecognizer *ConnexInstrInfo::CreateTargetHazardRecognizer( + const TargetSubtargetInfo *STI, + const ScheduleDAG *DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexInstrInfo::CreateTargetHazardRecognizer()\n"); + + return new ConnexDispatchGroupSBHazardRecognizerPreRAScheduler( + // See http://llvm.org/docs/doxygen/html/TargetSubtargetInfo_8h_source.html#l00100 + STI->getInstrItineraryData(), + DAG); +} +*/ + +// Inspired from llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +void ConnexInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + LLVM_DEBUG(dbgs() << "Entered ConnexInstrInfo::insertNoop()\n"); + + DebugLoc DL; + BuildMI(MBB, MI, DL, get(Connex::NOP)); +} + + +// From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html: <> +/* From http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html: + <> +*/ +// Inspired from ARMBaseInstrInfo::isPredicable +bool ConnexInstrInfo::isPredicable(MachineInstr &MI) const { + //if (!MI.isPredicable()) + // return false; + LLVM_DEBUG(dbgs() << "ConnexInstrInfo::isPredicable(): MI.getOpcode() = " + << MI.getOpcode() << "\n"); + + if (MI.getOpcode() == Connex::VLOAD_H) { + return true; + } + + return false; +} + Index: lib/Target/Connex/ConnexMCInstLower.h =================================================================== --- lib/Target/Connex/ConnexMCInstLower.h +++ lib/Target/Connex/ConnexMCInstLower.h @@ -0,0 +1,42 @@ +//===-- ConnexMCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXMCINSTLOWER_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXMCINSTLOWER_H + +#include "llvm/Support/Compiler.h" + +namespace llvm { +class AsmPrinter; +class MCContext; +class MCInst; +class MCOperand; +class MCSymbol; +class MachineInstr; +class MachineModuleInfoMachO; +class MachineOperand; +class Mangler; + +// ConnexMCInstLower - This class is used to lower an MachineInstr into an MCInst. +class LLVM_LIBRARY_VISIBILITY ConnexMCInstLower { + MCContext &Ctx; + + AsmPrinter &Printer; + +public: + ConnexMCInstLower(MCContext &ctx, AsmPrinter &printer) + : Ctx(ctx), Printer(printer) {} + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + + MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; +}; +} + +#endif Index: lib/Target/Connex/ConnexMCInstLower.cpp =================================================================== --- lib/Target/Connex/ConnexMCInstLower.cpp +++ lib/Target/Connex/ConnexMCInstLower.cpp @@ -0,0 +1,116 @@ +//=-- ConnexMCInstLower.cpp - Convert Connex MachineInstr to an MCInst ------------=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower Connex MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "ConnexMCInstLower.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/SmallString.h" + +#include "llvm/Support/Debug.h" // for dbgs and LLVM_DEBUG() macro +#define DEBUG_TYPE "mc-inst-lower" + + +using namespace llvm; + +MCSymbol * +ConnexMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { + return Printer.getSymbol(MO.getGlobal()); +} + +MCOperand ConnexMCInstLower::LowerSymbolOperand(const MachineOperand &MO, + MCSymbol *Sym) const { + + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); + + if (!MO.isJTI() && MO.getOffset()) + llvm_unreachable("unknown symbol op"); + + return MCOperand::createExpr(Expr); +} + +void ConnexMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + LLVM_DEBUG(dbgs() << "Entered ConnexMCInstLower::Lower(*MI = " + << *MI << ")...\n"); + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + LLVM_DEBUG(dbgs() << "ConnexMCInstLower::Lower(): MO = " + << MO << "\n"); + LLVM_DEBUG(dbgs() << " ConnexMCInstLower::Lower(): MO.getType() = " + << MO.getType() << "\n"); + + MCOperand MCOp; + + switch (MO.getType()) { + + default: + MI->dump(); + /* + LLVM_DEBUG(dbgs() << "ConnexMCInstLower::Lower(): MO.getType() = " + << MO.getType() << "\n"); + */ + + llvm_unreachable("unknown operand type"); + + + + case MachineOperand::MO_ExternalSymbol: { + const MCSymbol *Symbol = Printer.GetExternalSymbolSymbol(MO.getSymbolName()); + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, Ctx); + MCOp = MCOperand::createExpr(Expr); + //Offset += MO.getOffset(); + break; + } + + //case MachineOperand::MO_MetaData: { + case MachineOperand::MO_Metadata: { + continue; + //break; + } + + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) + continue; + MCOp = MCOperand::createReg(MO.getReg()); + break; + + case MachineOperand::MO_Immediate: + MCOp = MCOperand::createImm(MO.getImm()); + break; + + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::createExpr( + MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx)); + break; + + case MachineOperand::MO_RegisterMask: + continue; + case MachineOperand::MO_GlobalAddress: + MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + break; + } + + OutMI.addOperand(MCOp); + } +} + Index: lib/Target/Connex/ConnexRegisterInfo.h =================================================================== --- lib/Target/Connex/ConnexRegisterInfo.h +++ lib/Target/Connex/ConnexRegisterInfo.h @@ -0,0 +1,76 @@ +//===-- ConnexRegisterInfo.h - Connex Register Information Impl -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the Connex implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXREGISTERINFO_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXREGISTERINFO_H + +#include "llvm/CodeGen/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "ConnexGenRegisterInfo.inc" + +namespace llvm { + +struct ConnexRegisterInfo : public ConnexGenRegisterInfo { + + ConnexRegisterInfo(); + + // Inspired from lib/Target/Mips/MipsRegisterInfo.cpp + const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const; + + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + + /* + From http://llvm.org/doxygen/classllvm_1_1TargetRegisterInfo.html: + <> + */ + BitVector getReservedRegs(const MachineFunction &MF) const override; + + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; + + unsigned getFrameRegister(const MachineFunction &MF) const override; + + + /* Addressing bug + (llc -O0, at pass: "********** FAST REGISTER ALLOCATION **********") + <> + + (Using suggestion from at https://groups.google.com/forum/#!topic/llvm-dev/fEyD9YREi5M). + */ + // See http://llvm.org/docs/doxygen/html/classllvm_1_1TargetRegisterInfo.html + // Returns true if the target requires (and can make use of) the register scavenger. + virtual bool requiresRegisterScavenging (const MachineFunction &MF) const { + //return true; + return false; + } + + virtual bool requiresFrameIndexScavenging (const MachineFunction &MF) const { + //return true; + return false; + } +}; +} + +#endif Index: lib/Target/Connex/ConnexRegisterInfo.cpp =================================================================== --- lib/Target/Connex/ConnexRegisterInfo.cpp +++ lib/Target/Connex/ConnexRegisterInfo.cpp @@ -0,0 +1,152 @@ +//===-- ConnexRegisterInfo.cpp - Connex Register Information ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the Connex implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Connex.h" +#include "ConnexRegisterInfo.h" +#include "ConnexSubtarget.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +#define GET_REGINFO_TARGET_DESC +#include "ConnexGenRegisterInfo.inc" +using namespace llvm; + +#include "llvm/Support/Debug.h" // for dbgs and LLVM_DEBUG() macro +#define DEBUG_TYPE "mc-inst-lower" + + + +ConnexRegisterInfo::ConnexRegisterInfo() + : ConnexGenRegisterInfo(Connex::R0) {} + +// Inspired from lib/Target/Mips/MipsRegisterInfo.cpp +const TargetRegisterClass *ConnexRegisterInfo::getPointerRegClass( + const MachineFunction &MF, + unsigned Kind) const { + return &Connex::GPRRegClass; +} + +const MCPhysReg *ConnexRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + return CSR_SaveList; +} + +BitVector ConnexRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + int numRegs = getNumRegs(); + + LLVM_DEBUG(dbgs() << "getReservedRegs(): numRegs = " + << numRegs << "\n"); + + BitVector Reserved(numRegs); + Reserved.set(Connex::R10); // R10 is read only frame pointer + Reserved.set(Connex::R11); // R11 is pseudo stack pointer + + /* Wh30, vector register R(30), is used by me to codegen: + - LLVM's VSELECT on Connex in ConnexTargetMachine.cpp - PassAfterPostRAScheduler + (NO longer: in ConnexISelLowering::Lower() for VSELECT to be + lowered to WHERE*). + Doing so we avoid errors like: + <<*** Bad machine code: Using an undefined physical register *** + - function: IfConversion + - basic block: BB#6 vector.body (0x1501fd8) + - instruction: %vreg47 = COPY + - operand 1: %Wh31>> + + - in ConnexInstrInfo::copyPhysReg() . + */ + Reserved.set(CONNEX_RESERVED_REGISTER_01); + Reserved.set(CONNEX_RESERVED_REGISTER_02); + Reserved.set(CONNEX_RESERVED_REGISTER_03); + + return Reserved; +} + +// From book Lopes_2014: +// "implements this replacement by converting each frame index to a real stack offset +// for all machine instructions that contain stack references (usually loads and stores). +// Extra instructions are also generated whenever additional stack offset arithmetic is +// necessary". +void ConnexRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + + unsigned i = 0; + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + DebugLoc DL = MI.getDebugLoc(); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + unsigned FrameReg = getFrameRegister(MF); + int FrameIndex = MI.getOperand(i).getIndex(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineBasicBlock &MBB = *MI.getParent(); + + if (MI.getOpcode() == Connex::MOV_rr) { + MI.getOperand(i).ChangeToRegister(FrameReg, false); + + // !!!!TODO MAYBE: we took out the scalar ADD and therefore we have to comment this + // /* + int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex); + unsigned reg = MI.getOperand(i - 1).getReg(); + + BuildMI(MBB, ++II, DL, TII.get(Connex::ADD_ri), reg) + .addReg(reg) + .addImm(Offset); + // */ + + return; + } + + int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) + + MI.getOperand(i + 1).getImm(); + + if (!isInt<32>(Offset)) + llvm_unreachable("bug in frame offset"); + + if (MI.getOpcode() == Connex::FI_ri) { + // architecture does not really support FI_ri, replace it with + // MOV_rr , frame_reg + // ADD_ri , imm + unsigned reg = MI.getOperand(i - 1).getReg(); + + BuildMI(MBB, ++II, DL, TII.get(Connex::MOV_rr), reg) + .addReg(FrameReg); + + // !!!!TODO MAYBE: we took out the scalar ADD and therefore we have to comment this + // /* + BuildMI(MBB, II, DL, TII.get(Connex::ADD_ri), reg) + .addReg(reg) + .addImm(Offset); + // */ + + // Remove FI_ri instruction + MI.eraseFromParent(); + } + else { + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i + 1).ChangeToImmediate(Offset); + } +} + +unsigned ConnexRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + // MEGA-TODO: in principle we should return also for the Connex vector processor a vector register like: Connex::Wh28 + return Connex::R10; +} Index: lib/Target/Connex/ConnexSelectionDAGInfo.h =================================================================== --- lib/Target/Connex/ConnexSelectionDAGInfo.h +++ lib/Target/Connex/ConnexSelectionDAGInfo.h @@ -0,0 +1,74 @@ +//===-- ConnexSelectionDAGInfo.h - Connex SelectionDAG Info -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the Connex subclass for SelectionDAGTargetInfo. +/// +//===----------------------------------------------------------------------===// + +// Inspired from ARM/ARMSelectionDAGInfo.cpp + + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXSELECTIONDAGINFO_H + +//#include "MCTargetDesc/ConnexAddressingModes.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +namespace llvm { + +/* +namespace Connex_AM { + static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) { + switch (Opcode) { + default: return Connex_AM::no_shift; + case ISD::SHL: return Connex_AM::lsl; + case ISD::SRL: return Connex_AM::lsr; + case ISD::SRA: return Connex_AM::asr; + case ISD::ROTR: return Connex_AM::ror; + //case ISD::ROTL: // Only if imm -> turn into ROTR. + // Can't handle RRX here, because it would require folding a flag into + // the addressing mode. :( This causes us to miss certain things. + //case ConnexISD::RRX: return Connex_AM::rrx; + } + } +} // end namespace Connex_AM +*/ + +class ConnexSelectionDAGInfo : public SelectionDAGTargetInfo { +public: + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, + bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + + SDValue + EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, + unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + + // Adjust parameters for memset, see RTABI section 4.3.4 + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const override; + + SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + RTLIB::Libcall LC) const; +}; + +} // end namespace llvm + +#endif Index: lib/Target/Connex/ConnexSelectionDAGInfo.cpp =================================================================== --- lib/Target/Connex/ConnexSelectionDAGInfo.cpp +++ lib/Target/Connex/ConnexSelectionDAGInfo.cpp @@ -0,0 +1,131 @@ +//===-- ConnexSelectionDAGInfo.cpp - Connex SelectionDAG Info -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the ConnexSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "ConnexTargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/DerivedTypes.h" +#include "ConnexSelectionDAGInfo.h" + + +// Inspired from ARM/ARMSelectionDAGInfo.cpp + +using namespace llvm; + +#define DEBUG_TYPE "connex-selectiondag-info" + +// Emit, if possible, a specialized version of the given Libcall. Typically this +// means selecting the appropriately aligned version, but we also convert memset +// of 0 into memclr. +SDValue ConnexSelectionDAGInfo::EmitSpecializedLibcall( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, RTLIB::Libcall LC) const { + + const ConnexSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget(); + const ConnexTargetLowering *TLI = Subtarget.getTargetLowering(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + /* + if (AEABILibcall == AEABI_MEMCLR) { + Entry.Node = Size; + Args.push_back(Entry); + } else if (AEABILibcall == AEABI_MEMSET) { + */ + // Adjust parameters for memset, EABI uses format (ptr, size, value), + // GNU library uses (ptr, value, size) + // See RTABI section 4.3.4 + Entry.Node = Size; + Args.push_back(Entry); + + // Extend or truncate the argument to be an i32 value for the call. + if (Src.getValueType().bitsGT(MVT::i32)) + Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); + else if (Src.getValueType().bitsLT(MVT::i32)) + Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + + Entry.Node = Src; + Entry.Ty = Type::getInt32Ty(*DAG.getContext()); + Entry.IsSExt = false; + Args.push_back(Entry); + /* + } else { + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Node = Size; + Args.push_back(Entry); + } + */ + + static char const *FunctionNames[4][3] = { + { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" }, + { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, + //{ "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" }, + { "memset", "memset", "memset" }, + { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } + }; + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee( + TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(FunctionNames[2][2], + TLI->getPointerTy(DAG.getDataLayout())), + std::move(Args)) + .setDiscardResult(); + std::pair CallResult = TLI->LowerCallTo(CLI); + + return CallResult.second; +} + +SDValue ConnexSelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, + RTLIB::MEMCPY); +} + +SDValue ConnexSelectionDAGInfo::EmitTargetCodeForMemmove(SelectionDAG &DAG, + const SDLoc &dl, + SDValue Chain, + SDValue Dst, + SDValue Src, + SDValue Size, + unsigned Align, + bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, + RTLIB::MEMMOVE); +} + +SDValue ConnexSelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, + const SDLoc &dl, + SDValue Chain, + SDValue Dst, + SDValue Src, + SDValue Size, + unsigned Align, + bool isVolatile, + MachinePointerInfo DstPtrInfo) const { + LLVM_DEBUG(dbgs() << "Entered ConnexSelectionDAGInfo::EmitTargetCodeForMemset()" + << "\n"); + + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, + RTLIB::MEMSET); +} Index: lib/Target/Connex/ConnexSubtarget.h =================================================================== --- lib/Target/Connex/ConnexSubtarget.h +++ lib/Target/Connex/ConnexSubtarget.h @@ -0,0 +1,70 @@ +//===-- ConnexSubtarget.h - Define Subtarget for the Connex -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the Connex specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXSUBTARGET_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXSUBTARGET_H + +#include "ConnexFrameLowering.h" +#include "ConnexISelLowering.h" +#include "ConnexInstrInfo.h" +#include "ConnexSelectionDAGInfo.h" + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" + +#define GET_SUBTARGETINFO_HEADER +#include "ConnexGenSubtargetInfo.inc" + +namespace llvm { +class StringRef; + +class ConnexSubtarget : public ConnexGenSubtargetInfo { + virtual void anchor(); + ConnexInstrInfo InstrInfo; + ConnexFrameLowering FrameLowering; + ConnexTargetLowering TLInfo; + + SelectionDAGTargetInfo TSInfo; + ConnexSelectionDAGInfo TSInfo2; + +public: + // This constructor initializes the data members to match that + // of the specified triple. + ConnexSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, + const TargetMachine &TM); + + // ParseSubtargetFeatures - Parses features string setting specified + // subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + const ConnexInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const ConnexFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const ConnexTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + + const TargetRegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + + // Inspired from ARM/ARMSubtarget.cpp + const ConnexSelectionDAGInfo *getSelectionDAGInfo() const override { + return &TSInfo2; + } +}; +} // End llvm namespace + +#endif Index: lib/Target/Connex/ConnexSubtarget.cpp =================================================================== --- lib/Target/Connex/ConnexSubtarget.cpp +++ lib/Target/Connex/ConnexSubtarget.cpp @@ -0,0 +1,30 @@ +//===-- ConnexSubtarget.cpp - Connex Subtarget Information ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Connex specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "ConnexSubtarget.h" +#include "Connex.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define DEBUG_TYPE "connex-subtarget" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "ConnexGenSubtargetInfo.inc" + +void ConnexSubtarget::anchor() {} + +ConnexSubtarget::ConnexSubtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, const TargetMachine &TM) + : ConnexGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(*this), + TLInfo(TM, *this) {} Index: lib/Target/Connex/ConnexTargetMachine.h =================================================================== --- lib/Target/Connex/ConnexTargetMachine.h +++ lib/Target/Connex/ConnexTargetMachine.h @@ -0,0 +1,51 @@ +//===-- ConnexTargetMachine.h - Define TargetMachine for Connex --- C++ ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the Connex specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXTARGETMACHINE_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXTARGETMACHINE_H + +#include "ConnexSubtarget.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetMachine.h" // This was before +#include + +namespace llvm { +class ConnexTargetMachine : public LLVMTargetMachine { + std::unique_ptr TLOF; + ConnexSubtarget Subtarget; + +public: + ConnexTargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Optional RM, Optional CM, + CodeGenOpt::Level OL, bool JIT); + + const ConnexSubtarget *getSubtargetImpl() const { return &Subtarget; } + const ConnexSubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } + + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + // Inspired from ARC/ARCTargetMachine.h + TargetTransformInfo getTargetTransformInfo(const Function &F) override; + + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } +}; +} + +#endif Index: lib/Target/Connex/ConnexTargetMachine.cpp =================================================================== --- lib/Target/Connex/ConnexTargetMachine.cpp +++ lib/Target/Connex/ConnexTargetMachine.cpp @@ -0,0 +1,1580 @@ +// TargetMachine.cpp - Define TargetMachine for Connex ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements the info about Connex target spec. +// NOTE: I (partly) documented what the passes PassCreateBundles and +// PassFinalizeBundles do and my design decisions at +// http://lists.llvm.org/pipermail/llvm-dev/2017-March/110990.html +//===----------------------------------------------------------------------===// + +#include "Connex.h" +#include "ConnexTargetMachine.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" // For MIBundleBuilder +// +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#include "llvm/Support/Debug.h" +#define DEBUG_TYPE "connex-target-config" + +#include "ConnexTargetTransformInfo.h" + + + + +using namespace llvm; + +static cl::opt DontTreatCopyInstructions("dont-treat-copy-instructions", + cl::Hidden, + cl::init(false), + cl::desc("Don't treat copy instructions")); + + +#define CONNEX_RESERVED_REGISTER_DST_FOR_SPLIT CONNEX_RESERVED_REGISTER_02 +// NOT compiling - <>: #define CONNEX_RESERVED_REGISTER_DST_FOR_SPLIT Connex::Wh3000 +// Gives strange results, but sortta helps for reading output.cpp: #define CONNEX_RESERVED_REGISTER_DST_FOR_SPLIT 3000 + +extern "C" void LLVMInitializeConnexTarget() { + // Register the target - Force static initialization. + RegisterTargetMachine Z(TheConnexTarget); +} + +static StringRef computeDataLayout(const Triple &TT) { + /* + See http://llvm.org/docs/LangRef.html#data-layout for all details regarding layout declaration. + - e + Specifies that the target lays out data in little-endian form. + - S + Specifies the natural alignment of the stack in bits. + Alignment promotion of stack variables is limited to the natural stack alignment to avoid dynamic stack realignment. + The stack alignment must be a multiple of 8-bits. + If omitted, the natural stack alignment defaults to “unspecified”, which does not prevent any alignment promotions. + - p[n]::: + This specifies the size of a pointer and its and erred alignments for address space n. All sizes are in bits. The address space, n, is optional, and if not specified, denotes the default address space 0. The value of n must be in the range [1,2^23). + - i:: + This specifies the alignment for an integer type of a given bit . The value of must be in the range [1,2^23). + - n::... + This specifies a set of native integer widths for the target CPU in bits. + - v:: + This specifies the alignment for a vector type of a given bit . + + See also http://llvm.org/docs/WritingAnLLVMBackend.html + An upper-case “E” in the string indicates a big-endian target data model. + A lower-case “e” indicates little-endian. + “p:” is followed by pointer information: size, ABI alignment, and preferred alignment. + If only two figures follow “p:”, then the first value is pointer size, and the second value is both ABI and preferred alignment. + Then a letter for numeric type alignment: “i”, “f”, “v”, or “a” (corresponding to integer, floating point, vector, or aggregate). + “i”, “v”, or “a” are followed by ABI alignment and preferred alignment. + “f” is followed by three values: the first indicates the size of a long double, then ABI alignment, and then ABI preferred alignment. + */ + + // We specify here the data-layout: + // - of the CPU, eBPF - actually ABI properties + // - only a few alignment properties for the vector types + // - see at the end of the string. Note that we can't + // specify any other properties for the Connex vector processor. + // VERY IMPORTANT: The pointer size 64 (of the eBPF CPU), because the + // masked.gather/scatter instructions use such pointer normally in LLVM IR, + // even if we translate them to writeDataTo/readDataFromConnex() and + // Connex vector assembly instructions with indirect memory accesses. + // + // We really need to specify p:64 (not p:16), otherwise we get an error like: + // "Do not know how to promote this operator!" + // (GlobalAddress 0") + // IMPORTANT: the string is the one from the (e)BPF back end, + // concatenated with the spec for the vector alignment for Connex. + return "e-m:e-p:64:64-i64:64-n32:64-S128-v128:128:128-v2048:2048:2048"; +} + + +static Reloc::Model getEffectiveRelocModel(Optional RM) { + if (!RM.hasValue()) + return Reloc::PIC_; + return *RM; +} + + +// Inspired from XCore/XCoreTargetMachine.cpp +static CodeModel::Model getEffectiveXCoreCodeModel( + Optional CM) { + if (CM) { + if (*CM != CodeModel::Small && *CM != CodeModel::Large) + report_fatal_error("Target only supports CodeModel Small or Large"); + return *CM; + } + return CodeModel::Small; +} + + +ConnexTargetMachine::ConnexTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Optional RM, + Optional CM, + CodeGenOpt::Level OL, + bool JIT) + : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, + getEffectiveRelocModel(RM), + getEffectiveCodeModel(CM, CodeModel::Small), OL), + TLOF(make_unique()), + Subtarget(TT, CPU, FS, *this) { + initAsmInfo(); +} + + + + + +namespace { + + +/* I made sure that the iterators don't become invalid by using + another iterator, e.g. I2succ, which stores the next pointer in the + data structures. + +small-TODO: it might be safer to do a change by moving (maybe also + erasing) COPY instrs one per WHERE block (or even per MBB) and then get out of + the MBB::iterator loop and restart the loop from the beginning again until + NO more changes are performed - this in order to avoid any (eventual) issue with + iterator invalidation. +*/ +class PassHandleMisplacedInstr : public MachineFunctionPass { + public: + PassHandleMisplacedInstr() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "PassHandleMisplacedInstr"; + } + + /* // GMS said he doesn't like having arithmetic or logic instruction between predicate and WHERE* instruction: + #ifdef ALLOW_COPY_BETWEEN_PREDICATE_AND_WHERE_INSTRUCTIONS + - this case needs to be implemented carefully - I only sketched it a bit, so + it isn't tested either + */ + + void updateUsesOfRegUntilCOPY(MachineBasicBlock::iterator &Ipredicate, + // We start replacing uses from Ipredicate + 1 + MachineBasicBlock::iterator &I2, // COPY + MachineBasicBlock::iterator &IE, + unsigned regCrt, + unsigned regNew) { + LLVM_DEBUG(dbgs() << " I2 = " << *I2); + + /* We update all following occurences of the dest register + of COPY instr (which was also the dest register of the + predicate) + - for both uses and def, until 1st def. */ + MachineBasicBlock::iterator Iupdate; + Iupdate = Ipredicate; + Iupdate++; + + for (; Iupdate != I2 && Iupdate != IE; Iupdate++) { + LLVM_DEBUG(dbgs() << " Iupdate = " << *Iupdate); + + /* IMPORTANT: we go in reverse order to make the def last since we + break at def. */ + for (int idOpnd = Iupdate->getNumOperands() - 1; idOpnd >= 0; idOpnd--) { + MachineOperand &IOpnd = Iupdate->getOperand((unsigned)idOpnd); + + if (IOpnd.isReg() && IOpnd.getReg() == regCrt) { + LLVM_DEBUG(dbgs() << "updateUsesOfRegUntilCOPY(): Updating to " + "regNew the register of Iupdate. " + " Iupdate = " + << *Iupdate); + + /* + // This does NOT hold because we can have uses of a COPY instr dest + // register before the COPY - see the big WHERE block of ADD.f16 + assert( (Iupdate->getOpcode() == Connex::WHEREEQ || + Iupdate->getOpcode() == Connex::WHERELT || + Iupdate->getOpcode() == Connex::WHERECRY) && + "We should NOT be arriving here otherwise."); + */ + + if (IOpnd.isDef()) { + // We break + Iupdate = IE; Iupdate--; // We make it break out of outermost loop + break; + } + + IOpnd.setReg(regNew); + } + } + } + } + + + void putCOPYBeforeWhereBlock(MachineBasicBlock &MBB, + const TargetInstrInfo *TII, + //MachineBasicBlock::iterator &I, + MachineInstr *IMI, // The WHERE* instruction + MachineBasicBlock::iterator &I2, // COPY + MachineBasicBlock::iterator &I2plus1, + MachineBasicBlock::iterator &IE, + bool &changedMF, + int &destRegisterPredicateOfSplitWhere) { + /* NOTE: I2 is the COPY instruction + if (I2.getOperand(0) == Ipredicate.getOperand(0)) + for each instruction from Ipredicate to I2 - 1 replace defs and uses of + I2.getOperand(0) with CONNEX_RESERVED_REGISTER_01 + */ + + /* + Moving COPY before the WHERE block. + + Normally we move the COPY instructions and put them + in the same order before the predicate. + + important-Note: If we have 2 COPY with the same dest register, + the WHERE block will be surely split at least for + the 2nd COPY. For example, from MatMul-256.f16: + + R(11) = R(23) == R(1); + NOP; + ); + EXECUTE_WHERE_EQ( + R(19) = ISHL(R(21), 10); + // Assume it's not here: R(19) = R(10) | R(19); + // Assume it's not here: R(25) = R(1) & R(10); + R(10) = R(0) | R(0); // COPY + R(10) = R(26) - R(1); + R(11) = R(1) << R(11); + R(10) = R(0) | R(0); // COPY + R(10) = R(11) & R(20); + The 2nd COPY forces the WHERE to be split + - it's actually a different variable. + + Note: although not important, in principle we could + have non-SPECIALV_H instrs inside WHERE blocks if + the register is NOT initialized. */ + LLVM_DEBUG(dbgs() << " moving I2 immediately before the " + "predicate instruction linked to the " + "WHERE block\n"); + + MachineBasicBlock::iterator Ipredicate = IMI; + LLVM_DEBUG(dbgs() << " IMI = " + << *IMI << "\n"); + Ipredicate--; + LLVM_DEBUG(dbgs() << " Ipredicate = " + << *Ipredicate << "\n"); + + /* + if (Ipredicate->getOpcode() != Connex::NOP_BPF) + LLVM_DEBUG(dbgs() << "PassHandleMisplacedInstr: Warning: " + "Ipredicate->getOpcode() != Connex::NOP_BPF\n"); + */ + assert(Ipredicate->getOpcode() == Connex::NOP_BPF + //|| Ipredicate->getOpcode() == Connex::NOP + ); + + /* Ipredicate is pointing at 2 instructions before the + WHERE* instruction, normally at the predicate + instruction.*/ + Ipredicate--; + + LLVM_DEBUG(dbgs() << " Ipredicate = " + << *Ipredicate << "\n"); + + // IMPORTANT-TODO: check better: check for right (w.r.t. WHERE) predicate instruction before NOP + assert(Ipredicate->getOpcode() == Connex::EQ_H || + Ipredicate->getOpcode() == Connex::LT_H || + Ipredicate->getOpcode() == Connex::ULT_H //); + || + // This is for the case of using lane gating instructions (DISABLE_CELL, ENABLE_ALL_CELLS) + Ipredicate->getOpcode() == Connex::EQ_SPECIAL_H || + Ipredicate->getOpcode() == Connex::LT_SPECIAL_H || + Ipredicate->getOpcode() == Connex::ULT_SPECIAL_H); + + + assert(Ipredicate->getOperand(0).isReg() && + Ipredicate->getOperand(0).isDef()); + assert(I2->getOperand(0).isReg() && + I2->getOperand(0).isDef()); + + + /* + // This case can be handled (ONLY) by splitting WHERE block: + #ifndef ALLOW_COPY_BETWEEN_PREDICATE_AND_WHERE_INSTRUCTIONS + assert(I2->getOperand(1).getReg() != Ipredicate->getOperand(0).getReg() && + "We reached a case that's not treatable by to implement this case!"); + #endif + */ + + /* Checking for WAR/anti-dependence between predicate and COPY instruction + - if so, then changing order (moving COPY before predicate) compromises + correctness so we make a copy of the respective predicate input. */ + // I2 is the COPY instruction + assert( I2->getOperand(0).isReg() && I2->getOperand(0).isDef() ); + // + // Ipredicate is the predicate instruction + assert( Ipredicate->getOperand(1).isReg() && + Ipredicate->getOperand(1).isUse() ); + assert( Ipredicate->getOperand(2).isReg() && + Ipredicate->getOperand(2).isUse() ); + // + bool sameOpnd1 = + Ipredicate->getOperand(1).getReg() == I2->getOperand(0).getReg(); + bool sameOpnd2 = + Ipredicate->getOperand(2).getReg() == I2->getOperand(0).getReg(); + // + if (sameOpnd1 || sameOpnd2) { + LLVM_DEBUG(dbgs() << + "Moving COPY before WHERE predicate breaks WAR/anti-dependence " + "relation between COPY and predicate. " + "--> fixing the problem by making copy of predicate input.\n"); + + /* TODO???: if Ipredicate has a use of the dest register of EQ???????????? + then add: a) an instr before COPY with + CONNEX_RESERVED_REGISTER_01 = Rinput_EQ | Rinput_EQ + */ + + /* We preserve the input register of the predicate instruction since it + will be overwritten by the moved (before the predicate) + COPY instruction: + we make a copy: + CONNEX_RESERVED_REGISTER_01 = Rdst_COPY | Rdst_COPY + */ + #ifndef ALLOW_COPY_BETWEEN_PREDICATE_AND_WHERE_INSTRUCTIONS + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + BuildMI(MBB, + Ipredicate, + /* We insert this MachineInstr before Ipredicate. + Also the COPY I2 we move after this, after Ipredicate, + so I2 will be moved after this new copy */ + IMI->getDebugLoc(), + TII->get(Connex::ORV_H), + CONNEX_RESERVED_REGISTER_01). + addReg(I2->getOperand(0).getReg()). + /* Note: I2 (COPY) does NOT necessarily have the + same dest register as Ipredicate. */ + addReg(I2->getOperand(0).getReg()); + #else + #error "This case is NOT implemented. Implement it!" + #endif + #endif + /* This really helps a lot since the COPY moved before + Ipredicate should be visible inside the WHERE block, + so then we need to make the Ipredicate destination a reserved reg. + Chances are big (but it's not necessary to be so I think) that since + sameOpnd1 || sameOpnd2, then we can have Ipredicate with + Ipredicate->getOperand(0) == I2->getOperand(0); + and if we leave it like that then we shadow the COPY. + . */ + if (Ipredicate->getOperand(0).getReg() == I2->getOperand(0).getReg()) + Ipredicate->getOperand(0).setReg(CONNEX_RESERVED_REGISTER_01); + + // Note: Ipredicate is the predicate instruction + /* These checks handle also the case both input operands of Ipredicate + are the same. + */ + if (sameOpnd1) + Ipredicate->getOperand(1).setReg(CONNEX_RESERVED_REGISTER_01); + if (sameOpnd2) + Ipredicate->getOperand(2).setReg(CONNEX_RESERVED_REGISTER_01); + + /* We now normally have to update the uses of modified input of + Ipredicate for the following instructions between the predicate + and the place where the COPY was. + However, the instructions using the input after predicate are + only the ones in the WHERE block basically. + */ + updateUsesOfRegUntilCOPY(Ipredicate, + I2, // COPY + IE, + I2->getOperand(0).getReg(), + CONNEX_RESERVED_REGISTER_01); + } + else // MEGA-TODO: think if OK + if (Ipredicate->getOperand(0).getReg() == I2->getOperand(0).getReg()) { + // If we have a WAW (output) dependendce + // Note: Ipredicate is the predicate, I2 is the COPY + LLVM_DEBUG(dbgs() << + " Found that the COPY to be moved " + "immediately before the predicate of the " + "WHERE block has the same destination register as the predicate. " + "This forces us to handle specially " + "the predicate instr dest register, " + "since this dest " + "register is the same as the one of the " + "COPY (hence, a WAW dependence is broken " + "and the program would become incorrect " + "otherwise).\n"); + + /* We update dest register of of Ipredicate (predicate) + due to conflict with I2, which we move before it. */ + /* + if (destRegisterPredicateOfSplitWhere != -1) + Ipredicate->getOperand(0).setReg(destRegisterPredicateOfSplitWhere); + else + Ipredicate->getOperand(0).setReg(CONNEX_RESERVED_REGISTER_01); + */ + Ipredicate->getOperand(0).setReg(CONNEX_RESERVED_REGISTER_02); + // + updateUsesOfRegUntilCOPY(Ipredicate, + I2, // COPY + IE, + I2->getOperand(0).getReg(), + CONNEX_RESERVED_REGISTER_02); + } + + // We move the COPY instruction before the predicate + MBB.remove((&(*I2))); + //MBB.insert(IMI, I2); // It inserts before IMI + #ifdef ALLOW_COPY_BETWEEN_PREDICATE_AND_WHERE_INSTRUCTIONS + MBB.insert(Ipredicate, IMI); // It inserts immediately before the WHERE instr + #else + MBB.insert(Ipredicate, (&(*I2))); // It inserts before Ipredicate + #endif + changedMF = true; + + // We handle the case of more than 1 COPY instr in the WHERE block +// I2plus1 represents the next instr after the COPY (before move) + I2 = I2plus1; + } // END putCOPYBeforeWhereBlock() + + + void splitWhereBlock(MachineBasicBlock &MBB, + const TargetInstrInfo *TII, + MachineBasicBlock::iterator &I, + MachineInstr *&IMI, + MachineBasicBlock::iterator &I2, // COPY instr + MachineBasicBlock::iterator &IE, + bool &changedMF, + int &destRegisterPredicateOfSplitWhere) { + /* This case handles only the cases we ran so far. + See MEGA-TODO for limitation of this case. */ + changedMF = true; + + LLVM_DEBUG(dbgs() << " splitWhereBlock(): IMI = " + << *IMI); + LLVM_DEBUG(dbgs() << " splitWhereBlock(): I2 = " + << *I2 << "\n"); + + /* TODO TODO: handle case + where we have COPY between 2 instr like ADD and + ADDC, which is incorrect because the COPY messes + up the Connex flags. */ + MachineBasicBlock::iterator I2plus1 = I2; + I2plus1++; + // I think this does NOT cover all cases but most of them + assert(I2plus1->getOpcode() != Connex::ADDCV_H && + I2plus1->getOpcode() != Connex::SUBCV_H && + I2plus1->getOpcode() != Connex::ADDCV_SPECIAL_H && + I2plus1->getOpcode() != Connex::SUBCV_SPECIAL_H && + "We do NOT handle yet ADDCV/SUBCV instructions immediately after COPY " + "for this case (and the corresponding ADD/SUB before the COPY)"); + + LLVM_DEBUG(dbgs() << " splitting WHERE block in 2 s.t. we put I2 immediately " + "after new END_WHERE resulting from split.\n"); + // I = beginning of new WHERE block + //const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + + MachineBasicBlock::iterator Ipredicate = IMI; + // We make Ipredicate point to the predicate of this WHERE + // block + Ipredicate--; + LLVM_DEBUG(dbgs() << " splitWhereBlock(): Ipredicate = " + << *Ipredicate << "\n"); + assert(Ipredicate->getOpcode() == Connex::NOP_BPF); + Ipredicate--; + LLVM_DEBUG(dbgs() << " splitWhereBlock(): Ipredicate (2 instr before) = " + << *Ipredicate << "\n"); + + unsigned regDest = CONNEX_RESERVED_REGISTER_02; + int changedPredicateOpnd = -1; + + // We check Ipredicate, the predicate, is 3-opcode + assert( + ( + ( + // For the standard case: + (Ipredicate->getOpcode() == Connex::EQ_H || + Ipredicate->getOpcode() == Connex::LT_H || + Ipredicate->getOpcode() == Connex::ULT_H + ) && + Ipredicate->getNumOperands() == 3 + ) + || + ( + // For disabled lane gating regions + ( + Ipredicate->getOpcode() == Connex::EQ_SPECIAL_H || + Ipredicate->getOpcode() == Connex::LT_SPECIAL_H || + Ipredicate->getOpcode() == Connex::ULT_SPECIAL_H + ) && + Ipredicate->getNumOperands() == 4 + ) + ) + && + Ipredicate->getOperand(0).isReg() && + Ipredicate->getOperand(0).isDef() && + Ipredicate->getOperand(1).isReg() && + Ipredicate->getOperand(1).isUse() && + Ipredicate->getOperand(2).isReg() && + Ipredicate->getOperand(2).isUse() + ); + + unsigned predicateInstrOpnd[2]; + predicateInstrOpnd[0] = Ipredicate->getOperand(1).getReg(); + predicateInstrOpnd[1] = Ipredicate->getOperand(2).getReg(); + + destRegisterPredicateOfSplitWhere = Ipredicate->getOperand(0).getReg(); + LLVM_DEBUG(dbgs() + << "PassHandleMisplacedInstr: destRegisterPredicateOfSplitWhere = " + << destRegisterPredicateOfSplitWhere + << "\n"); + + /* + assert( (predicateInstrOpnd[0] != CONNEX_RESERVED_REGISTER_02) && + (predicateInstrOpnd[1] != CONNEX_RESERVED_REGISTER_02) && + // MEGA-MEGA-TODO: implement this - it happens for ADD/MUL.f16 + "We currently can't handle these cases because we have only 1 reserved register."); + */ + unsigned predicateInstrOpcode = Ipredicate->getOpcode(); + unsigned predicateInstrOpndAux[2]; + + /* We look if predicateInstrOpnd[*] is updated/redefined + either in the predicate instruction or in the + instructions of the + associated WHERE block before the COPY instr. + - i.e., if predicateInstrOpnd[1] changes then + use it as predicateInstrOpnd[0]. + If NO change happens we do NOT need to save the + value of predicateInstrOpnd[*], i.e., to create + ORV_H below. + + We check this from Ipredicate(+1) (next instr after predicate) to I2(-1) + (COPY instr, exclusive). + We check if any of the operands of the predicate change. + NOTE: assert (if both change - we don't want to waste by reserving 2 + Connex registers - maybe we can change the Connex ASM code by hand + to avoid this). + */ + /* + if (Ipredicate->getOperand(0).getReg() == + Ipredicate->getOperand(1).getReg()) { + // We changed the 1st input operand of the predicate + changedPredicateOpnd = 0; + } + else + if (Ipredicate->getOperand(0).getReg() == + Ipredicate->getOperand(2).getReg()) { + // We changed the 2nd input operand of the predicate + changedPredicateOpnd = 1; + } + */ + + MachineBasicBlock::iterator Iaux = Ipredicate; + //Iaux++; + MachineBasicBlock::iterator IauxEnd = I2; // I2 is COPY + + #define TREAT_ONLY_ONCE_CHANGE_PREDICATE_OPERANDS + #ifdef TREAT_ONLY_ONCE_CHANGE_PREDICATE_OPERANDS + IauxEnd++; + #endif + //IauxEnd--; + /* IMPORTANT: for the NEW predicate we don't care what we use for the + destination register. + + We now check for the NEW predicate we create for the split if its input + operands are updated between the + original_predicate..COPY_instr */ + for (; Iaux != IauxEnd && Iaux != IE; Iaux++) { + LLVM_DEBUG(dbgs() << " splitWhereBlock(): Iaux = " + << *Iaux << "\n"); + if (Iaux->getNumOperands() >= 1 && Iaux->getOperand(0).isReg() && + Iaux->getOperand(0).isDef()) { + if (Iaux->getOperand(0).getReg() == predicateInstrOpnd[0]) { + assert((changedPredicateOpnd == -1 || changedPredicateOpnd == 0) && + // MEGA-TODO: handle this assert violation case + "It seems both input operands of the " + "predicate get updated so we would need to " + "reserve 2 Connex registers to handle well " + "this case."); + // We find that we subsequently change the 1st input operand of the predicate + changedPredicateOpnd = 0; + } + else + if (Iaux->getOperand(0).getReg() == predicateInstrOpnd[1]) { + /* We find that we subsequently change + the 2nd input operand of the predicate */ + assert((changedPredicateOpnd == -1 || changedPredicateOpnd == 1) && + // MEGA-TODO: handle this assert violation case + "It seems both input operands of the " + "predicate get updated so we would need " + "to reserve 2 Connex registers to handle " + "well this case."); + changedPredicateOpnd = 1; + } + } + } + + LLVM_DEBUG(dbgs() << " changedPredicateOpnd = " + << changedPredicateOpnd + << " (for the input operands of the predicate)\n"); + + if (changedPredicateOpnd == -1) { + //regDest = predicateInstrOpnd[0]; + predicateInstrOpndAux[0] = predicateInstrOpnd[0]; + predicateInstrOpndAux[1] = predicateInstrOpnd[1]; + } + else { + /* Put a copy of the changed input register of the predicate instruction + before Ipredicate, the initial predicate of this WHERE block. */ + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + if (regDest != predicateInstrOpnd[changedPredicateOpnd]) { + BuildMI(MBB, + Ipredicate, + IMI->getDebugLoc(), + TII->get(Connex::ORV_H), + regDest). // The reserved register, CONNEX_RESERVED_REGISTER_02 + addReg(predicateInstrOpnd[changedPredicateOpnd]). + addReg(predicateInstrOpnd[changedPredicateOpnd]); + } + #else + #error "This case is NOT implemented. Implement it!" + #endif + + /* + predicateInstrOpndAux[0] = regDest; // Reserved register + predicateInstrOpndAux[1] = predicateInstrOpnd[1 - changedPredicateOpnd]; + */ + predicateInstrOpndAux[changedPredicateOpnd] = CONNEX_RESERVED_REGISTER_02; // regDest + predicateInstrOpndAux[1 - changedPredicateOpnd] = + predicateInstrOpnd[1 - changedPredicateOpnd]; + } + + LLVM_DEBUG(dbgs() << " predicateInstrOpndAux[0] = " + << predicateInstrOpndAux[0] + << "\n"); + LLVM_DEBUG(dbgs() << " predicateInstrOpndAux[1] = " + << predicateInstrOpndAux[1] + << "\n"); + + MachineBasicBlock::iterator I2succ = I2; + I2succ++; + BuildMI(MBB, + I2, // Immediately before the COPY instr + IMI->getDebugLoc(), + TII->get(Connex::END_WHERE) + //, I2->getOperand(0).getReg() + ); + LLVM_DEBUG(dbgs() << " Finished creating the END_WHERE\n"); + + #ifndef TREAT_ONLY_ONCE_CHANGE_PREDICATE_OPERANDS + /* + // Ipredicate is predicate + // Unnecessary check: + assert(Ipredicate->getOperand(0).getReg() != + I2->getOperand(0).getReg()); + */ + /* + This check is actually VAGUELY different from the one above because + the one above inserts a register save (copy) instruction before the original WHERE, + while this new one after the new END_WHERE resulting from the split. + VERY IMPORTANT Note: the new predicate WHERE can have the result stored in RESERVED_REGISTER. + * We now check for conflicts between: + - destination register operand of COPY and + - input registers of predicate instruction. + * + * Note: I2 is the COPY instruction that triggered the split of WHERE block. + * + * Addressing the case, where after the split of WHERE* block we have something + * like this immediately after the 1st new WHERE* block, before the 2nd + * WHERE* block, where the repeated predicate instruction (repeated by us) + * happens to use the register defined in the COPY instruction, which makes + * the computation incorrect: + * END_WHERE; + * R(26) = R(10) | R(10); // This COPY instruction is the reason of the split + * R(30) = R(26) < R(3); + * NOP + * WHERE* + * + * Note: R(30) (CONNEX_RESERVED_REGISTER_01) is a reserved register. + * + * To correct the problem in this example we have to copy the value of R(26) + * in R(30): + * END_WHERE; + * R(30) = R(26) | R(26); + * R(26) = R(10) | R(10); // This COPY instruction is the reason of the split + * R(30) = R(30) < R(3); + * NOP + * WHERE* + */ + int changeInputPredicateOperandsDueToCOPY = 0; + if (predicateInstrOpnd[0] == I2->getOperand(0).getReg()) { + changeInputPredicateOperandsDueToCOPY |= 1; + } + if (predicateInstrOpnd[1] == I2->getOperand(0).getReg()) { + changeInputPredicateOperandsDueToCOPY |= 2; + } + // + assert(changeInputPredicateOperandsDueToCOPY != 3 && + // important-TODO: handle this assert violation case + "We shouldn't have such a case - doesn't really make sense for a " + "conditional to have both operands equal."); + + LLVM_DEBUG(dbgs() << " changeInputPredicateOperandsDueToCOPY = " + << changeInputPredicateOperandsDueToCOPYMBB << "\n"); + /* + assert(! (changedPredicateOpnd != -1 && changeInputPredicateOperandsDueToCOPY != 0) && + // TODO: if not merging the 2 cases together, handle this assert violation case, + "We currently can't handle both cases simultaneously."); + */ + // + if (changeInputPredicateOperandsDueToCOPY != 0) { + LLVM_DEBUG(dbgs() << " PassHandleMisplacedInstr::runOnMachineFunction(): correcting " + "the conflicting register (due to the COPY) in the " + "predicate instruction\n"); + MachineBasicBlock::iterator Icorrect = I2succ; + //Icorrect++; + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + BuildMI(MBB, + Icorrect, // We insert this MachineInstr after the new END_WHERE, before the COPY instr + IMI->getDebugLoc(), + TII->get(Connex::ORV_H), + CONNEX_RESERVED_REGISTER_02). + addReg(I2->getOperand(0).getReg()). + addReg(I2->getOperand(0).getReg()); + #else + #error "This case is NOT implemented. Implement it!" + #endif + + + /* Note: Ipredicate is the predicate for the 1st (part) WHERE* block. + //Ipredicate->getOperand(1).setReg(CONNEX_RESERVED_REGISTER_02); */ + + LLVM_DEBUG(dbgs() << "PassHandleMisplacedInstr: after WHERE block processed: MBB = "; + MBB.dump()); + // We check that we don't mess up the program - TODO we should also check that the iterators are not messed up + /* + for (MachineBasicBlock::iterator Inew = MBB.begin(), + IEnew = MBB.end(); Inew != IEnew; ++Inew) { + //MachineInstr *IMI = I; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): Inew = " + << *Inew << "\n"); + } + */ + } + #endif // END ifndef TREAT_ONLY_ONCE_CHANGE_PREDICATE_OPERANDS + + + // I2succ++; + LLVM_DEBUG(dbgs() << " moving I2 immediately after END_WHERE of " + "split WHERE block\n"); + + /* VERY IMPORTANT: We create another predicate, a NOP and a new WHERE* + instructions, identical with the (previous) one associated to the + WHERE block, EXCEPT the destination register is + CONNEX_RESERVED_REGISTER_02 - this is safe. */ + BuildMI(MBB, + I2succ, // We insert new instr immediately before I2succ + IMI->getDebugLoc(), + TII->get(predicateInstrOpcode), + #define NEW2018_08_11 + #ifdef NEW2018_08_11 + CONNEX_RESERVED_REGISTER_03 + #else + /* destRegisterPredicateOfSplitWhere is made -1 only after + iterating over END_WHERE, below + */ + destRegisterPredicateOfSplitWhere != -1 ? + destRegisterPredicateOfSplitWhere : + regDest // It is CONNEX_RESERVED_REGISTER_02 + #endif + ). + /* We now change the conflicting register in the predicate + * instruction. + */ + #ifdef TREAT_ONLY_ONCE_CHANGE_PREDICATE_OPERANDS + addReg((changedPredicateOpnd == 0) ? + #else + addReg(((changeInputPredicateOperandsDueToCOPY & 1) == 1) ? + #endif + (unsigned)CONNEX_RESERVED_REGISTER_02 : + predicateInstrOpndAux[0]). //predicateInstrOpnd1). + #ifdef TREAT_ONLY_ONCE_CHANGE_PREDICATE_OPERANDS + addReg((changedPredicateOpnd == 1) ? + #else + addReg(((changeInputPredicateOperandsDueToCOPY & 2) == 2) ? + #endif + (unsigned)CONNEX_RESERVED_REGISTER_02 : + predicateInstrOpndAux[1]); + + BuildMI(MBB, + I2succ, + IMI->getDebugLoc(), + TII->get(Connex::NOP_BPF)); + // TODO: maybe add an addImm(0)?, although it works without + + // We add the same WHERE instr as the one for this block + /* This gives the following error: + <getParent() && "machine instruction already in a basic block"' failed.>> + MBB.insert(I2succ, IMI); // before I2succ + */ + LLVM_DEBUG(dbgs() << " splitWhereBlock(): IMI (for split) = " + << *IMI << "\n"); + /* From http://llvm.org/doxygen/MachineInstrBuilder_8h_source.html#l00312: + "inserts the newly-built instruction before the given position". */ + /* + IMI = I2succ; + LLVM_DEBUG(dbgs() << " IMI = I2succ = " + << *IMI << "\n"); + IMI--; // IMPORTANT: This makes IMI NULL since IMI is a MachineInstr - see /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/DawnCC/35l_MatMul_f16/SIZE_128/L/STDerr_llc_01_old17 + */ + // See good comments on iterator invalidation: http://llvm.1065342.n5.nabble.com/deleting-or-replacing-a-MachineInst-td77723.html + I = BuildMI(MBB, + I2succ, // We insert new instr immediately before I2succ + IMI->getDebugLoc(), + TII->get(IMI->getOpcode()), + regDest + ); + + // TODO TODO TODO TODO: understand if it generates (due to iterator invalidation??) another END_WHERE - see /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/DawnCC/25k_map/MUL_i32/!!/5_GOOD/output_old06.cpp + + // NOTE: I is the new WHERE* instruction just created + // We update I2 to check for more COPY instrs after the new created WHERE + I2 = I; I2++; + + // We update IMI since we insert COPY before predicate of WHERE using IMI + IMI = (&(*I)); + + //MachineBasicBlock::iterator Iaux10 = I2succ; Iaux10--; + LLVM_DEBUG(dbgs() << " I2succ = " + << *I2succ << "\n"); + LLVM_DEBUG(dbgs() << " IMI = " + << *IMI << "\n"); + LLVM_DEBUG(dbgs() << " I = " + << *I << "\n"); + LLVM_DEBUG(dbgs() << " I2 = " + << *I2 << "\n"); + + //break; + //assert(); + LLVM_DEBUG(dbgs() << " To check: IMI = " + << *IMI << "\n"); + + LLVM_DEBUG(dbgs() + << "splitWhereBlock(): after splitting WHERE block in 2: MBB = "; + MBB.dump()); + } // END splitWhereBlock() + + + /// \brief Loop over all of the basic blocks + bool runOnMachineFunction(MachineFunction &MF) { + bool changedMF = false; + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineFunction.html + LLVM_DEBUG(dbgs() << "Entered PassHandleMisplacedInstr::runOnMachineFunction(MF = " + //; MF.dump(); + << MF.getName() + //dbgs() + << ")\n"); + //bool Changed = false; + + // Process all basic blocks. + for (auto &MBB : MF) { + //int anotherReservedRegister = -1; + int destRegisterPredicateOfSplitWhere = -1; + + // For the current MBB: + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + LLVM_DEBUG(dbgs() + << "PassHandleMisplacedInstr::runOnMachineFunction(): a new MBB = " + << MBB + << "\n"); + + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + LLVM_DEBUG(dbgs() + << "PassHandleMisplacedInstr::runOnMachineFunction(): again MBB = " + << MBB + << "\n"); + + for (MachineBasicBlock::iterator I = MBB.begin(), + IE = MBB.end(); I != IE; ++I) { + MachineInstr *IMI = (&(*I)); + /* + if (IMI == &MI) + I++; + // predMI contains normally instruction VLOAD_H_SYM_IMM + break; + */ + // predMI = I; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I = " + << *I << "\n"); + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): DontTreatCopyInstructions = " + << DontTreatCopyInstructions << "\n"); + + if (DontTreatCopyInstructions == false) { + /*IMPORTANT: we move the COPY instructions outside + the WHERE block, just like the ARM/Thumb2ITBlockPass.cpp + does (the ARM pass is also registered in addPreSched2()). + Note that moving COPY instrs before WHERE (ARM IT) blocks + (as it seems ARM surprisingly is doing, since + MBB::insert(iterator, MI) does "Insert MI into the + instruction list before I, possibly inside a bundle.") + can change semantics in most cases. + + IMPORTANT: This is where we remove any COPY instructions + generated by the TwoAddressInstructionPass and not erased + by RegisterCoalescer (transformed + into ORV_H) instructions inside WHERE* blocks. + This is to handle cases like sequences of manually + selected instructions in ConnexISelDAGToDAG for MULi32, DIVi16, etc. + */ + if (IMI->getOpcode() == Connex::WHEREEQ || + IMI->getOpcode() == Connex::WHERELT || + IMI->getOpcode() == Connex::WHERECRY) { + LLVM_DEBUG(dbgs() << "runOnMachineFunction(): found WHERE* block\n"); + + /* Removing useless COPY immediately before WHERE* block + * (between NOP and WHERE*, where it should normally be put). + * It is useless - we eye-balled seriously on a few + * programs, most notably SSD.f16 on Jul 29-30 2018 + * (I guess - MEGA-TODO: check if so) always because it is + * generated by the WHERE* instruction and, + * therefore, it's NOT required. + * important-TODO: we should take care of COPY + * instructions being moved by the post-RA scheduler. */ + MachineBasicBlock::iterator ItmpToErase = IMI; + ItmpToErase--; + if (ItmpToErase->getOpcode() != Connex::NOP_BPF + //|| ItmpToErase->getOpcode() == Connex::NOP + ) { + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + if (ItmpToErase->getOpcode() == Connex::ORV_H) { + #else + #error "This case is NOT implemented. Implement it!" + #endif + MachineInstr *Iremove = (&(*ItmpToErase)); + //ItmpToErase--; + + /* We assert this COPY is related to the WHERE* + instruction - if NOT, then the COPY was moved + probably by the post-RA scheduler here. + */ + assert(Iremove->getOperand(0).isReg() && + Iremove->getOperand(0).isDef() && + Iremove->getOperand(0).getReg() == IMI->getOperand(0).getReg() + ); + + + /* Checking that it is really safe to remove this COPY + since it is not used by any instruction after it. + */ + MachineBasicBlock::iterator Icheck = I; + // We jump over the WHERE* instruction found + Icheck++; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): Icheck = " + << *Icheck << "\n"); + // Iterating over all remaining instructions of the BB + for (; Icheck != IE; Icheck++) { + LLVM_DEBUG(dbgs() << " Icheck = " << *Icheck); + if (Icheck->getNumOperands() > 0 && + Icheck->getOperand(0).isReg() && + Icheck->getOperand(0).getReg() == + Iremove->getOperand(0).getReg()) { + // It normally has to be a def - if it's a use it's bad + assert(Icheck->getOperand(0).isDef() && + "PassHandleMisplacedInstr: Found a 'useless' COPY " + "that is not useless since it is used after... - " + "this is not good --> change ConnexTargetMachine.cpp"); + break; + } + } + + LLVM_DEBUG(dbgs() << " Removing useless COPY immediately " + "before the WHERE block.\n"); + + MBB.remove(Iremove); + } + } + + + MachineBasicBlock::iterator I2 = I; // + 1; + // We jump over the WHERE* instruction found + I2++; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2 = " + << *I2 << "\n"); + + //continue; + + // Iterating over all remaining instructions of the BB + for (; I2 != IE; /* I2++ */) { + LLVM_DEBUG(dbgs() << " I2 = " << *I2); + + // TO_ADAPT: currently copyPhysReg() is implemented with ORV_H + /* IMPORTANT: NORMALLY, inside WHERE blocks generated + with Opincaa lib's Kernel::genLLVMISelManualCode(), + we are guaranteed to have only ORV_SPECIAL_H Connex + instructions, so meeting an ORV_H is only when a COPY + was generated by the TwoAddressInstructionPass. */ + if ( + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + I2->getOpcode() == Connex::ORV_H + #else + #error "This case is NOT implemented. Implement it!" + #endif + || I2->getOpcode() == Connex::LD_FILL_H) { + // MEGA-TODO: || I2->getOpcode() == Connex::ST_FILL_H + /* The ORV_H instruction implemented in copyPhysReg() + has both input operands equal. + NOTE: the destination register of any instruction + I is I->getOperand(0). + */ + + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + if (I2->getOpcode() == Connex::ORV_H) + assert(I2->getOperand(1).getReg() == + I2->getOperand(2).getReg() && + "I2 is an ORV_H with different input operands. " + "Maybe too paranoid check: We do not " + "recommend to have emulation Opincaa kernels " + "generated by Kernel::genLLVMISelManualCode() " + "with ORV_H inside WHERE blocks (if these " + "instructions come from there). But you " + "can comment this assert and issue a simple " + "warning."); + /* + if (I2->getOperand(1).getReg() != + I2->getOperand(2).getReg()) + LLVM_DEBUG(dbgs() << "PassHandleMisplacedInstr: Warning: " + "I2->getOperand(1).getReg() != " + "I2->getOperand(2).getReg()\n\n"); + */ + #endif + + + /* From http://llvm.org/doxygen/MachineBasicBlock_8h_source.html: + MBB::insert(iterator, MI) + "Insert MI into the instruction list before I, possibly inside a bundle. + */ + LLVM_DEBUG(dbgs() << " found COPY/LD_FILL at I2 = " << *I2 + << " --> moving it out of the WHERE block to " + "preserve correct program semantics.\n"); + + /* We should move I2 before or after the WHERE block, + * or split the WHERE block in 2. */ + /* The algo is (a sketch that MIGHT NOT reflect + totally the implementation): + NOTE: this is the case that allows having COPY between + predicate and WHERE instr. + If the COPY doesn't use (doesn't have as source) + a register defined in the WHERE block + BEFORE the COPY (NO RAW/flow dependence relation to be broken) + and also the COPY doesn't define a register + that is used by an instruction before + (NO WAR/anti-dependence relation to be broken): + We move the COPY exactly before the + WHERE instruction starting the block + Else + If the COPY doesn't use (doesn't have as source) + a register defined in the WHERE block, + after the COPY (NO WAR dep broken) + and also the COPY doesn't define a register + used by an instruction after it (NO RAW dep broken): + We move the COPY exactly after the END_WHERE + instruction ending the block + Else + Moving the COPY immediately before/after + the WHERE block is UNsafe and + would change semantics program + The solution is to split the WHERE block in + two and for the 2nd WHERE block to copy the + predicate (together with a NOP) just + before it. + */ + + #ifdef ALLOW_COPY_BETWEEN_PREDICATE_AND_WHERE_INSTRUCTIONS + MachineBasicBlock::iterator I3 = IMI; // IMI is WHERE instr + LLVM_DEBUG(dbgs() << " I3 = " + << *I3 << "\n"); + + I3--; + LLVM_DEBUG(dbgs() << " I3 (after 1 -)= " + << *I3 << "\n"); + + assert(I3->getOpcode() == Connex::NOP || + I3->getOpcode() == Connex::NOP_BPF); + + I3--; + LLVM_DEBUG(dbgs() << " I3 (after 2 -)= " + << *I3 << "\n"); + assert(I3->getOpcode() == Connex::EQ_H || + I3->getOpcode() == Connex::LT_H || + I3->getOpcode() == Connex::ULT_H); + #else + MachineBasicBlock::iterator I3 = IMI; // IMI is WHERE instr + I3++; + #endif + + #define SAFE_SINCE_NO_CONSTRAINT 0 + #define NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK 1 + #define NOT_SAFE_TO_PUT_COPY_AFTER_WHERE_BLOCK 2 + #define SAFE_TO_PUT_COPY_IN_SPLIT_WHERE_BLOCK 3 + int whatToDo = SAFE_SINCE_NO_CONSTRAINT; + + //bool I2afterIsInsideWhereBlock = true; + bool I3IsBeforeI2 = true; + + // Remember: I2 points to the COPY instruction + for (; I3 != IE; I3++) { + if (I3->getOpcode() == Connex::END_WHERE) { + break; + } + + LLVM_DEBUG(dbgs() << " I3 = " + << *I3); + + if (I3 == I2) { + I3IsBeforeI2 = false; + continue; + } + LLVM_DEBUG(dbgs() << " I3IsBeforeI2 = " + << I3IsBeforeI2 << "\n"); + + // We look at all operands of instruction I3 + for (unsigned idOpnd = 0; idOpnd < I3->getNumOperands(); + idOpnd++) { + MachineOperand &I3Opnd = I3->getOperand(idOpnd); + + LLVM_DEBUG(dbgs() << " I3Opnd (index = " << idOpnd + << ") = " << I3Opnd << "\n"); + + if (I3Opnd.isReg() && I3Opnd.isUse()) { + // Remember: I2 points to the COPY instruction + if (I3Opnd.getReg() == I2->getOperand(0).getReg()) { + if (I3IsBeforeI2) { + // RBW dependence w.r.t. COPY (I2), which writes + // I3 uses the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is before I2, " + "uses the dst-register of I2 " + "--> moving I2 before the " + "WHERE block is NOT safe\n"); + + whatToDo |= NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK; + /* + LLVM_DEBUG(dbgs() << " changing I2afterOpnd's reg to = " + << I2->getOperand(0).getReg() << "\n"); + I2afterOpnd.setReg(I2->getOperand(1).getReg()); + */ + } + else { // NOT I3IsBeforeI2 + // RAW dependence w.r.t. COPY (I2), which writes + // I3 uses the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is after I2, " + "uses the dst-register of I2 " + "--> moving I2 after the " + "WHERE block is NOT safe\n"); + + whatToDo |= NOT_SAFE_TO_PUT_COPY_AFTER_WHERE_BLOCK; + } + } + else + /* Although we are safe on the else branch, + we put this code here for "completness". + */ + if ( + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + I2->getOpcode() == Connex::ORV_H && + #endif + I3Opnd.getReg() == I2->getOperand(1).getReg()) { + // RAR dependence - NONE actually :) + if (I3IsBeforeI2) { + // I3 uses the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is before I2, " + "uses the src-register of I2 " + "--> everything is safe\n"); + + //whatToDo |= NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK; + } + else { + // I3 uses the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is after I2, " + "uses the src-register of I2 " + "--> everything is safe\n"); + + //whatToDo |= NOT_SAFE_TO_PUT_COPY_AFTER_WHERE_BLOCK; + } + } + } // END I3Opnd.isUse() + else + if (I3Opnd.isReg() && I3Opnd.isDef()) { + // Remember: I2 points to the COPY instruction + if (I3Opnd.getReg() == I2->getOperand(0).getReg()) { + if (I3IsBeforeI2) { + // WAW dependence w.r.t. COPY (I2), which writes + // I3 defs the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is before I2, " + "defs the dst-register of I2 --> " + "moving I2 before the " + "WHERE block is NOT safe\n"); + + whatToDo |= NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK; + } + else { + // WAW dependence w.r.t. COPY (I2), which writes + // I3 defs the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is after I2, " + "defs the dst-register of I2 --> " + "moving I2 after the " + "WHERE block is NOT safe\n"); + + whatToDo |= NOT_SAFE_TO_PUT_COPY_AFTER_WHERE_BLOCK; + } + } + else + if ( + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + I2->getOpcode() == Connex::ORV_H && + #endif + I3Opnd.getReg() == I2->getOperand(1).getReg()) { + if (I3IsBeforeI2) { + // RAW dependence w.r.t. I3, which writes + // I3 defs the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is before I2, " + "defs the src-register of I2 --> " + "moving I2 before the " + "WHERE block is NOT safe\n"); + + whatToDo |= NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK; + } + else { + // RBW dependence w.r.t. I3, which writes + // I3 defs the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is after I2, " + "defs the src-register of I2 --> " + "moving I2 after the " + "WHERE block is NOT safe\n"); + + whatToDo |= NOT_SAFE_TO_PUT_COPY_AFTER_WHERE_BLOCK; + } + } + } // END I3Opnd.isDef() + } // END for loop idOpnd + } // END for loop with ind-var I3 + + /* + * Note: + * I = main loop iterating over all instr of the MBB + * IMI = I; + * I2 + * if IMI == WHERE* + * I2 = I + 1; + * for (;; I2++) + * if I2 == ORV_H (or whatever is used to implement the COPY primitive) + * for (I3 = IMI + 1; ; I3++) // used to compute whatToDo; + if I3 == END_WHERE + break; + compute whatToDo; + */ + MachineBasicBlock::iterator I2plus1 = I2; + /* We need to increment it, otherwise it looks that + * I2 and I2plus1 are identical after remove() + * and insert() + */ + I2plus1++; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2plus1 = " + << *I2plus1 << "\n"); + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2 (before moving I2) = " + << *I2 << "\n"); + LLVM_DEBUG(dbgs() << " whatToDo = " << whatToDo << "\n"); + + if (//whatToDo == SAFE_SINCE_NO_CONSTRAINT || + whatToDo == NOT_SAFE_TO_PUT_COPY_AFTER_WHERE_BLOCK) { + // Moving COPY before the WHERE block. + putCOPYBeforeWhereBlock(MBB, TII, IMI, I2, + I2plus1, IE, changedMF, + destRegisterPredicateOfSplitWhere); + // break; + + } // END moving I2 immediately before the logical instruction linked to the WHERE block + else + if ( + // We treat here SAFE_SINCE_NO_CONSTRAINT because moving after WHERE block doesn't add any auxiliary instruction + whatToDo == SAFE_SINCE_NO_CONSTRAINT || + whatToDo == NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK) { + // TODO TODO: we should put multiple COPY instructions from this WHERE block in the SAME order after END_WHERE. See if such cases happen. + LLVM_DEBUG(dbgs() << " moving I2 immediately after WHERE block\n"); + assert(I3 != IE); + + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2 = " + << *I2 << "\n"); + + // I3 is pointing to END_WHERE (see code above) + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I3 = " + << *I3 << "\n"); + + assert( (I3->getOpcode() == Connex::END_WHERE) && + "I3 should point to END_WHERE (see code above)."); + /* + assert( (I3->getOpcode() == Connex::WHEREEQ || + I3->getOpcode() == Connex::WHERELT || + I3->getOpcode() == Connex::WHERECRY) && + "We should NOT be arriving here otherwise."); + */ + + I3++; // Jump over END_WHERE (normally) + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I3 (after I3++) = " + << *I3 << "\n"); + + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): Preparing to remove I2 = " + << *I2 + << " and moving it before I3 = " + << *I3 << "\n"); + MBB.remove((&(*I2))); + MBB.insert(I3, (&(*I2))); // It inserts before I3 + + /* This is NOT good for case where we have 2+ COPY + instrs in the WHERE block: I = I3; */ + //I2++; + //I = I2; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2 (after moving I2) = " + << *I2 << "\n"); + // I2plus1++; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2plus1 = " + << *I2plus1 << "\n"); + + /* Here we handle the case of more than 1 COPY + instr in the WHERE block (I2plus1 represents the next + instr after the COPY (before move)) */ + I2 = I2plus1; + + MachineBasicBlock::iterator I2plus2 = I2plus1; + I2plus2++; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2plus2 = " + << *I2plus2 << "\n"); + + changedMF = true; + /* This is NOT good for case where we have 2+ COPY + instrs in the WHERE block: break; + We keep searching with I2 for loop in this WHERE block + for more COPY instrs. */ + } // END if (whatToDo == NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK) + else + if (whatToDo == SAFE_TO_PUT_COPY_IN_SPLIT_WHERE_BLOCK) { + splitWhereBlock(MBB, TII, I, IMI, I2, IE, + changedMF, + destRegisterPredicateOfSplitWhere); + LLVM_DEBUG(dbgs() << " After calling splitWhereBlock(): IMI = " + << *IMI << "\n"); + } // END if SPLIT WHERE block + else + // IMPORTANT: we increment here the iterator over instruction in WHERE block + I2++; + } // END if (I2->getOpcode() == Connex::ORV_H) + else { + // IMPORTANT: we increment here the iterator over instruction in WHERE block + I2++; + // else + } + + // Note that the END_WHERE takes input node and has a value output + if (I2->getOpcode() == Connex::END_WHERE) { + LLVM_DEBUG(dbgs() << " found END_WHERE --> breaking I2 loop\n"); + I2++; + I = I2; + + // MEGA-TODO: think if OK here + destRegisterPredicateOfSplitWhere = -1; + + LLVM_DEBUG(dbgs() << " Making destRegisterPredicateOfSplitWhere = -1\n"); + + break; + } + + LLVM_DEBUG(dbgs() << "PassHandleMisplacedInstr: at end of for loop I2, I2 = " + << *I2 + << " and IMI = " + << *IMI); + } // END for loop with ind-var I2 + + LLVM_DEBUG(dbgs() << "PassHandleMisplacedInstr: after WHERE block processed: MBB = "; + MBB.dump()); + LLVM_DEBUG(dbgs() << "PassHandleMisplacedInstr: IMI = " + << *IMI); + } // END if WHERE* + } // END if (DontTreatCopyInstructions == false) + } // END for (MachineBasicBlock::iterator I + + } // END for (auto &MBB : MF) + + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): changedMF = " + << changedMF << "\n"); + + return changedMF; // indicates if we changed MF + } // end runOnMachineFunction(MachineFunction &MF) + + private: + MachineRegisterInfo *MRI; + + static char ID; +}; // END class PassHandleMisplacedInstr +char PassHandleMisplacedInstr::ID = 0; + +} // END namespace + + + +// We currently don't use anymore bundles, since we avoid using the post-RA scheduler +//#define CREATE_BUNDLES +#ifdef CREATE_BUNDLES + #include "ConnexTargetMachine_NotUsed_Important.h" +#endif + +// Gives error: should have been declared inside ‘llvm’: FunctionPass *llvm::createPreRAPassFinalizeBundles() { return new PreRAPassFinalizeBundles(); } +namespace llvm { +#ifdef CREATE_BUNDLES + FunctionPass *createPassCreateBundles() { + return new PassCreateBundles(); + } + + FunctionPass *createPassFinalizeBundles() { + return new PassFinalizeBundles(); + } +#endif + + FunctionPass *createPassHandleMisplacedInstr() { + return new PassHandleMisplacedInstr(); + } +} + + +namespace { + +// Connex Code Generator Pass Configuration Options. +class ConnexPassConfig : public TargetPassConfig { + public: + ConnexPassConfig(ConnexTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig((LLVMTargetMachine &)(*TM), PM) {} + + + ConnexTargetMachine &getConnexTargetMachine() const { + return getTM(); + } + + +//#ifdef CREATE_BUNDLES // IMPORTANT - not executing these methods inside results in error: <> + //bool addInstSelector() override; + // Install an instruction selector pass using + // the ISelDag to gen Connex code; also register extra passes. + +// VERY IMPORTANT: commenting this method results in error: <> + //#ifdef CREATE_BUNDLES + bool /* ConnexPassConfig:: */ addInstSelector() { + addPass(createConnexISelDag(getConnexTargetMachine())); + + /* The registered pass is run immediately after the 1st List + * scheduling, after the ISel pass registered above. + * The reason it is NOT directly after the ISel pass is that it seems + * that the 1st scheduling + * pass is considered to be linked together with ISel. + */ + #ifdef CREATE_BUNDLES + addPass(createPassCreateBundles()); + #endif + + return false; + } + //#endif + + + /* From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetPassConfig.html + This method may be implemented by targets that want to run passes immediately before register allocation. + */ + void addPreRegAlloc() { + /* + // IMPORTANT: As of Mar 2017, implementing this pass with finalizeBundle here + // gives error at: + // <> + + LLVM_DEBUG(dbgs() << "Entered ConnexPassConfig::addPreRegAlloc().\n"); + + // Inspired from llvm/lib/Target/X86/X86TargetMachine.cpp and X86OptimizeLEAs.cpp + if (getOptLevel() != CodeGenOpt::None) + addPass(createPassFinalizeBundles()); + */ + + /* + LLVM_DEBUG(dbgs() << "Entered ConnexPassConfig::addPreRegAlloc().\n"); + //addPass(createPassCreateBundles()); + + // IMPORTANT: finalizeBundle gives error: + // <> + addPass(createPassFinalizeBundles()); + */ + } + + + void addPostRegAlloc() { + /* + // It does NOT help for my llc -O1 bug related to <> + + LLVM_DEBUG(dbgs() << "Entered ConnexPassConfig::addPostRegAlloc().\n"); + addPass(createPassFinalizeBundles()); + */ + } + + + #ifdef CREATE_BUNDLES + /* IMPORTANT: + From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetPassConfig.html + <> + (This runs after register allocation, before 2nd (post-RA) scheduler) */ + void addPreSched2() { + LLVM_DEBUG(dbgs() << "Entered ConnexPassConfig::addPreSched2().\n"); + + // Inspired from llvm/lib/Target/ARM/ARMTargetMachine.cpp + //if (getOptLevel() != CodeGenOpt::None) + addPass(createPassFinalizeBundles()); + } + #endif +//#endif // CREATE_BUNDLES + + + /* + From http://llvm.org/doxygen/classllvm_1_1TargetPassConfig.html: + <> + */ + void addPreEmitPass() { + LLVM_DEBUG(dbgs() << "Entered ConnexPassConfig::addPreEmitPass().\n"); + + addPass(createPassHandleMisplacedInstr()); + + // Here we add a stand-alone hazard recognizer pass + addPass(&PostRAHazardRecognizerID); + } +}; + +} // end namespace + +TargetPassConfig *ConnexTargetMachine::createPassConfig(PassManagerBase &PM) { + return new ConnexPassConfig(this, PM); +} + +/* +*/ +// Inspired from ARCTargetMachine.cpp +TargetTransformInfo ConnexTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(ConnexTTIImpl(this, F)); +} + Index: lib/Target/Connex/ConnexTargetTransformInfo.h =================================================================== --- lib/Target/Connex/ConnexTargetTransformInfo.h +++ lib/Target/Connex/ConnexTargetTransformInfo.h @@ -0,0 +1,132 @@ +//===-- ConnexTargetTransformInfo.h - Connex specific TTI ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file contains a TargetTransformInfo::Concept conforming object specific to the +/// Connex target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +// Inspired from XCore/XCoreTargetTransformInfo.h + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXTARGETTRANSFORMINFO_H + +#include "Connex.h" +#include "ConnexTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/TargetLowering.h" + +namespace llvm { + +class ConnexTTIImpl : public BasicTTIImplBase { + typedef BasicTTIImplBase BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const ConnexSubtarget *ST; + const ConnexTargetLowering *TLI; + + const ConnexSubtarget *getST() const { + LLVM_DEBUG(dbgs() << "Entered getST()\n"); + return ST; } + + const ConnexTargetLowering *getTLI() const { + LLVM_DEBUG(dbgs() << "Entered getTLI()\n"); + return TLI; } + + +public: + bool isLegalMaskedGather(Type *DataTy) { + // Inspired from X86TargetTransformInfo.cpp + LLVM_DEBUG(dbgs() << "Entered isLegalMaskedGather()\n"); + + /* + // Some CPUs have better gather performance than others. + // TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only + // enable gather with a -march. + if (!(ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2()))) + return false; + + // This function is called now in two cases: from the Loop Vectorizer + // and from the Scalarizer. + // When the Loop Vectorizer asks about legality of the feature, + // the vectorization factor is not calculated yet. The Loop Vectorizer + // sends a scalar type and the decision is based on the width of the + // scalar element. + // Later on, the cost model will estimate usage this intrinsic based on + // the vector type. + // The Scalarizer asks again about legality. It sends a vector type. + // In this case we can reject non-power-of-2 vectors. + // We also reject single element vectors as the type legalizer can't + // scalarize it. + if (isa(DataTy)) { + unsigned NumElts = DataTy->getVectorNumElements(); + if (NumElts == 1 || !isPowerOf2_32(NumElts)) + return false; + } + Type *ScalarTy = DataTy->getScalarType(); + if (ScalarTy->isPointerTy()) + return true; + + if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) + return true; + + if (!ScalarTy->isIntegerTy()) + return false; + + unsigned IntWidth = ScalarTy->getIntegerBitWidth(); + return IntWidth == 32 || IntWidth == 64; + */ + + Type *ScalarTy = DataTy->getScalarType(); + + if (ScalarTy->isHalfTy()) + return true; + + if (ScalarTy->isIntegerTy()) { + unsigned IntWidth = ScalarTy->getIntegerBitWidth(); + LLVM_DEBUG(dbgs() << "isLegalMaskedGather(): IntWidth = " + << IntWidth << "\n"); + //return IntWidth == 16; // 32 || IntWidth == 64; + return (IntWidth == 16) || (IntWidth == 32); + } + + return false; + } + + bool isLegalMaskedScatter(Type *DataType) { + LLVM_DEBUG(dbgs() << "Entered isLegalMaskedScatter()\n"); + + // Inspired from X86TargetTransformInfo.cpp + return isLegalMaskedGather(DataType); + } + +public: + explicit ConnexTTIImpl(const ConnexTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()), + // Doesn't help (inspired from X86 backend) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) { + LLVM_DEBUG(dbgs() << "Entered constructor ConnexTTIImpl()\n"); + } + +/* + unsigned getNumberOfRegisters(bool Vector) { + if (Vector) { + return 0; + } + return 12; + } +*/ +}; + +} // end namespace llvm + +#endif Index: lib/Target/Connex/InstPrinter/CMakeLists.txt =================================================================== --- lib/Target/Connex/InstPrinter/CMakeLists.txt +++ lib/Target/Connex/InstPrinter/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMConnexAsmPrinter + ConnexInstPrinter.cpp + ) Index: lib/Target/Connex/InstPrinter/ConnexInstPrinter.h =================================================================== --- lib/Target/Connex/InstPrinter/ConnexInstPrinter.h +++ lib/Target/Connex/InstPrinter/ConnexInstPrinter.h @@ -0,0 +1,65 @@ +//===-- ConnexInstPrinter.h - Convert Connex MCInst to asm syntax -------*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class prints a Connex MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_INSTPRINTER_CONNEXINSTPRINTER_H +#define LLVM_LIB_TARGET_CONNEX_INSTPRINTER_CONNEXINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { +class MCOperand; + +class ConnexInstPrinter : public MCInstPrinter { +public: + ConnexInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + + // IMPORTANT Note: printOperand() etc are not methods of the + // MCInstPrinter class, but they are methods called from the + // TableGen generated code from ConnexGenAsmWriter.inc. + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, + const char *Modifier = nullptr); + + template + void printUImm(const MCInst *MI, int opNum, raw_ostream &O); + + void printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O, + const char *Modifier = nullptr); + + // Taken from MSP430InstPrinter.h + void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, + const char *Modifier = nullptr); + + void printImm64Operand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + // Inspired from printi256mem() from [LLVM]/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h + void printScatterGatherMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + +private: + // Taken from [LLVM]/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h + void printUnsignedImm8(const MCInst *MI, int opNum, raw_ostream &O); + + // Required by ConnexGenAsmWriter.inc + // Taken from Mips/InstPrinter/MipsInstPrinter.h + void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O); +}; +} + +#endif Index: lib/Target/Connex/InstPrinter/ConnexInstPrinter.cpp =================================================================== --- lib/Target/Connex/InstPrinter/ConnexInstPrinter.cpp +++ lib/Target/Connex/InstPrinter/ConnexInstPrinter.cpp @@ -0,0 +1,527 @@ +//===-- ConnexInstPrinter.cpp - Convert Connex MCInst to asm syntax -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class prints an Connex MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstr.h" +#include "Connex.h" +#include "ConnexInstPrinter.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" + +#include "llvm/Support/Debug.h" // for dbgs and DEBUG() macro +#include "ConnexConfig.h" + +using namespace llvm; + + +//#define DEBUG_TYPE "asm-printer" +#define DEBUG_TYPE "asm-inst-printer" + +// Include the auto-generated portion of the assembly writer. +#include "ConnexGenAsmWriter.inc" + + +#include "llvm/CodeGen/MachineInstr.h" +#include + +/* +NOTE: as of Nov 2016, the LLVM APIs allow printing customized code only +here (and NOT in ConnexAsmPrinter.cpp, which around a year ago had some APIs). +*/ + +/* +We declare here these vars and use them as extern in + ConnexAsmPrinter.cpp (and NOT the other way around - which gives ld error) + because of the way these C modules are being linked by the Makefile scripts + of LLVM. + +Note that the flow of the operations is + ConnexAsmPrinter::EmitInstruction() gets called first and then + ConnexInstPrinter::printUnsignedImm() gets called immediately after. + (look at the stdout files generated by llc with the DEBUG prints) + Also, if we look at ConnexAsmPrinter::EmitInstruction(), we have an + automatic var MCInst TmpInst; . + So, MCInst is generated for the time it is output to the stream and then + automatically destroyed - so it does NOT make any sense to keep track in a + map the associated MachineInstr for the MCInst in "flight" (TmpInst). +*/ +const MachineInstr *crtMI = NULL; +#ifdef NOTNOTNOT +std::unordered_map mapMachineMCInst; +#endif +// A map associating: first is LD_H, ST_H or REPEAT, second is the associated INLINEASM +std::unordered_map mapLD_ST_REPEAT_InlineAsm; + +#ifdef NOTNOTNOT +const MachineInstr *retrieveAssociatedMachineInstr(const MCInst *mci) { + DEBUG(dbgs() << "Entered retrieveAssociatedMachineInstr()\n"); + + const MachineInstr *res; + + //for (auto : mapMachineMCInst) + // See http://www.cplusplus.com/reference/unordered_map/unordered_map/begin/ + for (auto it = mapMachineMCInst.begin(); + it != mapMachineMCInst.end(); ++it) { + //std::cout << " " << it->first << ":" << it->second; + if (it->second == mci) { + const MachineInstr *mi = &(*(it->first)); + DEBUG(dbgs() << "retrieveAssociatedMachineInstr(): " + << "mci = " << *mci + << ", mci = " << mci + //<< ", it->second = " << it->second + << ", MachineInstr = " << mi + //<< " " << *mi + << "\n"); + + res = it->first; + /*const MachineInstr *res = it->first; + return res; + */ + } + } + return res; + /* + assert(0 && "MCInst not found!"); + return NULL; + */ +} +#endif + + + +void ConnexInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot, const MCSubtargetInfo &STI) { + LLVM_DEBUG(dbgs() << "Entered ConnexInstPrinter::printInst()...\n"); + //O << "Entered ConnexInstPrinter::printInst()\n"; + LLVM_DEBUG(dbgs() << "printInst(): MI = " << *MI << "\n"); + LLVM_DEBUG(dbgs() << "printInst(): MI->getOpcode() = " + << MI->getOpcode() << "\n"); + + /* For some reason, ConnexGenAsmWriter.inc cannot print INLINEASM from the + MachineInstr bundles I create in ConnexInstrInfo.cpp, expandPostRAPseudo(), + and then unpack in [Target]AsmPrinter::EmitInstruction(), + because of this definition they have: + static const uint32_t OpInfo0[] = { + 0U,>// PHI + 0U,>// INLINEASM + ... + etc. + So I handle these INLINEASMs myself here. + TODO: maybe explain better. + */ + if (MI->getOpcode() == 1) { + O << " "; + printOperand(MI, 0, O); //getOperand(0)); + O << " // custom code in ConnexInstPrinter::printInst() for INLINEASM"; + } + /* + else + if (MI->getOpcode() == 13) { // Handling bundle for VSELECT, more exactly instruction COPY + // TODO TODO TODO: I should maybe implement this in printInstruction() and check for Bits != 0 and act accordingly + O << " "; + printOperand(MI, 0, O); //getOperand(0)); + O << " = "; + printOperand(MI, 1, O); + O << " | "; + printOperand(MI, 1, O); + O << " ; // custom code in ConnexInstPrinter::printInst() for VSELECT"; + } + */ + else { + printInstruction(MI, O); + } + + printAnnotation(O, Annot); +} + + +static void printExpr(const MCExpr *Expr, raw_ostream &O) { +#ifndef NDEBUG + const MCSymbolRefExpr *SRE; + + if (const MCBinaryExpr *BE = dyn_cast(Expr)) + SRE = dyn_cast(BE->getLHS()); + else + SRE = dyn_cast(Expr); + assert(SRE && "Unexpected MCExpr type."); + + MCSymbolRefExpr::VariantKind Kind = SRE->getKind(); + + assert(Kind == MCSymbolRefExpr::VK_None); +#endif + + O << *Expr; +} + +void ConnexInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O, const char *Modifier) { + LLVM_DEBUG(dbgs() << "Entered ConnexInstPrinter::printOperand(OpNo = " + << OpNo << ")...\n"); + LLVM_DEBUG(dbgs() << "ConnexInstPrinter::printOperand(): *MI = " + << *MI << "\n"); + LLVM_DEBUG(dbgs() << "ConnexInstPrinter::printOperand(): MI->getNumOperands() = " + << MI->getNumOperands() << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MCInst.html + + /* Simple failback, useful just for NOP - + * TODO: I could take care of it in printInstruction(), which calls + * printOperand() + */ + if (MI->getNumOperands() <= OpNo) + return; + + LLVM_DEBUG(dbgs() << "ConnexInstPrinter::printOperand(): MI->getOperand(OpNo) = " + << MI->getOperand(OpNo) << "\n"); + + assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + + const MCOperand &Op = MI->getOperand(OpNo); + + if (Op.isReg()) { + // This handles registers, such as scalar r0 or vector R(0) + O << getRegisterName(Op.getReg()); + } + else + if (Op.isImm()) { + /* Normally we do NOT get here because this case is treated in + printUnsignedImm(). */ + LLVM_DEBUG(dbgs() << "ConnexInstPrinter::printOperand(): Op.getImm() = " + << Op.getImm() << "\n"); + O << (int32_t)Op.getImm(); + } + else { + assert(Op.isExpr() && "Expected an expression"); + printExpr(Op.getExpr(), O); + } +} + +template +void ConnexInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) { + const MCOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) { + uint64_t Imm = MO.getImm(); + Imm -= Offset; + Imm &= (1 << Bits) - 1; + Imm += Offset; + O << formatImm(Imm); + return; + } + + printOperand(MI, opNum, O); +} + +void ConnexInstPrinter::printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O, + const char *Modifier) { + // We arrive here for instructions like: sth 0(r12), r14 + + LLVM_DEBUG(dbgs() << "Entered ConnexInstPrinter::printMemOperand()\n"); + + const MCOperand &RegOp = MI->getOperand(OpNo); + const MCOperand &OffsetOp = MI->getOperand(OpNo + 1); + + // offset + if (OffsetOp.isImm()) + O << formatDec(OffsetOp.getImm()); + else + assert(0 && "Expected an immediate"); + + // register + assert(RegOp.isReg() && "Register operand not a register"); + //#ifdef USE_ORIGINAL_PRINT_CODE + O << '(' << getRegisterName(RegOp.getReg()) << ')'; + /* + #else + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MCOperand.html + O << getRegisterName(RegOp.getReg()); //print something like r1, r2, etc + //O << RegOp.getImm(); // Gives error: /home/asusu/LLVM/llvm38Nov2016/llvm/include/llvm/MC/MCInst.h:75: int64_t llvm::MCOperand::getImm() const: Assertion `isImm() && "This is not an immediate"' failed. + //O << RegOp; // Outputs something like , etc + #endif + */ +} + +// Taken from MSP430InstPrinter.h +void ConnexInstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O, + const char *Modifier) { + LLVM_DEBUG(dbgs() << "Entered ConnexInstPrinter::printSrcMemOperand()\n"); + + const MCOperand &Base = MI->getOperand(0); + const MCOperand &Disp = MI->getOperand(1); + + // Print displacement first + + // If the global address expression is a part of displacement field with a + // register base, we should not emit any prefix symbol here, e.g. + // mov.w &foo, r1 + // vs + // mov.w glb(r1), r2 + // Otherwise (!) msp430-as will silently miscompile the output :( + if (!Base.getReg()) + O << '&'; + + if (Disp.isExpr()) + Disp.getExpr()->print(O, &MAI); + else { + assert(Disp.isImm() && "Expected immediate in displacement field"); + O << Disp.getImm(); + } + + // Print register base field + if (Base.getReg()) + O << '(' << getRegisterName(Base.getReg()) << ')'; +} + +void ConnexInstPrinter::printImm64Operand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + LLVM_DEBUG(dbgs() << "Entered ConnexInstPrinter::printImm64Operand()\n"); + + const MCOperand &Op = MI->getOperand(OpNo); + + if (Op.isImm()) { + // This is for instructions like: ld_64 r3, 4294967296 + O << (uint64_t)Op.getImm(); + } + else { + // This is for instructions like: ld_64 r1, + O << Op; + } +} + +void ConnexInstPrinter::printScatterGatherMemOperand(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + LLVM_DEBUG(dbgs() << + "Entered ConnexInstPrinter::printScatterGatherMemOperand() - " + "NOTE that we discard the BasePtr of the TableGen MemOperand\n"); + /* + IMPORTANT: Here, for the MCInst, the parameters do NOT follow the order from the .td file. + Following include/llvm/Target/TargetSelectionDAG.td we have: + + // SDTypeProfile - This profile describes the type requirements of a Selection + // DAG node. + class SDTypeProfile constraints> { + int NumResults = numresults; + int NumOperands = numoperands; + list Constraints = constraints; + } + + // So: 3 input operands, 2 results. + // Params are: passthru, mask, index; results are: vector of i1, vector of ptr (actual result) + // Params are 0, 1, 2 and results are 3, 4. + // Operands 0 and 1 have vector type, with same number of elements. + // Operands 0 and 2 have identical types. + // Operands 1 and 3 have identical types. + // --> Opnd 3 (result 0?) is i1 vector + // Operand 4 (result 1?) has pointer type. + // Operand 1 is vector type with element type of i1. + def SDTMaskedGather: SDTypeProfile<2, 3, [ // masked gather + SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<1, 3>, + SDTCisPtrTy<4>, SDTCVecEltisVT<1, i1>, SDTCisSameNumEltsAs<0, 1> + ]>; + + def masked_gather : SDNode<"ISD::MGATHER", SDTMaskedGather, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + */ + + if (MI->getNumOperands() > 4) { + // We have an MGATHER operation + /* + // THIS is WRONG: + const MCOperand &index = MI->getOperand(0); + const MCOperand &maskIn = MI->getOperand(1); + const MCOperand &passthru = MI->getOperand(2); + const MCOperand &maskOut = MI->getOperand(3); + const MCOperand &res = MI->getOperand(4); + */ + const MCOperand &res = MI->getOperand(0); + const MCOperand &index = MI->getOperand(4); + const MCOperand &maskIn = MI->getOperand(1); + const MCOperand &passthru = MI->getOperand(2); + const MCOperand &maskOut = MI->getOperand(3); + + assert(index.isReg() && "index not a register"); + assert(passthru.isReg() && "passthru not a register"); + + LLVM_DEBUG(dbgs() << "MI = " << *MI + << "\n index = " << index + << "\n maskIn (bool vector register, which we actually do NOT use) = " << maskIn + << "\n passthru = " << passthru + << "\n maskOut = " << maskOut + << "\n res = " << res << "\n"); + + LLVM_DEBUG(dbgs() << "\n res = " << res << "\n"); + + assert(res.isReg() && "res not a register"); + O << getRegisterName(index.getReg()); + } + else { + // We have an MSCATTER operation + const MCOperand &value = MI->getOperand(1); + const MCOperand &maskIn = MI->getOperand(0); + const MCOperand &mask2 = MI->getOperand(2); + const MCOperand &index = MI->getOperand(3); + + LLVM_DEBUG(dbgs() << "MI = " << *MI + << "\n value (src) = " << value + << "\n maskIn (bool vector register, " + "which we actually do NOT use) = " << maskIn + << "\n index = " << index + << "\n mask2 = " << mask2 + << "\n"); + O << getRegisterName(index.getReg()); + } + + + /* + O << "MI = " << *MI << "\n"; + O << "index = (" << getRegisterName(index.getReg()) << ")\n"; + O << "passthru = (" << getRegisterName(passthru.getReg()) << ")\n"; + O << "res = (" << getRegisterName(res.getReg()) << ")\n"; + //O << " = (" << getRegisterName(BaseReg.getReg()) << ")\n"; + */ + + //printMemReference(MI, OpNo, O); + LLVM_DEBUG(dbgs() << "Exiting ConnexInstPrinter::printScatterGatherMemOperand()\n"); +} + + +char *getStringFromAssociatedInlineAsm(const MachineInstr *assocMI, + char *strToSearch) { + char *res = NULL; + + assert(0 && + "getStringFromAssociatedInlineAsm() should NOT be executed since we don't " + "use symbolic LD_H or ST_H anymore"); + + LLVM_DEBUG(dbgs() << "getStringFromAssociatedInlineAsm(): assocMI = " + //; assocMI->dump(); + //dbgs() << + << "(" << assocMI << ")" << "\n"); + + const MachineInstr *miInlineasm = mapLD_ST_REPEAT_InlineAsm[assocMI]; + LLVM_DEBUG(dbgs() << "getStringFromAssociatedInlineAsm(): miInlineasm = " + << miInlineasm << "\n"); + + if (miInlineasm == NULL) { + res = strdup("[NO_VALUE - since miInlineasm == NULL!!!!]"); + return res; + } + assert(miInlineasm->isInlineAsm()); + + const MachineOperand &inlineAsmStrMO0 = miInlineasm->getOperand(0); + + // LLVM_DEBUG(dbgs() << "ConnexInstPrinter::printUnsignedImm(): inlineAsmStrMO = " + // << inlineAsmStrMO0 << "\n"); + // Inspiring from http://llvm.org/docs/doxygen/html/MachineInstr_8cpp_source.html#l00306 + assert(inlineAsmStrMO0.getType() == MachineOperand::MO_ExternalSymbol); + + LLVM_DEBUG(dbgs() << "getStringFromAssociatedInlineAsm(): " + "inlineAsmStrMO0.getSymbolName() = " + << inlineAsmStrMO0.getSymbolName() << "\n"); + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MachineOperand.html + // const char *getSymbolName () const + res = strstr(const_cast(inlineAsmStrMO0.getSymbolName()), + strToSearch); + + assert(res != NULL && "Did not find strToSearch marker in INLINEASM"); + + res += strlen(strToSearch); + + assert(res != NULL); + + return res; +} + + +// Taken from MipsInstPrinter.cpp +// (required by ConnexGenAsmWriter.inc) +void ConnexInstPrinter::printUnsignedImm(const MCInst *MI, int opNum, + raw_ostream &O) { + char *res = NULL; + //int offsetLS; + + LLVM_DEBUG(dbgs() << "Entered ConnexInstPrinter::printUnsignedImm()...\n"); + + const MCOperand &MO = MI->getOperand(opNum); + + if (MO.isImm()) { + // Printing 16-bits unsigned int + //O << (unsigned short)MO.getImm(); + // Printing unsigned int + unsigned imm = MO.getImm(); + + LLVM_DEBUG(dbgs() << "ConnexInstPrinter::printUnsignedImm(): imm = " + << imm + << ", MI (ptr) = " << MI + << ", MI = " << *MI + << "\n"); + + #ifdef GENERATE_ASSOCIATED_INLINEASM_FROM_LOOPVECTORIZE_PASS + if (imm == VALUE_BOGUS_REPEAT_X_TIMES) { + assert(MI->getOpcode() == Connex::REPEAT); + + res = getStringFromAssociatedInlineAsm(crtMI, + const_cast("/*value*/")); + + O << res; + } + else + #endif + + if (imm == CONNEX_MEM_NUM_ROWS + 10) { + #ifdef NOTNOTNOT + // This was too complicated + + //MCInst *assocMC = mapMachineMCInst[MI]; + const MachineInstr *assocMI = + retrieveAssociatedMachineInstr(MI); + #endif + + const MachineInstr *assocMI = crtMI; + + assert((MI->getOpcode() == Connex::LD_H) || + (MI->getOpcode() == Connex::ST_H)); + + res = getStringFromAssociatedInlineAsm(crtMI, "/*offset*/"); + + //sscanf(res, "%d", &offsetLS); + + //LLVM_DEBUG(dbgs() << "assocMI = " << *assocMC << "\n"); + + O << STR_LOOP_SYMBOLIC_INDEX + << " + " << res; // offsetLS + } + else { + O << (unsigned int)MO.getImm(); + } + } + else + printOperand(MI, opNum, O); +} + + +// Taken from [LLVM]/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +void ConnexInstPrinter::printUnsignedImm8(const MCInst *MI, int opNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(opNum); + + if (MO.isImm()) + O << (unsigned short int)(unsigned char)MO.getImm(); + else + printOperand(MI, opNum, O); +} Index: lib/Target/Connex/InstPrinter/LLVMBuild.txt =================================================================== --- lib/Target/Connex/InstPrinter/LLVMBuild.txt +++ lib/Target/Connex/InstPrinter/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/Connex/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ConnexAsmPrinter +parent = Connex +required_libraries = MC Support +add_to_library_groups = Connex Index: lib/Target/Connex/LLVMBuild.txt =================================================================== --- lib/Target/Connex/LLVMBuild.txt +++ lib/Target/Connex/LLVMBuild.txt @@ -0,0 +1,42 @@ +;===- ./lib/Target/Connex/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + +[component_0] +type = TargetGroup +name = Connex +parent = Target +has_asmprinter = 1 + +[component_1] +type = Library +name = ConnexCodeGen +parent = Connex +required_libraries = + Analysis + AsmPrinter + CodeGen + Core + MC + ConnexAsmPrinter + ConnexDesc + ConnexInfo + SelectionDAG + Support + Target +add_to_library_groups = Connex Index: lib/Target/Connex/MCTargetDesc/CMakeLists.txt =================================================================== --- lib/Target/Connex/MCTargetDesc/CMakeLists.txt +++ lib/Target/Connex/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_library(LLVMConnexDesc + ConnexMCTargetDesc.cpp + ConnexAsmBackend.cpp + ConnexMCCodeEmitter.cpp + ConnexELFObjectWriter.cpp + ) Index: lib/Target/Connex/MCTargetDesc/ConnexAsmBackend.cpp =================================================================== --- lib/Target/Connex/MCTargetDesc/ConnexAsmBackend.cpp +++ lib/Target/Connex/MCTargetDesc/ConnexAsmBackend.cpp @@ -0,0 +1,138 @@ +//===-- ConnexAsmBackend.cpp - Connex Assembler Backend -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ConnexMCTargetDesc.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCFixup.h" +/* +// 2019_03_30 +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +*/ +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/Support/EndianStream.h" +/* +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCExpr.h" +*/ +#include +#include +/* +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +*/ + +using namespace llvm; + +namespace { + +class ConnexAsmBackend : public MCAsmBackend { +public: + ConnexAsmBackend(support::endianness Endian) : MCAsmBackend(Endian) {} + + ~ConnexAsmBackend() override = default; + + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved, + const MCSubtargetInfo *STI) const override; + + std::unique_ptr createObjectTargetWriter() + const override; + + + // No instruction requires relaxation + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + return false; + } + + + unsigned getNumFixupKinds() const override { return 1; } + + + bool mayNeedRelaxation(const MCInst &Inst, + const MCSubtargetInfo &STI) const override { + return false; + } + + + void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + MCInst &Res) const override {} + + + bool writeNopData(raw_ostream &OS, uint64_t Count) const override; +}; + +} // end anonymous namespace + + +bool ConnexAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { + if ((Count % 8) != 0) + return false; + + for (uint64_t i = 0; i < Count; i += 8) + support::endian::write(OS, 0x15000000, Endian); + + return true; +} + + +void ConnexAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved, + const MCSubtargetInfo *STI) const { + if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) { + // The Value is 0 for global variables, and the in-section offset + // for static variables. Write to the immediate field of the inst. + assert(Value <= UINT32_MAX); + support::endian::write(&Data[Fixup.getOffset() + 4], + static_cast(Value), + Endian); + } else if (Fixup.getKind() == FK_Data_4) { + support::endian::write(&Data[Fixup.getOffset()], Value, Endian); + } else if (Fixup.getKind() == FK_Data_8) { + support::endian::write(&Data[Fixup.getOffset()], Value, Endian); + } else if (Fixup.getKind() == FK_PCRel_4) { + Value = (uint32_t)((Value - 8) / 8); + if (Endian == support::little) { + Data[Fixup.getOffset() + 1] = 0x10; + support::endian::write32le(&Data[Fixup.getOffset() + 4], Value); + } else { + Data[Fixup.getOffset() + 1] = 0x1; + support::endian::write32be(&Data[Fixup.getOffset() + 4], Value); + } + } else { + assert(Fixup.getKind() == FK_PCRel_2); + Value = (uint16_t)((Value - 8) / 8); + support::endian::write(&Data[Fixup.getOffset() + 2], Value, + Endian); + } +} + + +std::unique_ptr + ConnexAsmBackend::createObjectTargetWriter() const { + return createConnexELFObjectWriter(0); +} + + +MCAsmBackend *llvm::createConnexAsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &) { + return new ConnexAsmBackend(support::little); +} + Index: lib/Target/Connex/MCTargetDesc/ConnexELFObjectWriter.cpp =================================================================== --- lib/Target/Connex/MCTargetDesc/ConnexELFObjectWriter.cpp +++ lib/Target/Connex/MCTargetDesc/ConnexELFObjectWriter.cpp @@ -0,0 +1,84 @@ +//===-- ConnexELFObjectWriter.cpp - Connex ELF Writer ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ConnexMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include + +using namespace llvm; + +namespace { + +class ConnexELFObjectWriter : public MCELFObjectTargetWriter { +public: + ConnexELFObjectWriter(uint8_t OSABI); + + ~ConnexELFObjectWriter() override; + +protected: + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; +}; + +} // end anonymous namespace + + +ConnexELFObjectWriter::ConnexELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_NONE, + /*HasRelocationAddend*/ false) {} + + +ConnexELFObjectWriter::~ConnexELFObjectWriter() {} + + +unsigned ConnexELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + // determine the type of the relocation + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("invalid fixup kind!"); + case FK_SecRel_8: + return ELF::R_BPF_64_64; + case FK_PCRel_4: + case FK_SecRel_4: + return ELF::R_BPF_64_32; + case FK_Data_8: + return ELF::R_BPF_64_64; + case FK_Data_4: + // .BTF.ext generates FK_Data_4 relocations for + // insn offset by creating temporary labels. + // The insn offset is within the code section and + // already been fulfilled by applyFixup(). No + // further relocation is needed. + if (const MCSymbolRefExpr *A = Target.getSymA()) { + if (A->getSymbol().isTemporary()) { + MCSection &Section = A->getSymbol().getSection(); + const MCSectionELF *SectionELF = dyn_cast(&Section); + assert(SectionELF && "Null section for reloc symbol"); + + // The reloc symbol should be in text section. + unsigned Flags = SectionELF->getFlags(); + if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_EXECINSTR)) + return ELF::R_BPF_NONE; + } + } + return ELF::R_BPF_64_32; + } +} + + +std::unique_ptr + llvm::createConnexELFObjectWriter(uint8_t OSABI) { + return llvm::make_unique(OSABI); +} Index: lib/Target/Connex/MCTargetDesc/ConnexMCAsmInfo.h =================================================================== --- lib/Target/Connex/MCTargetDesc/ConnexMCAsmInfo.h +++ lib/Target/Connex/MCTargetDesc/ConnexMCAsmInfo.h @@ -0,0 +1,50 @@ +//===-- ConnexMCAsmInfo.h - Connex asm properties -------------------*- C++ -*--====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the ConnexMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_MCTARGETDESC_CONNEXMCASMINFO_H +#define LLVM_LIB_TARGET_CONNEX_MCTARGETDESC_CONNEXMCASMINFO_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/ADT/Triple.h" + +namespace llvm { +class Target; +class Triple; + +class ConnexMCAsmInfo : public MCAsmInfo { + public: + explicit ConnexMCAsmInfo(const Triple &TT) { + #ifdef NOT_NOT_NOT + if (TT.getArch() == Triple::bpfeb) + IsLittleEndian = false; + #endif + + PrivateGlobalPrefix = ".L"; + WeakRefDirective = "\t.weak\t"; + + // Inspired from http://llvm.org/docs/doxygen/html/NVPTXMCAsmInfo_8cpp_source.html#l00028 + // Avoiding to add APP and NO_APP delimiters before ASM Inline Expressions + CommentString = "//"; + InlineAsmStart = ""; + InlineAsmEnd = ""; + + UsesELFSectionDirectiveForBSS = true; + HasSingleParameterDotFile = false; + HasDotTypeDotSizeDirective = false; + + SupportsDebugInformation = true; + } +}; +} + +#endif Index: lib/Target/Connex/MCTargetDesc/ConnexMCCodeEmitter.cpp =================================================================== --- lib/Target/Connex/MCTargetDesc/ConnexMCCodeEmitter.cpp +++ lib/Target/Connex/MCTargetDesc/ConnexMCCodeEmitter.cpp @@ -0,0 +1,177 @@ +//===-- ConnexMCCodeEmitter.cpp - Convert Connex code to machine code -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the ConnexMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ConnexMCTargetDesc.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/EndianStream.h" +#include +#include + + +using namespace llvm; + +#define DEBUG_TYPE "mccodeemitter" + +namespace { + +class ConnexMCCodeEmitter : public MCCodeEmitter { + const MCInstrInfo &MCII; + const MCRegisterInfo &MRI; + bool IsLittleEndian; + +public: + ConnexMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, + bool IsLittleEndian) + : MCII(mcii), MRI(mri), IsLittleEndian(IsLittleEndian) {} + + ConnexMCCodeEmitter(const ConnexMCCodeEmitter &) = delete; + + void operator=(const ConnexMCCodeEmitter &) = delete; + + ~ConnexMCCodeEmitter() override = default; + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // getMachineOpValue - Return binary encoding of operand. If the machin + // operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; +}; + +} // end anonymous namespace + +MCCodeEmitter *llvm::createConnexMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new ConnexMCCodeEmitter(MCII, MRI, true); +} + +#ifdef NOT_NOT_NOT +MCCodeEmitter *llvm::createBPFbeMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new BPFMCCodeEmitter(MRI, false); +} +#endif + + + +unsigned ConnexMCCodeEmitter::getMachineOpValue(const MCInst &MI, + const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + if (MO.isReg()) + return MRI.getEncodingValue(MO.getReg()); + if (MO.isImm()) + return static_cast(MO.getImm()); + + assert(MO.isExpr()); + + const MCExpr *Expr = MO.getExpr(); + + assert(Expr->getKind() == MCExpr::SymbolRef); + + if (MI.getOpcode() == Connex::JAL) + // func call name + Fixups.push_back(MCFixup::create(0, Expr, FK_SecRel_4)); + else if (MI.getOpcode() == Connex::LD_imm64) + Fixups.push_back(MCFixup::create(0, Expr, FK_SecRel_8)); + else + // bb label + Fixups.push_back(MCFixup::create(0, Expr, FK_PCRel_2)); + + return 0; +} + +static uint8_t SwapBits(uint8_t Val) { + return (Val & 0x0F) << 4 | (Val & 0xF0) >> 4; +} + +void ConnexMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + /* + // 2019_03_30_TODO + verifyInstructionPredicates(MI, + computeAvailableFeatures(STI.getFeatureBits())); + */ + + unsigned Opcode = MI.getOpcode(); + support::endian::Writer OSE(OS, + IsLittleEndian ? support::little : support::big); + + if (Opcode == Connex::LD_imm64 || Opcode == Connex::LD_pseudo) { + uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); + OS << char(Value >> 56); + if (IsLittleEndian) + OS << char((Value >> 48) & 0xff); + else + OS << char(SwapBits((Value >> 48) & 0xff)); + OSE.write(0); + OSE.write(Value & 0xffffFFFF); + + const MCOperand &MO = MI.getOperand(1); + uint64_t Imm = MO.isImm() ? MO.getImm() : 0; + OSE.write(0); + OSE.write(0); + OSE.write(0); + OSE.write(Imm >> 32); + } else { + // Get instruction encoding and emit it + uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); + OS << char(Value >> 56); + if (IsLittleEndian) + OS << char((Value >> 48) & 0xff); + else + OS << char(SwapBits((Value >> 48) & 0xff)); + OSE.write((Value >> 32) & 0xffff); + OSE.write(Value & 0xffffFFFF); + } +} + +// Encode Connex Memory Operand +uint64_t ConnexMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t Encoding; + const MCOperand Op1 = MI.getOperand(1); + assert(Op1.isReg() && "First operand is not register."); + Encoding = MRI.getEncodingValue(Op1.getReg()); + Encoding <<= 16; + MCOperand Op2 = MI.getOperand(2); + assert(Op2.isImm() && "Second operand is not immediate."); + Encoding |= Op2.getImm() & 0xffff; + return Encoding; +} + +// 2019_03_30_TODO #define ENABLE_INSTR_PREDICATE_VERIFIER +#include "ConnexGenMCCodeEmitter.inc" Index: lib/Target/Connex/MCTargetDesc/ConnexMCTargetDesc.h =================================================================== --- lib/Target/Connex/MCTargetDesc/ConnexMCTargetDesc.h +++ lib/Target/Connex/MCTargetDesc/ConnexMCTargetDesc.h @@ -0,0 +1,64 @@ +//===-- ConnexMCTargetDesc.h - Connex Target Descriptions -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides Connex specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_MCTARGETDESC_CONNEXMCTARGETDESC_H +#define LLVM_LIB_TARGET_CONNEX_MCTARGETDESC_CONNEXMCTARGETDESC_H + +#include "llvm/Config/config.h" +#include "llvm/Support/DataTypes.h" + +#include + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectTargetWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class MCTargetOptions; +class StringRef; +class Target; +class Triple; +class raw_ostream; +class raw_pwrite_stream; + +extern Target TheConnexTarget; + + +MCCodeEmitter *createConnexMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx); + +MCAsmBackend *createConnexAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options); + +std::unique_ptr createConnexELFObjectWriter(uint8_t OSABI); +} + +// Defines symbolic names for Connex registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "ConnexGenRegisterInfo.inc" + +// Defines symbolic names for the Connex instructions. +// +#define GET_INSTRINFO_ENUM +#include "ConnexGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "ConnexGenSubtargetInfo.inc" + +#endif Index: lib/Target/Connex/MCTargetDesc/ConnexMCTargetDesc.cpp =================================================================== --- lib/Target/Connex/MCTargetDesc/ConnexMCTargetDesc.cpp +++ lib/Target/Connex/MCTargetDesc/ConnexMCTargetDesc.cpp @@ -0,0 +1,109 @@ +//===-- ConnexMCTargetDesc.cpp - Connex Target Descriptions ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides Connex specific target descriptions. +// +//===----------------------------------------------------------------------===// + + +#include "Connex.h" +#include "ConnexMCTargetDesc.h" +#include "ConnexMCAsmInfo.h" +#include "InstPrinter/ConnexInstPrinter.h" +//#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "ConnexGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "ConnexGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "ConnexGenRegisterInfo.inc" + +using namespace llvm; + + +static MCInstrInfo *createConnexMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitConnexMCInstrInfo(X); + return X; +} + + +static MCRegisterInfo *createConnexMCRegisterInfo(const Triple &TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitConnexMCRegisterInfo(X, Connex::R11 /* RAReg doesn't exist */); + return X; +} + + +static MCSubtargetInfo *createConnexMCSubtargetInfo(const Triple &TT, + StringRef CPU, StringRef FS) { + return createConnexMCSubtargetInfoImpl(TT, CPU, FS); +} + + +static MCStreamer *createConnexMCStreamer(const Triple &T, MCContext &Ctx, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter, + bool RelaxAll) { + return createELFStreamer(Ctx, std::move(MAB), std::move(OW), + std::move(Emitter), + RelaxAll); +} + + +static MCInstPrinter *createConnexMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + if (SyntaxVariant == 0) + return new ConnexInstPrinter(MAI, MII, MRI); + return nullptr; +} + + +extern "C" void LLVMInitializeConnexTargetMC() { + for (Target *T : {&TheConnexTarget}) { + // Register the MC asm info. + RegisterMCAsmInfo X(*T); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createConnexMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createConnexMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, + createConnexMCSubtargetInfo); + + // Register the object streamer + TargetRegistry::RegisterELFStreamer(*T, createConnexMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createConnexMCInstPrinter); + } + + // Register the MC code emitter + TargetRegistry::RegisterMCCodeEmitter(TheConnexTarget, + createConnexMCCodeEmitter); + + // Register the ASM Backend + TargetRegistry::RegisterMCAsmBackend(TheConnexTarget, + createConnexAsmBackend); +} Index: lib/Target/Connex/MCTargetDesc/LLVMBuild.txt =================================================================== --- lib/Target/Connex/MCTargetDesc/LLVMBuild.txt +++ lib/Target/Connex/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/Target/Connex/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ConnexDesc +parent = Connex +required_libraries = MC ConnexAsmPrinter ConnexInfo Support +add_to_library_groups = Connex Index: lib/Target/Connex/Misc.h =================================================================== --- lib/Target/Connex/Misc.h +++ lib/Target/Connex/Misc.h @@ -0,0 +1,78 @@ +#ifndef INCLUDED_SUNIT_DUMP +#define INCLUDED_SUNIT_DUMP + +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +// Inspired from SystemZHazardRecognizer.cpp + +#ifndef NDEBUG // Debug output + + +// The SUnit (Scheduling Unit) class no longer has the dump() method, +// so we create a helper method for it here. +// Inspired from SystemZHazardRecognizer.h + +/// Resolves and cache a resolved scheduling class for an SUnit. +static const MCSchedClassDesc *getSchedClass(SUnit *SU) { + if (!SU->SchedClass // && SchedModel->hasInstrSchedModel() + ) { + return NULL; + // TODO: SU->SchedClass = SchedModel->resolveSchedClass(SU->getInstr()); + } + + return SU->SchedClass; +} + +static void dumpSU(llvm::SUnit *SU, raw_ostream &OS) { + OS << "SU(" << SU->NodeNum << "):"; + //OS << TII->getName(SU->getInstr()->getOpcode()); + OS << SU->getInstr()->getOpcode(); + + const MCSchedClassDesc *SC = getSchedClass(SU); + if (!SC->isValid()) + return; + + /* + // TODO: make this compile + + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + const MCProcResourceDesc &PRD = + *SchedModel->getProcResource(PI->ProcResourceIdx); + std::string FU(PRD.Name); + // trim e.g. Z13_FXaUnit -> FXa + FU = FU.substr(FU.find("_") + 1); + size_t Pos = FU.find("Unit"); + if (Pos != std::string::npos) + FU.resize(Pos); + if (FU == "LS") // LSUnit -> LSU + FU = "LSU"; + OS << "/" << FU; + + if (PI->Cycles > 1) + OS << "(" << PI->Cycles << "cyc)"; + } + */ + + if (SC->NumMicroOps > 1) + OS << "/" << SC->NumMicroOps << "uops"; + if (SC->BeginGroup && SC->EndGroup) + OS << "/GroupsAlone"; + else if (SC->BeginGroup) + OS << "/BeginsGroup"; + else if (SC->EndGroup) + OS << "/EndsGroup"; + if (SU->isUnbuffered) + OS << "/Unbuffered"; + /* + // TODO: make this compile + if (has4RegOps(SU->getInstr())) + OS << "/4RegOps"; + */ +} +#endif + +#endif // INCLUDED_SUNIT_DUMP Index: lib/Target/Connex/RecoverFromLlvmIR.h =================================================================== --- lib/Target/Connex/RecoverFromLlvmIR.h +++ lib/Target/Connex/RecoverFromLlvmIR.h @@ -0,0 +1,2022 @@ +#ifndef RECOVER_FROM_LLVM_IR +#define RECOVER_FROM_LLVM_IR + +//#include +//#include + +// Alex: new code +//#include +#include +//#include +#include +#include // std::pair +// Alex: END new code + +#include "llvm/IR/DebugInfo.h" + +// See http://llvm.org/docs/ProgrammersManual.html#isa +#include "llvm/Support/Casting.h" // for dyn_cast + +//#define DEBUG_TYPE LV_NAME + + +#define STR_REMAINDER_VF "n.mod.vf" + + +#define EXCHANGE(a, b) a ^= b; b ^= a; a ^= b; + +#ifndef MAXLEN_STR + //#define MAXLEN_STR 2048 + #define MAXLEN_STR 8192 +#endif + +using namespace llvm; + +namespace { + +// Normally used to return the variable name without suffix e.g. ".034" +void rStripStringAfterChar(char *str, char ch) { + /* + //char *reductionVarNameTmp; + char *strTmp; + for (strTmp = str; *strTmp != 0; strTmp++) { + if (*strTmp == ch) + *strTmp = 0; + } + */ + + //char *found = const_cast(strchr(str0, '.')); + char *found = const_cast(strchr(str, ch)); + if (found != NULL) + *found = 0; +} + + +/* IMPORTANT NOTE: + * If the val is an LLVM variable, it will return something like + * "%[llvm_var_name]". + * If val is a constant it returns normally the value of the + * constant. + * + * I consider a rather big defficiency of Value::getName() NOT to return + * (itself or a different method, created by the key LLVM people) + * the auto-generated number like %0, if the Value is created without an + * explicit name. + * + * IMPORTANT: I noticed that for different Instruction the result of print() + * can be somewhat different, like: + * - i32 %0 + * - %1 = bitcast ... + */ +std::string getLLVMValueName(Value *val) { + /* Somewhat important: it is possible that, if the API + changes a bit the name will NOT be printed + here anymore */ + std::string printStr; + raw_string_ostream OS(printStr); + + // bci->printAsOperand(OS, true); // Does NOT write anything (false neither) + + // See http://llvm.org/docs/doxygen/html/Value_8h_source.html#l00202 + /* NOTE: IsForDebug false can print: + - the SAME as true or + - the complete instruction, not just the value */ + val->print(OS, /*IsForDebug*/ true); + LLVM_DEBUG(dbgs() << "getLLVMValueName(): printStr = " + << printStr << "\n"); + + char strValName[MAXLEN_STR]; + char strValName2[MAXLEN_STR]; + + if (llvm::dyn_cast(val) != NULL) { + LLVM_DEBUG(dbgs() << "getLLVMValueName(): val is Constant\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1Constant.html + sscanf(printStr.c_str(), "%s %s", strValName2, strValName); + + /* Normally printStr is of form "type_ct val_ct". + * But we can also have something like + * @dataT = common local_unnamed_addr global [128 x [150 x half]] zeroinitializer + */ + if (strValName2[0] == '@') + strcpy(strValName, strValName2 + 1); + } + else { + const char *ptr = printStr.c_str(); + for (; *ptr != 0; ptr++) { + if (*ptr == '%') + break; + } + LLVM_DEBUG(dbgs() << "getLLVMValueName(): ptr = " << ptr << "\n"); + + if (*ptr == 0) { + // This is NOT a variable Value - probably just a constant + return ""; //std::to_string(""); + } + + sscanf(ptr, "%s ", strValName); + //sscanf(valTypeAndName.c_str(), "%s %s", strValName, strValName); + } + + std::string res = strValName; + LLVM_DEBUG(dbgs() << "getLLVMValueName(): res = " + << res << "\n"); + + return res; +} + +// Used by getAllMetadata() (and getExpr()) +bool ranGetAllMetadata; +//DenseMap varNameMap; +// Map with +std::unordered_map varNameMap; +// +void getAllMetadata(Function *F) { + ranGetAllMetadata = true; + + LLVM_DEBUG(dbgs() << "Entered getAllMetadata()\n"); + + // Some info about metadata: http://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-value + + // Inspired from + // https://weaponshot.wordpress.com/2012/05/06/extract-all-the-metadata-nodes-in-llvm/ + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), + E = BB->end(); I != E; ++I) { + /* Get the Metadata declared in the llvm intrinsic functions + such as llvm.dbg.declare() */ + if (CallInst *CI = dyn_cast(I)) { + if (Function *F = CI->getCalledFunction()) { + // We look at the llvm.dbg.value metadata which associates Value (LLVM IR values) with names in the original program + if (F->getName().startswith("llvm.dbg.value")) { + //if (F->getName().startswith("llvm.dbg")) + LLVM_DEBUG(dbgs() << "getAllMetadata(): CI = " << *CI << "\n"); + + /* It seems that the association between LLVM IR + Value and names in the original source program + is always like this: + - opnd 0 contains the Value, + - opnd 1 is always a (useless?) 0, + - opnd 2 contains the DILocalVariable, + */ + // Error: <>: DILocalVariable *srcVar = llvm::dyn_cast_or_null(I->getOperand(2)); + // Error: <>: MDNode *srcVar = llvm::dyn_cast_or_null(I->getOperand(2)); + /* See http://llvm.org/docs/doxygen/html/classllvm_1_1MetadataAsValue.html + (see maybe http://llvm.org/docs/doxygen/html/namespacellvm_1_1mdconst.html: + "Now that Value and Metadata are in separate hierarchies" */ + MetadataAsValue *srcVarMDV = llvm::dyn_cast_or_null(I->getOperand(2)); + + //Value *val = I->getOperand(0); + MetadataAsValue *val = llvm::dyn_cast_or_null(I->getOperand(0)); + assert(val != NULL); + + if (srcVarMDV != NULL) { + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MDNode.html + //MDNode *srcVar = llvm::dyn_cast_or_null(srcVarMDV->getMetadata()); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1DILocalVariable.html + // and http://llvm.org/docs/doxygen/html/classllvm_1_1DIVariable.html + DILocalVariable *srcVar = llvm::dyn_cast_or_null(srcVarMDV->getMetadata()); + + assert(srcVar != NULL); + + // Gives compiler-error: const MDOperand srcVarOpnd0 = srcVar->getOperand(0); + //const MDOperand *srcVarOpnd0 = & (srcVar->getOperand(0)); + + std::string valueName = getLLVMValueName(val); + if (valueName.size() == 0) { + /* We can have metadata which has for 1st + operand a constant e.g. 0. + For ex + call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !32, metadata !21), !dbg !33 + */ + continue; + } + + //varNameMap[valTypeAndName] = (srcVar->getName()).str(); + varNameMap[valueName] = (srcVar->getName()).str(); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1DILocalVariable.html + LLVM_DEBUG(dbgs() << "getAllMetadata(): val = " + << *val << "\n"); + LLVM_DEBUG(dbgs() << " val = " + << val << "\n"); + LLVM_DEBUG(dbgs() << " val->getValueName() = " + << val->getValueName() << "\n"); + LLVM_DEBUG(dbgs() << " val->getName() = " + << val->getName() << "\n"); + LLVM_DEBUG(dbgs() << " srcVar = " + << *srcVar << "\n"); + //LLVM_DEBUG(dbgs() << " srcVar->getOperand(0) = " + LLVM_DEBUG(dbgs() << " srcVarName = " + << varNameMap[valueName] /* srcVar->getName() */ + << "\n"); + } + } + } + } + } + } +} // end getAllMetadata() + + +std::string printCTypeFromLLVMType(Type *aType, LLVMContext *aContext) { + std::string res; + + // See http://llvm.org/doxygen/classllvm_1_1Type.html + if (aType == Type::getInt16Ty(*aContext)) + res = "short"; + else + if (aType == Type::getInt32Ty(*aContext)) //Builder.getInt32Ty()) + res = "int"; + else + if (aType == Type::getHalfTy(*aContext)) + res = "half"; + else + assert(0 && "printCTypeFromLLVMType(): Type NOT supported"); + + return res; +} + + +// TODO: probably we will need to treat struct/record +// (category theory coproduct data type), +// union/variants (category theory cartesian product data type) +Type *getElementTypeOfDerivedType(Type *valType) { + int sizeofElem; + + LLVM_DEBUG(dbgs() << "getElementTypeOfDerivedType(): valType = " + << *valType << "\n"); + + // Helps for vector type. + // So it does NOT help for pointer type, as it is the case for val (normally). + Type *scalarType = valType->getScalarType(); + LLVM_DEBUG(dbgs() << "getElementTypeOfDerivedType(): scalarType = " + << *scalarType << "\n"); + + sizeofElem = scalarType->getScalarSizeInBits() / 8; + LLVM_DEBUG(dbgs() << "getElementTypeOfDerivedType(): sizeof(scalarType) = " + << sizeofElem << "\n"); + if (sizeofElem != 0) + return scalarType; + + /* + // Does NOT help: both return 0... + LLVM_DEBUG(dbgs() << "GetSize(): bitsizeof(type of val) = " + //<< valType->getPrimitiveSizeInBits() / 8 << "\n"); + << valType->getScalarSizeInBits() << "\n"); + */ + ArrayType *arrType = llvm::dyn_cast(valType); + + if (arrType != NULL) { + Type *elemArrType = arrType->getElementType(); + sizeofElem = elemArrType->getScalarSizeInBits() / 8; + LLVM_DEBUG(dbgs() + << "getElementTypeOfDerivedType(): (arrType != NULL): elemArrType = " + << *elemArrType << "\n"); + LLVM_DEBUG(dbgs() + << "getElementTypeOfDerivedType(): (arrType != NULL): sizeofElem = " + << sizeofElem << "\n"); + + if (sizeofElem == 0) { + return getElementTypeOfDerivedType(elemArrType); + } + else { + return elemArrType; + } + } + + /* See http://llvm.org/docs/doxygen/html/classllvm_1_1SequentialType.html + and http://llvm.org/docs/doxygen/html/classllvm_1_1PointerType.html */ + PointerType *ptrType = llvm::dyn_cast(valType); + if (ptrType != NULL) { + Type *elemPtrType = ptrType->getElementType(); + + sizeofElem = elemPtrType->getScalarSizeInBits() / 8; + LLVM_DEBUG(dbgs() << "getElementTypeOfDerivedType(): elemPtrType = " + << *elemPtrType << "\n"); + LLVM_DEBUG(dbgs() << "getElementTypeOfDerivedType(): sizeof(elemPtrType) = " + << sizeofElem << "\n"); + + if (sizeofElem == 0) { + return getElementTypeOfDerivedType(elemPtrType); + } + else + return elemPtrType; + } + + /* + ArrayType *elemPtrTypeArr = llvm::dyn_cast(elemPtrType); + if (elemPtrTypeArr != NULL) { + Type *elemPtr2TypeArr = elemPtrTypeArr->getElementType(); + sizeofElem = elemPtr2TypeArr->getScalarSizeInBits() / 8; + + LLVM_DEBUG(dbgs() + << "getElementTypeOfDerivedType(): (elemPtr2TypeArr != NULL) elemPtr2TypeArr = " + << *elemPtr2TypeArr << "\n"); + LLVM_DEBUG(dbgs() + << "getElementTypeOfDerivedType(): (elemPtr2TypeArr != NULL) sizeofElem = " + << sizeofElem << "\n"); + LLVM_DEBUG(dbgs() + << "getElementTypeOfDerivedType(): elemPtrTypeArr->getNumElements() = " + << elemPtrTypeArr->getNumElements() << "\n"); + if (sizeofElem == 0) { + return getElementTypeOfDerivedType(elemPtrType); + } + } + */ + + return NULL; +} + + +bool testEquivalence(Instruction *it, PHINode *phi) { + Value *op0 = NULL; + + LLVM_DEBUG(dbgs() << "Entered testEquivalence(): it = " + << * it + << ", phi = " + << *phi << "\n"); + + if (phi == it) + return true; + + if (it->getNumOperands() > 0) { + op0 = it->getOperand(0); + } + + switch (it->getOpcode()) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::Trunc: + //case Instruction::ShuffleVector: + //case Instruction::InsertElement: + //case Instruction::PHI: + //case Instruction::ExtractElement: + //res = ""; + break; + + /*case Instruction::GetElementPtr: + break; */ + + default: + return false; + //assert(0 && "testEquivalence(): we do not deal with these cases"); + } + + /* + // IMPORTANT-TODO: need to do this for the case we have an access like + // B[j + 1][0] + switch (it->getOpcode()) { + case Instruction::Add: + res += " + "; + break; + } + */ + + return testEquivalence((Instruction *)op0, phi); +} + + +inline bool isGlobalArray(GetElementPtrInst *GEPPtr) { + return llvm::dyn_cast(GEPPtr->getOperand(0)) != NULL; +} + +inline Value *GetIndexOpndFromGEPInst(GetElementPtrInst *GEPPtr) { + int startIndex; + if (isGlobalArray(GEPPtr)) { + /* Following also + http://llvm.org/docs/GetElementPtr.html#why-is-the-extra-0-index-required + we see that for global arrays, the 1st index + in GEP is redundant - it has value 0 invariably, + so we skip it. + */ + startIndex = 2; + } + else { + startIndex = 1; + } + + Value *res = GEPPtr->getOperand(startIndex); + + return res; +} + + +/* +Currently this function ONLY does this: it gets rid of duplicated spaces. + +// IMPORTANT-TODO: get rid of unnecessary parantheses + - for this normally I have to parse expr before and pretty-print it intelligently +To do algebraic simplification is more complex. See Muchnick, + - value numbering, etc. + +To do Constant folding (Constant-Expression Evaluation), + although both these methods are heavy, we could use them: + We could try to use CIL's partial evaluation module, but: + - it doesn't work with C++ + We can't use sympy, which can parse expressions (parse_expr) and simplify them (method sympy.simplify.cse_main.cse) because: + - see e.g. + https://github.com/sympy/sympy/blob/master/sympy/parsing/sympy_parser.py + - it doesn't handle pointers, etc - but we can extend it + - ... +*/ +std::string canonicalizeExpression(std::string aStr) { + for (;;) { + // From http://www.cplusplus.com/reference/string/string/find/ + std::size_t pos = aStr.find(" "); + + if (pos == std::string::npos) { + break; + } + else { + //std::cout << "first 'needle' found at position: " << pos << "\n"; + // From http://www.cplusplus.com/reference/string/string/erase/ + aStr.erase(pos, 1); + } + } + /* + std::cout << "canonicalizeExpression(): returning aStr = " + << aStr << "\n"; + */ + + return aStr; +} + + + +inline void printInfo(Instruction *it, + char *str0, char *str1, const char *iGetNameData, + Value *op0, Value *op1) { + LLVM_DEBUG(dbgs() << "printInfo(): it = " << *it << "\n"); + LLVM_DEBUG(dbgs() << "printInfo(): it ptr = " << it << "\n"); + LLVM_DEBUG(dbgs() << " (printInfo(): it->getOpcodeName() = " + << it->getOpcodeName() << ")\n"); + LLVM_DEBUG(dbgs() << " (printInfo(): it->getOpcode() = " + << it->getOpcode() << ")\n"); + LLVM_DEBUG(dbgs() << " (printInfo(): it->getName() = " + << iGetNameData << ")\n"); + LLVM_DEBUG(dbgs() << " (printInfo(): str0 = " + << str0 << ")\n"); + LLVM_DEBUG(dbgs() << " (printInfo(): str1 = " + << str1 << ")\n"); + + if (op0 == NULL) { + LLVM_DEBUG(dbgs() << " (printInfo(): op0 = NULL\n"); + } + else { + LLVM_DEBUG(dbgs() << " (printInfo(): op0 = " + << *op0 << ")\n"); + } + + if (op1 == NULL) { + LLVM_DEBUG(dbgs() << " (printInfo(): op1 = NULL\n"); + } + else { + LLVM_DEBUG(dbgs() << " (printInfo(): op1 = " + << *op1 << ")\n"); + } +} + + +/* Alex: + * - we get a C expression + * by walking on the use-def-chains (more exactly the only reaching definition + * for the SSA it instruction) in order to get the most complete definition + * for the it instruction. + * + * - doing some sort of partial evaluation + + NOTE: SCEV also pretty prints - display expressions related to tripcounts + (zext i16 (-1 + %N) to i32) + (see code below: + BackedgeTakenCount->dump(); + ExitCount->dump(); ) + See, more exactly, http://llvm.org/docs/doxygen/html/ScalarEvolution_8cpp_source.html + void SCEV::print(raw_ostream &OS) const {} + + IMPORTANT NOTE: We use ((int *)&x) instead of &x because the & for an array + (global at least) is a pointer to array and this affects/reflects on + the pointer arithmetic. + Concrete example on ARM 32 on zedboard.arh.pub.ro: + /home/alarm/OpincaaLLVM/opincaa_standalone_app/35_MatMul/SIZE_256/STDout_003a + Before 1st write: &A = 405912 + Before 1st write: &A + 20 = 3027352 + Before 1st write: &A + 131072 = 405912 + Before 1st write: ((char *)&A) + 131072 = 536984 + when running on ARM (32 bits processor) it is possible that &A + x == &A + (where x is e.g. 131072) (probably because of overflow or because the + VM did not map memory there or...) + So, again, we need to use when doing arithmetic instead of &A --> (int *)(&A) + or (short/char *)(&A) . + NOTE: [TODO TODO CHECK WELL]: It seems for pointer type we print just the var e.g. A + without &A. + */ +bool usePaddingForNestedLoops_more = false; +bool getExprVarSpecial = false; +//bool getExprForTripCount = false; +bool getExprForDMATransfer = true; +std::unordered_map cacheExpr; +Value *basePtrGetExprIt; // This is the base pointer (GetElementPtr, 1st operand; ) +// IMPORTANT-TODO: make getExpr(Value *it) and check if it is instruction or not +// +std::string getExpr(Instruction *it) { + if (it == NULL) { + LLVM_DEBUG(dbgs() << "Entered getExpr(): it = NULL\n"); + return std::string(""); + } + else { + LLVM_DEBUG(dbgs() << "Entered getExpr(): it = " + << *it << "\n"); + } + + char *str0 = const_cast(""); + char *str1 = const_cast(""); + + char strCopy[MAXLEN_STR]; + //static char res[MAXLEN_STR]; + std::string res; + + #define STR_VEC_IND "vec.ind" + #define STR_STEP_ADD "step.add" + /* Note that if I recall correctly, the var names ending in splatinsert are + automatically generated */ + #define STR_BROADCAST_SPLATINSERT "broadcast.splatinsert" + #define STR_SPLATINSERT ".splatinsert" + #define STR_BROADCAST_SPLAT "broadcast.splat" + #define STR_SPLAT ".splat" + // + #define STR_INDUCTION "induction" + #define STR_UNDEF "undef" + #define STR_INDEX "index" + #define STR_INDEX_NEXT "index.next" + + /* NOTE: It is possible that the names have a suffix when we have 2+ + vars starting with the same name - this happens when more + vector.body BBs are created (more loops are vectorized). + For this, we use strncmp(), not strcmp(). */ + + Value *op0 = NULL; + Value *op1 = NULL; + + if (it->getNumOperands() > 0) { + op0 = it->getOperand(0); + str0 = const_cast(op0->getName().data()); + if (it->getNumOperands() > 1) { + op1 = it->getOperand(1); + str1 = const_cast(op1->getName().data()); + } + } + + /* + * NOTE: it points to an Instruction (or just a Value). + getOperand() returns type Value. + * From http://llvm.org/docs/doxygen/html/classllvm_1_1Value.html + << StringRef getName () const + Return a constant reference to the value's name. >> + */ + + const char *iGetNameData = it->getName().data(); + + res.clear(); + + + /* + LLVM_DEBUG(dbgs() << "getExpr(): getExprForTripCount = " + << getExprForTripCount << "\n"); + */ + printInfo(it, str0, str1, iGetNameData, op0, op1); + + // See http://www.cplusplus.com/reference/unordered_map/unordered_map/find/ + std::unordered_map::const_iterator got = + cacheExpr.find(it); + #define INVALID_VALUE_CACHEEXPR "\\@@INVALID_STR@@" + if (got == cacheExpr.end()) { + //cacheExpr.insert(it); + + /* We insert an empty string res, just to keep track we visited this + * node and we update the entry with the correct value at the end of + * the function */ + cacheExpr[it] = INVALID_VALUE_CACHEEXPR; //res; + } + else { + if (cacheExpr[it] != INVALID_VALUE_CACHEEXPR) { + // This case can be quite easily reached if the expression it has + // several times as constituent atoms the same expression. + res = got->second; + LLVM_DEBUG(dbgs() + << "getExpr(): We already visited this node so we stop here.\n"); + goto GetExpr_end; + } + else + /* We have already cached something for this node, + * either an INVALID_VALUE_CACHEEXPR or a valid value we can return + * directly. + */ + if (it->getOpcode() == Instruction::PHI) { + /* If we visited this phi we do NOT revisit it since it can easily + * result in infinite cycles... It's not very fundamented, + * but it's OK :) */ + /* + We should keep the unstripped name, although it is possible that if + we visited the variable node before it might be already stripped. + */ + + if (strlen(str0) == 0) { + std::string exprOp0 = getExpr((Instruction *)op0); + LLVM_DEBUG(dbgs() << "getExpr(): Checking PHI's exprOp0 = " + << exprOp0 << " (should be a constant).\n"); + + // 2018_12_15: MEGA-TODO: test well, also regressive tests + if (strlen(iGetNameData) == 0) { + if (exprOp0.size() > 4) + res = exprOp0; + } + + //assert(strcmp(exprOp0.c_str(), "0") == 0); + } + else + if (strlen(str1) == 0) { + std::string exprOp1 = getExpr((Instruction *)op1); + LLVM_DEBUG(dbgs() << "getExpr(): Checking PHI's exprOp1 = " + << exprOp1 << " (should be a constant).\n"); + + // 2018_12_15: MEGA-TODO: test well, also regressive tests + if (strlen(iGetNameData) == 0) { + if (exprOp1.size() > 4) + res = exprOp1; + } + + //assert(strcmp(exprOp1.c_str(), "0") == 0); + } + else { + LLVM_DEBUG(dbgs() << "getExpr(): Setting res to empty string.\n"); + res = ""; + goto GetExpr_end; + } + + LLVM_DEBUG(dbgs() + << "getExpr(): We visited part of this PHI node " + "so we approximate it... This should be avoided if possible.\n"); + + if (getExprVarSpecial) { + //res += ""; + } + + LLVM_DEBUG(dbgs() << "getExpr(): res = " << res << "\n"); + //res = rStripStringAfterChar(iGetNameData, '.'); + strcpy(strCopy, iGetNameData); + rStripStringAfterChar(strCopy, '.'); + res += strCopy; + LLVM_DEBUG(dbgs() << "getExpr(): after, res = " << res << "\n"); + + if (getExprVarSpecial) { + char strTmp[MAXLEN_STR]; + sprintf(strTmp, "__%p", (void *)it); + res += strTmp; + //res += ""; + } + + goto GetExpr_end; + } + } // END else if (got == cacheExpr.end()) + + /* Global var (values, not arrays) in LLVM language are already pointers to + the global address space. This is why we need to use & for them. + We check that *it is a GlobalValue like: + @colsK = common local_unnamed_addr global i32 0, align 4 + // See http://llvm.org/docs/doxygen/html/classllvm_1_1GlobalValue.html + // (also http://llvm.org/docs/LangRef.html#global-variables) + */ + //if (GlobalValue *gv = llvm::dyn_cast(it)) + if (llvm::dyn_cast(it) != NULL) { + if (usePaddingForNestedLoops_more == true) + res = "("; + else + res = "((int *)&"; + + if (getExprVarSpecial) { + //res += ""; + } + + res += iGetNameData; + + if (getExprVarSpecial) { + char strTmp[MAXLEN_STR]; + sprintf(strTmp, "__%p", (void *)it); + res += strTmp; + //res += ""; + } + + res += ")"; + if (basePtrGetExprIt == NULL) + basePtrGetExprIt = it; + + goto GetExpr_end; + } + + #ifdef NOT_TREAT_NMODVF + /* When computing trip count, I don't want it to be multiple of VF, + but I want the original expression. + Note: n.mod.vf is a name given by the program below (this module) in + getOrCreateVectorTripCount(). */ + /* It is possible that the names to have a suffix since the names + exist, since a different vector.body was created before. */ + if (strncmp(iGetNameData, STR_REMAINDER_VF, + strlen(STR_REMAINDER_VF)) == 0) { + LLVM_DEBUG(dbgs() << "getExpr(): NOT following remainder var " + << iGetNameData << ".\n"); + + /* A simple hack, since I already have the - operator and am lazy to + get rid of it: */ + res = "0"; + + goto GetExpr_end; + } + #endif + + if ((strncmp(iGetNameData, STR_INDUCTION, strlen(STR_INDUCTION)) == 0) && + (it->getOpcode() == Instruction::Add)) { + LLVM_DEBUG(dbgs() << "getExpr(): NOT following induction var " + << iGetNameData << ".\n"); + res = getExpr((Instruction *) (it->getOperand(0)) ); + + /* Indeed, induction is a vector of consecutive indices - let's call it + a vector index. + VERY IMPORTANT: To understand things better, we distinguish: + - the scalar index, indexLLVM_LV, or LV's index (and index.next) + - the vector index, vec.ind, used for loading from array (well, + sortof scalar, but...) */ + + /* + // We do NOT process this: + res += " + "; + // TODO TODO: check that op1 == <0, 1, ..., VF-1> + res += "indexLLVM_LV"; + */ + goto GetExpr_end; + } + + + if ((strncmp(iGetNameData, STR_INDEX, + strlen(STR_INDEX)) == 0) && + (it->getOpcode() == Instruction::PHI) && + (strncmp(it->getOperand(1)->getName().data(), STR_INDEX_NEXT, + strlen(STR_INDEX_NEXT)) == 0) + ) { + // TODO TODO Check that op0 is constant 0. + // Coping with %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + //LLVM_DEBUG(dbgs() << "getExpr(): NOT following index induction var.\n"); + LLVM_DEBUG(dbgs() << "getExpr(): Treating special case index = phi(0, index.next).\n"); + + /* A simple hack, since I already have the - operator and am lazy to + get rid of it: */ + #ifdef AGGREGATED_DMA_TRANSFERS // IMPORTANT note: we include this file from the back end also now (not only LoopVectorize.cpp) + if (getExprForDMATransfer) + res = "0"; + else + res = "indexLLVM_LV"; + #else + if (getExprForDMATransfer) + res = "0"; + else + res = "indexLLVM_LV"; + #endif + + goto GetExpr_end; + } + + // Note: constants like i64 0 don't have name --> str0 is empty + /* Here we try to solve a recurrence equation with any PHI node related to + the C source variables: */ + if ((it->getOpcode() == Instruction::PHI) && + strncmp(iGetNameData, STR_VEC_IND, strlen(STR_VEC_IND)) != 0 && + strncmp(iGetNameData, STR_STEP_ADD, strlen(STR_STEP_ADD)) != 0 && + strncmp(iGetNameData, STR_INDUCTION, strlen(STR_INDUCTION)) != 0) { + LLVM_DEBUG(dbgs() << + "getExpr(): it is Phi, phi node with no special vector vars...\n"); + + assert(it->getNumOperands() > 0); + + // 2018_12_15: MEGA-TODO: test well + if (((Instruction *)op0)->getOpcode() == Instruction::PHI) { + // MEGA-TODO: && strncmp(exprOp1.c_str(), STR_UNDEF, strlen(STR_UNDEF)) + LLVM_DEBUG(dbgs() << + "getExpr(): op0 is Phi --> res = getExpr(op0)\n"); + res = getExpr((Instruction *)op0); + goto GetExpr_end; + } + else + // 2018_12_15: MEGA-TODO: test well + if (((Instruction *)op1)->getOpcode() == Instruction::PHI) { + // MEGA-TODO: && strncmp(exprOp1.c_str(), STR_UNDEF, strlen(STR_UNDEF)) + LLVM_DEBUG(dbgs() << + "getExpr(): op1 is Phi --> res = getExpr(op1)\n"); + res = getExpr((Instruction *)op1); + goto GetExpr_end; + } + + //#ifdef NEW_STUFF_DANGER + if (strlen(str0) == 0) { + LLVM_DEBUG(dbgs() << + " getExpr(): strlen(str0) == 0 --> exchanging operands\n"); + + long tmp; + tmp = (long)str0; + str0 = str1; + str1 = (char *)tmp; + + tmp = (long)op0; + op0 = op1; + op1 = (Value *)tmp; + + //EXCHANGE(str0, str1); + //EXCHANGE((int)op0, (int)op1); + + printInfo(it, str0, str1, iGetNameData, op0, op1); + } + //#endif + + if (strlen(str0) != 0) { + //assert(str0 ==(symbolically, after more recovery) iGetNameData + 1); + LLVM_DEBUG(dbgs() << " ... Entering getExpr() for op0\n"); + std::string exprOp0 = canonicalizeExpression(getExpr((Instruction *)op0)); + LLVM_DEBUG(dbgs() << " exprOp0 = " << exprOp0 << "\n"); + + std::string tmp = "("; + + strcpy(strCopy, iGetNameData); + rStripStringAfterChar(strCopy, '.'); + + //tmp = tmp + iGetNameData; + tmp = tmp + strCopy; + tmp = tmp + " + 1)"; + + LLVM_DEBUG(dbgs() << " tmp = " << tmp << "\n"); + + /* IMPORTANT-TODO: in some cases like + /home/asusu/LLVM/Tests/NEW_v128i16/32_MatAdd/STDerr_clang_opt_01 + we will have (i + 1) instead of (i.047.us + 1) + */ + if (strcmp(exprOp0.c_str(), tmp.c_str()) != 0) { + // IMPORTANT-TODO: take from the other if case below + LLVM_DEBUG(dbgs() + << " VERY BAD case encountered: " + << "Phi node is NOT like x = Phi(x + 1, 0) --> return 'main' part of exprOp0\n"); + /* IMPORTANT-TODO: this case is indeed bad - to + compute a solution to the phi node we normally require more + intelligent analysis. + + + For example, for test 32_MatAdd we have: + %conv48.us = phi i32 [ %conv.us, %for.cond3.for.inc12_crit_edge.us ], + [ 0, %for.cond3.preheader.us.preheader ] + %i.047.us = phi i16 [ %inc13.us, %for.cond3.for.inc12_crit_edge.us ], + [ 0, %for.cond3.preheader.us.preheader ] + + While the 2nd phi has an easy to find solution (by seeing that + %inc13.us = add i16 %i.047.us, 1, !dbg !27) + which means the closed-form solution of Phi is %i.047.us = i, + for the 1st phi node the situation is VERY complicated. + But we see that: + %conv.us = sext i16 %inc13.us to i32, !dbg !28 + which makes the Phi expression of %conv48.us the same as + for %i.047.us . + + Also for SSD: + %conv48.us = phi(i.047.us + 1, 0) + + getExpr(): it = %conv327 = phi i32 [ 0, %for.cond2.preheader ], [ %conv3, %for.inc44 ] + getExpr(): op1 = %conv3 = sext i16 %inc45 to i32, !dbg !41 + getExpr(): updated op1 = %inc45 = add i16 %counter.026, 1, !dbg !40 + Alhough %conv.327 does NOT appear in the final .ll file, if we look in: + NEW_v128i16/90_CV/SSD/STDerr_clang_opt_01 + we have a similar case: + for.cond7.preheader: ; preds = %for.cond2.preheader, %for.inc44 + %conv327 = phi i32 [ 0, %for.cond2.preheader ], [ %conv3, %for.inc44 ] + %counter.026 = phi i16 [ 0, %for.cond2.preheader ], [ %inc45, %for.inc44 ] + */ + + /* IMPORTANT-TODO: think if possible to do better like + having getExpr return a parse tree where it is clear that a + node is a var or constant in order to avoid using substr. */ + res += exprOp0.substr(1, exprOp0.size() - 6); + goto GetExpr_end; + } + else { //if (strcmp(exprOp0.c_str(), tmp.c_str()) == 0) + // Case: *it is: x == phi(x + 1, 0); + // Check getExpr(op0) == str0 + 1; + + LLVM_DEBUG(dbgs() << " ... Entering getExpr() for op1\n"); + std::string exprOp1 = canonicalizeExpression(getExpr((Instruction *)op1)); + LLVM_DEBUG(dbgs() << " exprOp1 = " << exprOp1 << "\n"); + assert(strcmp(exprOp1.c_str(), "0") == 0); + + //assert(op0->getOpcode() == Instruction::ADD); + /* assert that: + - op1 is ct 0 and + - op0 == iGetNameData + 1 (but this normally leads to + a cyclic dependency) + i.e., check that (str0 == iGetNameData) && (str1 == ct 0) */ + /* This next condition is VERY important + * - e.g., for i phi node, for ...: because TODO + */ + LLVM_DEBUG(dbgs() << + "getExpr(): ...and str0 not empty, --> res = name of it\n"); + + /* We don't modify iGetNameData - otherwise we get errors + (assertion failures, etc) for modifying the LLVM variable names + */ + strcpy(strCopy, iGetNameData); + + /* Alex: We might have a newly created temp LLVM var and keep the original + (source file) variable name + */ + rStripStringAfterChar(strCopy, '.'); + res += strCopy; + goto GetExpr_end; + } + } + #ifdef NOTNOTNOT + else + if (strlen(str1) != 0) { + LLVM_DEBUG(dbgs() << "getExpr(): op1 = " << *op1 << "\n"); + + bool goodPhi = false; + if (((Instruction *)op1)->getOpcode() == Instruction::Add) { + goodPhi = true; + } + else + /* IMPORTANT-TODO: make it more generic (maybe + getExpr can itself say if we have a chain of SExt, Trunc, etc + before an Add) */ + if (((Instruction *)op1)->getOpcode() == Instruction::SExt) { + op1 = ((Instruction *)op1)->getOperand(0); + + if (((Instruction *)op1)->getOpcode() == Instruction::Add) { + goodPhi = true; + LLVM_DEBUG(dbgs() << "getExpr(): updated op1 = " + << *op1 << "\n"); + } + } + + if (goodPhi) { + LLVM_DEBUG(dbgs() << "getExpr(): ...and str1 not empty...\n"); + + // TODO TODO: check for Add to be + 1, etc + Value *op10 = ((Instruction *)op1)->getOperand(0); + Value *op11 = ((Instruction *)op1)->getOperand(1); + const char *op10Name = op10->getName().data(); + + LLVM_DEBUG(dbgs() << "getExpr(): op10 = " << * op10 << "...\n"); + LLVM_DEBUG(dbgs() << "getExpr(): op10Name = " + << op10Name << "...\n"); + LLVM_DEBUG(dbgs() << "getExpr(): op11 = " << * op11 << "...\n"); + + std::string res11 = getExpr((Instruction *)op11); + LLVM_DEBUG(dbgs() << "getExpr(): ...getExpr(op11) = " + << res11 << "...\n"); + + //((Instruction *)op1)->getOperand(0)->getName().data() + if (strcmp(op10->getName().data(), iGetNameData) == 0 && + strcmp(res11.c_str(), "1") == 0) { + LLVM_DEBUG(dbgs() + << "getExpr(): ...it->op1->op0 == it --> res = name of it\n"); + + /* We have instruction (recurrent equation): + x = phi(0, x + 1) with solution x . */ + strcpy(strCopy, iGetNameData); + rStripStringAfterChar(strCopy, '.'); + res += strCopy; + goto GetExpr_end; + } + else { + /* UNFORTUNATELY, we have an equation like: + y = phi(0, f(x)). + It is difficult to give a solution for general f(x). + BUT for case f(x) = x + 1, if we have also an instruction + x = phi(0, x + 1), with the same phi-labels as the + y = phi(...) instruction then it is obvious that y = x. + Fortunately, this happens quite often, acutally. + */ + LLVM_DEBUG(dbgs() + << "getExpr(): ...it->op1->op0 != it --> ...\n"); + LLVM_DEBUG(dbgs() << "getExpr(): op0 = " + << *op0 << "\n"); + LLVM_DEBUG(dbgs() << "getExpr(): op1 = " + << *op1 << "\n"); + + std::string res0Aux = getExpr((Instruction *)op0); + assert(res0Aux == "0"); + + if ( (((Instruction *)op1)->getOpcode() == Instruction::Add) && + (getExpr((Instruction *)op11) == "1") && + (((Instruction *)op10)->getOpcode() == Instruction::PHI) ) { + std::string res10Aux = getExpr((Instruction *)op10); + LLVM_DEBUG(dbgs() << "res10Aux = " << res10Aux.c_str() << "\n"); + + if (cacheExpr[(Instruction *)op10] != INVALID_VALUE_CACHEEXPR) { + LLVM_DEBUG(dbgs() + << "getExpr() - Special PHI case " + "encountered: y = phi(0, x + 1), where x is also PHI\n"); + /* + if (strncmp(cacheExpr[op10].c_str(), + op10->getName().data(), + cacheExpr[op10].size()) == 0) + */ + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1PHINode.html + //PHINode *op10It = (PHINode *)op10; + PHINode *op10It = llvm::dyn_cast(op10); + + int itNumOpnds = it->getNumOperands(); + LLVM_DEBUG(dbgs() << "getExpr(): itNumOpnds = " + << itNumOpnds + << "; op10It->getNumOperands() = " + << op10It->getNumOperands() << "\n"); + assert(itNumOpnds == 2 && + op10It->getNumOperands() == 2); + + int iOpnd; + for (iOpnd = 0; iOpnd < itNumOpnds; iOpnd++) { + LLVM_DEBUG(dbgs() << "getExpr(): it->getIncomingBlock(" + << iOpnd << ") = " + << ((PHINode *)it)->getIncomingBlock(iOpnd) + << "\n"); + LLVM_DEBUG(dbgs() << "getExpr(): op10It->getIncomingBlock(" + << iOpnd << ") = " + << op10It->getIncomingBlock(iOpnd) << "\n"); + + if (((PHINode *)it)->getIncomingBlock(iOpnd) != + op10It->getIncomingBlock(iOpnd)) + break; + } + + LLVM_DEBUG(dbgs() << " getExpr() - ... and " + "these 2 PHIs are basically equivalent " + "(except the 'it' node does not have recursive eq " + "as the other - 'it' has a different name than op10)\n"); + + if (iOpnd == itNumOpnds) { + res += res10Aux; + goto GetExpr_end; + } + } + } + } + } + res += "!!!! [DO NOT KNOW HOW TO SOLVE]!!!!"; + } // end strlen(str1 != 0) + #endif // NOTNOTNOT + } // end if ((it->getOpcode() == Instruction::PHI) + + + // TODO TODO: NOT sure if it's OK to only choose it->getOperand(0) + // Normally this makes it a pointer to Value + if (it->getNumOperands() == 0) { + Type *itType = ((Value *)it)->getType(); + + LLVM_DEBUG(dbgs() << " (getExpr(): it->getType() = " + << *itType << " )\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1Type.html + if (itType->isVectorTy()) { + int64_t resVal = 0; + char strAux[MAXLEN_STR]; + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1ConstantVector.html + // Surprisingly NOT working: ConstantVector *ctVec = llvm::dyn_cast((Value *)it); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1ConstantDataVector.html + ConstantDataVector *ctVec = llvm::dyn_cast((Value *)it); + + LLVM_DEBUG(dbgs() << "getExpr(): ctVec =" + << ctVec << "\n"); + + if (ctVec != NULL) { + Constant *ctSplat = ctVec->getSplatValue(); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1Constant.html + const APInt ctAPInt = ctSplat->getUniqueInteger(); + // TODO TODO: Use instead Constant::getAggregateElement() - see http://lists.llvm.org/pipermail/llvm-dev/2016-November/106954.html + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1APInt.html + resVal = ctAPInt.getSExtValue(); + } + + /* This was meant for the %induction vector var; + but it's NOT good for %(broadcast).splatinsert - but we take + care of this below ...TODO [SAY WHERE] + */ + sprintf(strAux, "(int)%ld", resVal); + res += strAux; + goto GetExpr_end; + } + + // We print the constant or input variable: + std::string Result; + raw_string_ostream OS(Result); + ((Value *)it)->printAsOperand(OS, /* bool PrintType = */ false); + OS.flush(); + LLVM_DEBUG(dbgs() << " (getExpr(): it->printAsOperand() = " + << Result << ")\n"); + + // We erase the leading % char if it exists - for name of var + if (Result.c_str()[0] == '%') + Result.erase(0, 1); + + /* + Result.clear(); + ((Value *)it)->print(OS); + OS.flush(); + LLVM_DEBUG(dbgs() << " (getExpr(): it->print() = " + << Result << ")\n"); + */ + /* + switch (it->getOpcode()) { + case Instruction::Constant: + LLVM_DEBUG(dbgs() << " (getExpr(): it is Constant))\n"); + res = "ct!!!!"; + break; + } + */ + if (strncmp(Result.c_str(), STR_UNDEF, strlen(STR_UNDEF)) != 0) { + /* Note: + We can also have as parent %broadcast.splatinsert = insertelement <32 x i64> undef, i64 %mul.us, i32 0 + For this case, operand 0 is printed as: "<32 x i64> undef". + But we avoid to reach this case by specially treating + a %broadcast.splatinsert node. + */ + res += Result; + } + goto GetExpr_end; + } // END of if (it->getNumOperands() == 0) + + + bool putParantheses; + + switch (it->getOpcode()) { + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::Trunc: + case Instruction::ShuffleVector: + case Instruction::InsertElement: + case Instruction::PHI: + case Instruction::ExtractElement: + //res = ""; + putParantheses = false; + break; + + case Instruction::GetElementPtr: { + /* + putParantheses = false; + res = "(int *)&"; + */ + + /* IMPORTANT: + From http://en.cppreference.com/w/c/language/operator_precedence: + - operator [] (Array subscripting) has bigger priority + than & (Address-of). + So we need to put parantheses here + in case [] follows. + */ + putParantheses = true; + + //res = "(int *)&("; + + // By doing so we treat case like (&ls[index])[0] (see SSD benchmark) + res = "((int *)&"; + + GetElementPtrInst *GEPInstr = llvm::dyn_cast(it); + assert(GEPInstr != NULL); + if (basePtrGetExprIt == NULL) + basePtrGetExprIt = GEPInstr->getPointerOperand(); + + break; + } + default: + putParantheses = true; + res = "("; + } + /* + if (putParantheses) + res = "("; + // + if (it->getOpcode() == Instruction::GetElementPtr) { } + */ + + LLVM_DEBUG(dbgs() << "getExpr(): putParantheses = " + << putParantheses << "\n"); + + if (it->getNumOperands() > 1) { + LLVM_DEBUG(dbgs() << "getExpr(): it->getOperand(1) = " + << *op1 << "; " + << "(str1 = " + << str1 << ")[END]\n"); + + // We prevent pretty-printing constant vectors + //if (getExprForTripCount == false) + /* TODO: maybe step.add is not operand 1, but 0 or 2, etc; check that + op0 is constant */ + if (strncmp(iGetNameData, STR_VEC_IND, strlen(STR_VEC_IND)) == 0 && + strncmp(str1, STR_STEP_ADD, strlen(STR_STEP_ADD)) == 0 && + strncmp(str0, STR_INDUCTION, strlen(STR_INDUCTION)) != 0) { + /* + This prevents further processing of: + %vec.ind = phi <32 x i64> [ , %vector.ph ], [ %step.add, %vector.body ] + BUT NOT of: %vec.ind = phi <32 x i32> [ %induction, %vector.ph ], [ %step.add, %vector.body ] + */ + LLVM_DEBUG(dbgs() << "getExpr(): treating vec.ind = phi ct_vec, step.add case\n"); + + #ifdef AGGREGATED_DMA_TRANSFERS // IMPORTANT note: we include this file from the back end also now (not only LoopVectorize.cpp) + if (getExprForDMATransfer) + res = "0"; + else + res = "indexLLVM_LV"; + #else + if (getExprForDMATransfer) + res = "0"; + else + res = "indexLLVM_LV"; + #endif + goto GetExpr_end; + } + + if (strncmp(iGetNameData, STR_VEC_IND, strlen(STR_VEC_IND)) == 0 && + strncmp(str0, STR_INDUCTION, strlen(STR_INDUCTION)) == 0 && + strncmp(str1, STR_STEP_ADD, strlen(STR_STEP_ADD)) == 0 + ) { + /* + This prevents further processing of: + %vec.ind = phi <32 x i64> [ , %vector.ph ], [ %step.add, %vector.body ] + BUT NOT of: %vec.ind = phi <32 x i32> [ %induction, %vector.ph ], [ %step.add, %vector.body ] + */ + LLVM_DEBUG(dbgs() + << "getExpr(): treating vec.ind = phi induction, step.add case\n"); + res = getExpr((Instruction *)op0); + res += " + indexLLVM_LV"; + goto GetExpr_end; + } + + if (it->getOpcode() == Instruction::PHI) { + //assert(0 && "We should not get here... since we already treated it"); + LLVM_DEBUG(dbgs() + << "getExpr(): it is Phi. (normally should not be here)\n"); + LLVM_DEBUG(dbgs() << " getExpr(): *it = " << *it << "\n"); + + // IMPORTANT-TODO : follow I guess the loopexit value + /* This is for cases like the one encountered in 50_SpMV, where we + cycle over temporary created vars: + %1 = phi i16 [ %2, %for.cond.loopexit ], [ %.pre, %for.body.preheader ] + %2 = load i16, i16* %arrayidx5, align 2, !dbg !64, !tbaa !46 + %arrayidx5 = getelementptr inbounds i16, i16* %row_ptr, i64 %idxprom4, !dbg !64 + %idxprom4 = sext i32 %add to i64, !dbg !64 + %add = add nsw i32 %i.026, 1, !dbg !63 + %i.026 = phi i32 [ %add, %for.cond.loopexit ], [ 0, %for.body.preheader ] + */ + res = getExpr((Instruction *)op0); + LLVM_DEBUG(dbgs() << "getExpr(): it is Phi, res = " << res << "\n"); + + + /* Noname like in the case of 50_SpMV testcase: + %1 = phi(%2, row_ptr[0]) + TODO TODO But I guess I should check iGetName != str0 + 1... + */ + // Note: constants like i64 0 don't have name --> str0 is empty + if (strlen(str0) == 0) { + LLVM_DEBUG(dbgs() << "getExpr(): it is Phi, str0 is empty.\n"); + + // assert getNumOperands() > 1 + std::string res2 = getExpr((Instruction *)op1); + //res += " phi "; + + LLVM_DEBUG(dbgs() << "getExpr(): res2 = " << res2 << "\n"); + + /* Here we compute the solution of phi - a 1st simple and ~bad + * attempt. + MEGA MEGA-TODO: compute the + closed-form solution from these recursive equations. + */ + #define STR_TO_LOOK_FOR " + 1" + std::size_t found = canonicalizeExpression(res).find(STR_TO_LOOK_FOR); + if (found != std::string::npos) { + LLVM_DEBUG(dbgs() << "getExpr(): calling res.erase(found, " + "strlen(STR_TO_LOOK_FOR))\n"); + res.erase(found, strlen(STR_TO_LOOK_FOR)); + } + /* + //BUGS: because of modifying the internal char * of a std::strng + // and I guess string::size() needs to be updated + // also(??) + const char *resCStr = res.c_str(); + char *resCStrFound = (char *)strstr(resCStr, STR_TO_LOOK_FOR); + if (resCStrFound != NULL) { + LLVM_DEBUG(dbgs() << "InstrumentVectorStore(): resCStrFound = " + << resCStrFound << "\n"); + // NOT correct - strings do overlap: strcpy(resCStrFound, resCStrFound + 4); + memmove(resCStrFound, resCStrFound + strlen(STR_TO_LOOK_FOR), + strlen(resCStrFound + strlen(STR_TO_LOOK_FOR)) + 1); + } + */ + } + else { + /* IMPORTANT-TODO: think if it is correct to be empty + - try it out - note there is also another case treating + phi nodes above. + */ + } + + goto GetExpr_end; + } + + /* + // NOT necessary anymore - treat below this case by simply jumping to + // meaningful values + if (strncmp(iGetNameData, STR_BROADCAST_SPLATINSERT, + strlen(STR_BROADCAST_SPLATINSERT)) == 0 || + strncmp(iGetNameData, STR_SPLATINSERT, + strlen(STR_SPLATINSERT)) == 0) { + LLVM_DEBUG(dbgs() + << "getExpr(): treating (broadcast).splat(insert) case\n"); + + // op0 should be vector undef + res = getExpr((Instruction *)op1); + goto GetExpr_end; + } + */ + if (strncmp(iGetNameData, STR_BROADCAST_SPLAT, + strlen(STR_BROADCAST_SPLAT)) == 0 || + /* // I guess it's not necessary to do this test: + && (strncmp(iGetNameData, STR_BROADCAST_SPLATINSERT, + strlen(STR_BROADCAST_SPLATINSERT)) != 0) */ + (strncmp(iGetNameData, STR_SPLAT, + strlen(STR_SPLAT)) == 0) + /* // I guess it's not necessary to do this test: + && (strncmp(iGetNameData, STR_SPLATINSERT, + strlen(STR_SPLATINSERT)) != 0) */ + ) { + LLVM_DEBUG(dbgs() << "getExpr(): treating (broadcast).splat case\n"); + + //if (((Instruction *)op0) + /* This is for the SSD test: + %broadcast.splat33 = shufflevector <128 x i16> %broadcast.splatinsert32, + <128 x i16> undef, <128 x i32> zeroinitializer + where it = + %broadcast.splatinsert32 = insertelement <128 x i16> undef, i16 %0, i32 0 + and op0 = <128 x i16> undef + */ + if (llvm::dyn_cast(op0) == NULL) { + res = getExpr((Instruction *) op1); + goto GetExpr_end; + + /*it->getOpcode() == Instruction::InsertElement) + if (strncmp(iGetNameData, STR_BROADCAST_SPLAT, + strlen(STR_BROADCAST_SPLAT)) == 0 || + */ + } + else { + /// TODO TODO: maybe I should do some checks + // op1 should be vector undef, op2 should be zeroinitializer + //res = getExpr((Instruction *)op0); + res = getExpr((Instruction *) (((Instruction *)op0)->getOperand(1)) ); + goto GetExpr_end; + } + } + } + + // We now pretty print op0; + + if ((strlen(str0) == 0) + /* || + (strncmp(str0, STR_BROADCAST_SPLATINSERT, + strlen(STR_BROADCAST_SPLATINSERT)) == 0)) { */ + ) { + /* If the name of the variable is empty it means it is an automatically + * generated name (like %0, etc), NOT a name from the original (C,C++) + * program. Therefore we look also at the def of this var. + */ + + /* + TODO TODO + - ~BAD: recursively test str0 until we reach a + variable name that is input to the function?? + */ + /* TODO TODO (THIS IS MAYBE BADLY DESIGNED - might require more or fewer steps): + * Coping with type conversions like i32 to i64 (ex: + * ~/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/201_LoopVectorize/25_GOOD_map/NEW/7_v16i32/3better_opt.ll) + * in which case we have the following: + for.body.preheader: ; preds = %entry + %0 = add i32 %N, -1 + %1 = zext i32 %0 to i64 + %2 = add nuw nsw i64 %1, 1 + %min.iters.check = icmp ult i64 %2, 16 + [...] + min.iters.checked: ; preds = %for.body.preheader + %n.vec = and i64 %2, 8589934576 + */ + + /* + LLVM_DEBUG(dbgs() << "getExpr(): (it->getOperand(0) = " + << * (it->getOperand(0)) << ")\n"); + */ + LLVM_DEBUG(dbgs() + << "getExpr(): str0 empty (or so) --> calling getExpr(op0)\n"); + LLVM_DEBUG(dbgs() << " (getExpr(): current it = " << *it << ").\n"); + + //strcpy(res, tmp); + res += getExpr((Instruction *)op0); + } + else { // str0 is NOT empty + /* + // NOTNOTNOTNONOTNO + if (getExprForTripCount == false) { + LLVM_DEBUG(dbgs() << "getExpr(): returning str0 = " + << str0 << "\n"); + //strcpy(res, str0); + // Gives <> + // * (char *)strchr(str0, '.') = 0; + + // IMPORTANT-TODO: this + // transformation I guess is NOT 100% safe, because a named var + // can be a C var or an auxiliary LLVM var created in the LLVM pass + // - think how to make it safe + + if (strncmp(str0, STR_VEC_IND, strlen(STR_VEC_IND)) != 0) { + // We don't modify str0 - otherwise we get errors + //(assertion failures, etc) for modifying the LLVM variable names + strcpy(strCopy, str0); + + // Alex: We might have a newly created temp LLVM var and keep the original + // (source file) variable name + rStripStringAfterChar(strCopy, '.'); + res += strCopy; + + // Maybe put here operation pretty-print TODO TODO + } + else { + // vec.ind is the widened induction variable + //res += str0; + } + } + else + */ + { //getExprForTripCount == true and str0 not empty + /* + // This SOMETIMES introduces infinite cycles, which can be avoided + // if we keep track of the instructions already visited + Example of cycle: + - these 2 simple instructions: + %indvars.iv29 = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next30, %for.cond.loopexit ]. + %indvars.iv.next30 = add nuw nsw i64 %indvars.iv29, 1, !dbg !9 + */ + + if ((it->getOpcode() == Instruction::GetElementPtr) && + (it->getNumOperands() >= 3)) { + res += ((Instruction *)op0)->getName().data(); + } + else { + LLVM_DEBUG(dbgs() + << "getExpr(): str0 not empty --> calling getExpr(op0)\n"); + // This introduces useless parantheses: res += "("; + res += getExpr((Instruction *)op0); + // This introduces useless parantheses: res += ")"; + } + } + } + + + // We now pretty print operation associated to *it; + + /* We generate C code for the operation associated to the it + LLVM instruction. + See http://llvm.org/docs/doxygen/html/Instruction_8cpp_source.html + for all/various possible opcodes - see method + 00194 const char *Instruction::getOpcodeName(unsigned OpCode) . */ + // NOTE: vec.ind is a PHI node + //if (strncmp(str0, STR_VEC_IND, strlen(STR_VEC_IND)) != 0) + //{ + //if (!(getExprForTripCount == false && strcmp(str0, "vec.ind") == 0)) + switch (it->getOpcode()) { + case Instruction::Call: { + // IMPORTANT-TODO: this works well for the case 31c_dotprod_RaduH, BUT not sure if it's general + res = "((int *)&("; + res += iGetNameData; + res += "))"; + + const char *strFuncName; + strFuncName = dyn_cast(it)->getCalledFunction()->getName().data(); + assert( (strcmp(strFuncName, "malloc") == 0) || + (strcmp(strFuncName, "calloc") == 0) ); + + // Inspired from http://llvm.org/docs/ProgrammersManual.html#iterating-over-def-use-use-def-chains + for (Value::user_iterator i = it->user_begin(), + e = it->user_end(); + i != e; ++i) { + if (Instruction *inst = dyn_cast(*i)) { + LLVM_DEBUG(dbgs() << "getExpr(): it is used in instruction: " + << *inst << "\n"); + if (BitCastInst *bci = dyn_cast(*i)) { + if (strlen(bci->getName().data()) != 0) { + LLVM_DEBUG(dbgs() + << "getExpr(): it is used in BitCast instruction --> we use " + "its name instead\n"); + res = "((int *)&("; + res += bci->getName().data(); + res += "))"; + } + else { + if (ranGetAllMetadata == false) { + LLVM_DEBUG(dbgs() << "getExpr(): Before, varNameMap.size() = " + << varNameMap.size() << "\n"); + getAllMetadata(bci->getParent()->getParent()); + LLVM_DEBUG(dbgs() << "getExpr(): varNameMap.size() = " + << varNameMap.size() << "\n"); + } + + + std::string valueName = getLLVMValueName(bci); + + // Normally the value name is a number when getName() is empty + LLVM_DEBUG(dbgs() << "getExpr(): bci has empty name\n"); + LLVM_DEBUG(dbgs() << "getExpr(): bci = " << *bci << "\n"); + LLVM_DEBUG(dbgs() << " bci = " << bci << "\n"); + LLVM_DEBUG(dbgs() << " bci->getValueName() = " + << bci->getValueName() << "\n"); + LLVM_DEBUG(dbgs() << " bci->getName() = " + << bci->getName() << "\n"); + LLVM_DEBUG(dbgs() << "getExpr(): it = " << *it << "\n"); + // + LLVM_DEBUG(dbgs() << "getExpr(): varNameMap[bci] = " + << varNameMap[valueName] << "\n"); + + //res = varNameMap[valTypeAndName]; + res = varNameMap[valueName]; + + goto GetExpr_end; + + /* + for (Value::user_iterator i2 = bci->user_begin(), + e2 = bci->user_end(); + i2 != e2; ++i2) { + if (Instruction *inst2 = dyn_cast(*i2)) { + LLVM_DEBUG(dbgs() << "getExpr(): bci is used in instruction: " + << *inst2 << "\n"); + if (StoreInst *si = dyn_cast(*i2)) { + LLVM_DEBUG(dbgs() + << "getExpr(): bci is used in StoreInst instruction " + "--> we use its name instead\n"); + res = "((int *)&("; + res += si->getName().data(); + res += "))"; + goto GetExpr_end; + } + } + } + */ + } + } + } + else { + LLVM_DEBUG(dbgs() << "getExpr(): it is used in val: " + << *i << "\n"); + } + } + + goto GetExpr_end; + } + case Instruction::Add: + res += " + "; + break; + //case Instruction::FAdd: + case Instruction::Sub: + res += " - "; + break; + //case Instruction::FSub: + case Instruction::Mul: + res += " * "; + break; + //case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + res += " / "; + break; + case Instruction::URem: + case Instruction::SRem: + //case Instruction::FRem: + res += " % "; + break; + case Instruction::Shl: + res += " << "; + break; + case Instruction::LShr: + res += " >> "; + break; + // IMPORTANT-TODO: think better + case Instruction::AShr: + /* From https://en.wikipedia.org/wiki/Arithmetic_shift#cite_ref-1 : + "The >> operator in C and C++ is + not necessarily an arithmetic shift. Usually it is only an + arithmetic shift if used with a signed integer type on its + left-hand side. + If it is used on an unsigned integer type instead, it will be a + logical shift." + */ + res += " >> "; + break; + case Instruction::And: + res += " & "; + break; + case Instruction::Or: + res += " | "; + break; + case Instruction::Xor: + res += " ^ "; + break; + case Instruction::PHI: + res += " phi "; + break; + case Instruction::Load: + //res += " load "; + res += "[0]"; + break; + case Instruction::Store: + res += " store "; + break; + case Instruction::GetElementPtr: + //res += " getelementptr "; + /* + if (it->getNumOperands() < 3) { + res += " + "; + } + */ + break; + case Instruction::ZExt: + case Instruction::SExt: + //res += " ext "; // NOTE: this is unary operator + break; + //case Instruction::FPTrunc: + case Instruction::Trunc: { + //res += " trunc "; + break; + } + case Instruction::ICmp: + case Instruction::FCmp: { + /* TODO TODO: check type of cmp + CmpInst *Cmp = dyn_cast(it); + Cmp->getPredicate() + */ + res += " > "; + break; + } + case Instruction::Select: { + // TODO TODO: add : and 3rd operand + res += " ? "; + break; + } + case Instruction::ShuffleVector: { + //res += " shufflevector "; + break; + } + case Instruction::InsertElement: { + //res += " insertelement "; + break; + } + case Instruction::ExtractElement: { + //res += " extractelement "; + + std::string op1Expr = getExpr((Instruction *)op1); //((Instruction *)op1)->getName().data(); + if (op1Expr == "0") { + LLVM_DEBUG(dbgs() + << "getExpr(): Neutralizing ExtractElement, since index is 0\n"); + if (putParantheses) + res += ")"; + + goto GetExpr_end; + } + + + // TODO TODO: check that op0 is vec.ind or sext vec.ind + res = "((int *)&" + res; + res += "))"; // One ')' for the '(' added at beginning getExpr, + // 1 to close the '(' before '&' + res += "["; + res += op1Expr; + res += "]"; + + //basePtr = NULL; + + goto GetExpr_end; + //break; + } + // See e.g. http://llvm.org/docs/doxygen/html/Instructions_8h_source.html#l04703 + case Instruction::PtrToInt: + case Instruction::IntToPtr: { + /* This is normally encountered when using the LLVM-SRA library and + I give SCEVRangeBuilder->getUpperBound(AccessFunction) */ + // We don't do a thing + break; + } + case Instruction::Alloca: { + //res += "(int *)&("; + // TODO TODO: this works well for the case 31c_dotprod_RaduH + res = "((int *)&("; + res += iGetNameData; + res += "))"; + goto GetExpr_end; + //break; + } + default: + /* See llvm.org/docs/doxygen/html/Core_8h_source.html#l00100 and + http://llvm.org/docs/doxygen/html/Instruction_8cpp_source.html#l00194 + for all supported opcodes. + In fact, we can have more valid opcodes than these + See http://llvm.org/docs/doxygen/html/Core_8h_source.html#l00100 + - the enums with typedef enum LLVMOpcode - e.g., LLVMAdd, etc + seem to be related to values of Instruction::getOpcode(). + I think Instruction:Add == LLVMAdd + InstructionVal (use gdb to see exactly); + note also that getOpcode() returns getValueID() - InstructionVal. + http://llvm.org/docs/doxygen/html/Value_8h_source.html + see enum ValueTy - better see http://llvm.org/test-doxygen/api/Value_8h_source.html, + since the Value.h source file uses TableGen macros inside. + */ + LLVM_DEBUG(dbgs() << "getExpr(): !!!!Special case: it = " + << *it + << "\n"); + const Constant *C = llvm::dyn_cast(it); + + LLVM_DEBUG(dbgs() << "getExpr(): C = " + << C + << "\n"); + + if (C != NULL) { + LLVM_DEBUG(dbgs() << " getExpr(): It is Constant.\n"); + //res += "Constant-->"; + + if (const ConstantInt *CI = llvm::dyn_cast(C)) { + LLVM_DEBUG(dbgs() << " getExpr(): CI->getValue() = " + << CI->getValue() + << ".\n"); + } + /* + // Maybe useful in the future, but little likely: + if (const ConstantDataArray *CA = llvm::dyn_cast(C)) { + LLVM_DEBUG(dbgs() << " getExpr(): It is ConstantDataArray.\n"); + } + if (const ConstantArray *CA = llvm::dyn_cast(C)) { + LLVM_DEBUG(dbgs() << " getExpr(): It is ConstantArray.\n"); + } + */ + + /* Inspired from http://llvm.org/docs/doxygen/html/AsmWriter_8cpp_source.html#l01304, + method WriteConstantInternal() . + */ + if (const ConstantExpr *CE = llvm::dyn_cast(C)) { + LLVM_DEBUG(dbgs() << " getExpr(): It is ConstantExpr.\n"); + + // From http://llvm.org/test-doxygen/api/Constants_8cpp_source.html#l01937 + // res += CE->getOpcodeName(); + switch (CE->getOpcode()) { + // small-TODO: this code is similar to the one for the switch above - maybe we should reuse code although it will make things more complicated... + case Instruction::Add: + res += " + "; + break; + case Instruction::Sub: + res += " - "; + break; + case Instruction::Mul: + res += " * "; + break; + case Instruction::UDiv: + case Instruction::SDiv: + res += " / "; + break; + case Instruction::SRem: + case Instruction::URem: + res += " % "; + break; + case Instruction::Shl: + res += " << "; + break; + case Instruction::LShr: + res += " >> "; + break; + case Instruction::AShr: + res += " >> "; + break; + case Instruction::ICmp: + case Instruction::FCmp: + res += " > "; + break; + case Instruction::ZExt: + case Instruction::SExt: + //res += " ext "; // NOTE: this is unary operator + break; + case Instruction::Trunc: + //res += " trunc "; + break; + + case Instruction::PtrToInt: + case Instruction::IntToPtr: { + break; + } + default: + res += " [Unsupported_C_CtExpr_operator]"; + break; + } + res += " "; + } + else { + res += " [Unsupported_C_operator]"; + res += it->getOpcodeName(); + res += " "; + } + break; + } + else { + //res += " [Unsupported_C_operator] [Constant_C_is_NULL]"; + res += iGetNameData; + } + } // end switch + + /* + if (it->getOpcode() == Instruction::PHI) { + // TODO TODO: check that op0 is associated to predecessor BB + // different than itself - e.g., preheader, vector.ph, etc + + // This results in incorrect paranthesis - missing a few ')' + goto GetExpr_end; + } + */ + + // Pretty print op1: + + /* + if ((it->getNumOperands() > 1) && + (it->getOpcode() != Instruction::PHI)) { + */ + if (it->getNumOperands() > 1) { + //strcat(res, " "); + res += " "; + + bool specialCase = false; + bool str1NotEmpty = (strlen(str1) != 0); + + if (str1NotEmpty) { + LLVM_DEBUG(dbgs() << "getExpr(): str1 NOT empty: str1 = " + << str1 << "\n"); + + + /* IMPORTANT NOTE: some operands have names and are also + instructions */ + + /* + The following can also introduce cycles: + - an example + getExpr(): str0 empty (or so) --> calling getExpr(op0) + (getExpr(): current it = %vec.ind = phi <32 x i64> [ , %vector.ph ], [ %step.add, %vector.body ]). + getExpr(): getExprForTripCount = 1 + getExpr(): it = <32 x i64> + (getExpr(): it->getOpcodeName() = ) + (getExpr(): it->getName() = ) + (getExpr(): it->printAsOperand() == ) + getExpr(): calling getExpr(op1). + getExpr(): it = %vec.ind = phi <32 x i64> [ , %vector.ph ], [ %step.add, %vector.body ]. + getExpr(): getExprForTripCount = 1 + getExpr(): it = %step.add = add <32 x i64> %vec.ind, + , !dbg !38 + + - another example: + getExpr(): it = %row.020.us = phi i64 [ %inc16.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ] + (getExpr(): it->getOpcodeName() = phi) + (getExpr(): it->getName() = row.020.us) + getExpr(): it->getOperand(1) = i64 0; str1 = [END] + getExpr(): getExprForTripCount = 1 + getExpr(): it = %inc16.us = add nuw nsw i64 %row.020.us, 1, !dbg !58 + (getExpr(): it->getOpcodeName() = add) + (getExpr(): it->getName() = inc16.us) + getExpr(): it->getOperand(1) = i64 1; str1 = [END] + getExpr(): getExprForTripCount = 1 + getExpr(): it = %row.020.us = phi i64 [ %inc16.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ] + */ + //if (strcmp(str1, "broadcast.splat") == 0) + if (((Instruction *)op1)->getNumOperands() != 0 && + // This prevents pretty-printing constant vectors, etc + !(strncmp(iGetNameData, STR_VEC_IND, strlen(STR_VEC_IND)) == 0 && + strncmp(str1, STR_STEP_ADD, strlen(STR_STEP_ADD)) == 0) + ) { + //strcat(res, getExpr((Instruction *)op1)); + + // We defer pretty printing below - see immediately below + /* + LLVM_DEBUG(dbgs() << "getExpr(): calling getExpr(op1).\n"); + LLVM_DEBUG(dbgs() << " getExpr(): it = " << *it << ".\n"); + res += getExpr((Instruction *)op1); + */ + } + else { + //strcat(res, str1); + res += str1; + specialCase = true; + } + } // End str1 NOT empty + + if (specialCase == false) { + LLVM_DEBUG(dbgs() << "getExpr(): specialCase = false, " + << "str1NotEmpty = " << str1NotEmpty << ".\n"); + LLVM_DEBUG(dbgs() << " getExpr(): it = " << *it << ".\n"); + + if (it->getOpcode() == Instruction::GetElementPtr) { + int numOpnds = it->getNumOperands(); + + //GetIndexOpndFromGEPInst(GEPPtr); + int startIndex; + + if (llvm::dyn_cast(it->getOperand(0)) != NULL) { + /* We empirically saw that for global arrays, the 1st index + in GEP is redundant - it has value 0 invariably, + so we skip it. + */ + startIndex = 2; + } + else { + startIndex = 1; + } + + for (int i = startIndex; i < numOpnds; i++) { + res += "["; + + Value *op_i = it->getOperand(i); + res += getExpr((Instruction *)op_i); + + res += "]"; + } + } + else { + //strcat(res, getExpr((Instruction *)op1)); + res += getExpr((Instruction *)op1); + } + } + } + + + // IMPORTANT-TODO : treat also Phi, which can have arbitrary num of arguments: if (it->getOpcode() == Instruction::Phi) { + if (it->getOpcode() == Instruction::Select) { + res += " : "; + + Value *op2; + op2 = it->getOperand(2); + res += getExpr((Instruction *)op2); + } + + + if (putParantheses) + res += ")"; + + GetExpr_end: + /* + // Don't really understand why it fails at compile-time at make_pair + // std::unordered_map cacheExpr; + typedef Instruction *InstructionPtr; + //cacheExpr.insert(std::make_pair(it, res)); + cacheExpr.insert(std::make_pair(it, res)); + But this does NOT fail: + // Inspired from example http://www.cplusplus.com/reference/utility/make_pair/ + std::pair tmp; + tmp = std::make_pair(it, res); + cacheExpr.insert(tmp); + */ + /* + if ((res.size() == 2) && (res.c_str()[0] == '(') && + (res.c_str()[1] == ')')) { + */ + if (res == "()") { + // This is redundant so we drop it. + res.clear(); + } + + LLVM_DEBUG(dbgs() << "getExpr(): Inserting in cacheExpr it = " << it + << " (*it = " << *it + << ") and res = " << res << "\n"); + cacheExpr[it] = res; + return res; +} + +} // end namespace + +#endif // RECOVER_FROM_LLVM_IR + Index: lib/Target/Connex/Select_ADDf16_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_ADDf16_OpincaaCodeGen.h +++ lib/Target/Connex/Select_ADDf16_OpincaaCodeGen.h @@ -0,0 +1,3633 @@ +//===-- Select_ADDf16_OpincaaCodeGen.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from the OPINCAA lib, from kernel add_or_sub.f16. +// You should put this code in the Select() method of the SelectionDAGISel +// class of your back end. +// Number of instructions generated: 279. +// +//===----------------------------------------------------------------------===// + +// From /home/asusu/LLVM/Tests/opincaa_standalone_apps/Emulate_f16/ADD_SUB_f16_manual/DumpISel_OpincaaCodeGen_old36_C00_ADDf16.cpp + + + + + +SDValue ct0 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R14 = 1; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast1, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #2 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R29 = 16; +// Instr #3 +SDNode *vload3 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +SDValue ct4 = CurDAG->getConstant(31, DL, MVT::i16, true, false); +// R28 = 31; +// Instr #4 +SDNode *vload4 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(vload3, 1) + ); + +SDValue ct5 = CurDAG->getConstant(1023, DL, MVT::i16, true, false); +// R13 = 1023; +// Instr #5 +SDNode *vload5 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct5, + // glue (or chain) input edge + SDValue(vload4, 1) + ); + +SDValue ct6 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R12 = 31744; +// Instr #6 +SDNode *vload6 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct6, + // glue (or chain) input edge + SDValue(vload5, 1) + ); + +SDValue ct7 = CurDAG->getConstant(-32768, DL, MVT::i16, true, false); +// R11 = -32768; +// Instr #7 +SDNode *vload7 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct7, + // glue (or chain) input edge + SDValue(vload6, 1) + ); + +SDValue ct8 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R10 = 1024; +// Instr #8 +SDNode *vload8 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct8, + // glue (or chain) input edge + SDValue(vload7, 1) + ); + +SDValue ct9 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = 0; +// Instr #9 +SDNode *vload9 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct9, + // glue (or chain) input edge + SDValue(vload8, 1) + ); + +SDValue ct10 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R19 = 0; +// Instr #10 +SDNode *vload10 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct10, + // glue (or chain) input edge + SDValue(vload9, 1) + ); + +SDValue ct11 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = 0; +// Instr #11 +SDNode *vload11 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct11, + // glue (or chain) input edge + SDValue(vload10, 1) + ); + +SDValue ct12 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R61 = 0; +// Instr #12 +SDNode *vload12 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct12, + // glue (or chain) input edge + SDValue(vload11, 1) + ); + +SDValue ct13 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R07 = 0; +// Instr #13 +SDNode *vload13 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct13, + // glue (or chain) input edge + SDValue(vload12, 1) + ); + +SDValue ct14 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R04 = 0; +// Instr #14 +SDNode *vload14 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct14, + // glue (or chain) input edge + SDValue(vload13, 1) + ); + +SDValue ct15 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R03 = 0; +// Instr #15 +SDNode *vload15 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct15, + // glue (or chain) input edge + SDValue(vload14, 1) + ); + +SDValue ct16 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R02 = 0; +// Instr #16 +SDNode *vload16 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct16, + // glue (or chain) input edge + SDValue(vload15, 1) + ); + +SDValue ct17 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R01 = 0; +// Instr #17 +SDNode *vload17 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct17, + // glue (or chain) input edge + SDValue(vload16, 1) + ); + +SDValue ct18 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R00 = 0; +// Instr #18 +SDNode *vload18 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct18, + // glue (or chain) input edge + SDValue(vload17, 1) + ); + +// R24 = R27 & R11; +// Instr #19 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(vload18, 1) + ); + +// R25 = R27 & R12; +// Instr #20 +SDNode *and1 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct19 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R25 = R25 >> 10; +// Instr #21 +SDNode *ishr0 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and1, 0), + ct19, + // glue (or chain) input edge + SDValue(and1, 1) + ); + +// R26 = R27 & R13; +// Instr #22 +SDNode *and2 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(ishr0, 1) + ); + +// R18 = R31 < R26; +// Instr #23 +SDNode *lt0 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(and2, 1) + ); + +// R17 = R25 == R31; +// Instr #24 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr0, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(lt0, 1) + ); + +// R09 = R17 & R18; +// Instr #25 +SDNode *and3 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt0, 0), + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// R09 = R09 == R30; +// Instr #26 +SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and3, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and3, 1) + ); + +SDValue ct20 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #27 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct20, + // glue (or chain) input edge + SDValue(eq1, 1) + ); + +// WHERE_EQ; +// Instr #28 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq1, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct21 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R25 = 1; +// Instr #29 +SDNode *vload19 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct21, + SDValue(ishr0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #30 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload19, 1) + ); + +// R18 = R25 == R28; +// Instr #31 +SDNode *eq2 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload19, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// R18 = R18 | R17; +// Instr #32 +SDNode *or0 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(eq2, 0), + // glue (or chain) input edge + SDValue(eq2, 1) + ); + +// R18 = R18 == R31; +// Instr #33 +SDNode *eq3 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or0, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(or0, 1) + ); + +SDValue ct22 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #34 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct22, + // glue (or chain) input edge + SDValue(eq3, 1) + ); + +// WHERE_EQ; +// Instr #35 +SDNode *whereeq1 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq3, 0), + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R26 = R26 | R10; +// Instr #36 +SDNode *or1 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload8, 0), + SDValue(and2, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(whereeq1, 1) + ); + +// END_WHERE; +// Instr #37 +SDNode *endwhere1 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or1, 1) + ); + +// R20 = R23 & R11; +// Instr #38 +SDNode *and4 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(endwhere1, 0) + ); + +// R21 = R23 & R12; +// Instr #39 +SDNode *and5 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(and4, 1) + ); + +SDValue ct23 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R21 = R21 >> 10; +// Instr #40 +SDNode *ishr1 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and5, 0), + ct23, + // glue (or chain) input edge + SDValue(and5, 1) + ); + +// R22 = R23 & R13; +// Instr #41 +SDNode *and6 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(ishr1, 1) + ); + +// R16 = R31 < R22; +// Instr #42 +SDNode *lt1 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(and6, 1) + ); + +// R15 = R21 == R31; +// Instr #43 +SDNode *eq4 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr1, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(lt1, 1) + ); + +// R09 = R15 & R16; +// Instr #44 +SDNode *and7 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt1, 0), + SDValue(eq4, 0), + // glue (or chain) input edge + SDValue(eq4, 1) + ); + +// R09 = R09 == R30; +// Instr #45 +SDNode *eq5 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and7, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and7, 1) + ); + +SDValue ct24 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #46 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct24, + // glue (or chain) input edge + SDValue(eq5, 1) + ); + +// WHERE_EQ; +// Instr #47 +SDNode *whereeq2 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq5, 0), + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +SDValue ct25 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R21 = 1; +// Instr #48 +SDNode *vload20 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct25, + SDValue(ishr1, 0), + // glue (or chain) input edge + SDValue(whereeq2, 1) + ); + +// END_WHERE; +// Instr #49 +SDNode *endwhere2 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload20, 1) + ); + +// R16 = R21 == R28; +// Instr #50 +SDNode *eq6 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(endwhere2, 0) + ); + +// R16 = R16 | R15; +// Instr #51 +SDNode *or2 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq4, 0), + SDValue(eq6, 0), + // glue (or chain) input edge + SDValue(eq6, 1) + ); + +// R16 = R16 == R31; +// Instr #52 +SDNode *eq7 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or2, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(or2, 1) + ); + +SDValue ct26 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #53 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct26, + // glue (or chain) input edge + SDValue(eq7, 1) + ); + +// WHERE_EQ; +// Instr #54 +SDNode *whereeq3 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq7, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R22 = R22 | R10; +// Instr #55 +SDNode *or3 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload8, 0), + SDValue(and6, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(whereeq3, 1) + ); + +// END_WHERE; +// Instr #56 +SDNode *endwhere3 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or3, 1) + ); + +// R50 = R24 == R11; +// Instr #57 +SDNode *eq8 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(endwhere3, 0) + ); + +// R49 = R25 == R28; +// Instr #58 +SDNode *eq9 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload19, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(eq8, 1) + ); + +// R48 = R26 == R31; +// Instr #59 +SDNode *eq10 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or1, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(eq9, 1) + ); + +// R47 = R20 == R11; +// Instr #60 +SDNode *eq11 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and4, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(eq10, 1) + ); + +// R46 = R21 == R28; +// Instr #61 +SDNode *eq12 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(eq11, 1) + ); + +// R45 = R22 == R31; +// Instr #62 +SDNode *eq13 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or3, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(eq12, 1) + ); + +// R07 = R49 & R46; +// Instr #63 +SDNode *and8 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + SDValue(eq9, 0), + // glue (or chain) input edge + SDValue(eq13, 1) + ); + +// R08 = R07 & R50; +// Instr #64 +SDNode *and9 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + SDValue(and8, 0), + // glue (or chain) input edge + SDValue(and8, 1) + ); + +// R44 = ~R47; +// Instr #65 +SDNode *not0 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq11, 0), + // glue (or chain) input edge + SDValue(and9, 1) + ); + +// R08 = R08 & R44; +// Instr #66 +SDNode *and10 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(not0, 0), + SDValue(and9, 0), + // glue (or chain) input edge + SDValue(not0, 1) + ); + +// R44 = ~R50; +// Instr #67 +SDNode *not1 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + // glue (or chain) input edge + SDValue(and10, 1) + ); + +// R44 = R44 & R07; +// Instr #68 +SDNode *and11 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and8, 0), + SDValue(not1, 0), + // glue (or chain) input edge + SDValue(not1, 1) + ); + +// R44 = R44 & R47; +// Instr #69 +SDNode *and12 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq11, 0), + SDValue(and11, 0), + // glue (or chain) input edge + SDValue(and11, 1) + ); + +// R08 = R08 | R44; +// Instr #70 +SDNode *or4 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and12, 0), + SDValue(and10, 0), + // glue (or chain) input edge + SDValue(and12, 1) + ); + +// R07 = ~R45; +// Instr #71 +SDNode *not2 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + // glue (or chain) input edge + SDValue(or4, 1) + ); + +// R07 = R07 & R46; +// Instr #72 +SDNode *and13 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + SDValue(not2, 0), + // glue (or chain) input edge + SDValue(not2, 1) + ); + +// R08 = R08 | R07; +// Instr #73 +SDNode *or5 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and13, 0), + SDValue(or4, 0), + // glue (or chain) input edge + SDValue(and13, 1) + ); + +// R07 = ~R48; +// Instr #74 +SDNode *not3 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq10, 0), + // glue (or chain) input edge + SDValue(or5, 1) + ); + +// R07 = R07 & R49; +// Instr #75 +SDNode *and14 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + SDValue(not3, 0), + // glue (or chain) input edge + SDValue(not3, 1) + ); + +// R08 = R08 | R07; +// Instr #76 +SDNode *or6 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and14, 0), + SDValue(or5, 0), + // glue (or chain) input edge + SDValue(and14, 1) + ); + +// R09 = R08 == R30; +// Instr #77 +SDNode *eq14 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or6, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(or6, 1) + ); + +SDValue ct27 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #78 +SDNode *nop4 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct27, + // glue (or chain) input edge + SDValue(eq14, 1) + ); + +// WHERE_EQ; +// Instr #79 +SDNode *whereeq4 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq14, 0), + // glue (or chain) input edge + SDValue(nop4, 0) + ); + +SDValue ct28 = CurDAG->getConstant(31745, DL, MVT::i16, true, false); +// R19 = 31745; +// Instr #80 +SDNode *vload21 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct28, + SDValue(vload10, 0), + // glue (or chain) input edge + SDValue(whereeq4, 1) + ); + +SDValue ct29 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #81 +SDNode *vload22 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct29, + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(vload21, 1) + ); + +// END_WHERE; +// Instr #82 +SDNode *endwhere4 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload22, 1) + ); + +// R08 = R49 | R46; +// Instr #83 +SDNode *or7 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + SDValue(eq9, 0), + // glue (or chain) input edge + SDValue(endwhere4, 0) + ); + +// R09 = R08 & R14; +// Instr #84 +SDNode *and15 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload22, 0), + SDValue(or7, 0), + // glue (or chain) input edge + SDValue(or7, 1) + ); + +// R09 = R09 == R30; +// Instr #85 +SDNode *eq15 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and15, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and15, 1) + ); + +SDValue ct30 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #86 +SDNode *nop5 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct30, + // glue (or chain) input edge + SDValue(eq15, 1) + ); + +// WHERE_EQ; +// Instr #87 +SDNode *whereeq5 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq15, 0), + // glue (or chain) input edge + SDValue(nop5, 0) + ); + +SDValue ct31 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R19 = 31744; +// Instr #88 +SDNode *vload23 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct31, + SDValue(vload21, 0), + // glue (or chain) input edge + SDValue(whereeq5, 1) + ); + +// R08 = R50 & R49; +// Instr #89 +SDNode *and16 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + SDValue(eq8, 0), + SDValue(or7, 0), + // glue (or chain) input edge + SDValue(vload23, 1) + ); + +// R07 = R47 & R46; +// Instr #90 +SDNode *and17 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + SDValue(eq11, 0), + SDValue(and14, 0), + // glue (or chain) input edge + SDValue(and16, 1) + ); + +// R08 = R08 | R07; +// Instr #91 +SDNode *or8 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and17, 0), + SDValue(and16, 0), + SDValue(and16, 0), + // glue (or chain) input edge + SDValue(and17, 1) + ); + +SDValue ct32 = CurDAG->getConstant(15, DL, MVT::i16, true, false); +// R08 = R08 << 15; +// Instr #92 +SDNode *ishl0 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or8, 0), + ct32, + SDValue(or8, 0), + // glue (or chain) input edge + SDValue(or8, 1) + ); + +// R19 = R19 ^ R08; +// Instr #93 +SDNode *xor0 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl0, 0), + SDValue(vload23, 0), + SDValue(vload23, 0), + // glue (or chain) input edge + SDValue(ishl0, 1) + ); + +SDValue ct33 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #94 +SDNode *vload24 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct33, + SDValue(vload22, 0), + // glue (or chain) input edge + SDValue(xor0, 1) + ); + +// END_WHERE; +// Instr #95 +SDNode *endwhere5 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload24, 1) + ); + +// R15 = R25 - R21; +// Instr #96 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload19, 0), + SDValue(vload20, 0), + // glue (or chain) input edge + SDValue(endwhere5, 0) + ); + +SDValue ct34 = CurDAG->getConstant(-15, DL, MVT::i16, true, false); +// R08 = -15; +// Instr #97 +SDNode *vload25 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct34, + // glue (or chain) input edge + SDValue(sub0, 1) + ); + +// R09 = R15 < R08; +// Instr #98 +SDNode *lt2 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub0, 0), + SDValue(vload25, 0), + // glue (or chain) input edge + SDValue(vload25, 1) + ); + +// R09 = R09 & R14; +// Instr #99 +SDNode *and18 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt2, 0), + // glue (or chain) input edge + SDValue(lt2, 1) + ); + +// R09 = R09 == R30; +// Instr #100 +SDNode *eq16 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and18, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and18, 1) + ); + +SDValue ct35 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #101 +SDNode *nop6 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct35, + // glue (or chain) input edge + SDValue(eq16, 1) + ); + +// WHERE_EQ; +// Instr #102 +SDNode *whereeq6 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq16, 0), + // glue (or chain) input edge + SDValue(nop6, 0) + ); + +// R15 = R31 - R15; +// Instr #103 +SDNode *sub1 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub0, 0), + SDValue(sub0, 0), + // glue (or chain) input edge + SDValue(whereeq6, 1) + ); + +SDValue ct36 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R26 = 0; +// Instr #104 +SDNode *vload26 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct36, + SDValue(or1, 0), + // glue (or chain) input edge + SDValue(sub1, 1) + ); + +SDValue ct37 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = R21 << 0; +// Instr #105 +SDNode *ishl1 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + ct37, + SDValue(vload19, 0), + // glue (or chain) input edge + SDValue(vload26, 1) + ); + +SDValue ct38 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #106 +SDNode *vload27 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct38, + SDValue(sub1, 0), + // glue (or chain) input edge + SDValue(ishl1, 1) + ); + +// END_WHERE; +// Instr #107 +SDNode *endwhere6 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload27, 1) + ); + +SDValue ct39 = CurDAG->getConstant(-3, DL, MVT::i16, true, false); +// R08 = -3; +// Instr #108 +SDNode *vload28 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct39, + // glue (or chain) input edge + SDValue(endwhere6, 0) + ); + +// R09 = R15 < R08; +// Instr #109 +SDNode *lt3 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload27, 0), + SDValue(vload28, 0), + // glue (or chain) input edge + SDValue(vload28, 1) + ); + +// R09 = R09 & R14; +// Instr #110 +SDNode *and19 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt3, 0), + // glue (or chain) input edge + SDValue(lt3, 1) + ); + +// R09 = R09 == R30; +// Instr #111 +SDNode *eq17 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and19, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and19, 1) + ); + +SDValue ct40 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #112 +SDNode *nop7 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct40, + // glue (or chain) input edge + SDValue(eq17, 1) + ); + +// WHERE_EQ; +// Instr #113 +SDNode *whereeq7 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq17, 0), + // glue (or chain) input edge + SDValue(nop7, 0) + ); + +// R15 = R31 - R15; +// Instr #114 +SDNode *sub2 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload27, 0), + SDValue(vload27, 0), + // glue (or chain) input edge + SDValue(whereeq7, 1) + ); + +// R26 = R26 >> R15; +// Instr #115 +SDNode *shr0 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload26, 0), + SDValue(sub2, 0), + SDValue(vload26, 0), + // glue (or chain) input edge + SDValue(sub2, 1) + ); + +SDValue ct41 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = R21 << 0; +// Instr #116 +SDNode *ishl2 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + ct41, + SDValue(ishl1, 0), + // glue (or chain) input edge + SDValue(shr0, 1) + ); + +SDValue ct42 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #117 +SDNode *vload29 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct42, + SDValue(sub2, 0), + // glue (or chain) input edge + SDValue(ishl2, 1) + ); + +// END_WHERE; +// Instr #118 +SDNode *endwhere7 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload29, 1) + ); + +// R09 = R15 < R31; +// Instr #119 +SDNode *lt4 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload29, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere7, 0) + ); + +// R09 = R09 & R14; +// Instr #120 +SDNode *and20 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt4, 0), + // glue (or chain) input edge + SDValue(lt4, 1) + ); + +// R09 = R09 == R30; +// Instr #121 +SDNode *eq18 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and20, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and20, 1) + ); + +SDValue ct43 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #122 +SDNode *nop8 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct43, + // glue (or chain) input edge + SDValue(eq18, 1) + ); + +// WHERE_EQ; +// Instr #123 +SDNode *whereeq8 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq18, 0), + // glue (or chain) input edge + SDValue(nop8, 0) + ); + +// R15 = R31 - R15; +// Instr #124 +SDNode *sub3 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload29, 0), + SDValue(vload29, 0), + // glue (or chain) input edge + SDValue(whereeq8, 1) + ); + +// R22 = R22 << R15; +// Instr #125 +SDNode *shl0 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or3, 0), + SDValue(sub3, 0), + SDValue(or3, 0), + // glue (or chain) input edge + SDValue(sub3, 1) + ); + +SDValue ct44 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R21 = R25 << 0; +// Instr #126 +SDNode *ishl3 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl2, 0), + ct44, + SDValue(vload20, 0), + // glue (or chain) input edge + SDValue(shl0, 1) + ); + +SDValue ct45 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #127 +SDNode *vload30 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct45, + SDValue(sub3, 0), + // glue (or chain) input edge + SDValue(ishl3, 1) + ); + +// END_WHERE; +// Instr #128 +SDNode *endwhere8 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload30, 1) + ); + +SDValue ct46 = CurDAG->getConstant(4, DL, MVT::i16, true, false); +// R08 = 4; +// Instr #129 +SDNode *vload31 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct46, + // glue (or chain) input edge + SDValue(endwhere8, 0) + ); + +// R09 = R15 < R08; +// Instr #130 +SDNode *lt5 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload30, 0), + SDValue(vload31, 0), + // glue (or chain) input edge + SDValue(vload31, 1) + ); + +// R09 = R09 & R14; +// Instr #131 +SDNode *and21 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt5, 0), + // glue (or chain) input edge + SDValue(lt5, 1) + ); + +// R09 = R09 == R30; +// Instr #132 +SDNode *eq19 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and21, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and21, 1) + ); + +SDValue ct47 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #133 +SDNode *nop9 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct47, + // glue (or chain) input edge + SDValue(eq19, 1) + ); + +// WHERE_EQ; +// Instr #134 +SDNode *whereeq9 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq19, 0), + // glue (or chain) input edge + SDValue(nop9, 0) + ); + +// R26 = R26 << R15; +// Instr #135 +SDNode *shl1 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr0, 0), + SDValue(vload30, 0), + SDValue(shr0, 0), + // glue (or chain) input edge + SDValue(whereeq9, 1) + ); + +SDValue ct48 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = R21 << 0; +// Instr #136 +SDNode *ishl4 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl3, 0), + ct48, + SDValue(ishl2, 0), + // glue (or chain) input edge + SDValue(shl1, 1) + ); + +SDValue ct49 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #137 +SDNode *vload32 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct49, + SDValue(vload30, 0), + // glue (or chain) input edge + SDValue(ishl4, 1) + ); + +// END_WHERE; +// Instr #138 +SDNode *endwhere9 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload32, 1) + ); + +// R09 = R15 < R29; +// Instr #139 +SDNode *lt6 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload32, 0), + SDValue(vload3, 0), + // glue (or chain) input edge + SDValue(endwhere9, 0) + ); + +// R09 = R09 & R14; +// Instr #140 +SDNode *and22 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt6, 0), + // glue (or chain) input edge + SDValue(lt6, 1) + ); + +// R09 = R09 == R30; +// Instr #141 +SDNode *eq20 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and22, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and22, 1) + ); + +SDValue ct50 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #142 +SDNode *nop10 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct50, + // glue (or chain) input edge + SDValue(eq20, 1) + ); + +// WHERE_EQ; +// Instr #143 +SDNode *whereeq10 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq20, 0), + // glue (or chain) input edge + SDValue(nop10, 0) + ); + +// R22 = R22 >> R15; +// Instr #144 +SDNode *shr1 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl0, 0), + SDValue(vload32, 0), + SDValue(shl0, 0), + // glue (or chain) input edge + SDValue(whereeq10, 1) + ); + +SDValue ct51 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R21 = R25 << 0; +// Instr #145 +SDNode *ishl5 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + ct51, + SDValue(ishl3, 0), + // glue (or chain) input edge + SDValue(shr1, 1) + ); + +SDValue ct52 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #146 +SDNode *vload33 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct52, + SDValue(vload32, 0), + // glue (or chain) input edge + SDValue(ishl5, 1) + ); + +// END_WHERE; +// Instr #147 +SDNode *endwhere10 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload33, 1) + ); + +SDValue ct53 = CurDAG->getConstant(32, DL, MVT::i16, true, false); +// R08 = 32; +// Instr #148 +SDNode *vload34 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct53, + // glue (or chain) input edge + SDValue(endwhere10, 0) + ); + +// R09 = R15 < R08; +// Instr #149 +SDNode *lt7 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload33, 0), + SDValue(vload34, 0), + // glue (or chain) input edge + SDValue(vload34, 1) + ); + +// R09 = R09 & R14; +// Instr #150 +SDNode *and23 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt7, 0), + // glue (or chain) input edge + SDValue(lt7, 1) + ); + +// R09 = R09 == R30; +// Instr #151 +SDNode *eq21 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and23, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and23, 1) + ); + +SDValue ct54 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #152 +SDNode *nop11 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct54, + // glue (or chain) input edge + SDValue(eq21, 1) + ); + +// WHERE_EQ; +// Instr #153 +SDNode *whereeq11 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq21, 0), + // glue (or chain) input edge + SDValue(nop11, 0) + ); + +SDValue ct55 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R22 = 0; +// Instr #154 +SDNode *vload35 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct55, + SDValue(shr1, 0), + // glue (or chain) input edge + SDValue(whereeq11, 1) + ); + +SDValue ct56 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R21 = R25 << 0; +// Instr #155 +SDNode *ishl6 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + ct56, + SDValue(ishl5, 0), + // glue (or chain) input edge + SDValue(vload35, 1) + ); + +// END_WHERE; +// Instr #156 +SDNode *endwhere11 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(ishl6, 1) + ); + +// R09 = R24 == R11; +// Instr #157 +SDNode *eq22 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(endwhere11, 0) + ); + +// R09 = R09 & R14; +// Instr #158 +SDNode *and24 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq22, 0), + // glue (or chain) input edge + SDValue(eq22, 1) + ); + +// R09 = R09 == R30; +// Instr #159 +SDNode *eq23 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and24, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and24, 1) + ); + +SDValue ct57 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #160 +SDNode *nop12 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct57, + // glue (or chain) input edge + SDValue(eq23, 1) + ); + +// WHERE_EQ; +// Instr #161 +SDNode *whereeq12 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq23, 0), + // glue (or chain) input edge + SDValue(nop12, 0) + ); + +// R26 = R31 - R26; +// Instr #162 +SDNode *sub4 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(shl1, 0), + SDValue(shl1, 0), + // glue (or chain) input edge + SDValue(whereeq12, 1) + ); + +// END_WHERE; +// Instr #163 +SDNode *endwhere12 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub4, 1) + ); + +// R09 = R20 == R11; +// Instr #164 +SDNode *eq24 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and4, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(endwhere12, 0) + ); + +// R09 = R09 & R14; +// Instr #165 +SDNode *and25 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq24, 0), + // glue (or chain) input edge + SDValue(eq24, 1) + ); + +// R09 = R09 == R30; +// Instr #166 +SDNode *eq25 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and25, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and25, 1) + ); + +SDValue ct58 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #167 +SDNode *nop13 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct58, + // glue (or chain) input edge + SDValue(eq25, 1) + ); + +// WHERE_EQ; +// Instr #168 +SDNode *whereeq13 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq25, 0), + // glue (or chain) input edge + SDValue(nop13, 0) + ); + +// R22 = R31 - R22; +// Instr #169 +SDNode *sub5 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload35, 0), + SDValue(vload35, 0), + // glue (or chain) input edge + SDValue(whereeq13, 1) + ); + +// END_WHERE; +// Instr #170 +SDNode *endwhere13 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub5, 1) + ); + +// R09 = R14 == R30; +// Instr #171 +SDNode *eq26 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere13, 0) + ); + +SDValue ct59 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #172 +SDNode *nop14 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct59, + // glue (or chain) input edge + SDValue(eq26, 1) + ); + +// WHERE_EQ; +// Instr #173 +SDNode *whereeq14 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq26, 0), + // glue (or chain) input edge + SDValue(nop14, 0) + ); + +// R26 = R22 + R26; +// Instr #174 +SDNode *add0 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub4, 0), + SDValue(sub5, 0), + SDValue(sub4, 0), + // glue (or chain) input edge + SDValue(whereeq14, 1) + ); + +// END_WHERE; +// Instr #175 +SDNode *endwhere14 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(add0, 1) + ); + +// R24 = R26 & R11; +// Instr #176 +SDNode *and26 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add0, 0), + // glue (or chain) input edge + SDValue(endwhere14, 0) + ); + +// R09 = R24 == R11; +// Instr #177 +SDNode *eq27 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and26, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(and26, 1) + ); + +// R09 = R09 & R14; +// Instr #178 +SDNode *and27 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq27, 0), + // glue (or chain) input edge + SDValue(eq27, 1) + ); + +// R09 = R09 == R30; +// Instr #179 +SDNode *eq28 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and27, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and27, 1) + ); + +SDValue ct60 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #180 +SDNode *nop15 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct60, + // glue (or chain) input edge + SDValue(eq28, 1) + ); + +// WHERE_EQ; +// Instr #181 +SDNode *whereeq15 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq28, 0), + // glue (or chain) input edge + SDValue(nop15, 0) + ); + +// R26 = R31 - R26; +// Instr #182 +SDNode *sub6 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(add0, 0), + SDValue(add0, 0), + // glue (or chain) input edge + SDValue(whereeq15, 1) + ); + +// END_WHERE; +// Instr #183 +SDNode *endwhere15 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub6, 1) + ); + +SDValue ct61 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R05 = R26 << 0; +// Instr #184 +SDNode *ishl7 = CurDAG->getMachineNode( + Connex::ISHLV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + ct61, + // glue (or chain) input edge + SDValue(endwhere15, 0) + ); + +SDValue ct62 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R08 = R05 >> 1; +// Instr #185 +SDNode *ishr2 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl7, 0), + ct62, + // glue (or chain) input edge + SDValue(ishl7, 1) + ); + +// R05 = R05 | R08; +// Instr #186 +SDNode *or9 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr2, 0), + SDValue(ishl7, 0), + // glue (or chain) input edge + SDValue(ishr2, 1) + ); + +SDValue ct63 = CurDAG->getConstant(2, DL, MVT::i16, true, false); +// R08 = R05 >> 2; +// Instr #187 +SDNode *ishr3 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or9, 0), + ct63, + // glue (or chain) input edge + SDValue(or9, 1) + ); + +// R05 = R05 | R08; +// Instr #188 +SDNode *or10 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr3, 0), + SDValue(or9, 0), + // glue (or chain) input edge + SDValue(ishr3, 1) + ); + +SDValue ct64 = CurDAG->getConstant(4, DL, MVT::i16, true, false); +// R08 = R05 >> 4; +// Instr #189 +SDNode *ishr4 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or10, 0), + ct64, + // glue (or chain) input edge + SDValue(or10, 1) + ); + +// R05 = R05 | R08; +// Instr #190 +SDNode *or11 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr4, 0), + SDValue(or10, 0), + // glue (or chain) input edge + SDValue(ishr4, 1) + ); + +SDValue ct65 = CurDAG->getConstant(8, DL, MVT::i16, true, false); +// R08 = R05 >> 8; +// Instr #191 +SDNode *ishr5 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or11, 0), + ct65, + // glue (or chain) input edge + SDValue(or11, 1) + ); + +// R05 = R05 | R08; +// Instr #192 +SDNode *or12 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr5, 0), + SDValue(or11, 0), + // glue (or chain) input edge + SDValue(ishr5, 1) + ); + +// R05 = ~R05; +// Instr #193 +SDNode *not4 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or12, 0), + // glue (or chain) input edge + SDValue(or12, 1) + ); + +// R06 = POPCNT(R05); +// Instr #194 +SDNode *popcnt0 = CurDAG->getMachineNode( + Connex::POPCNT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(not4, 0), + // glue (or chain) input edge + SDValue(not4, 1) + ); + +// R06 = R29 - R06; +// Instr #195 +SDNode *sub7 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(popcnt0, 0), + // glue (or chain) input edge + SDValue(popcnt0, 1) + ); + +SDValue ct66 = CurDAG->getConstant(11, DL, MVT::i16, true, false); +// R08 = 11; +// Instr #196 +SDNode *vload36 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct66, + // glue (or chain) input edge + SDValue(sub7, 1) + ); + +// R08 = R06 - R08; +// Instr #197 +SDNode *sub8 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub7, 0), + SDValue(vload36, 0), + // glue (or chain) input edge + SDValue(vload36, 1) + ); + +// R09 = R31 < R08; +// Instr #198 +SDNode *lt8 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub8, 0), + // glue (or chain) input edge + SDValue(sub8, 1) + ); + +// R09 = R09 & R14; +// Instr #199 +SDNode *and28 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt8, 0), + // glue (or chain) input edge + SDValue(lt8, 1) + ); + +// R09 = R09 == R30; +// Instr #200 +SDNode *eq29 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and28, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and28, 1) + ); + +SDValue ct67 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #201 +SDNode *nop16 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct67, + // glue (or chain) input edge + SDValue(eq29, 1) + ); + +// WHERE_EQ; +// Instr #202 +SDNode *whereeq16 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq29, 0), + // glue (or chain) input edge + SDValue(nop16, 0) + ); + +SDValue ct68 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = R26 << 0; +// Instr #203 +SDNode *ishl8 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + ct68, + SDValue(vload11, 0), + // glue (or chain) input edge + SDValue(whereeq16, 1) + ); + +// R09 = R29 - R08; +// Instr #204 +SDNode *sub9 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(sub8, 0), + SDValue(eq29, 0), + // glue (or chain) input edge + SDValue(ishl8, 1) + ); + +// R62 = R62 << R09; +// Instr #205 +SDNode *shl2 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl8, 0), + SDValue(sub9, 0), + SDValue(ishl8, 0), + // glue (or chain) input edge + SDValue(sub9, 1) + ); + +// R62 = R62 >> R09; +// Instr #206 +SDNode *shr2 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl2, 0), + SDValue(sub9, 0), + SDValue(shl2, 0), + // glue (or chain) input edge + SDValue(shl2, 1) + ); + +SDValue ct69 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R61 = R08 << 0; +// Instr #207 +SDNode *ishl9 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub8, 0), + ct69, + SDValue(vload12, 0), + // glue (or chain) input edge + SDValue(shr2, 1) + ); + +// R26 = R26 >> R08; +// Instr #208 +SDNode *shr3 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + SDValue(sub8, 0), + SDValue(sub6, 0), + // glue (or chain) input edge + SDValue(ishl9, 1) + ); + +// R25 = R08 + R25; +// Instr #209 +SDNode *add1 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + SDValue(sub8, 0), + SDValue(ishl4, 0), + // glue (or chain) input edge + SDValue(shr3, 1) + ); + +// END_WHERE; +// Instr #210 +SDNode *endwhere16 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(add1, 1) + ); + +// R09 = R08 < R31; +// Instr #211 +SDNode *lt9 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub8, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere16, 0) + ); + +// R09 = R09 & R14; +// Instr #212 +SDNode *and29 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt9, 0), + // glue (or chain) input edge + SDValue(lt9, 1) + ); + +// R09 = R09 == R30; +// Instr #213 +SDNode *eq30 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and29, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and29, 1) + ); + +SDValue ct70 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #214 +SDNode *nop17 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct70, + // glue (or chain) input edge + SDValue(eq30, 1) + ); + +// WHERE_EQ; +// Instr #215 +SDNode *whereeq17 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq30, 0), + // glue (or chain) input edge + SDValue(nop17, 0) + ); + +// R08 = R31 - R08; +// Instr #216 +SDNode *sub10 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub8, 0), + SDValue(sub8, 0), + // glue (or chain) input edge + SDValue(whereeq17, 1) + ); + +// R26 = R26 << R08; +// Instr #217 +SDNode *shl3 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr3, 0), + SDValue(sub10, 0), + SDValue(shr3, 0), + // glue (or chain) input edge + SDValue(sub10, 1) + ); + +// R25 = R25 - R08; +// Instr #218 +SDNode *sub11 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(add1, 0), + SDValue(sub10, 0), + SDValue(add1, 0), + // glue (or chain) input edge + SDValue(shl3, 1) + ); + +// END_WHERE; +// Instr #219 +SDNode *endwhere17 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub11, 1) + ); + +// R09 = R25 < R30; +// Instr #220 +SDNode *lt10 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub11, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere17, 0) + ); + +// R09 = R09 & R14; +// Instr #221 +SDNode *and30 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt10, 0), + // glue (or chain) input edge + SDValue(lt10, 1) + ); + +// R09 = R09 == R30; +// Instr #222 +SDNode *eq31 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and30, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and30, 1) + ); + +SDValue ct71 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #223 +SDNode *nop18 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct71, + // glue (or chain) input edge + SDValue(eq31, 1) + ); + +// WHERE_EQ; +// Instr #224 +SDNode *whereeq18 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq31, 0), + // glue (or chain) input edge + SDValue(nop18, 0) + ); + +// R61 = R30 - R25; +// Instr #225 +SDNode *sub12 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(sub11, 0), + SDValue(ishl9, 0), + // glue (or chain) input edge + SDValue(whereeq18, 1) + ); + +SDValue ct72 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R25 = 1; +// Instr #226 +SDNode *vload37 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct72, + SDValue(sub11, 0), + // glue (or chain) input edge + SDValue(sub12, 1) + ); + +SDValue ct73 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = R26 << 0; +// Instr #227 +SDNode *ishl10 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl3, 0), + ct73, + SDValue(shr2, 0), + // glue (or chain) input edge + SDValue(vload37, 1) + ); + +// R09 = R29 - R61; +// Instr #228 +SDNode *sub13 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(sub12, 0), + SDValue(eq31, 0), + // glue (or chain) input edge + SDValue(ishl10, 1) + ); + +// R62 = R62 << R09; +// Instr #229 +SDNode *shl4 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl10, 0), + SDValue(sub13, 0), + SDValue(ishl10, 0), + // glue (or chain) input edge + SDValue(sub13, 1) + ); + +// R62 = R62 >> R09; +// Instr #230 +SDNode *shr4 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl4, 0), + SDValue(sub13, 0), + SDValue(shl4, 0), + // glue (or chain) input edge + SDValue(shl4, 1) + ); + +// R26 = R26 >> R61; +// Instr #231 +SDNode *shr5 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl3, 0), + SDValue(sub12, 0), + SDValue(shl3, 0), + // glue (or chain) input edge + SDValue(shr4, 1) + ); + +// END_WHERE; +// Instr #232 +SDNode *endwhere18 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(shr5, 1) + ); + +SDValue ct74 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R08 = 1024; +// Instr #233 +SDNode *vload38 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct74, + // glue (or chain) input edge + SDValue(endwhere18, 0) + ); + +// R08 = R26 < R08; +// Instr #234 +SDNode *lt11 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr5, 0), + SDValue(vload38, 0), + // glue (or chain) input edge + SDValue(vload38, 1) + ); + +// R09 = R25 == R30; +// Instr #235 +SDNode *eq32 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload37, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(lt11, 1) + ); + +// R09 = R09 & R14; +// Instr #236 +SDNode *and31 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq32, 0), + // glue (or chain) input edge + SDValue(eq32, 1) + ); + +// R09 = R09 & R08; +// Instr #237 +SDNode *and32 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt11, 0), + SDValue(and31, 0), + // glue (or chain) input edge + SDValue(and31, 1) + ); + +// R09 = R09 == R30; +// Instr #238 +SDNode *eq33 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and32, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and32, 1) + ); + +SDValue ct75 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #239 +SDNode *nop19 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct75, + // glue (or chain) input edge + SDValue(eq33, 1) + ); + +// WHERE_EQ; +// Instr #240 +SDNode *whereeq19 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq33, 0), + // glue (or chain) input edge + SDValue(nop19, 0) + ); + +SDValue ct76 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = 0; +// Instr #241 +SDNode *vload39 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct76, + SDValue(vload37, 0), + // glue (or chain) input edge + SDValue(whereeq19, 1) + ); + +// END_WHERE; +// Instr #242 +SDNode *endwhere19 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload39, 1) + ); + +// R26 = R26 & R13; +// Instr #243 +SDNode *and33 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(shr5, 0), + // glue (or chain) input edge + SDValue(endwhere19, 0) + ); + +SDValue ct77 = CurDAG->getConstant(30, DL, MVT::i16, true, false); +// R09 = 30; +// Instr #244 +SDNode *vload40 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct77, + // glue (or chain) input edge + SDValue(and33, 1) + ); + +// R09 = R09 < R25; +// Instr #245 +SDNode *lt12 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload40, 0), + SDValue(vload39, 0), + // glue (or chain) input edge + SDValue(vload40, 1) + ); + +// R09 = R09 & R14; +// Instr #246 +SDNode *and34 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt12, 0), + // glue (or chain) input edge + SDValue(lt12, 1) + ); + +// R09 = R09 == R30; +// Instr #247 +SDNode *eq34 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and34, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and34, 1) + ); + +SDValue ct78 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #248 +SDNode *nop20 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct78, + // glue (or chain) input edge + SDValue(eq34, 1) + ); + +// WHERE_EQ; +// Instr #249 +SDNode *whereeq20 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq34, 0), + // glue (or chain) input edge + SDValue(nop20, 0) + ); + +SDValue ct79 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #250 +SDNode *vload41 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct79, + SDValue(vload24, 0), + // glue (or chain) input edge + SDValue(whereeq20, 1) + ); + +SDValue ct80 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R19 = 31744; +// Instr #251 +SDNode *vload42 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct80, + SDValue(xor0, 0), + // glue (or chain) input edge + SDValue(vload41, 1) + ); + +// R19 = R19 | R24; +// Instr #252 +SDNode *or13 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and26, 0), + SDValue(vload42, 0), + SDValue(vload42, 0), + // glue (or chain) input edge + SDValue(vload42, 1) + ); + +// END_WHERE; +// Instr #253 +SDNode *endwhere20 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or13, 1) + ); + +// R08 = R14 == R30; +// Instr #254 +SDNode *eq35 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload41, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere20, 0) + ); + +SDValue ct81 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #255 +SDNode *nop21 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct81, + // glue (or chain) input edge + SDValue(eq35, 1) + ); + +// WHERE_EQ; +// Instr #256 +SDNode *whereeq21 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq35, 0), + // glue (or chain) input edge + SDValue(nop21, 0) + ); + +SDValue ct82 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R19 = R25 << 10; +// Instr #257 +SDNode *ishl11 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload39, 0), + ct82, + SDValue(or13, 0), + // glue (or chain) input edge + SDValue(whereeq21, 1) + ); + +// R19 = R19 | R26; +// Instr #258 +SDNode *or14 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and33, 0), + SDValue(ishl11, 0), + SDValue(ishl11, 0), + // glue (or chain) input edge + SDValue(ishl11, 1) + ); + +// R04 = R26 & R30; +// Instr #259 +SDNode *and35 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(and33, 0), + SDValue(vload14, 0), + // glue (or chain) input edge + SDValue(or14, 1) + ); + +// R07 = R61 - R30; +// Instr #260 +SDNode *sub14 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub12, 0), + SDValue(vload1, 0), + SDValue(and17, 0), + // glue (or chain) input edge + SDValue(and35, 1) + ); + +// R08 = R30 << R08; +// Instr #261 +SDNode *shl5 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq35, 0), + SDValue(eq35, 0), + // glue (or chain) input edge + SDValue(sub14, 1) + ); + +// R03 = R62 & R08; +// Instr #262 +SDNode *and36 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl5, 0), + SDValue(shr4, 0), + SDValue(vload15, 0), + // glue (or chain) input edge + SDValue(shl5, 1) + ); + +// R62 = R62 ^ R03; +// Instr #263 +SDNode *xor1 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and36, 0), + SDValue(shr4, 0), + SDValue(shr4, 0), + // glue (or chain) input edge + SDValue(and36, 1) + ); + +// R03 = R03 == R31; +// Instr #264 +SDNode *eq36 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and36, 0), + SDValue(vload2, 0), + SDValue(and36, 0), + // glue (or chain) input edge + SDValue(xor1, 1) + ); + +// R03 = R30 - R03; +// Instr #265 +SDNode *sub15 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq36, 0), + SDValue(eq36, 0), + // glue (or chain) input edge + SDValue(eq36, 1) + ); + +SDValue ct83 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R08 = R08 >> 1; +// Instr #266 +SDNode *ishr6 = CurDAG->getMachineNode( + Connex::ISHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl5, 0), + ct83, + SDValue(shl5, 0), + // glue (or chain) input edge + SDValue(sub15, 1) + ); + +// R02 = R62 & R08; +// Instr #267 +SDNode *and37 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr6, 0), + SDValue(xor1, 0), + SDValue(vload16, 0), + // glue (or chain) input edge + SDValue(ishr6, 1) + ); + +// R62 = R62 ^ R02; +// Instr #268 +SDNode *xor2 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and37, 0), + SDValue(xor1, 0), + SDValue(xor1, 0), + // glue (or chain) input edge + SDValue(and37, 1) + ); + +// R02 = R02 == R31; +// Instr #269 +SDNode *eq37 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and37, 0), + SDValue(vload2, 0), + SDValue(and37, 0), + // glue (or chain) input edge + SDValue(xor2, 1) + ); + +// R02 = R30 - R02; +// Instr #270 +SDNode *sub16 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq37, 0), + SDValue(eq37, 0), + // glue (or chain) input edge + SDValue(eq37, 1) + ); + +// R01 = R62 == R31; +// Instr #271 +SDNode *eq38 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor2, 0), + SDValue(vload2, 0), + SDValue(vload17, 0), + // glue (or chain) input edge + SDValue(sub16, 1) + ); + +// R01 = R30 - R01; +// Instr #272 +SDNode *sub17 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq38, 0), + SDValue(eq38, 0), + // glue (or chain) input edge + SDValue(eq38, 1) + ); + +// R00 = R04 | R02; +// Instr #273 +SDNode *or15 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub16, 0), + SDValue(and35, 0), + SDValue(vload18, 0), + // glue (or chain) input edge + SDValue(sub17, 1) + ); + +// R00 = R00 | R01; +// Instr #274 +SDNode *or16 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub17, 0), + SDValue(or15, 0), + SDValue(or15, 0), + // glue (or chain) input edge + SDValue(or15, 1) + ); + +// R00 = R00 & R03; +// Instr #275 +SDNode *and38 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub15, 0), + SDValue(or16, 0), + SDValue(or16, 0), + // glue (or chain) input edge + SDValue(or16, 1) + ); + +// R19 = R00 + R19; +// Instr #276 +SDNode *add2 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or14, 0), + SDValue(and38, 0), + SDValue(or14, 0), + // glue (or chain) input edge + SDValue(and38, 1) + ); + +// R19 = R19 | R24; +// Instr #277 +SDNode *resF16 /*or17*/ = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and26, 0), + SDValue(add2, 0), + SDValue(add2, 0), + // glue (or chain) input edge + SDValue(add2, 1) + ); + +// END_WHERE; +// Instr #278 +SDNode *lastNode /*endwhere21*/ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, +// Alex: MVT::Glue, + MVT::Other, + // glue (or chain) input edge +// Alex: SDValue(or17, 1) + SDValue(resF16, 1) + ); + Index: lib/Target/Connex/Select_ADDi32_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_ADDi32_OpincaaCodeGen.h +++ lib/Target/Connex/Select_ADDi32_OpincaaCodeGen.h @@ -0,0 +1,213 @@ +//===-- Select_ADDi32_OpincaaCodeGen.h - Connex specific TTI ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Code auto-generated by method Kernel::genLLVMISelManualCode(). +// from the OPINCAA lib, from kernel add.i32. +// You should put this code in the Select() method of the SelectionDAGISel +// class of your back end. +// Number of instructions generated: 15. +// +//===----------------------------------------------------------------------===// + +// From /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/opincaa_standalone_apps/Emulate_i32/ADD_i32_manual/DumpISel_OpincaaCodeGen_old05_011.cpp + +// R27 is REG_SRC1. It is represented by result of nodeOpSrcCast1. +// R28 is REG_SRC2. It is represented by result of nodeOpSrcCast2. + + + + + + + +SDValue ct0 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast2, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +// R29 = R27 + R28; +// Instr #2 +SDNode *add0 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast2, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +// R23 = ADDC(R31, R31); +// Instr #3 +SDNode *addc0 = CurDAG->getMachineNode( + Connex::ADDCV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(vload0, 0), + SDValue(add0, 0) + // no need for glue or chain input (since it normally consumes the output of the predecessor) + ); + +// R26 = INDEX; +// Instr #4 +SDNode *ldix0 = CurDAG->getMachineNode( + Connex::LDIX_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(addc0, 1) + ); + +// R25 = R26 & R30; +// Instr #5 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(ldix0, 0), + // glue (or chain) input edge + SDValue(ldix0, 1) + ); + +// R24 = R25 == R30; +// Instr #6 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #7 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// WHERE_EQ; +// Instr #8 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct3 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R23 = 0; +// Instr #9 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + SDValue(addc0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #10 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +// CELL_SHR(R23, R30); +// Instr #11 +SDNode *cellshr0 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +SDValue ct4 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #12 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(cellshr0, 0) + ); + +// R21 = SHIFT_REG; +// Instr #13 +SDNode *ldsh0 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R22 = R21 + R29; +// Instr #14 +SDNode *resH /*add1*/ = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(add0, 0), + SDValue(ldsh0, 0), + // glue (or chain) input edge + SDValue(ldsh0, 1) + ); + +SDNode *lastNode = resH; Index: lib/Target/Connex/Select_LTf16_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_LTf16_OpincaaCodeGen.h +++ lib/Target/Connex/Select_LTf16_OpincaaCodeGen.h @@ -0,0 +1,705 @@ +//===-- Select_ADDf16_OpincaaCodeGen.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from the OPINCAA lib, from kernel lt.f16. +// You should put this code in the Select() method of the SelectionDAGISel +// class of your back end. +// Number of instructions generated: 53. +// +//===----------------------------------------------------------------------===// + +// From /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/opincaa_standalone_apps/Emulate_f16/LT_f16_manual/DumpISel_OpincaaCodeGen_old05_050.cpp + +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from Opincaa lib from kernel: lt.f16. +// It is important to put this code in the Select() method of the +// SelectionDAGISel class of your back end, after the ISelLowering pass, +// which contains the DAG Combiner, because the DAG Combiner can remove +// the getCopyToReg() we create, which can lead to the following error: +// <> assertion failed. +// Number of instructions generated: 53. + + + + +SDValue ct0 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast1, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(5, DL, MVT::i16, true, false); +// R29 = 5; +// Instr #2 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(1023, DL, MVT::i16, true, false); +// R13 = 1023; +// Instr #3 +SDNode *vload3 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +SDValue ct4 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R12 = 31744; +// Instr #4 +SDNode *vload4 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(vload3, 1) + ); + +SDValue ct5 = CurDAG->getConstant(-32768, DL, MVT::i16, true, false); +// R11 = -32768; +// Instr #5 +SDNode *vload5 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct5, + // glue (or chain) input edge + SDValue(vload4, 1) + ); + +SDValue ct6 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R10 = 1024; +// Instr #6 +SDNode *vload6 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct6, + // glue (or chain) input edge + SDValue(vload5, 1) + ); + +SDValue ct7 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R19 = 0; +// Instr #7 +SDNode *vload7 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct7, + // glue (or chain) input edge + SDValue(vload6, 1) + ); + +SDValue ct8 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R14 = 1; +// Instr #8 +SDNode *vload8 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct8, + // glue (or chain) input edge + SDValue(vload7, 1) + ); + +// R25 = R27 & R12; +// Instr #9 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload4, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(vload8, 1) + ); + +// R26 = R27 & R13; +// Instr #10 +SDNode *and1 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +// R21 = R23 & R12; +// Instr #11 +SDNode *and2 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload4, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(and1, 1) + ); + +// R22 = R23 & R13; +// Instr #12 +SDNode *and3 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(and2, 1) + ); + +// R17 = POPCNT(R25); +// Instr #13 +SDNode *popcnt0 = CurDAG->getMachineNode( + Connex::POPCNT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + // glue (or chain) input edge + SDValue(and3, 1) + ); + +// R17 = R17 == R29; +// Instr #14 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(popcnt0, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(popcnt0, 1) + ); + +// R18 = R26 == R31; +// Instr #15 +SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and1, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// R18 = R30 - R18; +// Instr #16 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq1, 0), + // glue (or chain) input edge + SDValue(eq1, 1) + ); + +// R18 = R18 & R17; +// Instr #17 +SDNode *and4 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(sub0, 0), + // glue (or chain) input edge + SDValue(sub0, 1) + ); + +// R18 = R18 == R30; +// Instr #18 +SDNode *eq2 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and4, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and4, 1) + ); + +SDValue ct9 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #19 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct9, + // glue (or chain) input edge + SDValue(eq2, 1) + ); + +// WHERE_EQ; +// Instr #20 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq2, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct10 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #21 +SDNode *vload9 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct10, + SDValue(vload8, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #22 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload9, 1) + ); + +// R15 = POPCNT(R21); +// Instr #23 +SDNode *popcnt1 = CurDAG->getMachineNode( + Connex::POPCNT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// R15 = R15 == R29; +// Instr #24 +SDNode *eq3 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(popcnt1, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(popcnt1, 1) + ); + +// R16 = R22 == R31; +// Instr #25 +SDNode *eq4 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and3, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(eq3, 1) + ); + +// R16 = R30 - R16; +// Instr #26 +SDNode *sub1 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq4, 0), + // glue (or chain) input edge + SDValue(eq4, 1) + ); + +// R16 = R16 & R15; +// Instr #27 +SDNode *and5 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq3, 0), + SDValue(sub1, 0), + // glue (or chain) input edge + SDValue(sub1, 1) + ); + +// R16 = R16 == R30; +// Instr #28 +SDNode *eq5 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and5, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and5, 1) + ); + +SDValue ct11 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #29 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct11, + // glue (or chain) input edge + SDValue(eq5, 1) + ); + +// WHERE_EQ; +// Instr #30 +SDNode *whereeq1 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq5, 0), + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +SDValue ct12 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #31 +SDNode *vload10 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct12, + SDValue(vload9, 0), + // glue (or chain) input edge + SDValue(whereeq1, 1) + ); + +// END_WHERE; +// Instr #32 +SDNode *endwhere1 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload10, 1) + ); + +// R16 = R27 == R23; +// Instr #33 +SDNode *eq6 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast1, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(endwhere1, 0) + ); + +// R14 = R14 ^ R16; +// Instr #34 +SDNode *xor0 = CurDAG->getMachineNode( + Connex::XORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq6, 0), + SDValue(vload10, 0), + // glue (or chain) input edge + SDValue(eq6, 1) + ); + +// R16 = R27 & R23; +// Instr #35 +SDNode *and6 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast2, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(xor0, 1) + ); + +// R16 = R16 & R11; +// Instr #36 +SDNode *and7 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(and6, 1) + ); + +// R16 = R16 == R11; +// Instr #37 +SDNode *eq7 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and7, 0), + SDValue(vload5, 0), + // glue (or chain) input edge + SDValue(and7, 1) + ); + +// R16 = R16 & R14; +// Instr #38 +SDNode *and8 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor0, 0), + SDValue(eq7, 0), + // glue (or chain) input edge + SDValue(eq7, 1) + ); + +// R16 = R16 == R30; +// Instr #39 +SDNode *eq8 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and8, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and8, 1) + ); + +SDValue ct13 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #40 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct13, + // glue (or chain) input edge + SDValue(eq8, 1) + ); + +// WHERE_EQ; +// Instr #41 +SDNode *whereeq2 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +// R27 = R27 ^ R11; +// Instr #42 +SDNode *xor1 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast1, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(whereeq2, 1) + ); + +// R23 = R23 ^ R11; +// Instr #43 +SDNode *xor2 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast2, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(xor1, 1) + ); + +SDValue ct14 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R19 = 1; +// Instr #44 +SDNode *vload11 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct14, + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(xor2, 1) + ); + +// END_WHERE; +// Instr #45 +SDNode *endwhere2 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload11, 1) + ); + +// R16 = R27 < R23; +// Instr #46 +SDNode *lt0 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor1, 0), + SDValue(xor2, 0), + // glue (or chain) input edge + SDValue(endwhere2, 0) + ); + +// R16 = R16 & R14; +// Instr #47 +SDNode *and9 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor0, 0), + SDValue(lt0, 0), + // glue (or chain) input edge + SDValue(lt0, 1) + ); + +// R16 = R16 == R30; +// Instr #48 +SDNode *eq9 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and9, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and9, 1) + ); + +SDValue ct15 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #49 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct15, + // glue (or chain) input edge + SDValue(eq9, 1) + ); + +// WHERE_EQ; +// Instr #50 +SDNode *whereeq3 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R19 = R19 ^ R30; +// Instr #51 +SDNode *resF16 /*xor3*/ = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(vload11, 0), + SDValue(vload11, 0), + // glue (or chain) input edge + SDValue(whereeq3, 1) + ); + +// END_WHERE; +// Instr #52 +SDNode *lastNode /*endwhere3*/ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, +// Alex: MVT::Glue, + MVT::Other, + // glue (or chain) input edge +// Alex: SDValue(xor3, 1) + SDValue(resF16, 1) + ); + Index: lib/Target/Connex/Select_MULTf16_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_MULTf16_OpincaaCodeGen.h +++ lib/Target/Connex/Select_MULTf16_OpincaaCodeGen.h @@ -0,0 +1,3266 @@ +//===-- Select_MULTf16_OpincaaCodeGen.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from the OPINCAA lib, from kernel mul.f16. +// You should put this code in the Select() method of the SelectionDAGISel +// class of your back end. +// Number of instructions generated: 249. +// +//===----------------------------------------------------------------------===// + +// From /home/asusu/LLVM/Tests/opincaa_standalone_apps/Emulate_f16/MULTf16_manual/DumpISel_OpincaaCodeGen.cpp + + + + + +SDValue ct0 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast1, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R29 = 16; +// Instr #2 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(31, DL, MVT::i16, true, false); +// R28 = 31; +// Instr #3 +SDNode *vload3 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +SDValue ct4 = CurDAG->getConstant(1023, DL, MVT::i16, true, false); +// R10 = 1023; +// Instr #4 +SDNode *vload4 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(vload3, 1) + ); + +SDValue ct5 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R09 = 31744; +// Instr #5 +SDNode *vload5 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct5, + // glue (or chain) input edge + SDValue(vload4, 1) + ); + +SDValue ct6 = CurDAG->getConstant(-32768, DL, MVT::i16, true, false); +// R08 = -32768; +// Instr #6 +SDNode *vload6 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct6, + // glue (or chain) input edge + SDValue(vload5, 1) + ); + +SDValue ct7 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R07 = 1024; +// Instr #7 +SDNode *vload7 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct7, + // glue (or chain) input edge + SDValue(vload6, 1) + ); + +SDValue ct8 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R55 = 0; +// Instr #8 +SDNode *vload8 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct8, + // glue (or chain) input edge + SDValue(vload7, 1) + ); + +SDValue ct9 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R00 = 1; +// Instr #9 +SDNode *vload9 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct9, + // glue (or chain) input edge + SDValue(vload8, 1) + ); + +// R24 = R27 & R08; +// Instr #10 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(vload9, 1) + ); + +// R25 = R27 & R09; +// Instr #11 +SDNode *and1 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct10 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R25 = R25 >> 10; +// Instr #12 +SDNode *ishr0 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and1, 0), + ct10, + // glue (or chain) input edge + SDValue(and1, 1) + ); + +// R26 = R27 & R10; +// Instr #13 +SDNode *and2 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload4, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(ishr0, 1) + ); + +// R14 = R31 < R26; +// Instr #14 +SDNode *lt0 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(and2, 1) + ); + +// R13 = R25 == R31; +// Instr #15 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr0, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt0, 1) + ); + +// R12 = R13 & R14; +// Instr #16 +SDNode *and3 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt0, 0), + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// R12 = R12 == R30; +// Instr #17 +SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and3, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and3, 1) + ); + +SDValue ct11 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #18 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct11, + // glue (or chain) input edge + SDValue(eq1, 1) + ); + +// WHERE_EQ; +// Instr #19 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq1, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct12 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R25 = 1; +// Instr #20 +SDNode *vload10 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct12, + SDValue(ishr0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #21 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload10, 1) + ); + +// R14 = R25 == R28; +// Instr #22 +SDNode *eq2 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload10, 0), + SDValue(vload3, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// R14 = R14 | R13; +// Instr #23 +SDNode *or0 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(eq2, 0), + // glue (or chain) input edge + SDValue(eq2, 1) + ); + +// R14 = R14 == R31; +// Instr #24 +SDNode *eq3 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or0, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(or0, 1) + ); + +SDValue ct13 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #25 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct13, + // glue (or chain) input edge + SDValue(eq3, 1) + ); + +// WHERE_EQ; +// Instr #26 +SDNode *whereeq1 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq3, 0), + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R26 = R26 | R07; +// Instr #27 +SDNode *or1 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(and2, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(whereeq1, 1) + ); + +// END_WHERE; +// Instr #28 +SDNode *endwhere1 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or1, 1) + ); + +// R20 = R23 & R08; +// Instr #29 +SDNode *and4 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(endwhere1, 0) + ); + +// R21 = R23 & R09; +// Instr #30 +SDNode *and5 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(and4, 1) + ); + +SDValue ct14 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R21 = R21 >> 10; +// Instr #31 +SDNode *ishr1 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and5, 0), + ct14, + // glue (or chain) input edge + SDValue(and5, 1) + ); + +// R22 = R23 & R10; +// Instr #32 +SDNode *and6 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload4, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(ishr1, 1) + ); + +// R14 = R31 < R22; +// Instr #33 +SDNode *lt1 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(and6, 1) + ); + +// R13 = R21 == R31; +// Instr #34 +SDNode *eq4 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr1, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt1, 1) + ); + +// R12 = R13 & R14; +// Instr #35 +SDNode *and7 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt1, 0), + SDValue(eq4, 0), + // glue (or chain) input edge + SDValue(eq4, 1) + ); + +// R12 = R12 == R30; +// Instr #36 +SDNode *eq5 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and7, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and7, 1) + ); + +SDValue ct15 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #37 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct15, + // glue (or chain) input edge + SDValue(eq5, 1) + ); + +// WHERE_EQ; +// Instr #38 +SDNode *whereeq2 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq5, 0), + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +SDValue ct16 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R21 = 1; +// Instr #39 +SDNode *vload11 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct16, + SDValue(ishr1, 0), + // glue (or chain) input edge + SDValue(whereeq2, 1) + ); + +// END_WHERE; +// Instr #40 +SDNode *endwhere2 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload11, 1) + ); + +// R14 = R21 == R28; +// Instr #41 +SDNode *eq6 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload11, 0), + SDValue(vload3, 0), + // glue (or chain) input edge + SDValue(endwhere2, 0) + ); + +// R14 = R14 | R13; +// Instr #42 +SDNode *or2 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq4, 0), + SDValue(eq6, 0), + // glue (or chain) input edge + SDValue(eq6, 1) + ); + +// R14 = R14 == R31; +// Instr #43 +SDNode *eq7 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or2, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(or2, 1) + ); + +SDValue ct17 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #44 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct17, + // glue (or chain) input edge + SDValue(eq7, 1) + ); + +// WHERE_EQ; +// Instr #45 +SDNode *whereeq3 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq7, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R22 = R22 | R07; +// Instr #46 +SDNode *or3 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(and6, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(whereeq3, 1) + ); + +// END_WHERE; +// Instr #47 +SDNode *endwhere3 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or3, 1) + ); + +SDValue ct18 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R19 = 31744; +// Instr #48 +SDNode *vload12 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct18, + // glue (or chain) input edge + SDValue(endwhere3, 0) + ); + +// R15 = R24 ^ R20; +// Instr #49 +SDNode *xor0 = CurDAG->getMachineNode( + Connex::XORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and4, 0), + SDValue(and0, 0), + // glue (or chain) input edge + SDValue(vload12, 1) + ); + +SDValue ct19 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R16 = 0; +// Instr #50 +SDNode *vload13 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct19, + // glue (or chain) input edge + SDValue(xor0, 1) + ); + +SDValue ct20 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R18 = 0; +// Instr #51 +SDNode *vload14 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct20, + // glue (or chain) input edge + SDValue(vload13, 1) + ); + +SDValue ct21 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R17 = 0; +// Instr #52 +SDNode *vload15 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct21, + // glue (or chain) input edge + SDValue(vload14, 1) + ); + +SDValue ct22 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R54 = 0; +// Instr #53 +SDNode *vload16 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct22, + // glue (or chain) input edge + SDValue(vload15, 1) + ); + +SDValue ct23 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R60 = 0; +// Instr #54 +SDNode *vload17 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct23, + // glue (or chain) input edge + SDValue(vload16, 1) + ); + +SDValue ct24 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R59 = 0; +// Instr #55 +SDNode *vload18 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct24, + // glue (or chain) input edge + SDValue(vload17, 1) + ); + +SDValue ct25 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R58 = 0; +// Instr #56 +SDNode *vload19 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct25, + // glue (or chain) input edge + SDValue(vload18, 1) + ); + +SDValue ct26 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R57 = 0; +// Instr #57 +SDNode *vload20 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct26, + // glue (or chain) input edge + SDValue(vload19, 1) + ); + +// R02 = R27 == R24; +// Instr #58 +SDNode *eq8 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast1, 0), + SDValue(and0, 0), + // glue (or chain) input edge + SDValue(vload20, 1) + ); + +// R01 = R23 == R20; +// Instr #59 +SDNode *eq9 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast2, 0), + SDValue(and4, 0), + // glue (or chain) input edge + SDValue(eq8, 1) + ); + +// R36 = R25 == R28; +// Instr #60 +SDNode *eq10 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload10, 0), + SDValue(vload3, 0), + // glue (or chain) input edge + SDValue(eq9, 1) + ); + +// R38 = R31 < R26; +// Instr #61 +SDNode *lt2 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(or1, 0), + // glue (or chain) input edge + SDValue(eq10, 1) + ); + +// R35 = R21 == R28; +// Instr #62 +SDNode *eq11 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload11, 0), + SDValue(vload3, 0), + // glue (or chain) input edge + SDValue(lt2, 1) + ); + +// R37 = R31 < R22; +// Instr #63 +SDNode *lt3 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(or3, 0), + // glue (or chain) input edge + SDValue(eq11, 1) + ); + +// R62 = R36 & R38; +// Instr #64 +SDNode *and8 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt2, 0), + SDValue(eq10, 0), + // glue (or chain) input edge + SDValue(lt3, 1) + ); + +// R63 = R35 & R37; +// Instr #65 +SDNode *and9 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt3, 0), + SDValue(eq11, 0), + // glue (or chain) input edge + SDValue(and8, 1) + ); + +// R61 = R62 | R63; +// Instr #66 +SDNode *or4 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and9, 0), + SDValue(and8, 0), + // glue (or chain) input edge + SDValue(and9, 1) + ); + +// R62 = R36 & R01; +// Instr #67 +SDNode *and10 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + SDValue(eq10, 0), + // glue (or chain) input edge + SDValue(or4, 1) + ); + +// R63 = R35 & R02; +// Instr #68 +SDNode *and11 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + SDValue(eq11, 0), + // glue (or chain) input edge + SDValue(and10, 1) + ); + +// R61 = R61 | R62; +// Instr #69 +SDNode *or5 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and10, 0), + SDValue(or4, 0), + // glue (or chain) input edge + SDValue(and11, 1) + ); + +// R61 = R61 | R63; +// Instr #70 +SDNode *or6 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and11, 0), + SDValue(or5, 0), + // glue (or chain) input edge + SDValue(or5, 1) + ); + +// R03 = R61 == R30; +// Instr #71 +SDNode *eq12 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or6, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(or6, 1) + ); + +SDValue ct27 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #72 +SDNode *nop4 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct27, + // glue (or chain) input edge + SDValue(eq12, 1) + ); + +// WHERE_EQ; +// Instr #73 +SDNode *whereeq4 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + // glue (or chain) input edge + SDValue(nop4, 0) + ); + +SDValue ct28 = CurDAG->getConstant(31745, DL, MVT::i16, true, false); +// R19 = 31745; +// Instr #74 +SDNode *vload21 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct28, + SDValue(vload12, 0), + // glue (or chain) input edge + SDValue(whereeq4, 1) + ); + +SDValue ct29 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R00 = 0; +// Instr #75 +SDNode *vload22 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct29, + SDValue(vload9, 0), + // glue (or chain) input edge + SDValue(vload21, 1) + ); + +// END_WHERE; +// Instr #76 +SDNode *endwhere4 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload22, 1) + ); + +// R39 = R36 | R35; +// Instr #77 +SDNode *or7 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq11, 0), + SDValue(eq10, 0), + // glue (or chain) input edge + SDValue(endwhere4, 0) + ); + +// R04 = R61 == R31; +// Instr #78 +SDNode *eq13 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or6, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(or7, 1) + ); + +// R03 = R39 == R30; +// Instr #79 +SDNode *eq14 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or7, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(eq13, 1) + ); + +// R03 = R03 & R04; +// Instr #80 +SDNode *and12 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + SDValue(eq14, 0), + // glue (or chain) input edge + SDValue(eq14, 1) + ); + +// R03 = R03 == R30; +// Instr #81 +SDNode *eq15 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and12, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and12, 1) + ); + +SDValue ct30 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #82 +SDNode *nop5 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct30, + // glue (or chain) input edge + SDValue(eq15, 1) + ); + +// WHERE_EQ; +// Instr #83 +SDNode *whereeq5 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq15, 0), + // glue (or chain) input edge + SDValue(nop5, 0) + ); + +// R19 = R19 | R15; +// Instr #84 +SDNode *or8 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor0, 0), + SDValue(vload21, 0), + SDValue(vload21, 0), + // glue (or chain) input edge + SDValue(whereeq5, 1) + ); + +SDValue ct31 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R00 = 0; +// Instr #85 +SDNode *vload23 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct31, + SDValue(vload22, 0), + // glue (or chain) input edge + SDValue(or8, 1) + ); + +// END_WHERE; +// Instr #86 +SDNode *endwhere5 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload23, 1) + ); + +// R52 = R00 == R30; +// Instr #87 +SDNode *eq16 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere5, 0) + ); + +SDValue ct32 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #88 +SDNode *nop6 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct32, + // glue (or chain) input edge + SDValue(eq16, 1) + ); + +// WHERE_EQ; +// Instr #89 +SDNode *whereeq6 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq16, 0), + // glue (or chain) input edge + SDValue(nop6, 0) + ); + +// R16 = R21 + R25; +// Instr #90 +SDNode *add0 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload10, 0), + SDValue(vload11, 0), + SDValue(vload13, 0), + // glue (or chain) input edge + SDValue(whereeq6, 1) + ); + +SDValue ct33 = CurDAG->getConstant(15, DL, MVT::i16, true, false); +// R04 = 15; +// Instr #91 +SDNode *vload24 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct33, + SDValue(eq13, 0), + // glue (or chain) input edge + SDValue(add0, 1) + ); + +// R16 = R16 - R04; +// Instr #92 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(add0, 0), + SDValue(vload24, 0), + SDValue(add0, 0), + // glue (or chain) input edge + SDValue(vload24, 1) + ); + +// R26 * R22; +// Instr #93 +SDNode *mult0 = CurDAG->getMachineNode( + Connex::MULT_H, + DL, + MVT::Glue, + SDValue(or1, 0), + SDValue(or3, 0), + // glue (or chain) input edge + SDValue(sub0, 1) + ); + +// R18 = MULT_LOW(); +// Instr #94 +SDNode *multlo0 = CurDAG->getMachineNode( + Connex::MULTLO_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload14, 0), + // glue (or chain) input edge + SDValue(mult0, 0) + ); + +// R17 = MULT_HIGH(); +// Instr #95 +SDNode *multhi0 = CurDAG->getMachineNode( + Connex::MULTHI_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload15, 0), + // glue (or chain) input edge + SDValue(multlo0, 1) + ); + +// END_WHERE; +// Instr #96 +SDNode *endwhere6 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(multhi0, 1) + ); + +SDValue ct34 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R03 = 16; +// Instr #97 +SDNode *vload25 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct34, + // glue (or chain) input edge + SDValue(endwhere6, 0) + ); + +SDValue ct35 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R06 = R18 << 0; +// Instr #98 +SDNode *ishl0 = CurDAG->getMachineNode( + Connex::ISHLV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(multlo0, 0), + ct35, + // glue (or chain) input edge + SDValue(vload25, 1) + ); + +// R04 = R31 < R17; +// Instr #99 +SDNode *lt4 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(multhi0, 0), + // glue (or chain) input edge + SDValue(ishl0, 1) + ); + +// R52 = R04 & R00; +// Instr #100 +SDNode *and13 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt4, 0), + // glue (or chain) input edge + SDValue(lt4, 1) + ); + +// R52 = R52 == R30; +// Instr #101 +SDNode *eq17 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and13, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and13, 1) + ); + +SDValue ct36 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #102 +SDNode *nop7 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct36, + // glue (or chain) input edge + SDValue(eq17, 1) + ); + +// WHERE_EQ; +// Instr #103 +SDNode *whereeq7 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq17, 0), + // glue (or chain) input edge + SDValue(nop7, 0) + ); + +SDValue ct37 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R06 = R17 << 0; +// Instr #104 +SDNode *ishl1 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(multhi0, 0), + ct37, + SDValue(ishl0, 0), + // glue (or chain) input edge + SDValue(whereeq7, 1) + ); + +SDValue ct38 = CurDAG->getConstant(32, DL, MVT::i16, true, false); +// R03 = 32; +// Instr #105 +SDNode *vload26 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct38, + SDValue(vload25, 0), + // glue (or chain) input edge + SDValue(ishl1, 1) + ); + +// END_WHERE; +// Instr #106 +SDNode *endwhere7 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload26, 1) + ); + +SDValue ct39 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R04 = R06 >> 1; +// Instr #107 +SDNode *ishr2 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl1, 0), + ct39, + // glue (or chain) input edge + SDValue(endwhere7, 0) + ); + +// R06 = R06 | R04; +// Instr #108 +SDNode *or9 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr2, 0), + SDValue(ishl1, 0), + // glue (or chain) input edge + SDValue(ishr2, 1) + ); + +SDValue ct40 = CurDAG->getConstant(2, DL, MVT::i16, true, false); +// R04 = R06 >> 2; +// Instr #109 +SDNode *ishr3 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or9, 0), + ct40, + // glue (or chain) input edge + SDValue(or9, 1) + ); + +// R06 = R06 | R04; +// Instr #110 +SDNode *or10 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr3, 0), + SDValue(or9, 0), + // glue (or chain) input edge + SDValue(ishr3, 1) + ); + +SDValue ct41 = CurDAG->getConstant(4, DL, MVT::i16, true, false); +// R04 = R06 >> 4; +// Instr #111 +SDNode *ishr4 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or10, 0), + ct41, + // glue (or chain) input edge + SDValue(or10, 1) + ); + +// R06 = R06 | R04; +// Instr #112 +SDNode *or11 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr4, 0), + SDValue(or10, 0), + // glue (or chain) input edge + SDValue(ishr4, 1) + ); + +SDValue ct42 = CurDAG->getConstant(8, DL, MVT::i16, true, false); +// R04 = R06 >> 8; +// Instr #113 +SDNode *ishr5 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or11, 0), + ct42, + // glue (or chain) input edge + SDValue(or11, 1) + ); + +// R06 = R06 | R04; +// Instr #114 +SDNode *or12 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr5, 0), + SDValue(or11, 0), + // glue (or chain) input edge + SDValue(ishr5, 1) + ); + +// R06 = ~R06; +// Instr #115 +SDNode *not0 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or12, 0), + // glue (or chain) input edge + SDValue(or12, 1) + ); + +// R05 = POPCNT(R06); +// Instr #116 +SDNode *popcnt0 = CurDAG->getMachineNode( + Connex::POPCNT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(not0, 0), + // glue (or chain) input edge + SDValue(not0, 1) + ); + +// R05 = R03 - R05; +// Instr #117 +SDNode *sub1 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload26, 0), + SDValue(popcnt0, 0), + // glue (or chain) input edge + SDValue(popcnt0, 1) + ); + +SDValue ct43 = CurDAG->getConstant(11, DL, MVT::i16, true, false); +// R04 = 11; +// Instr #118 +SDNode *vload27 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct43, + // glue (or chain) input edge + SDValue(sub1, 1) + ); + +// R04 = R05 - R04; +// Instr #119 +SDNode *sub2 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub1, 0), + SDValue(vload27, 0), + // glue (or chain) input edge + SDValue(vload27, 1) + ); + +// R12 = R04 < R31; +// Instr #120 +SDNode *lt5 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub2, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(sub2, 1) + ); + +// R52 = R12 & R00; +// Instr #121 +SDNode *and14 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt5, 0), + // glue (or chain) input edge + SDValue(lt5, 1) + ); + +// R52 = R52 == R30; +// Instr #122 +SDNode *eq18 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and14, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and14, 1) + ); + +SDValue ct44 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #123 +SDNode *nop8 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct44, + // glue (or chain) input edge + SDValue(eq18, 1) + ); + +// WHERE_EQ; +// Instr #124 +SDNode *whereeq8 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq18, 0), + // glue (or chain) input edge + SDValue(nop8, 0) + ); + +// R03 = R31 - R04; +// Instr #125 +SDNode *sub3 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(sub2, 0), + SDValue(vload26, 0), + // glue (or chain) input edge + SDValue(whereeq8, 1) + ); + +// R18 = R18 << R03; +// Instr #126 +SDNode *shl0 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(multlo0, 0), + SDValue(sub3, 0), + SDValue(multlo0, 0), + // glue (or chain) input edge + SDValue(sub3, 1) + ); + +// END_WHERE; +// Instr #127 +SDNode *endwhere8 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(shl0, 1) + ); + +// R12 = R31 < R04; +// Instr #128 +SDNode *lt6 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(sub2, 0), + // glue (or chain) input edge + SDValue(endwhere8, 0) + ); + +// R52 = R12 & R00; +// Instr #129 +SDNode *and15 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt6, 0), + // glue (or chain) input edge + SDValue(lt6, 1) + ); + +// R52 = R52 == R30; +// Instr #130 +SDNode *eq19 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and15, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and15, 1) + ); + +SDValue ct45 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #131 +SDNode *nop9 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct45, + // glue (or chain) input edge + SDValue(eq19, 1) + ); + +// WHERE_EQ; +// Instr #132 +SDNode *whereeq9 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq19, 0), + // glue (or chain) input edge + SDValue(nop9, 0) + ); + +SDValue ct46 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R03 = R17 << 0; +// Instr #133 +SDNode *ishl2 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(multhi0, 0), + ct46, + SDValue(sub3, 0), + // glue (or chain) input edge + SDValue(whereeq9, 1) + ); + +// R17 = R17 >> R04; +// Instr #134 +SDNode *shr0 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(multhi0, 0), + SDValue(sub2, 0), + SDValue(multhi0, 0), + // glue (or chain) input edge + SDValue(ishl2, 1) + ); + +SDValue ct47 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R12 = R04 << 0; +// Instr #135 +SDNode *ishl3 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub2, 0), + ct47, + SDValue(lt6, 0), + // glue (or chain) input edge + SDValue(shr0, 1) + ); + +// R04 = R29 - R04; +// Instr #136 +SDNode *sub4 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub2, 0), + SDValue(sub2, 0), + // glue (or chain) input edge + SDValue(ishl3, 1) + ); + +SDValue ct48 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R55 = R18 << 0; +// Instr #137 +SDNode *ishl4 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl0, 0), + ct48, + SDValue(vload8, 0), + // glue (or chain) input edge + SDValue(sub4, 1) + ); + +// R55 = R55 << R04; +// Instr #138 +SDNode *shl1 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + SDValue(sub4, 0), + SDValue(ishl4, 0), + // glue (or chain) input edge + SDValue(ishl4, 1) + ); + +// R55 = R55 >> R04; +// Instr #139 +SDNode *shr1 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl1, 0), + SDValue(sub4, 0), + SDValue(shl1, 0), + // glue (or chain) input edge + SDValue(shl1, 1) + ); + +SDValue ct49 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R54 = R12 << 0; +// Instr #140 +SDNode *ishl5 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl3, 0), + ct49, + SDValue(vload16, 0), + // glue (or chain) input edge + SDValue(shr1, 1) + ); + +// R18 = R18 >> R12; +// Instr #141 +SDNode *shr2 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl0, 0), + SDValue(ishl3, 0), + SDValue(shl0, 0), + // glue (or chain) input edge + SDValue(ishl5, 1) + ); + +// R03 = R03 << R04; +// Instr #142 +SDNode *shl2 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl2, 0), + SDValue(sub4, 0), + SDValue(ishl2, 0), + // glue (or chain) input edge + SDValue(shr2, 1) + ); + +// R18 = R18 | R03; +// Instr #143 +SDNode *or13 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl2, 0), + SDValue(shr2, 0), + SDValue(shr2, 0), + // glue (or chain) input edge + SDValue(shl2, 1) + ); + +// END_WHERE; +// Instr #144 +SDNode *endwhere9 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or13, 1) + ); + +// R52 = R00 == R30; +// Instr #145 +SDNode *eq20 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere9, 0) + ); + +SDValue ct50 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #146 +SDNode *nop10 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct50, + // glue (or chain) input edge + SDValue(eq20, 1) + ); + +// WHERE_EQ; +// Instr #147 +SDNode *whereeq10 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq20, 0), + // glue (or chain) input edge + SDValue(nop10, 0) + ); + +SDValue ct51 = CurDAG->getConstant(21, DL, MVT::i16, true, false); +// R04 = 21; +// Instr #148 +SDNode *vload28 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct51, + SDValue(sub4, 0), + // glue (or chain) input edge + SDValue(whereeq10, 1) + ); + +// R04 = R04 - R05; +// Instr #149 +SDNode *sub5 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload28, 0), + SDValue(sub1, 0), + SDValue(vload28, 0), + // glue (or chain) input edge + SDValue(vload28, 1) + ); + +// R16 = R16 - R04; +// Instr #150 +SDNode *sub6 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub0, 0), + SDValue(sub5, 0), + SDValue(sub0, 0), + // glue (or chain) input edge + SDValue(sub5, 1) + ); + +// END_WHERE; +// Instr #151 +SDNode *endwhere10 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub6, 1) + ); + +SDValue ct52 = CurDAG->getConstant(15, DL, MVT::i16, true, false); +// R03 = 15; +// Instr #152 +SDNode *vload29 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct52, + // glue (or chain) input edge + SDValue(endwhere10, 0) + ); + +// R04 = R30 - R16; +// Instr #153 +SDNode *sub7 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(sub6, 0), + // glue (or chain) input edge + SDValue(vload29, 1) + ); + +// R12 = R03 < R04; +// Instr #154 +SDNode *lt7 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload29, 0), + SDValue(sub7, 0), + // glue (or chain) input edge + SDValue(sub7, 1) + ); + +// R52 = R12 & R00; +// Instr #155 +SDNode *and16 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt7, 0), + // glue (or chain) input edge + SDValue(lt7, 1) + ); + +// R52 = R52 == R30; +// Instr #156 +SDNode *eq21 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and16, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and16, 1) + ); + +SDValue ct53 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #157 +SDNode *nop11 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct53, + // glue (or chain) input edge + SDValue(eq21, 1) + ); + +// WHERE_EQ; +// Instr #158 +SDNode *whereeq11 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq21, 0), + // glue (or chain) input edge + SDValue(nop11, 0) + ); + +// R54 = R55 == R31; +// Instr #159 +SDNode *eq22 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr1, 0), + SDValue(vload0, 0), + SDValue(ishl5, 0), + // glue (or chain) input edge + SDValue(whereeq11, 1) + ); + +// R54 = R30 - R54; +// Instr #160 +SDNode *sub8 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq22, 0), + SDValue(eq22, 0), + // glue (or chain) input edge + SDValue(eq22, 1) + ); + +SDValue ct54 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R55 = R18 << 0; +// Instr #161 +SDNode *ishl6 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or13, 0), + ct54, + SDValue(shr1, 0), + // glue (or chain) input edge + SDValue(sub8, 1) + ); + +// R55 = R55 | R54; +// Instr #162 +SDNode *or14 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub8, 0), + SDValue(ishl6, 0), + SDValue(ishl6, 0), + // glue (or chain) input edge + SDValue(ishl6, 1) + ); + +SDValue ct55 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R54 = 16; +// Instr #163 +SDNode *vload30 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct55, + SDValue(sub8, 0), + // glue (or chain) input edge + SDValue(or14, 1) + ); + +SDValue ct56 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R18 = R17 << 0; +// Instr #164 +SDNode *ishl7 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr0, 0), + ct56, + SDValue(or13, 0), + // glue (or chain) input edge + SDValue(vload30, 1) + ); + +SDValue ct57 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R17 = 0; +// Instr #165 +SDNode *vload31 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct57, + SDValue(shr0, 0), + // glue (or chain) input edge + SDValue(ishl7, 1) + ); + +// R04 = R04 - R29; +// Instr #166 +SDNode *sub9 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub7, 0), + SDValue(vload2, 0), + SDValue(sub7, 0), + // glue (or chain) input edge + SDValue(vload31, 1) + ); + +// R16 = R29 + R16; +// Instr #167 +SDNode *add1 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + SDValue(vload2, 0), + SDValue(sub6, 0), + // glue (or chain) input edge + SDValue(sub9, 1) + ); + +// END_WHERE; +// Instr #168 +SDNode *endwhere11 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(add1, 1) + ); + +// R12 = R03 < R04; +// Instr #169 +SDNode *lt8 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload29, 0), + SDValue(sub9, 0), + // glue (or chain) input edge + SDValue(endwhere11, 0) + ); + +// R52 = R12 & R00; +// Instr #170 +SDNode *and17 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt8, 0), + // glue (or chain) input edge + SDValue(lt8, 1) + ); + +// R52 = R52 == R30; +// Instr #171 +SDNode *eq23 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and17, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and17, 1) + ); + +SDValue ct58 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #172 +SDNode *nop12 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct58, + // glue (or chain) input edge + SDValue(eq23, 1) + ); + +// WHERE_EQ; +// Instr #173 +SDNode *whereeq12 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq23, 0), + // glue (or chain) input edge + SDValue(nop12, 0) + ); + +// R54 = R55 == R31; +// Instr #174 +SDNode *eq24 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or14, 0), + SDValue(vload0, 0), + SDValue(vload30, 0), + // glue (or chain) input edge + SDValue(whereeq12, 1) + ); + +// R54 = R30 - R54; +// Instr #175 +SDNode *sub10 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq24, 0), + SDValue(eq24, 0), + // glue (or chain) input edge + SDValue(eq24, 1) + ); + +SDValue ct59 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R55 = R18 << 0; +// Instr #176 +SDNode *ishl8 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl7, 0), + ct59, + SDValue(or14, 0), + // glue (or chain) input edge + SDValue(sub10, 1) + ); + +// R55 = R55 | R54; +// Instr #177 +SDNode *or15 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub10, 0), + SDValue(ishl8, 0), + SDValue(ishl8, 0), + // glue (or chain) input edge + SDValue(ishl8, 1) + ); + +SDValue ct60 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R54 = 16; +// Instr #178 +SDNode *vload32 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct60, + SDValue(sub10, 0), + // glue (or chain) input edge + SDValue(or15, 1) + ); + +SDValue ct61 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R18 = 0; +// Instr #179 +SDNode *vload33 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct61, + SDValue(ishl7, 0), + // glue (or chain) input edge + SDValue(vload32, 1) + ); + +// R04 = R04 - R29; +// Instr #180 +SDNode *sub11 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub9, 0), + SDValue(vload2, 0), + SDValue(sub9, 0), + // glue (or chain) input edge + SDValue(vload33, 1) + ); + +// R16 = R29 + R16; +// Instr #181 +SDNode *add2 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(add1, 0), + SDValue(vload2, 0), + SDValue(add1, 0), + // glue (or chain) input edge + SDValue(sub11, 1) + ); + +// END_WHERE; +// Instr #182 +SDNode *endwhere12 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(add2, 1) + ); + +// R12 = R16 < R30; +// Instr #183 +SDNode *lt9 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(add2, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere12, 0) + ); + +// R52 = R12 & R00; +// Instr #184 +SDNode *and18 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt9, 0), + // glue (or chain) input edge + SDValue(lt9, 1) + ); + +// R52 = R52 == R30; +// Instr #185 +SDNode *eq25 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and18, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and18, 1) + ); + +SDValue ct62 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #186 +SDNode *nop13 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct62, + // glue (or chain) input edge + SDValue(eq25, 1) + ); + +// WHERE_EQ; +// Instr #187 +SDNode *whereeq13 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq25, 0), + // glue (or chain) input edge + SDValue(nop13, 0) + ); + +// R04 = R30 - R16; +// Instr #188 +SDNode *sub12 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(add2, 0), + SDValue(sub11, 0), + // glue (or chain) input edge + SDValue(whereeq13, 1) + ); + +// R54 = R55 == R31; +// Instr #189 +SDNode *eq26 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or15, 0), + SDValue(vload0, 0), + SDValue(vload32, 0), + // glue (or chain) input edge + SDValue(sub12, 1) + ); + +// R54 = R30 - R54; +// Instr #190 +SDNode *sub13 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq26, 0), + SDValue(eq26, 0), + // glue (or chain) input edge + SDValue(eq26, 1) + ); + +SDValue ct63 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R55 = R18 << 0; +// Instr #191 +SDNode *ishl9 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload33, 0), + ct63, + SDValue(or15, 0), + // glue (or chain) input edge + SDValue(sub13, 1) + ); + +// R55 = R55 | R54; +// Instr #192 +SDNode *or16 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub13, 0), + SDValue(ishl9, 0), + SDValue(ishl9, 0), + // glue (or chain) input edge + SDValue(ishl9, 1) + ); + +// R03 = R29 - R04; +// Instr #193 +SDNode *sub14 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub12, 0), + SDValue(vload29, 0), + // glue (or chain) input edge + SDValue(or16, 1) + ); + +// R55 = R55 << R03; +// Instr #194 +SDNode *shl3 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or16, 0), + SDValue(sub14, 0), + SDValue(or16, 0), + // glue (or chain) input edge + SDValue(sub14, 1) + ); + +// R55 = R55 >> R03; +// Instr #195 +SDNode *shr3 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl3, 0), + SDValue(sub14, 0), + SDValue(shl3, 0), + // glue (or chain) input edge + SDValue(shl3, 1) + ); + +SDValue ct64 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R54 = R04 << 0; +// Instr #196 +SDNode *ishl10 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub12, 0), + ct64, + SDValue(sub13, 0), + // glue (or chain) input edge + SDValue(shr3, 1) + ); + +// R18 = R18 >> R04; +// Instr #197 +SDNode *shr4 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload33, 0), + SDValue(sub12, 0), + SDValue(vload33, 0), + // glue (or chain) input edge + SDValue(ishl10, 1) + ); + +SDValue ct65 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R03 = R17 << 0; +// Instr #198 +SDNode *ishl11 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload31, 0), + ct65, + SDValue(sub14, 0), + // glue (or chain) input edge + SDValue(shr4, 1) + ); + +// R17 = R17 >> R04; +// Instr #199 +SDNode *shr5 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload31, 0), + SDValue(sub12, 0), + SDValue(vload31, 0), + // glue (or chain) input edge + SDValue(ishl11, 1) + ); + +// R04 = R29 - R04; +// Instr #200 +SDNode *sub15 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub12, 0), + SDValue(sub12, 0), + // glue (or chain) input edge + SDValue(shr5, 1) + ); + +// R03 = R03 << R04; +// Instr #201 +SDNode *shl4 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl11, 0), + SDValue(sub15, 0), + SDValue(ishl11, 0), + // glue (or chain) input edge + SDValue(sub15, 1) + ); + +// R18 = R18 | R03; +// Instr #202 +SDNode *or17 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl4, 0), + SDValue(shr4, 0), + SDValue(shr4, 0), + // glue (or chain) input edge + SDValue(shl4, 1) + ); + +SDValue ct66 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R16 = 1; +// Instr #203 +SDNode *vload34 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct66, + SDValue(add2, 0), + // glue (or chain) input edge + SDValue(or17, 1) + ); + +// END_WHERE; +// Instr #204 +SDNode *endwhere13 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload34, 1) + ); + +SDValue ct67 = CurDAG->getConstant(30, DL, MVT::i16, true, false); +// R12 = 30; +// Instr #205 +SDNode *vload35 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct67, + // glue (or chain) input edge + SDValue(endwhere13, 0) + ); + +// R12 = R12 < R16; +// Instr #206 +SDNode *lt10 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload35, 0), + SDValue(vload34, 0), + // glue (or chain) input edge + SDValue(vload35, 1) + ); + +// R52 = R12 & R00; +// Instr #207 +SDNode *and19 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt10, 0), + // glue (or chain) input edge + SDValue(lt10, 1) + ); + +// R52 = R52 == R30; +// Instr #208 +SDNode *eq27 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and19, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and19, 1) + ); + +SDValue ct68 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #209 +SDNode *nop14 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct68, + // glue (or chain) input edge + SDValue(eq27, 1) + ); + +// WHERE_EQ; +// Instr #210 +SDNode *whereeq14 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq27, 0), + // glue (or chain) input edge + SDValue(nop14, 0) + ); + +// R19 = R19 | R15; +// Instr #211 +SDNode *or18 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor0, 0), + SDValue(or8, 0), + SDValue(or8, 0), + // glue (or chain) input edge + SDValue(whereeq14, 1) + ); + +SDValue ct69 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R00 = 0; +// Instr #212 +SDNode *vload36 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct69, + SDValue(vload23, 0), + // glue (or chain) input edge + SDValue(or18, 1) + ); + +// END_WHERE; +// Instr #213 +SDNode *endwhere14 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload36, 1) + ); + +SDValue ct70 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R04 = 1024; +// Instr #214 +SDNode *vload37 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct70, + // glue (or chain) input edge + SDValue(endwhere14, 0) + ); + +// R04 = R18 < R04; +// Instr #215 +SDNode *lt11 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or17, 0), + SDValue(vload37, 0), + // glue (or chain) input edge + SDValue(vload37, 1) + ); + +// R12 = R16 == R30; +// Instr #216 +SDNode *eq28 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload34, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(lt11, 1) + ); + +// R12 = R12 & R04; +// Instr #217 +SDNode *and20 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt11, 0), + SDValue(eq28, 0), + // glue (or chain) input edge + SDValue(eq28, 1) + ); + +// R12 = R12 == R30; +// Instr #218 +SDNode *eq29 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and20, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and20, 1) + ); + +// R52 = R12 & R00; +// Instr #219 +SDNode *and21 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload36, 0), + SDValue(eq29, 0), + // glue (or chain) input edge + SDValue(eq29, 1) + ); + +// R52 = R52 == R30; +// Instr #220 +SDNode *eq30 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and21, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and21, 1) + ); + +SDValue ct71 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #221 +SDNode *nop15 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct71, + // glue (or chain) input edge + SDValue(eq30, 1) + ); + +// WHERE_EQ; +// Instr #222 +SDNode *whereeq15 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq30, 0), + // glue (or chain) input edge + SDValue(nop15, 0) + ); + +SDValue ct72 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R16 = 0; +// Instr #223 +SDNode *vload38 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct72, + SDValue(vload34, 0), + // glue (or chain) input edge + SDValue(whereeq15, 1) + ); + +// END_WHERE; +// Instr #224 +SDNode *endwhere15 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload38, 1) + ); + +// R52 = R00 == R30; +// Instr #225 +SDNode *eq31 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload36, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere15, 0) + ); + +SDValue ct73 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #226 +SDNode *nop16 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct73, + // glue (or chain) input edge + SDValue(eq31, 1) + ); + +// WHERE_EQ; +// Instr #227 +SDNode *whereeq16 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq31, 0), + // glue (or chain) input edge + SDValue(nop16, 0) + ); + +// R60 = R18 & R30; +// Instr #228 +SDNode *and22 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(or17, 0), + SDValue(vload17, 0), + // glue (or chain) input edge + SDValue(whereeq16, 1) + ); + +// R54 = R54 - R30; +// Instr #229 +SDNode *sub16 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl10, 0), + SDValue(vload1, 0), + SDValue(ishl10, 0), + // glue (or chain) input edge + SDValue(and22, 1) + ); + +// R54 = R30 << R54; +// Instr #230 +SDNode *shl5 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(sub16, 0), + SDValue(sub16, 0), + // glue (or chain) input edge + SDValue(sub16, 1) + ); + +// R59 = R55 & R54; +// Instr #231 +SDNode *and23 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl5, 0), + SDValue(shr3, 0), + SDValue(vload18, 0), + // glue (or chain) input edge + SDValue(shl5, 1) + ); + +// R55 = R55 ^ R59; +// Instr #232 +SDNode *xor1 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and23, 0), + SDValue(shr3, 0), + SDValue(shr3, 0), + // glue (or chain) input edge + SDValue(and23, 1) + ); + +// R59 = R59 == R31; +// Instr #233 +SDNode *eq32 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and23, 0), + SDValue(vload0, 0), + SDValue(and23, 0), + // glue (or chain) input edge + SDValue(xor1, 1) + ); + +// R59 = R30 - R59; +// Instr #234 +SDNode *sub17 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq32, 0), + SDValue(eq32, 0), + // glue (or chain) input edge + SDValue(eq32, 1) + ); + +// R58 = R55 == R31; +// Instr #235 +SDNode *eq33 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor1, 0), + SDValue(vload0, 0), + SDValue(vload19, 0), + // glue (or chain) input edge + SDValue(sub17, 1) + ); + +// R58 = R30 - R58; +// Instr #236 +SDNode *sub18 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq33, 0), + SDValue(eq33, 0), + // glue (or chain) input edge + SDValue(eq33, 1) + ); + +// R57 = R58 | R60; +// Instr #237 +SDNode *or19 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and22, 0), + SDValue(sub18, 0), + SDValue(vload20, 0), + // glue (or chain) input edge + SDValue(sub18, 1) + ); + +// R57 = R57 & R59; +// Instr #238 +SDNode *and24 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub17, 0), + SDValue(or19, 0), + SDValue(or19, 0), + // glue (or chain) input edge + SDValue(or19, 1) + ); + +// END_WHERE; +// Instr #239 +SDNode *endwhere16 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(and24, 1) + ); + +// R52 = R00 == R30; +// Instr #240 +SDNode *eq34 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload36, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere16, 0) + ); + +SDValue ct74 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #241 +SDNode *nop17 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct74, + // glue (or chain) input edge + SDValue(eq34, 1) + ); + +// WHERE_EQ; +// Instr #242 +SDNode *whereeq17 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq34, 0), + // glue (or chain) input edge + SDValue(nop17, 0) + ); + +SDValue ct75 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R19 = R16 << 10; +// Instr #243 +SDNode *ishl12 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload38, 0), + ct75, + SDValue(or18, 0), + // glue (or chain) input edge + SDValue(whereeq17, 1) + ); + +// R18 = R18 & R10; +// Instr #244 +SDNode *and25 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload4, 0), + SDValue(or17, 0), + SDValue(or17, 0), + // glue (or chain) input edge + SDValue(ishl12, 1) + ); + +// R19 = R19 | R18; +// Instr #245 +SDNode *or20 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and25, 0), + SDValue(ishl12, 0), + SDValue(ishl12, 0), + // glue (or chain) input edge + SDValue(and25, 1) + ); + +// R19 = R57 + R19; +// Instr #246 +SDNode *add3 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or20, 0), + SDValue(and24, 0), + SDValue(or20, 0), + // glue (or chain) input edge + SDValue(or20, 1) + ); + +// R19 = R19 | R15; +// Instr #247 +SDNode *resF16 /*or21*/ = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor0, 0), + SDValue(add3, 0), + SDValue(add3, 0), + // glue (or chain) input edge + SDValue(add3, 1) + ); + +// END_WHERE; +// Instr #248 +SDNode *lastNode /*endwhere17*/ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, +// Alex: MVT::Glue, + MVT::Other, + // glue (or chain) input edge +// Alex: SDValue(or21, 1) + SDValue(resF16, 1) + ); + Index: lib/Target/Connex/Select_MULTi32_ComplementedRepresentation_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_MULTi32_ComplementedRepresentation_OpincaaCodeGen.h +++ lib/Target/Connex/Select_MULTi32_ComplementedRepresentation_OpincaaCodeGen.h @@ -0,0 +1,354 @@ +//===-- Select_MULTi32_OpincaaCodeGen.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from the OPINCAA lib, from kernel mul.f16. +// You should include this code in the Select() method of the SelectionDAGISel +// class of your back end. +// Number of instructions generated: 27. +// +//===----------------------------------------------------------------------===// + + +// Copied from /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/opincaa_standalone_apps/Emulate_i32/MULTi32_manual_Complemented_radix_216_representation/DumpISel_OpincaaCodeGen_old27_220.cpp + + +// R27 is REG_SRC1. It is represented by result of nodeOpSrcCast1. +// R28 is REG_SRC2. It is represented by result of nodeOpSrcCast2. + + + + + + +SDValue ct0 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast2, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +// MULT_U(R28, R27); +// Instr #2 +SDNode *mult_u0 = CurDAG->getMachineNode( + Connex::MULT_U_H, + DL, + MVT::Other, + SDValue(nodeOpSrcCast2, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +// R26 = MULT_LOW(); +// Instr #3 +SDNode *multlo0 = CurDAG->getMachineNode( + Connex::MULTLO_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(mult_u0, 0) + ); + +// R25 = MULT_HIGH(); +// Instr #4 +SDNode *multhi0 = CurDAG->getMachineNode( + Connex::MULTHI_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(multlo0, 1) + ); + +// CELL_SHR(R27, R30); +// Instr #5 +SDNode *cellshr0 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Other, + SDValue(nodeOpSrcCast1, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(multhi0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #6 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Other, + ct2, + // glue (or chain) input edge + SDValue(cellshr0, 0) + ); + +// R24 = SHIFT_REG; +// Instr #7 +SDNode *ldsh0 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +// MULT_U(R24, R28); +// Instr #8 +SDNode *mult_u1 = CurDAG->getMachineNode( + Connex::MULT_U_H, + DL, + MVT::Other, + SDValue(ldsh0, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(ldsh0, 1) + ); + +// R24 = MULT_LOW(); +// Instr #9 +SDNode *multlo1 = CurDAG->getMachineNode( + Connex::MULTLO_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(mult_u1, 0) + ); + +// CELL_SHR(R28, R30); +// Instr #10 +SDNode *cellshr1 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Other, + SDValue(nodeOpSrcCast2, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(multlo1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #11 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Other, + ct3, + // glue (or chain) input edge + SDValue(cellshr1, 0) + ); + +// R23 = SHIFT_REG; +// Instr #12 +SDNode *ldsh1 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// MULT_U(R23, R27); +// Instr #13 +SDNode *mult_u2 = CurDAG->getMachineNode( + Connex::MULT_U_H, + DL, + MVT::Other, + SDValue(ldsh1, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(ldsh1, 1) + ); + +// R23 = MULT_LOW(); +// Instr #14 +SDNode *multlo2 = CurDAG->getMachineNode( + Connex::MULTLO_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(mult_u2, 0) + ); + +// CELL_SHR(R25, R30); +// Instr #15 +SDNode *cellshr2 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Other, + SDValue(multhi0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(multlo2, 1) + ); + +SDValue ct4 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #16 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Other, + ct4, + // glue (or chain) input edge + SDValue(cellshr2, 0) + ); + +// R21 = SHIFT_REG; +// Instr #17 +SDNode *ldsh2 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +// R14 = INDEX; +// Instr #18 +SDNode *ldix0 = CurDAG->getMachineNode( + Connex::LDIX_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(ldsh2, 1) + ); + +// R13 = R14 & R30; +// Instr #19 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + SDValue(vload1, 0), + SDValue(ldix0, 0), + // glue (or chain) input edge + SDValue(ldix0, 1) + ); + +// R12 = R13 == R30; +// Instr #20 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + SDValue(and0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct5 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #21 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Other, + ct5, + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// WHERE_EQ; +// Instr #22 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Other, + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R26 = R21 | R21; +// Instr #23 +SDNode *or0 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + SDValue(ldsh2, 0), + SDValue(ldsh2, 0), + SDValue(multlo0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// R26 = R24 + R26; +// Instr #24 +SDNode *add0 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + SDValue(or0, 0), + SDValue(multlo1, 0), + SDValue(or0, 0), + // glue (or chain) input edge + SDValue(or0, 1) + ); + +// R26 = R23 + R26; +// Instr #25 +SDNode *resH /*add1*/ = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + SDValue(add0, 0), + SDValue(multlo2, 0), + SDValue(add0, 0), + // glue (or chain) input edge + SDValue(add0, 1) + ); + +// END_WHERE; +// Instr #26 +SDNode *lastNode /*endwhere0*/ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Other, + // glue (or chain) input edge + SDValue(resH /*add1*/, 1) + ); + + +//SDNode *lastNode = resF16; Index: lib/Target/Connex/Select_REDf16_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_REDf16_OpincaaCodeGen.h +++ lib/Target/Connex/Select_REDf16_OpincaaCodeGen.h @@ -0,0 +1,1562 @@ +//===-- Select_REDf16_OpincaaCodeGen.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from the OPINCAA lib, from kernel red.f16. +// You should include this code in the Select() method of the SelectionDAGISel +// class of your back end. +// Number of instructions generated: 122. +// +//===----------------------------------------------------------------------===// + + +// From /home/asusu/LLVM/Tests/opincaa_standalone_apps/Emulate_f16/REDf16_manual/DumpISel_OpincaaCodeGen.cpp + + + +SDValue ct0 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R31 = 1; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast, 1) + ); + +SDValue ct1 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R30 = 0; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(31, DL, MVT::i16, true, false); +// R29 = 31; +// Instr #2 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(1023, DL, MVT::i16, true, false); +// R13 = 1023; +// Instr #3 +SDNode *vload3 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +SDValue ct4 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R12 = 31744; +// Instr #4 +SDNode *vload4 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(vload3, 1) + ); + +SDValue ct5 = CurDAG->getConstant(-32768, DL, MVT::i16, true, false); +// R11 = -32768; +// Instr #5 +SDNode *vload5 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct5, + // glue (or chain) input edge + SDValue(vload4, 1) + ); + +SDValue ct6 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R10 = 1024; +// Instr #6 +SDNode *vload6 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct6, + // glue (or chain) input edge + SDValue(vload5, 1) + ); + +// R25 = R28 & R11; +// Instr #7 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast, 0), + // glue (or chain) input edge + SDValue(vload6, 1) + ); + +// R26 = R28 & R12; +// Instr #8 +SDNode *and1 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload4, 0), + SDValue(nodeOpSrcCast, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct7 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R26 = R26 >> 10; +// Instr #9 +SDNode *ishr0 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and1, 0), + ct7, + // glue (or chain) input edge + SDValue(and1, 1) + ); + +// R27 = R28 & R13; +// Instr #10 +SDNode *and2 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(nodeOpSrcCast, 0), + // glue (or chain) input edge + SDValue(ishr0, 1) + ); + +// R17 = R30 < R27; +// Instr #11 +SDNode *lt0 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(and2, 1) + ); + +// R16 = R26 == R30; +// Instr #12 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(lt0, 1) + ); + +// R09 = R16 & R17; +// Instr #13 +SDNode *and3 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt0, 0), + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// R09 = R09 == R31; +// Instr #14 +SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and3, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and3, 1) + ); + +SDValue ct8 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #15 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct8, + // glue (or chain) input edge + SDValue(eq1, 1) + ); + +// WHERE_EQ; +// Instr #16 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq1, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct9 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R26 = 1; +// Instr #17 +SDNode *vload7 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct9, + SDValue(ishr0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #18 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload7, 1) + ); + +// R17 = R26 == R29; +// Instr #19 +SDNode *eq2 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// R17 = R17 | R16; +// Instr #20 +SDNode *or0 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(eq2, 0), + // glue (or chain) input edge + SDValue(eq2, 1) + ); + +// R17 = R17 == R30; +// Instr #21 +SDNode *eq3 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(or0, 1) + ); + +SDValue ct10 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #22 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct10, + // glue (or chain) input edge + SDValue(eq3, 1) + ); + +// WHERE_EQ; +// Instr #23 +SDNode *whereeq1 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq3, 0), + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R27 = R27 | R10; +// Instr #24 +SDNode *or1 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(and2, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(whereeq1, 1) + ); + +// END_WHERE; +// Instr #25 +SDNode *endwhere1 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or1, 1) + ); + +// R18 = R26 == R29; +// Instr #26 +SDNode *eq4 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere1, 0) + ); + +// R17 = R27 == R30; +// Instr #27 +SDNode *eq5 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or1, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(eq4, 1) + ); + +// R09 = R31 - R17; +// Instr #28 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(eq5, 0), + // glue (or chain) input edge + SDValue(eq5, 1) + ); + +// R09 = R09 & R18; +// Instr #29 +SDNode *and4 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq4, 0), + SDValue(sub0, 0), + // glue (or chain) input edge + SDValue(sub0, 1) + ); + +// REDUCE(R09); +// Instr #30 +SDNode *sumRed0 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(and4, 0), + // glue (or chain) input edge + SDValue(and4, 1) + ); + +// R24 = R18 & R17; +// Instr #31 +SDNode *and5 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq5, 0), + SDValue(eq4, 0), + // glue (or chain) input edge + SDValue(sumRed0, 0) + ); + +// R09 = R25 == R30; +// Instr #32 +SDNode *eq6 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and5, 1) + ); + +// R16 = R24 & R09; +// Instr #33 +SDNode *and6 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq6, 0), + SDValue(and5, 0), + // glue (or chain) input edge + SDValue(eq6, 1) + ); + +// REDUCE(R16); +// Instr #34 +SDNode *sumRed1 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(and6, 1) + ); + +// R09 = R31 - R09; +// Instr #35 +SDNode *sub1 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(eq6, 0), + // glue (or chain) input edge + SDValue(sumRed1, 0) + ); + +// R16 = R24 & R09; +// Instr #36 +SDNode *and7 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub1, 0), + SDValue(and5, 0), + // glue (or chain) input edge + SDValue(sub1, 1) + ); + +// REDUCE(R16); +// Instr #37 +SDNode *sumRed2 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(and7, 0), + // glue (or chain) input edge + SDValue(and7, 1) + ); + +// R09 = R25 == R11; +// Instr #38 +SDNode *eq7 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload5, 0), + // glue (or chain) input edge + SDValue(sumRed2, 0) + ); + +SDValue ct11 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #39 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct11, + // glue (or chain) input edge + SDValue(eq7, 1) + ); + +// WHERE_EQ; +// Instr #40 +SDNode *whereeq2 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq7, 0), + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +// R27 = R30 - R27; +// Instr #41 +SDNode *sub2 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(or1, 0), + SDValue(or1, 0), + // glue (or chain) input edge + SDValue(whereeq2, 1) + ); + +// END_WHERE; +// Instr #42 +SDNode *endwhere2 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub2, 1) + ); + +SDValue ct12 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R02 = R26 << 0; +// Instr #43 +SDNode *ishl0 = CurDAG->getMachineNode( + Connex::ISHLV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + ct12, + // glue (or chain) input edge + SDValue(endwhere2, 0) + ); + +SDValue ct13 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R02 = 0; +// Instr #44 +SDNode *vload8 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct13, + // glue (or chain) input edge + SDValue(ishl0, 1) + ); + +SDValue ct14 = CurDAG->getConstant(6, DL, MVT::i16, true, false); +// R24 = 6; +// Instr #45 +SDNode *vload9 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct14, + // glue (or chain) input edge + SDValue(vload8, 1) + ); + +// R19 = R26 < R24; +// Instr #46 +SDNode *lt1 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload9, 0), + // glue (or chain) input edge + SDValue(vload9, 1) + ); + +// R17 = R02 < R26; +// Instr #47 +SDNode *lt2 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload8, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(lt1, 1) + ); + +// R02 = R31 + R02; +// Instr #48 +SDNode *add0 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload8, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt2, 1) + ); + +// R09 = R19 & R17; +// Instr #49 +SDNode *and8 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt2, 0), + SDValue(lt1, 0), + // glue (or chain) input edge + SDValue(add0, 1) + ); + +// R09 = R09 == R31; +// Instr #50 +SDNode *eq8 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and8, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and8, 1) + ); + +SDValue ct15 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #51 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct15, + // glue (or chain) input edge + SDValue(eq8, 1) + ); + +// WHERE_EQ; +// Instr #52 +SDNode *whereeq3 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R19 = R26 - R02; +// Instr #53 +SDNode *sub3 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add0, 0), + SDValue(lt1, 0), + // glue (or chain) input edge + SDValue(whereeq3, 1) + ); + +// R27 = R27 << R19; +// Instr #54 +SDNode *shl0 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub2, 0), + SDValue(sub3, 0), + SDValue(sub2, 0), + // glue (or chain) input edge + SDValue(sub3, 1) + ); + +// REDUCE(R27); +// Instr #55 +SDNode *sumRed3 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(shl0, 0), + // glue (or chain) input edge + SDValue(shl0, 1) + ); + +// END_WHERE; +// Instr #56 +SDNode *endwhere3 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sumRed3, 0) + ); + +SDValue ct16 = CurDAG->getConstant(5, DL, MVT::i16, true, false); +// R02 = 5; +// Instr #57 +SDNode *vload10 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct16, + // glue (or chain) input edge + SDValue(endwhere3, 0) + ); + +SDValue ct17 = CurDAG->getConstant(11, DL, MVT::i16, true, false); +// R24 = 11; +// Instr #58 +SDNode *vload11 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct17, + // glue (or chain) input edge + SDValue(vload10, 1) + ); + +// R19 = R26 < R24; +// Instr #59 +SDNode *lt3 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload11, 0), + // glue (or chain) input edge + SDValue(vload11, 1) + ); + +// R17 = R02 < R26; +// Instr #60 +SDNode *lt4 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload10, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(lt3, 1) + ); + +// R02 = R31 + R02; +// Instr #61 +SDNode *add1 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload10, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt4, 1) + ); + +// R09 = R19 & R17; +// Instr #62 +SDNode *and9 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt4, 0), + SDValue(lt3, 0), + // glue (or chain) input edge + SDValue(add1, 1) + ); + +// R09 = R09 == R31; +// Instr #63 +SDNode *eq9 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and9, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and9, 1) + ); + +SDValue ct18 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #64 +SDNode *nop4 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct18, + // glue (or chain) input edge + SDValue(eq9, 1) + ); + +// WHERE_EQ; +// Instr #65 +SDNode *whereeq4 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + // glue (or chain) input edge + SDValue(nop4, 0) + ); + +// R19 = R26 - R02; +// Instr #66 +SDNode *sub4 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add1, 0), + SDValue(lt3, 0), + // glue (or chain) input edge + SDValue(whereeq4, 1) + ); + +// R27 = R27 << R19; +// Instr #67 +SDNode *shl1 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl0, 0), + SDValue(sub4, 0), + SDValue(shl0, 0), + // glue (or chain) input edge + SDValue(sub4, 1) + ); + +// REDUCE(R27); +// Instr #68 +SDNode *sumRed4 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(shl1, 0), + // glue (or chain) input edge + SDValue(shl1, 1) + ); + +// END_WHERE; +// Instr #69 +SDNode *endwhere4 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sumRed4, 0) + ); + +SDValue ct19 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R02 = 10; +// Instr #70 +SDNode *vload12 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct19, + // glue (or chain) input edge + SDValue(endwhere4, 0) + ); + +SDValue ct20 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R24 = 16; +// Instr #71 +SDNode *vload13 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct20, + // glue (or chain) input edge + SDValue(vload12, 1) + ); + +// R19 = R26 < R24; +// Instr #72 +SDNode *lt5 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload13, 0), + // glue (or chain) input edge + SDValue(vload13, 1) + ); + +// R17 = R02 < R26; +// Instr #73 +SDNode *lt6 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload12, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(lt5, 1) + ); + +// R02 = R31 + R02; +// Instr #74 +SDNode *add2 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload12, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt6, 1) + ); + +// R09 = R19 & R17; +// Instr #75 +SDNode *and10 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt6, 0), + SDValue(lt5, 0), + // glue (or chain) input edge + SDValue(add2, 1) + ); + +// R09 = R09 == R31; +// Instr #76 +SDNode *eq10 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and10, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and10, 1) + ); + +SDValue ct21 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #77 +SDNode *nop5 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct21, + // glue (or chain) input edge + SDValue(eq10, 1) + ); + +// WHERE_EQ; +// Instr #78 +SDNode *whereeq5 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq10, 0), + // glue (or chain) input edge + SDValue(nop5, 0) + ); + +// R19 = R26 - R02; +// Instr #79 +SDNode *sub5 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add2, 0), + SDValue(lt5, 0), + // glue (or chain) input edge + SDValue(whereeq5, 1) + ); + +// R27 = R27 << R19; +// Instr #80 +SDNode *shl2 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl1, 0), + SDValue(sub5, 0), + SDValue(shl1, 0), + // glue (or chain) input edge + SDValue(sub5, 1) + ); + +// REDUCE(R27); +// Instr #81 +SDNode *sumRed5 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(shl2, 0), + // glue (or chain) input edge + SDValue(shl2, 1) + ); + +// END_WHERE; +// Instr #82 +SDNode *endwhere5 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sumRed5, 0) + ); + +SDValue ct22 = CurDAG->getConstant(15, DL, MVT::i16, true, false); +// R02 = 15; +// Instr #83 +SDNode *vload14 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct22, + // glue (or chain) input edge + SDValue(endwhere5, 0) + ); + +SDValue ct23 = CurDAG->getConstant(21, DL, MVT::i16, true, false); +// R24 = 21; +// Instr #84 +SDNode *vload15 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct23, + // glue (or chain) input edge + SDValue(vload14, 1) + ); + +// R19 = R26 < R24; +// Instr #85 +SDNode *lt7 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload15, 0), + // glue (or chain) input edge + SDValue(vload15, 1) + ); + +// R17 = R02 < R26; +// Instr #86 +SDNode *lt8 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload14, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(lt7, 1) + ); + +// R02 = R31 + R02; +// Instr #87 +SDNode *add3 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload14, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt8, 1) + ); + +// R09 = R19 & R17; +// Instr #88 +SDNode *and11 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt8, 0), + SDValue(lt7, 0), + // glue (or chain) input edge + SDValue(add3, 1) + ); + +// R09 = R09 == R31; +// Instr #89 +SDNode *eq11 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and11, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and11, 1) + ); + +SDValue ct24 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #90 +SDNode *nop6 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct24, + // glue (or chain) input edge + SDValue(eq11, 1) + ); + +// WHERE_EQ; +// Instr #91 +SDNode *whereeq6 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq11, 0), + // glue (or chain) input edge + SDValue(nop6, 0) + ); + +// R19 = R26 - R02; +// Instr #92 +SDNode *sub6 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add3, 0), + SDValue(lt7, 0), + // glue (or chain) input edge + SDValue(whereeq6, 1) + ); + +// R27 = R27 << R19; +// Instr #93 +SDNode *shl3 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl2, 0), + SDValue(sub6, 0), + SDValue(shl2, 0), + // glue (or chain) input edge + SDValue(sub6, 1) + ); + +// REDUCE(R27); +// Instr #94 +SDNode *sumRed6 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(shl3, 0), + // glue (or chain) input edge + SDValue(shl3, 1) + ); + +// END_WHERE; +// Instr #95 +SDNode *endwhere6 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sumRed6, 0) + ); + +SDValue ct25 = CurDAG->getConstant(20, DL, MVT::i16, true, false); +// R02 = 20; +// Instr #96 +SDNode *vload16 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct25, + // glue (or chain) input edge + SDValue(endwhere6, 0) + ); + +SDValue ct26 = CurDAG->getConstant(26, DL, MVT::i16, true, false); +// R24 = 26; +// Instr #97 +SDNode *vload17 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct26, + // glue (or chain) input edge + SDValue(vload16, 1) + ); + +// R19 = R26 < R24; +// Instr #98 +SDNode *lt9 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload17, 0), + // glue (or chain) input edge + SDValue(vload17, 1) + ); + +// R17 = R02 < R26; +// Instr #99 +SDNode *lt10 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload16, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(lt9, 1) + ); + +// R02 = R31 + R02; +// Instr #100 +SDNode *add4 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload16, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt10, 1) + ); + +// R09 = R19 & R17; +// Instr #101 +SDNode *and12 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt10, 0), + SDValue(lt9, 0), + // glue (or chain) input edge + SDValue(add4, 1) + ); + +// R09 = R09 == R31; +// Instr #102 +SDNode *eq12 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and12, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and12, 1) + ); + +SDValue ct27 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #103 +SDNode *nop7 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct27, + // glue (or chain) input edge + SDValue(eq12, 1) + ); + +// WHERE_EQ; +// Instr #104 +SDNode *whereeq7 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + // glue (or chain) input edge + SDValue(nop7, 0) + ); + +// R19 = R26 - R02; +// Instr #105 +SDNode *sub7 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add4, 0), + SDValue(lt9, 0), + // glue (or chain) input edge + SDValue(whereeq7, 1) + ); + +// R27 = R27 << R19; +// Instr #106 +SDNode *shl4 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl3, 0), + SDValue(sub7, 0), + SDValue(shl3, 0), + // glue (or chain) input edge + SDValue(sub7, 1) + ); + +// REDUCE(R27); +// Instr #107 +SDNode *sumRed7 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(shl4, 0), + // glue (or chain) input edge + SDValue(shl4, 1) + ); + +// END_WHERE; +// Instr #108 +SDNode *endwhere7 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sumRed7, 0) + ); + +SDValue ct28 = CurDAG->getConstant(25, DL, MVT::i16, true, false); +// R02 = 25; +// Instr #109 +SDNode *vload18 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct28, + // glue (or chain) input edge + SDValue(endwhere7, 0) + ); + +SDValue ct29 = CurDAG->getConstant(31, DL, MVT::i16, true, false); +// R24 = 31; +// Instr #110 +SDNode *vload19 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct29, + // glue (or chain) input edge + SDValue(vload18, 1) + ); + +// R19 = R26 < R24; +// Instr #111 +SDNode *lt11 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload19, 0), + // glue (or chain) input edge + SDValue(vload19, 1) + ); + +// R17 = R02 < R26; +// Instr #112 +SDNode *lt12 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload18, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(lt11, 1) + ); + +// R02 = R31 + R02; +// Instr #113 +SDNode *add5 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload18, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt12, 1) + ); + +// R09 = R19 & R17; +// Instr #114 +SDNode *and13 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt12, 0), + SDValue(lt11, 0), + // glue (or chain) input edge + SDValue(add5, 1) + ); + +// R09 = R09 == R31; +// Instr #115 +SDNode *eq13 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and13, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and13, 1) + ); + +SDValue ct30 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #116 +SDNode *nop8 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct30, + // glue (or chain) input edge + SDValue(eq13, 1) + ); + +// WHERE_EQ; +// Instr #117 +SDNode *whereeq8 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + // glue (or chain) input edge + SDValue(nop8, 0) + ); + +// R19 = R26 - R02; +// Instr #118 +SDNode *sub8 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add5, 0), + SDValue(lt11, 0), + // glue (or chain) input edge + SDValue(whereeq8, 1) + ); + +// R27 = R27 << R19; +// Instr #119 +SDNode *shl5 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl4, 0), + SDValue(sub8, 0), + SDValue(shl4, 0), + // glue (or chain) input edge + SDValue(sub8, 1) + ); + +// REDUCE(R27); +// Instr #120 +SDNode *sumRed8 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(shl5, 0), + // glue (or chain) input edge + SDValue(shl5, 1) + ); + +// END_WHERE; +// Instr #121 +SDNode *reduceH /* endwhere8 */ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, +// Alex: MVT::Glue, // Error: <getNodeId() == -1 && "Node already inserted!"' failed.>> + MVT::Other, + // glue (or chain) input edge + SDValue(sumRed8, 0) + ); + Index: lib/Target/Connex/Select_REDi32_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_REDi32_OpincaaCodeGen.h +++ lib/Target/Connex/Select_REDi32_OpincaaCodeGen.h @@ -0,0 +1,191 @@ +//===-- Select_REDi32_OpincaaCodeGen.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from the OPINCAA lib, from kernel red.i32. +// You should include this code in the Select() method of the SelectionDAGISel +// class of your back end. +// Number of instructions generated: 14. +// +//===----------------------------------------------------------------------===// + + +// From /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/opincaa_standalone_apps/Emulate_i32/RED_i32_manual/DumpISel_OpincaaCodeGen_old04_300.cpp + + + +SDValue ct0 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R29 = 1; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast, 1) + ); + +// CELL_SHR(R28, R29); +// Instr #1 +SDNode *cellshr0 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Glue, + SDValue(nodeOpSrcCast, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #2 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(cellshr0, 0) + ); + +// R27 = SHIFT_REG; +// Instr #3 +SDNode *ldsh0 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +// R26 = INDEX; +// Instr #4 +SDNode *ldix0 = CurDAG->getMachineNode( + Connex::LDIX_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(ldsh0, 1) + ); + +// R25 = R26 & R29; +// Instr #5 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(ldix0, 0), + // glue (or chain) input edge + SDValue(ldix0, 1) + ); + +// R24 = R25 == R29; +// Instr #6 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #7 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// WHERE_EQ; +// Instr #8 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +SDValue ct3 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R28 = 0; +// Instr #9 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + SDValue(nodeOpSrcCast, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +SDValue ct4 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R27 = 0; +// Instr #10 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct4, + SDValue(ldsh0, 0), + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +// END_WHERE; +// Instr #11 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +// REDUCE_U(R28); +// Instr #12 +SDNode *sumRedU0 = CurDAG->getMachineNode( + Connex::RED_U_H, + DL, + MVT::Glue, + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// REDUCE_U(R27); +// Instr #13 +SDNode *reduceHigh16 /*sumRedU1*/ = CurDAG->getMachineNode( + Connex::RED_U_H, + DL, +// Alex: MVT::Glue, + MVT::Other, + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(sumRedU0, 0) + ); + Index: lib/Target/Connex/Select_SHRAi32_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_SHRAi32_OpincaaCodeGen.h +++ lib/Target/Connex/Select_SHRAi32_OpincaaCodeGen.h @@ -0,0 +1,464 @@ +//===-- Select_SHRAi32_OpincaaCodeGen.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from the OPINCAA lib, from kernel shra.i32. +// You should include this code in the Select() method of the SelectionDAGISel +// class of your back end. +// Number of instructions generated: 33. +// +//===----------------------------------------------------------------------===// + +// From /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/opincaa_standalone_apps/Emulate_i32/SHRA_i32_manual/DumpISel_OpincaaCodeGen_old13_927.cpp + + + + +/* Alex: added manually to have predicated instructions refer to tied-to + constraints to these nodes (destination registers of predicated instr) + without initializing the respective dest registers, since it's not necessary. +*/ +SDValue ct21Node = CurDAG->getConstant(21, DL, MVT::i16, true, false); +SDNode *r21Node = CurDAG->getMachineNode( + Connex::VLOAD_BOGUS_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct21Node, + // glue (or chain) input edge + SDValue(nodeOpSrcCast1, 1) + ); + +SDValue ct22Node = CurDAG->getConstant(22, DL, MVT::i16, true, false); +SDNode *r22Node = CurDAG->getMachineNode( + Connex::VLOAD_BOGUS_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct22Node, + // glue (or chain) input edge + SDValue(r21Node, 1) + ); + + +SDValue ct0 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(r22Node, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R10 = 16; +// Instr #2 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(31, DL, MVT::i16, true, false); +// R08 = 31; +// Instr #3 +SDNode *vload3 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +// R27 = R27 & R08; +// Instr #4 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(vload3, 1) + ); + +// R25 = INDEX; +// Instr #5 +SDNode *ldix0 = CurDAG->getMachineNode( + Connex::LDIX_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(and0, 1) + ); + +// R25 = R25 & R30; +// Instr #6 +SDNode *and1 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(ldix0, 0), + // glue (or chain) input edge + SDValue(ldix0, 1) + ); + +// CELL_SHR(R27, R25); +// Instr #7 +SDNode *cellshr0 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Glue, + SDValue(and0, 0), + SDValue(and1, 0), + // glue (or chain) input edge + SDValue(and1, 1) + ); + +SDValue ct4 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #8 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(cellshr0, 0) + ); + +// R27 = SHIFT_REG; +// Instr #9 +SDNode *ldsh0 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +// R20 = R10 < R27; +// Instr #10 +SDNode *lt0 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(ldsh0, 0), + // glue (or chain) input edge + SDValue(ldsh0, 1) + ); + +// R29 = SHRA(R28, R27); +// Instr #11 +SDNode *shra0 = CurDAG->getMachineNode( + Connex::SHRAV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast1, 0), + SDValue(ldsh0, 0), + // glue (or chain) input edge + SDValue(lt0, 1) + ); + +// CELL_SHL(R28, R30); +// Instr #12 +SDNode *cellshl0 = CurDAG->getMachineNode( + Connex::CELLSHL_H, + DL, + MVT::Glue, + SDValue(nodeOpSrcCast1, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(shra0, 1) + ); + +SDValue ct5 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #13 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct5, + // glue (or chain) input edge + SDValue(cellshl0, 0) + ); + +// R23 = SHIFT_REG; +// Instr #14 +SDNode *ldsh1 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R25 = R25 == R31; +// Instr #15 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and1, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(ldsh1, 1) + ); + +// R24 = R20 & R25; +// Instr #16 +SDNode *and2 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(lt0, 0), + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// R19 = R24 == R30; +// Instr #17 +SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and2, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and2, 1) + ); + +SDValue ct6 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #18 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct6, + // glue (or chain) input edge + SDValue(eq1, 1) + ); + +// WHERE_EQ; +// Instr #19 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq1, 0), + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +// R21 = R27 - R10; +// Instr #20 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ldsh0, 0), + SDValue(vload2, 0), + SDValue(r21Node, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// R29 = SHRA(R23, R21); +// Instr #21 +SDNode *shra1 = CurDAG->getMachineNode( + Connex::SHRAV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ldsh1, 0), + SDValue(sub0, 0), + SDValue(shra0, 0), + // glue (or chain) input edge + SDValue(sub0, 1) + ); + +// END_WHERE; +// Instr #22 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(shra1, 1) + ); + +// R20 = R30 - R20; +// Instr #23 +SDNode *sub1 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(lt0, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// R24 = R20 & R25; +// Instr #24 +SDNode *and3 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(sub1, 0), + // glue (or chain) input edge + SDValue(sub1, 1) + ); + +// R19 = R24 == R30; +// Instr #25 +SDNode *eq2 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and3, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and3, 1) + ); + +SDValue ct7 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #26 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct7, + // glue (or chain) input edge + SDValue(eq2, 1) + ); + +// WHERE_EQ; +// Instr #27 +SDNode *whereeq1 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq2, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R21 = R10 - R27; +// Instr #28 +SDNode *sub2 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(ldsh0, 0), + SDValue(sub0, 0), + // glue (or chain) input edge + SDValue(whereeq1, 1) + ); + +// R22 = R23 << R21; +// Instr #29 +SDNode *shl0 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ldsh1, 0), + SDValue(sub2, 0), + SDValue(r22Node, 0), + // glue (or chain) input edge + SDValue(sub2, 1) + ); + +// R29 = R28 >> R27; +// Instr #30 +SDNode *shr0 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast1, 0), + SDValue(ldsh0, 0), + SDValue(shra1, 0), + // glue (or chain) input edge + SDValue(shl0, 1) + ); + +// R29 = R29 | R22; +// Instr #31 +SDNode *resH /*or0*/ = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl0, 0), + SDValue(shr0, 0), + SDValue(shr0, 0), + // glue (or chain) input edge + SDValue(shr0, 1) + ); + +// END_WHERE; +// Instr #32 +SDNode *lastNode /*endwhere1*/ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, +// MVT::Glue, + MVT::Other, + // glue (or chain) input edge + SDValue(resH /*or0*/, 1) + ); + Index: lib/Target/Connex/Select_SUBf16_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_SUBf16_OpincaaCodeGen.h +++ lib/Target/Connex/Select_SUBf16_OpincaaCodeGen.h @@ -0,0 +1,3651 @@ +//===-- Select_SUB_f16_OpincaaCodeGen.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from the OPINCAA lib, from kernel sub.f16. +// You should include this code in the Select() method of the SelectionDAGISel +// class of your back end. +// It is important to put this code in the Select() method of the +// SelectionDAGISel class of your back end, after the ISelLowering pass, +// which contains the DAG Combiner, because the DAG Combiner can remove +// the getCopyToReg() we create, which can lead to the following error: +// <> assertion failed. +// Number of instructions generated: 280. +// +//===----------------------------------------------------------------------===// + + +// From /home/asusu/LLVM/Tests/opincaa_standalone_apps/Emulate_f16/ADD_SUB_f16_manual/DumpISel_OpincaaCodeGen_old37_C10_SUBf16.cpp + + + + + +SDValue ct0 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R14 = 1; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast1, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #2 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R29 = 16; +// Instr #3 +SDNode *vload3 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +SDValue ct4 = CurDAG->getConstant(31, DL, MVT::i16, true, false); +// R28 = 31; +// Instr #4 +SDNode *vload4 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(vload3, 1) + ); + +SDValue ct5 = CurDAG->getConstant(1023, DL, MVT::i16, true, false); +// R13 = 1023; +// Instr #5 +SDNode *vload5 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct5, + // glue (or chain) input edge + SDValue(vload4, 1) + ); + +SDValue ct6 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R12 = 31744; +// Instr #6 +SDNode *vload6 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct6, + // glue (or chain) input edge + SDValue(vload5, 1) + ); + +SDValue ct7 = CurDAG->getConstant(-32768, DL, MVT::i16, true, false); +// R11 = -32768; +// Instr #7 +SDNode *vload7 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct7, + // glue (or chain) input edge + SDValue(vload6, 1) + ); + +SDValue ct8 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R10 = 1024; +// Instr #8 +SDNode *vload8 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct8, + // glue (or chain) input edge + SDValue(vload7, 1) + ); + +SDValue ct9 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = 0; +// Instr #9 +SDNode *vload9 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct9, + // glue (or chain) input edge + SDValue(vload8, 1) + ); + +SDValue ct10 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R19 = 0; +// Instr #10 +SDNode *vload10 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct10, + // glue (or chain) input edge + SDValue(vload9, 1) + ); + +SDValue ct11 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = 0; +// Instr #11 +SDNode *vload11 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct11, + // glue (or chain) input edge + SDValue(vload10, 1) + ); + +SDValue ct12 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R61 = 0; +// Instr #12 +SDNode *vload12 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct12, + // glue (or chain) input edge + SDValue(vload11, 1) + ); + +SDValue ct13 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R07 = 0; +// Instr #13 +SDNode *vload13 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct13, + // glue (or chain) input edge + SDValue(vload12, 1) + ); + +SDValue ct14 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R04 = 0; +// Instr #14 +SDNode *vload14 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct14, + // glue (or chain) input edge + SDValue(vload13, 1) + ); + +SDValue ct15 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R03 = 0; +// Instr #15 +SDNode *vload15 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct15, + // glue (or chain) input edge + SDValue(vload14, 1) + ); + +SDValue ct16 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R02 = 0; +// Instr #16 +SDNode *vload16 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct16, + // glue (or chain) input edge + SDValue(vload15, 1) + ); + +SDValue ct17 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R01 = 0; +// Instr #17 +SDNode *vload17 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct17, + // glue (or chain) input edge + SDValue(vload16, 1) + ); + +SDValue ct18 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R00 = 0; +// Instr #18 +SDNode *vload18 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct18, + // glue (or chain) input edge + SDValue(vload17, 1) + ); + +// R24 = R27 & R11; +// Instr #19 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(vload18, 1) + ); + +// R25 = R27 & R12; +// Instr #20 +SDNode *and1 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct19 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R25 = R25 >> 10; +// Instr #21 +SDNode *ishr0 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and1, 0), + ct19, + // glue (or chain) input edge + SDValue(and1, 1) + ); + +// R26 = R27 & R13; +// Instr #22 +SDNode *and2 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(ishr0, 1) + ); + +// R18 = R31 < R26; +// Instr #23 +SDNode *lt0 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(and2, 1) + ); + +// R17 = R25 == R31; +// Instr #24 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr0, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(lt0, 1) + ); + +// R09 = R17 & R18; +// Instr #25 +SDNode *and3 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt0, 0), + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// R09 = R09 == R30; +// Instr #26 +SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and3, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and3, 1) + ); + +SDValue ct20 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #27 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct20, + // glue (or chain) input edge + SDValue(eq1, 1) + ); + +// WHERE_EQ; +// Instr #28 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq1, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct21 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R25 = 1; +// Instr #29 +SDNode *vload19 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct21, + SDValue(ishr0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #30 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload19, 1) + ); + +// R18 = R25 == R28; +// Instr #31 +SDNode *eq2 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload19, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// R18 = R18 | R17; +// Instr #32 +SDNode *or0 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(eq2, 0), + // glue (or chain) input edge + SDValue(eq2, 1) + ); + +// R18 = R18 == R31; +// Instr #33 +SDNode *eq3 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or0, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(or0, 1) + ); + +SDValue ct22 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #34 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct22, + // glue (or chain) input edge + SDValue(eq3, 1) + ); + +// WHERE_EQ; +// Instr #35 +SDNode *whereeq1 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq3, 0), + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R26 = R26 | R10; +// Instr #36 +SDNode *or1 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload8, 0), + SDValue(and2, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(whereeq1, 1) + ); + +// END_WHERE; +// Instr #37 +SDNode *endwhere1 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or1, 1) + ); + +// R20 = R23 & R11; +// Instr #38 +SDNode *and4 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(endwhere1, 0) + ); + +// R21 = R23 & R12; +// Instr #39 +SDNode *and5 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(and4, 1) + ); + +SDValue ct23 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R21 = R21 >> 10; +// Instr #40 +SDNode *ishr1 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and5, 0), + ct23, + // glue (or chain) input edge + SDValue(and5, 1) + ); + +// R22 = R23 & R13; +// Instr #41 +SDNode *and6 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(ishr1, 1) + ); + +// R16 = R31 < R22; +// Instr #42 +SDNode *lt1 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(and6, 1) + ); + +// R15 = R21 == R31; +// Instr #43 +SDNode *eq4 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr1, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(lt1, 1) + ); + +// R09 = R15 & R16; +// Instr #44 +SDNode *and7 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt1, 0), + SDValue(eq4, 0), + // glue (or chain) input edge + SDValue(eq4, 1) + ); + +// R09 = R09 == R30; +// Instr #45 +SDNode *eq5 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and7, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and7, 1) + ); + +SDValue ct24 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #46 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct24, + // glue (or chain) input edge + SDValue(eq5, 1) + ); + +// WHERE_EQ; +// Instr #47 +SDNode *whereeq2 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq5, 0), + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +SDValue ct25 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R21 = 1; +// Instr #48 +SDNode *vload20 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct25, + SDValue(ishr1, 0), + // glue (or chain) input edge + SDValue(whereeq2, 1) + ); + +// END_WHERE; +// Instr #49 +SDNode *endwhere2 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload20, 1) + ); + +// R16 = R21 == R28; +// Instr #50 +SDNode *eq6 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(endwhere2, 0) + ); + +// R16 = R16 | R15; +// Instr #51 +SDNode *or2 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq4, 0), + SDValue(eq6, 0), + // glue (or chain) input edge + SDValue(eq6, 1) + ); + +// R16 = R16 == R31; +// Instr #52 +SDNode *eq7 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or2, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(or2, 1) + ); + +SDValue ct26 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #53 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct26, + // glue (or chain) input edge + SDValue(eq7, 1) + ); + +// WHERE_EQ; +// Instr #54 +SDNode *whereeq3 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq7, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R22 = R22 | R10; +// Instr #55 +SDNode *or3 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload8, 0), + SDValue(and6, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(whereeq3, 1) + ); + +// END_WHERE; +// Instr #56 +SDNode *endwhere3 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or3, 1) + ); + +// R50 = R24 == R11; +// Instr #57 +SDNode *eq8 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(endwhere3, 0) + ); + +// R49 = R25 == R28; +// Instr #58 +SDNode *eq9 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload19, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(eq8, 1) + ); + +// R48 = R26 == R31; +// Instr #59 +SDNode *eq10 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or1, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(eq9, 1) + ); + +// R47 = R20 == R11; +// Instr #60 +SDNode *eq11 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and4, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(eq10, 1) + ); + +// R47 = R47 == R31; +// Instr #61 +SDNode *eq12 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq11, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(eq11, 1) + ); + +// R46 = R21 == R28; +// Instr #62 +SDNode *eq13 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(eq12, 1) + ); + +// R45 = R22 == R31; +// Instr #63 +SDNode *eq14 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or3, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(eq13, 1) + ); + +// R07 = R49 & R46; +// Instr #64 +SDNode *and8 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + SDValue(eq9, 0), + // glue (or chain) input edge + SDValue(eq14, 1) + ); + +// R08 = R07 & R50; +// Instr #65 +SDNode *and9 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + SDValue(and8, 0), + // glue (or chain) input edge + SDValue(and8, 1) + ); + +// R44 = ~R47; +// Instr #66 +SDNode *not0 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + // glue (or chain) input edge + SDValue(and9, 1) + ); + +// R08 = R08 & R44; +// Instr #67 +SDNode *and10 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(not0, 0), + SDValue(and9, 0), + // glue (or chain) input edge + SDValue(not0, 1) + ); + +// R44 = ~R50; +// Instr #68 +SDNode *not1 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + // glue (or chain) input edge + SDValue(and10, 1) + ); + +// R44 = R44 & R07; +// Instr #69 +SDNode *and11 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and8, 0), + SDValue(not1, 0), + // glue (or chain) input edge + SDValue(not1, 1) + ); + +// R44 = R44 & R47; +// Instr #70 +SDNode *and12 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + SDValue(and11, 0), + // glue (or chain) input edge + SDValue(and11, 1) + ); + +// R08 = R08 | R44; +// Instr #71 +SDNode *or4 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and12, 0), + SDValue(and10, 0), + // glue (or chain) input edge + SDValue(and12, 1) + ); + +// R07 = ~R45; +// Instr #72 +SDNode *not2 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq14, 0), + // glue (or chain) input edge + SDValue(or4, 1) + ); + +// R07 = R07 & R46; +// Instr #73 +SDNode *and13 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + SDValue(not2, 0), + // glue (or chain) input edge + SDValue(not2, 1) + ); + +// R08 = R08 | R07; +// Instr #74 +SDNode *or5 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and13, 0), + SDValue(or4, 0), + // glue (or chain) input edge + SDValue(and13, 1) + ); + +// R07 = ~R48; +// Instr #75 +SDNode *not3 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq10, 0), + // glue (or chain) input edge + SDValue(or5, 1) + ); + +// R07 = R07 & R49; +// Instr #76 +SDNode *and14 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + SDValue(not3, 0), + // glue (or chain) input edge + SDValue(not3, 1) + ); + +// R08 = R08 | R07; +// Instr #77 +SDNode *or6 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and14, 0), + SDValue(or5, 0), + // glue (or chain) input edge + SDValue(and14, 1) + ); + +// R09 = R08 == R30; +// Instr #78 +SDNode *eq15 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or6, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(or6, 1) + ); + +SDValue ct27 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #79 +SDNode *nop4 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct27, + // glue (or chain) input edge + SDValue(eq15, 1) + ); + +// WHERE_EQ; +// Instr #80 +SDNode *whereeq4 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq15, 0), + // glue (or chain) input edge + SDValue(nop4, 0) + ); + +SDValue ct28 = CurDAG->getConstant(31745, DL, MVT::i16, true, false); +// R19 = 31745; +// Instr #81 +SDNode *vload21 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct28, + SDValue(vload10, 0), + // glue (or chain) input edge + SDValue(whereeq4, 1) + ); + +SDValue ct29 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #82 +SDNode *vload22 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct29, + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(vload21, 1) + ); + +// END_WHERE; +// Instr #83 +SDNode *endwhere4 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload22, 1) + ); + +// R08 = R49 | R46; +// Instr #84 +SDNode *or7 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + SDValue(eq9, 0), + // glue (or chain) input edge + SDValue(endwhere4, 0) + ); + +// R09 = R08 & R14; +// Instr #85 +SDNode *and15 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload22, 0), + SDValue(or7, 0), + // glue (or chain) input edge + SDValue(or7, 1) + ); + +// R09 = R09 == R30; +// Instr #86 +SDNode *eq16 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and15, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and15, 1) + ); + +SDValue ct30 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #87 +SDNode *nop5 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct30, + // glue (or chain) input edge + SDValue(eq16, 1) + ); + +// WHERE_EQ; +// Instr #88 +SDNode *whereeq5 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq16, 0), + // glue (or chain) input edge + SDValue(nop5, 0) + ); + +SDValue ct31 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R19 = 31744; +// Instr #89 +SDNode *vload23 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct31, + SDValue(vload21, 0), + // glue (or chain) input edge + SDValue(whereeq5, 1) + ); + +// R08 = R50 & R49; +// Instr #90 +SDNode *and16 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + SDValue(eq8, 0), + SDValue(or7, 0), + // glue (or chain) input edge + SDValue(vload23, 1) + ); + +// R07 = R47 & R46; +// Instr #91 +SDNode *and17 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + SDValue(eq12, 0), + SDValue(and14, 0), + // glue (or chain) input edge + SDValue(and16, 1) + ); + +// R08 = R08 | R07; +// Instr #92 +SDNode *or8 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and17, 0), + SDValue(and16, 0), + SDValue(and16, 0), + // glue (or chain) input edge + SDValue(and17, 1) + ); + +SDValue ct32 = CurDAG->getConstant(15, DL, MVT::i16, true, false); +// R08 = R08 << 15; +// Instr #93 +SDNode *ishl0 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or8, 0), + ct32, + SDValue(or8, 0), + // glue (or chain) input edge + SDValue(or8, 1) + ); + +// R19 = R19 ^ R08; +// Instr #94 +SDNode *xor0 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl0, 0), + SDValue(vload23, 0), + SDValue(vload23, 0), + // glue (or chain) input edge + SDValue(ishl0, 1) + ); + +SDValue ct33 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #95 +SDNode *vload24 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct33, + SDValue(vload22, 0), + // glue (or chain) input edge + SDValue(xor0, 1) + ); + +// END_WHERE; +// Instr #96 +SDNode *endwhere5 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload24, 1) + ); + +// R15 = R25 - R21; +// Instr #97 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload19, 0), + SDValue(vload20, 0), + // glue (or chain) input edge + SDValue(endwhere5, 0) + ); + +SDValue ct34 = CurDAG->getConstant(-15, DL, MVT::i16, true, false); +// R08 = -15; +// Instr #98 +SDNode *vload25 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct34, + // glue (or chain) input edge + SDValue(sub0, 1) + ); + +// R09 = R15 < R08; +// Instr #99 +SDNode *lt2 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub0, 0), + SDValue(vload25, 0), + // glue (or chain) input edge + SDValue(vload25, 1) + ); + +// R09 = R09 & R14; +// Instr #100 +SDNode *and18 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt2, 0), + // glue (or chain) input edge + SDValue(lt2, 1) + ); + +// R09 = R09 == R30; +// Instr #101 +SDNode *eq17 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and18, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and18, 1) + ); + +SDValue ct35 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #102 +SDNode *nop6 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct35, + // glue (or chain) input edge + SDValue(eq17, 1) + ); + +// WHERE_EQ; +// Instr #103 +SDNode *whereeq6 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq17, 0), + // glue (or chain) input edge + SDValue(nop6, 0) + ); + +// R15 = R31 - R15; +// Instr #104 +SDNode *sub1 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub0, 0), + SDValue(sub0, 0), + // glue (or chain) input edge + SDValue(whereeq6, 1) + ); + +SDValue ct36 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R26 = 0; +// Instr #105 +SDNode *vload26 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct36, + SDValue(or1, 0), + // glue (or chain) input edge + SDValue(sub1, 1) + ); + +SDValue ct37 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = R21 << 0; +// Instr #106 +SDNode *ishl1 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + ct37, + SDValue(vload19, 0), + // glue (or chain) input edge + SDValue(vload26, 1) + ); + +SDValue ct38 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #107 +SDNode *vload27 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct38, + SDValue(sub1, 0), + // glue (or chain) input edge + SDValue(ishl1, 1) + ); + +// END_WHERE; +// Instr #108 +SDNode *endwhere6 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload27, 1) + ); + +SDValue ct39 = CurDAG->getConstant(-3, DL, MVT::i16, true, false); +// R08 = -3; +// Instr #109 +SDNode *vload28 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct39, + // glue (or chain) input edge + SDValue(endwhere6, 0) + ); + +// R09 = R15 < R08; +// Instr #110 +SDNode *lt3 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload27, 0), + SDValue(vload28, 0), + // glue (or chain) input edge + SDValue(vload28, 1) + ); + +// R09 = R09 & R14; +// Instr #111 +SDNode *and19 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt3, 0), + // glue (or chain) input edge + SDValue(lt3, 1) + ); + +// R09 = R09 == R30; +// Instr #112 +SDNode *eq18 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and19, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and19, 1) + ); + +SDValue ct40 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #113 +SDNode *nop7 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct40, + // glue (or chain) input edge + SDValue(eq18, 1) + ); + +// WHERE_EQ; +// Instr #114 +SDNode *whereeq7 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq18, 0), + // glue (or chain) input edge + SDValue(nop7, 0) + ); + +// R15 = R31 - R15; +// Instr #115 +SDNode *sub2 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload27, 0), + SDValue(vload27, 0), + // glue (or chain) input edge + SDValue(whereeq7, 1) + ); + +// R26 = R26 >> R15; +// Instr #116 +SDNode *shr0 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload26, 0), + SDValue(sub2, 0), + SDValue(vload26, 0), + // glue (or chain) input edge + SDValue(sub2, 1) + ); + +SDValue ct41 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = R21 << 0; +// Instr #117 +SDNode *ishl2 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + ct41, + SDValue(ishl1, 0), + // glue (or chain) input edge + SDValue(shr0, 1) + ); + +SDValue ct42 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #118 +SDNode *vload29 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct42, + SDValue(sub2, 0), + // glue (or chain) input edge + SDValue(ishl2, 1) + ); + +// END_WHERE; +// Instr #119 +SDNode *endwhere7 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload29, 1) + ); + +// R09 = R15 < R31; +// Instr #120 +SDNode *lt4 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload29, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere7, 0) + ); + +// R09 = R09 & R14; +// Instr #121 +SDNode *and20 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt4, 0), + // glue (or chain) input edge + SDValue(lt4, 1) + ); + +// R09 = R09 == R30; +// Instr #122 +SDNode *eq19 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and20, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and20, 1) + ); + +SDValue ct43 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #123 +SDNode *nop8 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct43, + // glue (or chain) input edge + SDValue(eq19, 1) + ); + +// WHERE_EQ; +// Instr #124 +SDNode *whereeq8 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq19, 0), + // glue (or chain) input edge + SDValue(nop8, 0) + ); + +// R15 = R31 - R15; +// Instr #125 +SDNode *sub3 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload29, 0), + SDValue(vload29, 0), + // glue (or chain) input edge + SDValue(whereeq8, 1) + ); + +// R22 = R22 << R15; +// Instr #126 +SDNode *shl0 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or3, 0), + SDValue(sub3, 0), + SDValue(or3, 0), + // glue (or chain) input edge + SDValue(sub3, 1) + ); + +SDValue ct44 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R21 = R25 << 0; +// Instr #127 +SDNode *ishl3 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl2, 0), + ct44, + SDValue(vload20, 0), + // glue (or chain) input edge + SDValue(shl0, 1) + ); + +SDValue ct45 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #128 +SDNode *vload30 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct45, + SDValue(sub3, 0), + // glue (or chain) input edge + SDValue(ishl3, 1) + ); + +// END_WHERE; +// Instr #129 +SDNode *endwhere8 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload30, 1) + ); + +SDValue ct46 = CurDAG->getConstant(4, DL, MVT::i16, true, false); +// R08 = 4; +// Instr #130 +SDNode *vload31 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct46, + // glue (or chain) input edge + SDValue(endwhere8, 0) + ); + +// R09 = R15 < R08; +// Instr #131 +SDNode *lt5 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload30, 0), + SDValue(vload31, 0), + // glue (or chain) input edge + SDValue(vload31, 1) + ); + +// R09 = R09 & R14; +// Instr #132 +SDNode *and21 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt5, 0), + // glue (or chain) input edge + SDValue(lt5, 1) + ); + +// R09 = R09 == R30; +// Instr #133 +SDNode *eq20 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and21, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and21, 1) + ); + +SDValue ct47 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #134 +SDNode *nop9 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct47, + // glue (or chain) input edge + SDValue(eq20, 1) + ); + +// WHERE_EQ; +// Instr #135 +SDNode *whereeq9 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq20, 0), + // glue (or chain) input edge + SDValue(nop9, 0) + ); + +// R26 = R26 << R15; +// Instr #136 +SDNode *shl1 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr0, 0), + SDValue(vload30, 0), + SDValue(shr0, 0), + // glue (or chain) input edge + SDValue(whereeq9, 1) + ); + +SDValue ct48 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = R21 << 0; +// Instr #137 +SDNode *ishl4 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl3, 0), + ct48, + SDValue(ishl2, 0), + // glue (or chain) input edge + SDValue(shl1, 1) + ); + +SDValue ct49 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #138 +SDNode *vload32 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct49, + SDValue(vload30, 0), + // glue (or chain) input edge + SDValue(ishl4, 1) + ); + +// END_WHERE; +// Instr #139 +SDNode *endwhere9 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload32, 1) + ); + +// R09 = R15 < R29; +// Instr #140 +SDNode *lt6 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload32, 0), + SDValue(vload3, 0), + // glue (or chain) input edge + SDValue(endwhere9, 0) + ); + +// R09 = R09 & R14; +// Instr #141 +SDNode *and22 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt6, 0), + // glue (or chain) input edge + SDValue(lt6, 1) + ); + +// R09 = R09 == R30; +// Instr #142 +SDNode *eq21 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and22, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and22, 1) + ); + +SDValue ct50 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #143 +SDNode *nop10 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct50, + // glue (or chain) input edge + SDValue(eq21, 1) + ); + +// WHERE_EQ; +// Instr #144 +SDNode *whereeq10 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq21, 0), + // glue (or chain) input edge + SDValue(nop10, 0) + ); + +// R22 = R22 >> R15; +// Instr #145 +SDNode *shr1 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl0, 0), + SDValue(vload32, 0), + SDValue(shl0, 0), + // glue (or chain) input edge + SDValue(whereeq10, 1) + ); + +SDValue ct51 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R21 = R25 << 0; +// Instr #146 +SDNode *ishl5 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + ct51, + SDValue(ishl3, 0), + // glue (or chain) input edge + SDValue(shr1, 1) + ); + +SDValue ct52 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #147 +SDNode *vload33 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct52, + SDValue(vload32, 0), + // glue (or chain) input edge + SDValue(ishl5, 1) + ); + +// END_WHERE; +// Instr #148 +SDNode *endwhere10 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload33, 1) + ); + +SDValue ct53 = CurDAG->getConstant(32, DL, MVT::i16, true, false); +// R08 = 32; +// Instr #149 +SDNode *vload34 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct53, + // glue (or chain) input edge + SDValue(endwhere10, 0) + ); + +// R09 = R15 < R08; +// Instr #150 +SDNode *lt7 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload33, 0), + SDValue(vload34, 0), + // glue (or chain) input edge + SDValue(vload34, 1) + ); + +// R09 = R09 & R14; +// Instr #151 +SDNode *and23 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt7, 0), + // glue (or chain) input edge + SDValue(lt7, 1) + ); + +// R09 = R09 == R30; +// Instr #152 +SDNode *eq22 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and23, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and23, 1) + ); + +SDValue ct54 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #153 +SDNode *nop11 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct54, + // glue (or chain) input edge + SDValue(eq22, 1) + ); + +// WHERE_EQ; +// Instr #154 +SDNode *whereeq11 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq22, 0), + // glue (or chain) input edge + SDValue(nop11, 0) + ); + +SDValue ct55 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R22 = 0; +// Instr #155 +SDNode *vload35 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct55, + SDValue(shr1, 0), + // glue (or chain) input edge + SDValue(whereeq11, 1) + ); + +SDValue ct56 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R21 = R25 << 0; +// Instr #156 +SDNode *ishl6 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + ct56, + SDValue(ishl5, 0), + // glue (or chain) input edge + SDValue(vload35, 1) + ); + +// END_WHERE; +// Instr #157 +SDNode *endwhere11 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(ishl6, 1) + ); + +// R09 = R24 == R11; +// Instr #158 +SDNode *eq23 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(endwhere11, 0) + ); + +// R09 = R09 & R14; +// Instr #159 +SDNode *and24 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq23, 0), + // glue (or chain) input edge + SDValue(eq23, 1) + ); + +// R09 = R09 == R30; +// Instr #160 +SDNode *eq24 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and24, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and24, 1) + ); + +SDValue ct57 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #161 +SDNode *nop12 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct57, + // glue (or chain) input edge + SDValue(eq24, 1) + ); + +// WHERE_EQ; +// Instr #162 +SDNode *whereeq12 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq24, 0), + // glue (or chain) input edge + SDValue(nop12, 0) + ); + +// R26 = R31 - R26; +// Instr #163 +SDNode *sub4 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(shl1, 0), + SDValue(shl1, 0), + // glue (or chain) input edge + SDValue(whereeq12, 1) + ); + +// END_WHERE; +// Instr #164 +SDNode *endwhere12 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub4, 1) + ); + +// R09 = R20 == R31; +// Instr #165 +SDNode *eq25 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and4, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere12, 0) + ); + +// R09 = R09 & R14; +// Instr #166 +SDNode *and25 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq25, 0), + // glue (or chain) input edge + SDValue(eq25, 1) + ); + +// R09 = R09 == R30; +// Instr #167 +SDNode *eq26 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and25, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and25, 1) + ); + +SDValue ct58 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #168 +SDNode *nop13 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct58, + // glue (or chain) input edge + SDValue(eq26, 1) + ); + +// WHERE_EQ; +// Instr #169 +SDNode *whereeq13 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq26, 0), + // glue (or chain) input edge + SDValue(nop13, 0) + ); + +// R22 = R31 - R22; +// Instr #170 +SDNode *sub5 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload35, 0), + SDValue(vload35, 0), + // glue (or chain) input edge + SDValue(whereeq13, 1) + ); + +// END_WHERE; +// Instr #171 +SDNode *endwhere13 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub5, 1) + ); + +// R09 = R14 == R30; +// Instr #172 +SDNode *eq27 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere13, 0) + ); + +SDValue ct59 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #173 +SDNode *nop14 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct59, + // glue (or chain) input edge + SDValue(eq27, 1) + ); + +// WHERE_EQ; +// Instr #174 +SDNode *whereeq14 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq27, 0), + // glue (or chain) input edge + SDValue(nop14, 0) + ); + +// R26 = R22 + R26; +// Instr #175 +SDNode *add0 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub4, 0), + SDValue(sub5, 0), + SDValue(sub4, 0), + // glue (or chain) input edge + SDValue(whereeq14, 1) + ); + +// END_WHERE; +// Instr #176 +SDNode *endwhere14 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(add0, 1) + ); + +// R24 = R26 & R11; +// Instr #177 +SDNode *and26 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add0, 0), + // glue (or chain) input edge + SDValue(endwhere14, 0) + ); + +// R09 = R24 == R11; +// Instr #178 +SDNode *eq28 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and26, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(and26, 1) + ); + +// R09 = R09 & R14; +// Instr #179 +SDNode *and27 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq28, 0), + // glue (or chain) input edge + SDValue(eq28, 1) + ); + +// R09 = R09 == R30; +// Instr #180 +SDNode *eq29 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and27, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and27, 1) + ); + +SDValue ct60 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #181 +SDNode *nop15 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct60, + // glue (or chain) input edge + SDValue(eq29, 1) + ); + +// WHERE_EQ; +// Instr #182 +SDNode *whereeq15 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq29, 0), + // glue (or chain) input edge + SDValue(nop15, 0) + ); + +// R26 = R31 - R26; +// Instr #183 +SDNode *sub6 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(add0, 0), + SDValue(add0, 0), + // glue (or chain) input edge + SDValue(whereeq15, 1) + ); + +// END_WHERE; +// Instr #184 +SDNode *endwhere15 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub6, 1) + ); + +SDValue ct61 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R05 = R26 << 0; +// Instr #185 +SDNode *ishl7 = CurDAG->getMachineNode( + Connex::ISHLV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + ct61, + // glue (or chain) input edge + SDValue(endwhere15, 0) + ); + +SDValue ct62 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R08 = R05 >> 1; +// Instr #186 +SDNode *ishr2 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl7, 0), + ct62, + // glue (or chain) input edge + SDValue(ishl7, 1) + ); + +// R05 = R05 | R08; +// Instr #187 +SDNode *or9 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr2, 0), + SDValue(ishl7, 0), + // glue (or chain) input edge + SDValue(ishr2, 1) + ); + +SDValue ct63 = CurDAG->getConstant(2, DL, MVT::i16, true, false); +// R08 = R05 >> 2; +// Instr #188 +SDNode *ishr3 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or9, 0), + ct63, + // glue (or chain) input edge + SDValue(or9, 1) + ); + +// R05 = R05 | R08; +// Instr #189 +SDNode *or10 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr3, 0), + SDValue(or9, 0), + // glue (or chain) input edge + SDValue(ishr3, 1) + ); + +SDValue ct64 = CurDAG->getConstant(4, DL, MVT::i16, true, false); +// R08 = R05 >> 4; +// Instr #190 +SDNode *ishr4 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or10, 0), + ct64, + // glue (or chain) input edge + SDValue(or10, 1) + ); + +// R05 = R05 | R08; +// Instr #191 +SDNode *or11 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr4, 0), + SDValue(or10, 0), + // glue (or chain) input edge + SDValue(ishr4, 1) + ); + +SDValue ct65 = CurDAG->getConstant(8, DL, MVT::i16, true, false); +// R08 = R05 >> 8; +// Instr #192 +SDNode *ishr5 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or11, 0), + ct65, + // glue (or chain) input edge + SDValue(or11, 1) + ); + +// R05 = R05 | R08; +// Instr #193 +SDNode *or12 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr5, 0), + SDValue(or11, 0), + // glue (or chain) input edge + SDValue(ishr5, 1) + ); + +// R05 = ~R05; +// Instr #194 +SDNode *not4 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or12, 0), + // glue (or chain) input edge + SDValue(or12, 1) + ); + +// R06 = POPCNT(R05); +// Instr #195 +SDNode *popcnt0 = CurDAG->getMachineNode( + Connex::POPCNT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(not4, 0), + // glue (or chain) input edge + SDValue(not4, 1) + ); + +// R06 = R29 - R06; +// Instr #196 +SDNode *sub7 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(popcnt0, 0), + // glue (or chain) input edge + SDValue(popcnt0, 1) + ); + +SDValue ct66 = CurDAG->getConstant(11, DL, MVT::i16, true, false); +// R08 = 11; +// Instr #197 +SDNode *vload36 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct66, + // glue (or chain) input edge + SDValue(sub7, 1) + ); + +// R08 = R06 - R08; +// Instr #198 +SDNode *sub8 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub7, 0), + SDValue(vload36, 0), + // glue (or chain) input edge + SDValue(vload36, 1) + ); + +// R09 = R31 < R08; +// Instr #199 +SDNode *lt8 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub8, 0), + // glue (or chain) input edge + SDValue(sub8, 1) + ); + +// R09 = R09 & R14; +// Instr #200 +SDNode *and28 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt8, 0), + // glue (or chain) input edge + SDValue(lt8, 1) + ); + +// R09 = R09 == R30; +// Instr #201 +SDNode *eq30 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and28, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and28, 1) + ); + +SDValue ct67 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #202 +SDNode *nop16 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct67, + // glue (or chain) input edge + SDValue(eq30, 1) + ); + +// WHERE_EQ; +// Instr #203 +SDNode *whereeq16 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq30, 0), + // glue (or chain) input edge + SDValue(nop16, 0) + ); + +SDValue ct68 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = R26 << 0; +// Instr #204 +SDNode *ishl8 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + ct68, + SDValue(vload11, 0), + // glue (or chain) input edge + SDValue(whereeq16, 1) + ); + +// R09 = R29 - R08; +// Instr #205 +SDNode *sub9 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(sub8, 0), + SDValue(eq30, 0), + // glue (or chain) input edge + SDValue(ishl8, 1) + ); + +// R62 = R62 << R09; +// Instr #206 +SDNode *shl2 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl8, 0), + SDValue(sub9, 0), + SDValue(ishl8, 0), + // glue (or chain) input edge + SDValue(sub9, 1) + ); + +// R62 = R62 >> R09; +// Instr #207 +SDNode *shr2 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl2, 0), + SDValue(sub9, 0), + SDValue(shl2, 0), + // glue (or chain) input edge + SDValue(shl2, 1) + ); + +SDValue ct69 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R61 = R08 << 0; +// Instr #208 +SDNode *ishl9 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub8, 0), + ct69, + SDValue(vload12, 0), + // glue (or chain) input edge + SDValue(shr2, 1) + ); + +// R26 = R26 >> R08; +// Instr #209 +SDNode *shr3 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + SDValue(sub8, 0), + SDValue(sub6, 0), + // glue (or chain) input edge + SDValue(ishl9, 1) + ); + +// R25 = R08 + R25; +// Instr #210 +SDNode *add1 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + SDValue(sub8, 0), + SDValue(ishl4, 0), + // glue (or chain) input edge + SDValue(shr3, 1) + ); + +// END_WHERE; +// Instr #211 +SDNode *endwhere16 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(add1, 1) + ); + +// R09 = R08 < R31; +// Instr #212 +SDNode *lt9 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub8, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere16, 0) + ); + +// R09 = R09 & R14; +// Instr #213 +SDNode *and29 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt9, 0), + // glue (or chain) input edge + SDValue(lt9, 1) + ); + +// R09 = R09 == R30; +// Instr #214 +SDNode *eq31 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and29, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and29, 1) + ); + +SDValue ct70 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #215 +SDNode *nop17 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct70, + // glue (or chain) input edge + SDValue(eq31, 1) + ); + +// WHERE_EQ; +// Instr #216 +SDNode *whereeq17 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq31, 0), + // glue (or chain) input edge + SDValue(nop17, 0) + ); + +// R08 = R31 - R08; +// Instr #217 +SDNode *sub10 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub8, 0), + SDValue(sub8, 0), + // glue (or chain) input edge + SDValue(whereeq17, 1) + ); + +// R26 = R26 << R08; +// Instr #218 +SDNode *shl3 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr3, 0), + SDValue(sub10, 0), + SDValue(shr3, 0), + // glue (or chain) input edge + SDValue(sub10, 1) + ); + +// R25 = R25 - R08; +// Instr #219 +SDNode *sub11 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(add1, 0), + SDValue(sub10, 0), + SDValue(add1, 0), + // glue (or chain) input edge + SDValue(shl3, 1) + ); + +// END_WHERE; +// Instr #220 +SDNode *endwhere17 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub11, 1) + ); + +// R09 = R25 < R30; +// Instr #221 +SDNode *lt10 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub11, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere17, 0) + ); + +// R09 = R09 & R14; +// Instr #222 +SDNode *and30 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt10, 0), + // glue (or chain) input edge + SDValue(lt10, 1) + ); + +// R09 = R09 == R30; +// Instr #223 +SDNode *eq32 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and30, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and30, 1) + ); + +SDValue ct71 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #224 +SDNode *nop18 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct71, + // glue (or chain) input edge + SDValue(eq32, 1) + ); + +// WHERE_EQ; +// Instr #225 +SDNode *whereeq18 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq32, 0), + // glue (or chain) input edge + SDValue(nop18, 0) + ); + +// R61 = R30 - R25; +// Instr #226 +SDNode *sub12 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(sub11, 0), + SDValue(ishl9, 0), + // glue (or chain) input edge + SDValue(whereeq18, 1) + ); + +SDValue ct72 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R25 = 1; +// Instr #227 +SDNode *vload37 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct72, + SDValue(sub11, 0), + // glue (or chain) input edge + SDValue(sub12, 1) + ); + +SDValue ct73 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = R26 << 0; +// Instr #228 +SDNode *ishl10 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl3, 0), + ct73, + SDValue(shr2, 0), + // glue (or chain) input edge + SDValue(vload37, 1) + ); + +// R09 = R29 - R61; +// Instr #229 +SDNode *sub13 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(sub12, 0), + SDValue(eq32, 0), + // glue (or chain) input edge + SDValue(ishl10, 1) + ); + +// R62 = R62 << R09; +// Instr #230 +SDNode *shl4 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl10, 0), + SDValue(sub13, 0), + SDValue(ishl10, 0), + // glue (or chain) input edge + SDValue(sub13, 1) + ); + +// R62 = R62 >> R09; +// Instr #231 +SDNode *shr4 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl4, 0), + SDValue(sub13, 0), + SDValue(shl4, 0), + // glue (or chain) input edge + SDValue(shl4, 1) + ); + +// R26 = R26 >> R61; +// Instr #232 +SDNode *shr5 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl3, 0), + SDValue(sub12, 0), + SDValue(shl3, 0), + // glue (or chain) input edge + SDValue(shr4, 1) + ); + +// END_WHERE; +// Instr #233 +SDNode *endwhere18 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(shr5, 1) + ); + +SDValue ct74 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R08 = 1024; +// Instr #234 +SDNode *vload38 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct74, + // glue (or chain) input edge + SDValue(endwhere18, 0) + ); + +// R08 = R26 < R08; +// Instr #235 +SDNode *lt11 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr5, 0), + SDValue(vload38, 0), + // glue (or chain) input edge + SDValue(vload38, 1) + ); + +// R09 = R25 == R30; +// Instr #236 +SDNode *eq33 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload37, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(lt11, 1) + ); + +// R09 = R09 & R14; +// Instr #237 +SDNode *and31 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq33, 0), + // glue (or chain) input edge + SDValue(eq33, 1) + ); + +// R09 = R09 & R08; +// Instr #238 +SDNode *and32 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt11, 0), + SDValue(and31, 0), + // glue (or chain) input edge + SDValue(and31, 1) + ); + +// R09 = R09 == R30; +// Instr #239 +SDNode *eq34 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and32, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and32, 1) + ); + +SDValue ct75 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #240 +SDNode *nop19 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct75, + // glue (or chain) input edge + SDValue(eq34, 1) + ); + +// WHERE_EQ; +// Instr #241 +SDNode *whereeq19 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq34, 0), + // glue (or chain) input edge + SDValue(nop19, 0) + ); + +SDValue ct76 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = 0; +// Instr #242 +SDNode *vload39 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct76, + SDValue(vload37, 0), + // glue (or chain) input edge + SDValue(whereeq19, 1) + ); + +// END_WHERE; +// Instr #243 +SDNode *endwhere19 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload39, 1) + ); + +// R26 = R26 & R13; +// Instr #244 +SDNode *and33 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(shr5, 0), + // glue (or chain) input edge + SDValue(endwhere19, 0) + ); + +SDValue ct77 = CurDAG->getConstant(30, DL, MVT::i16, true, false); +// R09 = 30; +// Instr #245 +SDNode *vload40 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct77, + // glue (or chain) input edge + SDValue(and33, 1) + ); + +// R09 = R09 < R25; +// Instr #246 +SDNode *lt12 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload40, 0), + SDValue(vload39, 0), + // glue (or chain) input edge + SDValue(vload40, 1) + ); + +// R09 = R09 & R14; +// Instr #247 +SDNode *and34 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt12, 0), + // glue (or chain) input edge + SDValue(lt12, 1) + ); + +// R09 = R09 == R30; +// Instr #248 +SDNode *eq35 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and34, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and34, 1) + ); + +SDValue ct78 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #249 +SDNode *nop20 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct78, + // glue (or chain) input edge + SDValue(eq35, 1) + ); + +// WHERE_EQ; +// Instr #250 +SDNode *whereeq20 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq35, 0), + // glue (or chain) input edge + SDValue(nop20, 0) + ); + +SDValue ct79 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #251 +SDNode *vload41 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct79, + SDValue(vload24, 0), + // glue (or chain) input edge + SDValue(whereeq20, 1) + ); + +SDValue ct80 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R19 = 31744; +// Instr #252 +SDNode *vload42 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct80, + SDValue(xor0, 0), + // glue (or chain) input edge + SDValue(vload41, 1) + ); + +// R19 = R19 | R24; +// Instr #253 +SDNode *or13 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and26, 0), + SDValue(vload42, 0), + SDValue(vload42, 0), + // glue (or chain) input edge + SDValue(vload42, 1) + ); + +// END_WHERE; +// Instr #254 +SDNode *endwhere20 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or13, 1) + ); + +// R08 = R14 == R30; +// Instr #255 +SDNode *eq36 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload41, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere20, 0) + ); + +SDValue ct81 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #256 +SDNode *nop21 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct81, + // glue (or chain) input edge + SDValue(eq36, 1) + ); + +// WHERE_EQ; +// Instr #257 +SDNode *whereeq21 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq36, 0), + // glue (or chain) input edge + SDValue(nop21, 0) + ); + +SDValue ct82 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R19 = R25 << 10; +// Instr #258 +SDNode *ishl11 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload39, 0), + ct82, + SDValue(or13, 0), + // glue (or chain) input edge + SDValue(whereeq21, 1) + ); + +// R19 = R19 | R26; +// Instr #259 +SDNode *or14 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and33, 0), + SDValue(ishl11, 0), + SDValue(ishl11, 0), + // glue (or chain) input edge + SDValue(ishl11, 1) + ); + +// R04 = R26 & R30; +// Instr #260 +SDNode *and35 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(and33, 0), + SDValue(vload14, 0), + // glue (or chain) input edge + SDValue(or14, 1) + ); + +// R07 = R61 - R30; +// Instr #261 +SDNode *sub14 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub12, 0), + SDValue(vload1, 0), + SDValue(and17, 0), + // glue (or chain) input edge + SDValue(and35, 1) + ); + +// R08 = R30 << R08; +// Instr #262 +SDNode *shl5 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq36, 0), + SDValue(eq36, 0), + // glue (or chain) input edge + SDValue(sub14, 1) + ); + +// R03 = R62 & R08; +// Instr #263 +SDNode *and36 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl5, 0), + SDValue(shr4, 0), + SDValue(vload15, 0), + // glue (or chain) input edge + SDValue(shl5, 1) + ); + +// R62 = R62 ^ R03; +// Instr #264 +SDNode *xor1 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and36, 0), + SDValue(shr4, 0), + SDValue(shr4, 0), + // glue (or chain) input edge + SDValue(and36, 1) + ); + +// R03 = R03 == R31; +// Instr #265 +SDNode *eq37 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and36, 0), + SDValue(vload2, 0), + SDValue(and36, 0), + // glue (or chain) input edge + SDValue(xor1, 1) + ); + +// R03 = R30 - R03; +// Instr #266 +SDNode *sub15 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq37, 0), + SDValue(eq37, 0), + // glue (or chain) input edge + SDValue(eq37, 1) + ); + +SDValue ct83 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R08 = R08 >> 1; +// Instr #267 +SDNode *ishr6 = CurDAG->getMachineNode( + Connex::ISHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl5, 0), + ct83, + SDValue(shl5, 0), + // glue (or chain) input edge + SDValue(sub15, 1) + ); + +// R02 = R62 & R08; +// Instr #268 +SDNode *and37 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr6, 0), + SDValue(xor1, 0), + SDValue(vload16, 0), + // glue (or chain) input edge + SDValue(ishr6, 1) + ); + +// R62 = R62 ^ R02; +// Instr #269 +SDNode *xor2 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and37, 0), + SDValue(xor1, 0), + SDValue(xor1, 0), + // glue (or chain) input edge + SDValue(and37, 1) + ); + +// R02 = R02 == R31; +// Instr #270 +SDNode *eq38 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and37, 0), + SDValue(vload2, 0), + SDValue(and37, 0), + // glue (or chain) input edge + SDValue(xor2, 1) + ); + +// R02 = R30 - R02; +// Instr #271 +SDNode *sub16 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq38, 0), + SDValue(eq38, 0), + // glue (or chain) input edge + SDValue(eq38, 1) + ); + +// R01 = R62 == R31; +// Instr #272 +SDNode *eq39 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor2, 0), + SDValue(vload2, 0), + SDValue(vload17, 0), + // glue (or chain) input edge + SDValue(sub16, 1) + ); + +// R01 = R30 - R01; +// Instr #273 +SDNode *sub17 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq39, 0), + SDValue(eq39, 0), + // glue (or chain) input edge + SDValue(eq39, 1) + ); + +// R00 = R04 | R02; +// Instr #274 +SDNode *or15 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub16, 0), + SDValue(and35, 0), + SDValue(vload18, 0), + // glue (or chain) input edge + SDValue(sub17, 1) + ); + +// R00 = R00 | R01; +// Instr #275 +SDNode *or16 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub17, 0), + SDValue(or15, 0), + SDValue(or15, 0), + // glue (or chain) input edge + SDValue(or15, 1) + ); + +// R00 = R00 & R03; +// Instr #276 +SDNode *and38 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub15, 0), + SDValue(or16, 0), + SDValue(or16, 0), + // glue (or chain) input edge + SDValue(or16, 1) + ); + +// R19 = R00 + R19; +// Instr #277 +SDNode *add2 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or14, 0), + SDValue(and38, 0), + SDValue(or14, 0), + // glue (or chain) input edge + SDValue(and38, 1) + ); + +// R19 = R19 | R24; +// Instr #278 +SDNode *resF16 /*or17*/ = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and26, 0), + SDValue(add2, 0), + SDValue(add2, 0), + // glue (or chain) input edge + SDValue(add2, 1) + ); + +// END_WHERE; +// Instr #279 +SDNode *lastNode /*endwhere21*/ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, +// Alex: MVT::Glue, + MVT::Other, + // glue (or chain) input edge + SDValue(resF16 /*or17*/, 1) + ); + Index: lib/Target/Connex/Select_SUBi32_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_SUBi32_OpincaaCodeGen.h +++ lib/Target/Connex/Select_SUBi32_OpincaaCodeGen.h @@ -0,0 +1,212 @@ +//===-- Select_SUBi32_OpincaaCodeGen.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from the OPINCAA lib, from kernel sub.i32. +// You should include this code in the Select() method of the SelectionDAGISel +// class of your back end. +// Number of instructions generated: 15. +// +//===----------------------------------------------------------------------===// + + +// From /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/opincaa_standalone_apps/Emulate_i32/SUB_i32_manual/DumpISel_OpincaaCodeGen_old110_400.cpp + +// R27 is REG_SRC1. It is represented by result of nodeOpSrcCast1. +// R28 is REG_SRC2. It is represented by result of nodeOpSrcCast2. + + + + + +SDValue ct0 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast2, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +// R29 = R27 - R28; +// Instr #2 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast1, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +// R23 = ADDC(R31, R31); +// Instr #3 +SDNode *addc0 = CurDAG->getMachineNode( + Connex::ADDCV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(vload0, 0), + SDValue(sub0, 0) + // no need for glue or chain input (since it normally consumes the output of the predecessor) + ); + +// R26 = INDEX; +// Instr #4 +SDNode *ldix0 = CurDAG->getMachineNode( + Connex::LDIX_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(addc0, 1) + ); + +// R25 = R26 & R30; +// Instr #5 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(ldix0, 0), + // glue (or chain) input edge + SDValue(ldix0, 1) + ); + +// R24 = R25 == R30; +// Instr #6 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #7 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// WHERE_EQ; +// Instr #8 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct3 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R23 = 0; +// Instr #9 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + SDValue(addc0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #10 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +// CELL_SHR(R23, R30); +// Instr #11 +SDNode *cellshr0 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +SDValue ct4 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #12 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(cellshr0, 0) + ); + +// R23 = SHIFT_REG; +// Instr #13 +SDNode *ldsh0 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R29 = R29 - R23; +// Instr #14 +SDNode *resH /*sub1*/ = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub0, 0), + SDValue(ldsh0, 0), + // glue (or chain) input edge + SDValue(ldsh0, 1) + ); + +SDNode *lastNode = resH; Index: lib/Target/Connex/TargetInfo/CMakeLists.txt =================================================================== --- lib/Target/Connex/TargetInfo/CMakeLists.txt +++ lib/Target/Connex/TargetInfo/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMConnexInfo + ConnexTargetInfo.cpp + ) Index: lib/Target/Connex/TargetInfo/ConnexTargetInfo.cpp =================================================================== --- lib/Target/Connex/TargetInfo/ConnexTargetInfo.cpp +++ lib/Target/Connex/TargetInfo/ConnexTargetInfo.cpp @@ -0,0 +1,23 @@ +//===-- ConnexTargetInfo.cpp - Connex Target Implementation ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Connex.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +namespace llvm { +Target TheConnexTarget; +} + +extern "C" void LLVMInitializeConnexTargetInfo() { + TargetRegistry::RegisterTarget(TheConnexTarget, "connex", + //"Connex (host endian)", + "Connex", + "Connex", + [](Triple::ArchType) { return false; }, true); +} Index: lib/Target/Connex/TargetInfo/LLVMBuild.txt =================================================================== --- lib/Target/Connex/TargetInfo/LLVMBuild.txt +++ lib/Target/Connex/TargetInfo/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/Target/Connex/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===; +; +; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ConnexInfo +parent = Connex +required_libraries = Support +add_to_library_groups = Connex Index: lib/Target/LLVMBuild.txt =================================================================== --- lib/Target/LLVMBuild.txt +++ lib/Target/LLVMBuild.txt @@ -24,6 +24,7 @@ AArch64 AVR BPF + Connex Lanai Hexagon MSP430 Index: test/CodeGen/Connex/basictest.ll =================================================================== --- test/CodeGen/Connex/basictest.ll +++ test/CodeGen/Connex/basictest.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -march=bpfel | FileCheck %s + +define i32 @test0(i32 %X) { + %tmp.1 = add i32 %X, 1 + ret i32 %tmp.1 +; CHECK-LABEL: test0: +; CHECK: addi r1, 1 +} + +; CHECK-LABEL: store_imm: +; CHECK: stw 0(r1), r{{[03]}} +; CHECK: stw 4(r2), r{{[03]}} +define i32 @store_imm(i32* %a, i32* %b) { +entry: + store i32 0, i32* %a, align 4 + %0 = getelementptr inbounds i32, i32* %b, i32 1 + store i32 0, i32* %0, align 4 + ret i32 0 +} + +@G = external global i8 +define zeroext i8 @loadG() { + %tmp = load i8, i8* @G + ret i8 %tmp +; CHECK-LABEL: loadG: +; CHECK: ld_64 r1 +; CHECK: ldb r0, 0(r1) +} Index: test/CodeGen/Connex/lit.local.cfg =================================================================== --- test/CodeGen/Connex/lit.local.cfg +++ test/CodeGen/Connex/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'Connex' in config.root.targets: + config.unsupported = True