Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -321,6 +321,7 @@ AMDGPU ARM BPF + Connex Hexagon Lanai Mips Index: CODE_OWNERS.TXT =================================================================== --- CODE_OWNERS.TXT +++ CODE_OWNERS.TXT @@ -194,6 +194,10 @@ E: alexei.starovoitov@gmail.com D: BPF backend +N: Alex Susu +E: alex.susu@gmail.com +D: Connex backend + N: Tom Stellard E: tstellar@redhat.com D: Stable release management (x.y.[1-9] releases), AMDGPU Backend, libclc Index: include/llvm/ADT/Triple.h =================================================================== --- include/llvm/ADT/Triple.h +++ include/llvm/ADT/Triple.h @@ -53,6 +53,7 @@ avr, // AVR: Atmel AVR microcontroller bpfel, // eBPF or extended BPF or 64-bit BPF (little endian) bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian) + connex, // Connex vector processor hexagon, // Hexagon: hexagon mips, // MIPS: mips, mipsallegrex, mipsr6 mipsel, // MIPSEL: mipsel, mipsallegrexe, mipsr6el Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -270,6 +270,12 @@ uint16_t NextPersistentId = 0; public: + DenseMap *crtNodeMapPtr; + + void SetNodeMap(DenseMap *aCrtNodeMapPtr); + + void UpdateNodeMapSDValue(SDNode *oldSDN, SDValue &newSDV); + /// Clients of various APIs that cause global effects on /// the DAG can optionally implement this interface. This allows the clients /// to handle the various sorts of updates that happen. @@ -1217,6 +1223,12 @@ MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT, SDValue Op1, SDValue Op2, SDValue Op3); MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT, + SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4); + MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1, + EVT VT2, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4); + MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT, ArrayRef Ops); MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2); Index: include/llvm/CodeGen/SelectionDAGISel.h =================================================================== --- include/llvm/CodeGen/SelectionDAGISel.h +++ include/llvm/CodeGen/SelectionDAGISel.h @@ -56,6 +56,7 @@ const TargetLowering *TLI; bool FastISelFailed; SmallPtrSet ElidedArgCopyInstrs; + DenseMap crtNodeMap; /// Current optimization remark emitter. /// Used to report things like combines and FastISel failures. Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -1182,6 +1182,7 @@ include "llvm/IR/IntrinsicsMips.td" include "llvm/IR/IntrinsicsAMDGPU.td" include "llvm/IR/IntrinsicsBPF.td" +include "llvm/IR/IntrinsicsConnex.td" include "llvm/IR/IntrinsicsSystemZ.td" include "llvm/IR/IntrinsicsWebAssembly.td" include "llvm/IR/IntrinsicsRISCV.td" Index: include/llvm/IR/IntrinsicsConnex.td =================================================================== --- include/llvm/IR/IntrinsicsConnex.td +++ include/llvm/IR/IntrinsicsConnex.td @@ -0,0 +1,106 @@ +//===- IntrinsicsConnex.td - Defines Connex-S intrinsics ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the Connex-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +// All Connex-S vector processor intrinsics start with "llvm.connex." +// +let TargetPrefix = "connex" in { + + /* + * Note: all intrinsics defined in these .td files start with + * the int_ prefix (from intrinsic). For this file they start with + * int_connex prefix - otherwise we get the following TableGen error + * <> + * + * The LLVM IR intrinsics extend the LLVM language s.t. we can use + * these instructions in an LLVM IR program. We also need to define the + * corresponding assembly instructions in the back end TableGen files. + */ + + /* Following Intrinsics.td: + class Intrinsic ret_types, + list param_types = [], + list properties = [], + string name = ""> + */ + + + /* Small-note: + llvm_i64_ty makes simpler my LLVM IR generation in the LoopVectorize.cpp + module: + def int_connex_repeat_x_times : Intrinsic<[], [llvm_i64_ty], []>; + But llvm_i32_ty is in accordance to the original i32 type of n.vec in the + LoopVectorize.cpp module: + def int_connex_repeat_x_times : Intrinsic<[], [llvm_i32_ty], []>; + + Small-note: We get inspired from include/llvm/IR/IntrinsicsPowerPC.td: + // Intrinsics used to generate ctrl-based loops. + def int_ppc_mtctr : Intrinsic<[], [llvm_anyint_ty], []>; + + Small-note: Trying to use a polymorphic definition, which requires + specifying the actual type in Function::Create(FunctionType::get(), ...) + is: + def int_connex_repeat_x_times : Intrinsic<[], [llvm_anyint_ty], []>; + When instantiating it in LoopVectorize.cpp like this: + Value *instrinsicFunc = Intrinsic::getDeclaration(M, + Intrinsic::connex_repeat_x_times); + it gives error at runtime: + llvm::ArrayRef::operator[](size_t) const [with T = llvm::Type*; + size_t = long unsigned int]: Assertion `Index < Length && + "Invalid index!"' failed. + */ + def int_connex_repeat_x_times : Intrinsic<[], [llvm_i64_ty], []>; + def int_connex_end_repeat : Intrinsic<[], [], []>; + + /* Note: Possibly useful in the future. + Connex Opincaa's END_REPEAT does not have a relative offset, + as the standard Connex assembly ijmpnzdec instruction, + since it falls on Opincaa to compute the jump back relative offset. + We can also use a setlc to position it outside the loop created by the + ijmpnzdec instruction by using it inside a delay-slot instruction. + + def int_connex_setlc : Intrinsic<[], [llvm_i16_ty], []>; + def int_connex_ijmpnzdec : Intrinsic<[], [], []>; + */ + + + + /* IMPORTANT: REDUCE cannot return a value. It is the duty of the host (CPU) + to read the result itself from the REDUCE issued by Connex-S. + Therefore this definition is incorrect: + def int_connex_reduce : Intrinsic<[llvm_i32_ty], [llvm_v128i16_ty], []>; + */ + /* GOOD: + def int_connex_reduce : Intrinsic<[], [llvm_v128i16_ty], []>; + def int_connex_reduce_i32 : Intrinsic<[], [llvm_v64i32_ty], []>; + def int_connex_reduce_f16 : Intrinsic<[], [llvm_v128f16_ty], []>; + */ + def int_connex_reduce : Intrinsic<[], [llvm_anyvector_ty], []>; + + /* Note: ctpop is already defined in Intrinsics.td. + So the below definition is not required: + def int_connex_ctpop : Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty], []>; + */ + + + // Inherited BPF scalar intrinsics: Specialized loads from packet + def int_connex_load_byte : GCCBuiltin<"__builtin_connex_load_byte">, + Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>; + def int_connex_load_half : GCCBuiltin<"__builtin_connex_load_half">, + Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>; + def int_connex_load_word : GCCBuiltin<"__builtin_connex_load_word">, + Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem]>; + def int_connex_pseudo : GCCBuiltin<"__builtin_connex_pseudo">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty]>; +} + Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1469,6 +1469,9 @@ LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); + // Replacing SDNode N with RV in crtNodeMap + DAG.UpdateNodeMapSDValue(N, RV); + if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -82,6 +82,33 @@ return Res; } +void SelectionDAG::SetNodeMap(DenseMap *aCrtNodeMapPtr) { + crtNodeMapPtr = aCrtNodeMapPtr; +} + +void SelectionDAG::UpdateNodeMapSDValue(SDNode *oldSDN, SDValue &newSDV) { + /* NOTE: SelectionDAGBuilder defines DenseMap NodeMap. + * I added in SelectionDAGISel a copy of it, crtNodeMap. + * The pointer crtNodeMapPtr here is the pointer of crtNodeMap + * initialized in SelectionDAGISel::CodeGenAndEmitDAG(). + */ + for (auto iterNodeMap = crtNodeMapPtr->begin(); + iterNodeMap != crtNodeMapPtr->end(); iterNodeMap++) { + auto tmp1 = (*iterNodeMap); + + const Value *crtValue = (const Value *)(tmp1.first); + + SDValue crtSDValue = tmp1.second; + SDNode *crtSDNode = crtSDValue.getNode(); + + if (crtSDNode == oldSDN) { + (*crtNodeMapPtr)[crtValue] = newSDV; + break; + } + } +} + + // Default null implementations of the callbacks. void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} @@ -7803,6 +7830,24 @@ } MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT, + SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3, Op4 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, + EVT VT1, EVT VT2, + SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3, Op4 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1, EVT VT2, EVT VT3, ArrayRef Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -38,6 +38,7 @@ #include #include #include +#include "llvm/Support/Debug.h" namespace llvm { @@ -125,6 +126,11 @@ MapVector DanglingDebugInfoMap; public: + // Add a getter for NodeMap + DenseMap &getNodeMap() { + return NodeMap; + } + /// Loads are not emitted to the program immediately. We bunch them up and /// then emit token factor nodes when possible. This allows us to get simple /// disambiguation between loads without worrying about alias analysis. Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9090,8 +9090,15 @@ void TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { - if (SDValue Res = LowerOperation(SDValue(N, 0), DAG)) - Results.push_back(Res); + SDValue Res1 = LowerOperation(SDValue(N, 0), DAG); + if (Res1.getNode()) + Results.push_back(Res1); + + if (N->getNumValues() > 1) { + SDValue Res2 = LowerOperation(SDValue(N, 1), DAG); + if (Res2.getNode()) + Results.push_back(Res2); + } } SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -692,6 +692,9 @@ CurDAG->setRoot(SDB->getControlRoot()); HadTailCall = SDB->HasTailCall; SDB->resolveOrClearDbgInfo(); + + crtNodeMap = SDB->getNodeMap(); + SDB->clear(); // Final step, emit the lowered DAG as machine code. @@ -778,6 +781,9 @@ // Run the DAG combiner in pre-legalize mode. { + // We should do this only once + CurDAG->SetNodeMap(&crtNodeMap); + NamedRegionTimer T("combine1", "DAG Combining 1", GroupName, GroupDescription, TimePassesIsEnabled); CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); Index: lib/Target/Connex/Connex.h =================================================================== --- lib/Target/Connex/Connex.h +++ lib/Target/Connex/Connex.h @@ -0,0 +1,35 @@ +//===-- Connex.h - Top-level interface for Connex representation ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEX_H +#define LLVM_LIB_TARGET_CONNEX_CONNEX_H + +#include "MCTargetDesc/ConnexMCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" + + +// We define reserved register(s) of Connex to use for: +// - handling COPY instructions in WHERE blocks +// (see ConnexTargetMachine.cpp and ConnexISelLowering.cpp), etc +#define CONNEX_RESERVED_REGISTER_01 Connex::Wh30 +#define CONNEX_RESERVED_REGISTER_02 Connex::Wh31 +#define CONNEX_RESERVED_REGISTER_03 Connex::Wh29 + +#define COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + +namespace llvm { +class ConnexTargetMachine; + +FunctionPass *createConnexISelDag(ConnexTargetMachine &TM); +} + +#endif Index: lib/Target/Connex/ConnexAsmPrinter.cpp =================================================================== --- lib/Target/Connex/ConnexAsmPrinter.cpp +++ lib/Target/Connex/ConnexAsmPrinter.cpp @@ -0,0 +1,1284 @@ +//===-- ConnexAsmPrinter.cpp - Connex LLVM assembly writer ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to the Connex assembly language. +// +//===----------------------------------------------------------------------===// + +#include "Connex.h" +#include "ConnexInstrInfo.h" +#include "ConnexMCInstLower.h" +#include "ConnexTargetMachine.h" +// 2019_03_30_TODO: #include "BTFDebug.h" +#include "InstPrinter/ConnexInstPrinter.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" //See http://llvm.org/docs/CommandLine.html + +using namespace llvm; + +// Inspired from llvm/lib/CodeGen/TargetPassConfig.cpp +static cl::opt EnableASMPrint("enable-asm-print", + cl::Hidden, + cl::init(false), + cl::desc("Enable special instrumentation in ConnexASMPrinter")); + +static cl::opt TreatRepeat2ndInnerLoopGlobal("treat-repeat-2nd-inner-loop", + cl::Hidden, + cl::init(true), + cl::desc("Treat well 2nd inner loop in kernel and use REPEAT for it and host-side Opincaa for at the inner loop")); + + +#define DEBUG_TYPE "asm-printer" + +#include + +#include "ConnexAsmPrinter_more.h" + + + +// We need to store the correspondence between MachineInstr and the lowered +// MCInst, since MCInst does not. +// This is used in ConnexInstPrinter.cpp. +const MachineInstr *crtMI; +extern std::unordered_map mapLD_ST_REPEAT_InlineAsm; + + + + +namespace { +class ConnexAsmPrinter : public AsmPrinter { +public: + explicit ConnexAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)) {} + + StringRef getPassName() const override { return "Connex Assembly Printer"; } + + /* + (From http://llvm.org/docs/doxygen/html/classllvm_1_1MachineFunctionPass.html + we see SelectionDAGISel and AsmPrinter were the only passes that inherit + MachineFunctionPass, from this back end.) + From http://llvm.org/docs/doxygen/html/AsmPrinter_8h_source.html: + /// Set up the AsmPrinter when we are working on a new module. If your pass + /// overrides this, it must make sure to explicitly call this implementation. + */ + + bool IsVectorBody(const char *str) { + #define STR_VECTOR_BODY "vector.body" + #define STR_VECTOR_BODY_PREHEADER ".preheader" + + // We can have several BBs with name vector.bodyXYZT (but we do NOT + // search for STR_VECTOR_BODY_PREHEADER, which can be e.g., + // vector.body40.preheader) + if ((strncmp(str, STR_VECTOR_BODY, + strlen(STR_VECTOR_BODY)) == 0) && + (strncmp(str + strlen(str) - strlen(STR_VECTOR_BODY_PREHEADER), + STR_VECTOR_BODY_PREHEADER, + strlen(STR_VECTOR_BODY_PREHEADER)) == 0)) + return false; + if (strncmp(str, STR_VECTOR_BODY, strlen(STR_VECTOR_BODY)) != 0) + return false; + + return true; + } + + int IfImmSpecialUpdateMap(const MachineInstr *MI, const MachineInstr *MI2) { + unsigned imm; + if (MI2->getOpcode() == Connex::REPEAT) { + const MachineOperand &MI2MO0 = MI2->getOperand(0); + LLVM_DEBUG(dbgs() << "IfImmSpecialUpdateMap(): MI2MO0 = " + << MI2MO0 << "\n"); + + imm = MI2MO0.getImm(); + } + else { + const MachineOperand &MI2MO0 = MI2->getOperand(0); + LLVM_DEBUG(dbgs() << "IfImmSpecialUpdateMap(): MI2MO0 = " + << MI2MO0 << "\n"); + + const MachineOperand &MI2MO1 = MI2->getOperand(1); + LLVM_DEBUG(dbgs() << "IfImmSpecialUpdateMap(): MI2MO1 = " + << MI2MO1 << "\n"); + + imm = MI2MO1.getImm(); + } + + LLVM_DEBUG(dbgs() << "IfImmSpecialUpdateMap(): imm = " + << imm << "\n"); + + if ((imm == CONNEX_MEM_NUM_ROWS + 10) || + (imm == VALUE_BOGUS_REPEAT_X_TIMES)) { + LLVM_DEBUG(dbgs() << "IfImmSpecialUpdateMap(): MI2 = " + << *MI2 << "\n"); + LLVM_DEBUG(dbgs() << "IfImmSpecialUpdateMap(): MI->getOperand(0) = " + << MI->getOperand(0) << "\n"); + LLVM_DEBUG(dbgs() << "IfImmSpecialUpdateMap(): MI = " + << MI + << ", MI2 (ptr) = " << MI2 << "\n"); + + mapLD_ST_REPEAT_InlineAsm[MI2] = MI; + return 1; + } + + return -1; + } + + + void MoveToFrontRepeat(MachineBasicBlock *MBB) { + LLVM_DEBUG(dbgs() << "Entered MoveToFrontRepeat(MBB = " + << MBB << ")\n"); + + // Moving the REPEAT and it's symbolic operand in INLINEASM at the + // front of the MBB. + for (auto MIItr = MBB->begin(); MIItr != MBB->end(); ++MIItr) { + MachineInstr *MI = &(*MIItr); + + if (MI->getOpcode() == Connex::REPEAT_SYM_IMM) { + LLVM_DEBUG(dbgs() << "MoveToFrontRepeat(): Found Connex::REPEAT_SYM_IMM\n"); + MIItr++; + + MachineInstr *MI2 = &(*MIItr); + + //assert(MI2->getOpcode() == TargetOpcode::INLINEASM); + if (MI2->getOpcode() == TargetOpcode::INLINEASM) { + LLVM_DEBUG(dbgs() << "MoveToFrontRepeat(): Moving the successor " + "INLINEASM together with the Connex::REPEAT_SYM_IMM\n"); + + MBB->remove(MI2); + MBB->insert(MBB->front(), MI2); + } + else { + MIItr++; + MI2 = &(*MIItr); + + LLVM_DEBUG(dbgs() << "MoveToFrontRepeat(): Moving the following " + "(not successor) INLINEASM together with the " + "Connex::REPEAT_SYM_IMM\n"); + //MIItr++; + + if (MI2->getOpcode() == TargetOpcode::INLINEASM) { + MBB->remove(MI2); + MBB->insert(MBB->front(), MI2); + } + else { + assert(0 && "Can't find INLINEASM associated to REPEAT_SYM_IMM"); + } + } + + LLVM_DEBUG(dbgs() << "MoveToFrontRepeat(): Moving Connex::REPEAT_SYM_IMM\n"); + + MBB->remove(MI); + MBB->insert(MBB->front(), MI); + + break; + } + } + } + + + void MoveToFrontInlineAsm(MachineBasicBlock *MBB, char *strToSearch) { + LLVM_DEBUG(dbgs() << "Entered MoveToFrontInlineAsm(MBB = " + << MBB + << ", strToSearch = " << strToSearch << ")\n"); + + // Moving the REPEAT and it's symbolic operand in INLINEASM at the + // front of the MBB. + for (auto MIItr = MBB->begin(); MIItr != MBB->end(); /* ++MIItr */) { + MachineInstr *MI = &(*MIItr); + + // 2018_10_05: We avoid iterator invalidation: + // See some comments on iterator invalidation (when doing remove) at + // http://llvm.1065342.n5.nabble.com/deleting-or-replacing-a-MachineInst-td77723.html + MachineBasicBlock::iterator MIsucc = MIItr; + MIsucc++; + + if (MI->getOpcode() == TargetOpcode::INLINEASM) { + LLVM_DEBUG(dbgs() << " MoveToFrontInlineAsm(): found INLINEASM MI = " + << *MI << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + for (unsigned index = 0; index < MI->getNumOperands(); index++) { + MachineOperand *miOpnd; + miOpnd = & (MI->getOperand(index)); + + LLVM_DEBUG(dbgs() << " MI->getOperand(" << index << ") = " + << *miOpnd << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineOperand.html + if (miOpnd->isSymbol()) { + const char *symStr = miOpnd->getSymbolName(); + LLVM_DEBUG(dbgs() << " MoveToFrontInlineAsm(): symStr = " + << symStr << "\n"); + + if (strstr(symStr, strToSearch) != NULL) { + //LLVM_DEBUG(dbgs() << " MoveToFrontInlineAsm(): Found INLINEASM with host-side for loop\n"); + LLVM_DEBUG(dbgs() << " MoveToFrontInlineAsm(): Found " + "INLINEASM with strToSearch in the symbol " + "operand\n"); + //break; + + MBB->remove(MI); + MBB->insert(MBB->front(), MI); + } + } + } + } + + // 2018_10_05: We avoid iterator invalidation + MIItr = MIsucc; + } + } + + + /* + This moves to the front of the MBB a number of 3 (if justOne == false), + or 1 (if justOne == true) ASM inline expression(s) IF the 1st inline + expression has Opincaa kernel begin. + + We require to run first this function with justOne == false and then + with justOne == true. + + More exactly, in LoopVectorize.cpp we added, among others, the following + 3 ASM inline expressions (consecutively): + - 1 BEGIN_KERNEL INLINEASM instruction used as loop prologue + - 1 END_KERNEL INLINEASM instruction used as + loop prologue (END_KERNEL part) + - 1 BEGIN_KERNEL INLINEASM instruction for + the loop. + We move these 3 instructions to the front of + MBB when justOne == false. This ensures that eventual + less-likely case of having a VLOAD_H_SYM_IMM (and inline ASM associated, + containing the symbolic operand) manually generated by me + in ConnexISelDAGToDAG.cpp is not going to be first instruction, before + the Opincaa loop header ASM inline expression. + We also make sure that eventual loads from spills are put inside the loop + prologue. + + We move 1 instruction to the front since in runOnMachineFunction() we put + all instructions of the predecessor (has to be only 1 predecessor) of + vector.body at the front of MBB, so we have to move the BEGIN_KERNEL of + the loop prologue. + */ + void MoveToFront(MachineBasicBlock *MBB, bool justOne) { + MachineInstr *tmp1, *tmp2, *tmp3; //, *tmp4; + int counter = 0; + + LLVM_DEBUG(dbgs() << "Entered MoveToFront(justOne = " + << justOne << ")\n"); + + + /* We compute MIItrLastLoadAssociatedToSpill, an iterator (pointer) to + the first instruction after the loads (fills) from spills at the + beginning of the BB. + */ + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + /* IMPORTANT: make sure we put this initialization after any other MBB mutation + in order to use it well to move the 3 INLINEASM instructions. + */ + MachineBasicBlock::iterator MIItrLastLoadAssociatedToSpill = MBB->front(); + + if (justOne == false) { + for (auto MIItr2 = MBB->begin(); MIItr2 != MBB->end(); ++MIItr2) { + MachineInstr *MI = &(*MIItr2); + + LLVM_DEBUG(dbgs() << " MoveToFront(): MI = " + << *MI + << ", MI->getOpcode() = " + << MI->getOpcode() + << "\n"); + + unsigned imm = -1; + if (MI->getOpcode() == Connex::LD_H) { + /* Inspired from + http://llvm.org/docs/doxygen/html/MachineInstr_8cpp_source.html, + method MachineInstr::isIdenticalTo() + */ + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + if (MO.isImm()) { + imm = MO.getImm(); + LLVM_DEBUG(dbgs() << " MoveToFront(): imm = " + << imm << "\n"); + break; + } + } + + //if (MI == is a vector load (LD_H), with offset address + /* If the imm operand > CONNEX_MEM_NUM_ROWS - 32 it (normally) + * means that the operation is generated in + * ConnexInstrInfo::storeRegToStackSlot() and + * ConnexInstrInfo::loadRegFromStackSlot(), + * part of a spill or load from spill operation. + * Note that on Connex we do not have a stack per se, + * but we emulate it at the end of the LS memory. + */ + if ((imm >= CONNEX_MEM_NUM_ROWS - 32) && + (imm < CONNEX_MEM_NUM_ROWS)) { + //MIItr2++; + MIItrLastLoadAssociatedToSpill = MIItr2; + MIItrLastLoadAssociatedToSpill++; + } + } + } // end for + } // if (justOne == false) + + /* Moving the ISD::INLINEASM instruction containing the opincaa kernel + begin at the very front of this BB. */ + for (auto MIItr = MBB->begin(); MIItr != MBB->end(); + ++MIItr, ++counter) { + MachineInstr *MI = &(*MIItr); + + if (MI->getOpcode() == TargetOpcode::INLINEASM) { + LLVM_DEBUG(dbgs() << " MoveToFront() found INLINEASM MI = " + << *MI << "\n"); + + bool isOpincaaCodeBegin = false; + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + for (unsigned index = 0; index < MI->getNumOperands(); index++) { + MachineOperand *miOpndOpincaaCodeBegin; // = NULL; + miOpndOpincaaCodeBegin = & (MI->getOperand(index)); + + LLVM_DEBUG(dbgs() << " MI->getOperand(" << index << ") = " + << *miOpndOpincaaCodeBegin << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineOperand.html + if (miOpndOpincaaCodeBegin->isSymbol()) { + const char *symStr = miOpndOpincaaCodeBegin->getSymbolName(); + LLVM_DEBUG(dbgs() << " MoveToFront(): symStr = " + << symStr << "\n"); + if (strstr(symStr, STR_OPINCAA_CODE_BEGIN) != NULL) { + isOpincaaCodeBegin = true; + break; + } + } + } + + if (isOpincaaCodeBegin) { + if (counter != 0) { + // We move only if not at the beginning of MBB + tmp1 = MI; + LLVM_DEBUG(dbgs() << " MoveToFront(): moving INLINEASM to the front (counter = " + << counter << ", justOne = " + << justOne << ")\n"); + + if (justOne == true) { + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + MBB->remove(tmp1); + MBB->insert(MBB->front(), tmp1); + } + else { + /* We move the next 3 instructions to the front of + MBB, namely: + - 1 BEGIN_KERNEL INLINEASM instruction used as + loop prologue + - 1 END_KERNEL INLINEASM instruction used as + loop prologue (END_KERNEL part) + - 1 BEGIN_KERNEL INLINEASM instruction for + the loop. + + TODO TODO TODO TODO: check tmp3 and tmp2 are + also INLINEASM */ + + MIItr++; + tmp2 = &(*MIItr); + + MIItr++; + tmp3 = &(*MIItr); + + LLVM_DEBUG(dbgs() << " MoveToFront(): tmp1 = " + << *tmp1 << "\n"); + LLVM_DEBUG(dbgs() << " MoveToFront(): tmp2 = " + << *tmp2 << "\n"); + LLVM_DEBUG(dbgs() << " MoveToFront(): tmp3 = " + << *tmp3 << "\n"); + /* + MBB->remove(tmp4); + //MBB->insert(MBB->front(), tmp3); + */ + + MBB->remove(tmp3); + + MBB->remove(tmp2); + + MBB->remove(tmp1); + + /* TODO TODO TODO TODO TODO: check that the iterator + MIItrLastLoadAssociatedToSpill does NOT get + invalidated - it seems it is not invalidated even if we + change MBB, which is so because the instruction + to which the iterator points to is NOT changed. */ + MBB->insert(MIItrLastLoadAssociatedToSpill, tmp1); + MBB->insert(MIItrLastLoadAssociatedToSpill, tmp2); + MBB->insert(MIItrLastLoadAssociatedToSpill, tmp3); + } + } // END if (counter != 0) + break; + } // END if (isOpincaaCodeBegin) + } + //counter++; + } + } // END MoveToFront() + + + // Moving the last ISD::INLINEASM instruction of MBB at the very back of MBB + void MoveToBackLastInlineAsm(MachineBasicBlock *MBB) { + MachineInstr *tmp1; //, *tmp2, *tmp3; + int counter = 0; + + LLVM_DEBUG(dbgs() << " MoveToBackLastInlineAsm(): MBB = " + << *MBB << "\n"); + + for (auto MIItr = MBB->rbegin(); MIItr != MBB->rend(); + ++MIItr, ++counter) { + MachineInstr *MI = &(*MIItr); + + if (MI->getOpcode() == TargetOpcode::INLINEASM) { + LLVM_DEBUG(dbgs() << " MoveToBackLastInlineAsm() found INLINEASM MI = " + << *MI << "\n"); + + bool isOpincaaCodeEnd = false; + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + for (unsigned index = 0; index < MI->getNumOperands(); index++) { + MachineOperand *miOpndOpincaaCodeEnd; // = NULL; + miOpndOpincaaCodeEnd = & (MI->getOperand(index)); + + LLVM_DEBUG(dbgs() << " MI->getOperand(" << index << ") = " + << *miOpndOpincaaCodeEnd << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineOperand.html + if (miOpndOpincaaCodeEnd->isSymbol()) { + const char *symStr = miOpndOpincaaCodeEnd->getSymbolName(); + LLVM_DEBUG(dbgs() << " MoveToBackLastInlineAsm(): symStr = " + << symStr << "\n"); + if (strstr(symStr, STR_OPINCAA_CODE_END) != NULL) { + isOpincaaCodeEnd = true; + break; + } + } + } + + if (isOpincaaCodeEnd) { + //if (counter != 0) { // We move only if not at the beginning of MBB + tmp1 = MI; + LLVM_DEBUG(dbgs() << " MoveToBackLastInlineAsm(): moving INLINEASM to the front (counter = " + << counter << ")\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + MBB->remove(tmp1); + MBB->insert(MBB->end(), tmp1); + //} + break; + } + } + //counter++; + } + } // END MoveToBack() + + + void ReplaceWithSymbolicIndex(MachineBasicBlock *MBB) { + assert(0 && "ReplaceWithSymbolicIndex() does NOT do anything anymore"); + + LLVM_DEBUG(dbgs() << "Entered ReplaceWithSymbolicIndex()\n"); + + unsigned imm = -1; + + for (auto &MI : *MBB) { + if ((MI.getOpcode() == Connex::LD_H) || + (MI.getOpcode() == Connex::ST_H)) { + /* Inspired from + http://llvm.org/docs/doxygen/html/MachineInstr_8cpp_source.html, + method MachineInstr::isIdenticalTo() + */ + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (MO.isImm()) { + imm = MO.getImm(); + LLVM_DEBUG(dbgs() << " ReplaceWithSymbolicIndex(): imm = " + << imm << "\n"); + /* + if (imm == CONNEX_MEM_NUM_ROWS - 32 - 10) { + MO.setImm((int64_t)-1); + } + */ + break; + } + } + } + } + } + + + // We add at the front of vector.body the instructions + // for the predecessor of vector.body basic-block DIFFERENT than + // vector.body (normally vector.ph). + void CopyInstructionsFromPred(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock * &predMBBGood) { + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + /* (See also https://fossies.org/linux/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp + * method DeadMachineInstructionElim::runOnMachineFunction() for + * an example of iteration backwards). + */ + //for (auto &predMI : (*predMBB)) + unsigned counterPredMBB = 0; + + // rbegin() is a reverse_iterator + for (auto predMIItr = predMBBGood->rbegin(); + predMIItr != predMBBGood->rend(); + predMIItr++, counterPredMBB++) { + MachineInstr *predMI = &(*predMIItr); + + LLVM_DEBUG(dbgs() << " CopyInstructionsFromPred(): predMI = " + << *predMI << "\n"); + + // Need to insert them in different order + if (predMI->isBundle()) { + LLVM_DEBUG(dbgs() << " CopyInstructionsFromPred(): handling bundle\n"); + + const MachineBasicBlock *MBBBundle = predMI->getParent(); + //MachineBasicBlock::const_instr_iterator I = ++MI->getIterator(); + MachineBasicBlock::const_instr_iterator I = predMI->getIterator(); + + // IMPORTANT: We assume we work with finalized bundles + I++; + + // THIS cycles ~forever... EmitInstruction(& (*I) ); + + assert(I != MBBBundle->instr_end()); + const MachineInstr *I1 = & (*I); + LLVM_DEBUG(dbgs() << " CopyInstructionsFromPredConnexAsmPrinter::runOnMachineFunction(): I1 = " + << *I1 << "\n"); + // + I++; + + + // IMPORTANT: We assume we work with bundles with only 2 instructions + + /* + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + bool isInsideBundle () const + Return true if MI is in a bundle (but not the first MI in a bundle). + bool isBundled () const + Return true if this instruction part of a bundle. + */ + /* + // TODO: this fails if bundle created in addPreSched2() + // (before post-RA scheduler): + assert(I->isInsideBundle()); + assert(I->isBundled()); + */ + // + /* + // TODO: this fails if bundle created in addPreSched2() + // (before post-RA scheduler): + assert(I->isInsideBundle()); + assert(I->isBundled()); + */ + assert(I != MBBBundle->instr_end()); + const MachineInstr *I2 = & (*I); + + MachineInstr *newPredMI2 = MF.CloneMachineInstr(I2); + LLVM_DEBUG(dbgs() << " CopyInstructionsFromPred(): newPredMI2 = " + << *newPredMI2 << "\n"); + MBB.insert(MBB.front(), newPredMI2); + + MachineInstr *newPredMI1 = MF.CloneMachineInstr(I1); + LLVM_DEBUG(dbgs() << " CopyInstructionsFromPred(): newPredMI1 = " + << *newPredMI1 << "\n"); + MBB.insert(MBB.front(), newPredMI1); + + /* + while (I != MBBBundle->instr_end() && I->isInsideBundle()) { + MachineInstr *newPredMI = + MF.CloneMachineInstr(& (*I)); + MBB.insert(MBB.front(), newPredMI); + + //EmitInstruction(& (*I) ); + + ++I; + } + */ + + LLVM_DEBUG(dbgs() << " CopyInstructionsFromPred(): END handling bundle\n"); + + continue; + } + + + /* + * We avoid the last instruction of predMBBGood, since it is an + * unconditional JMP + */ + if (counterPredMBB == 0 && + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + predMI->isUnconditionalBranch()) { // predMBBGood->size()) + /* For llc -O3 it removes the JMP at the end of + vector.ph, hence it merges it with vector.body, + even if it leaves the entry label of vector.body. + So we need to check if predMI is JMP with + isUnconditionalBranch(). */ + LLVM_DEBUG(dbgs() << " CopyInstructionsFromPred(): found a JMP, " + "so not copying it in vector.body\n"); + continue; + } + + /* IMPORTANT note: EmitInstruction() fails for ISD::INLINEASM + EmitInstruction(&predMI); + */ + + /* See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineFunction.html + MachineInstr *CloneMachineInstr(const MachineInstr *Orig); + CloneMachineInstr - Create a new MachineInstr which is a + copy of the 'Orig' instruction, identical in all ways except + the instruction has no parent, prev, or next. + */ + MachineInstr *newPredMI = MF.CloneMachineInstr(predMI); + + //MBB.insert(MBB.front(), &predMI); + // Gives error: "Assertion `!N->getParent() && + // "machine instruction already in a basic block"' failed." + MBB.insert(MBB.front(), newPredMI); + } + + #ifdef NNNNO + /* + * I guess normally we should have 2 predecessors, but since I mess + * up in LoopVectorize.cpp the vector.body block in some cases + * (e.g., with a few iterations, in the order of magnitude of the + * vector unit width) it can remain with only 1 predecessor. + */ + assert(numPredecessors <= 2 && + "vector.body should have at most 2 predecessors: itself and one more"); + #endif + } + + + // IMPORTANT: We copy from successor BB (middle.block) to vector.body BB + void CopyInstructionsFromSucc(MachineFunction &MF, MachineBasicBlock &MBB) { + LLVM_DEBUG(dbgs() << " CopyInstructionsFromSucc(): Move code from succ of block " + << MBB.getName().data() << "\n"); + + int numSuccessors = 0; + + for (auto succMBB : MBB.successors()) { + numSuccessors++; + + const char *strSuccMBB = succMBB->getName().data(); + + /* + if (IsVectorBody(strPredMBB) == true) + continue; + */ + + LLVM_DEBUG(dbgs() << " CopyInstructionsFromSucc(): strSuccMBB = " + << strSuccMBB << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + /* (See also https://fossies.org/linux/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp + * method DeadMachineInstructionElim::runOnMachineFunction() for + * an example of iteration backwards). + */ + //for (auto &predMI : (*predMBB)) + unsigned counterSuccMBB = 0; + + // rbegin() is a reverse_iterator + for (auto succMIItr = succMBB->begin(); + succMIItr != succMBB->end(); + succMIItr++, counterSuccMBB++) { + MachineInstr *succMI = &(*succMIItr); + + LLVM_DEBUG(dbgs() << " CopyInstructionsFromSucc(): succMI = " + << *succMI << "\n"); + + /* + * We avoid the last instruction of predMBB, since it is an + * unconditional JMP + */ + if ( + // counterSuccMBB == 0 && + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + (succMI->isUnconditionalBranch() || + succMI->isConditionalBranch()) ) { // predMBB->size()) + /* For llc -O3 it removes the JMP at the end of + vector.ph, hence it merges it with vector.body, + even if it leaves the entry label of vector.body. + So we need to check if predMI is JMP with + isUnconditionalBranch(). */ + LLVM_DEBUG(dbgs() << "CopyInstructionsFromSucc(): found a JMP, " + "so not copying it in vector.body\n"); + continue; + } + + /* IMPORTANT note: EmitInstruction() fails for ISD::INLINEASM + EmitInstruction(&predMI); + */ + + /* See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineFunction.html + MachineInstr *CloneMachineInstr(const MachineInstr *Orig); + CloneMachineInstr - Create a new MachineInstr which is a + copy of the 'Orig' instruction, identical in all ways except + the instruction has no parent, prev, or next. + */ + MachineInstr *newSuccMI = MF.CloneMachineInstr(succMI); + + // Gives error: "Assertion `!N->getParent() && "machine instruction already in a basic block"' failed." + //MBB.insert(MBB.front(), &predMI); + MBB.insert(MBB.back(), newSuccMI); + } + + // Instead of break we should check if predMBB is the BB "just" + // above predMBBGood or below + break; + } + + assert(numSuccessors == 1); + } // END CopyInstructionsFromSucc() + + +//#define TRY_DFS +#ifdef TRY_DFS +#define RPO + std::map visitedMBB; + std::vector sortedListMBB; + + void DFS(MachineBasicBlock *n) { + // See http://www.cplusplus.com/reference/map/map/count/ + if (visitedMBB.count(n) != 0) + return; + + // See http://www.cplusplus.com/reference/map/map/insert/ + visitedMBB.insert(std::pair(n, true)); + #ifndef RPO + sortedListMBB.push_back(n); + #endif + + const char *strN = n->getName().data(); + LLVM_DEBUG(dbgs() << "DFS(): BB name: = " << strN + << ", n = " << n << "\n"); + + #ifdef NNNO + // If in the successors we have vector.ph, vector.body, etc we choose those + // first. + for (auto MBB : n->successors()) { + const char *strMBB = MBB->getName().data(); + /* + LLVM_DEBUG(dbgs() << "DFS(): BB name: = " << strMBB + << ", MBB = " << MBB << "\n"); + */ + if (strcmp(strMBB, "min.iters.checked") == 0 || + // somewhat-IMPORTANT-TODO: check only for "vector.*" not for all these below + strcmp(strMBB, "vector.memcheck") == 0 || + strcmp(strMBB, "vector.ph") == 0 || + strcmp(strMBB, "vector.body.preheader") == 0 || + strcmp(strMBB, "vector.body") == 0) { + DFS(MBB); // This will update visitedMBB to avoid further visits + } + } + #endif + + //for (auto i = n->succ_begin(); i != + //for (auto &MBB : n->successors()) + for (auto MBB : n->successors()) { + /* + const char *strMBB = MBB->getName().data(); + LLVM_DEBUG(dbgs() << "DFS(): BB name: = " << strMBB + << ", MBB = " << MBB << "\n"); + */ + DFS(MBB); + } + + #ifdef RPO + sortedListMBB.push_back(n); + #endif + } +#endif // TRY_DFS + + + /// Emit the specified function out to the OutStreamer. + bool runOnMachineFunction(MachineFunction &MF) override { + LLVM_DEBUG(dbgs() + << "Entered ConnexAsmPrinter::runOnMachineFunction()...\n"); + LLVM_DEBUG(dbgs() << " EnableASMPrint = " << EnableASMPrint << "\n"); + + MachineBasicBlock *entryMBB = NULL; + + #ifdef TRY_DFS + LLVM_DEBUG(dbgs() << "Printing the MBBs, as they are ordered now:\n"); + /* Looking at http://llvm.org/doxygen/classllvm_1_1MachineFunction.html + * it seems it's not possible to obtain the root(s) of the MB otherwise. + */ + for (auto &MBB : MF) { + if (entryMBB == NULL) + entryMBB = &MBB; + const char *strMBB = MBB.getName().data(); + LLVM_DEBUG(dbgs() << " BB name: = " << strMBB << "\n"); + } + // + visitedMBB.clear(); + sortedListMBB.clear(); + DFS(entryMBB); + // + #ifdef RPO + LLVM_DEBUG(dbgs() << "ConnexAsmPrinter: (RPO) sortedListMBB = \n"); + for (int idxSListMBB = sortedListMBB.size() - 1; + idxSListMBB >= 0; idxSListMBB--) { + MachineBasicBlock *MBB = sortedListMBB[idxSListMBB]; + const char *strMBB = MBB->getName().data(); + LLVM_DEBUG(dbgs() << " BB name: = " << strMBB + << ", MBB = " << MBB << "\n"); + } + #else + LLVM_DEBUG(dbgs() << "ConnexAsmPrinter: sortedListMBB = \n"); + for (auto &MBB : sortedListMBB) { + const char *strMBB = MBB->getName().data(); + LLVM_DEBUG(dbgs() << " BB name: = " << strMBB + << ", MBB = " << MBB << "\n"); + } + #endif + + /* + LLVM_DEBUG(dbgs() << "Printing the MBBs, as they are ordered after MF.sort():\n"); + for (auto &MBB : MF) { + const char *strMBB = MBB.getName().data(); + LLVM_DEBUG(dbgs() << " BB name: = " << strMBB << "\n"); + } + */ + #endif // TRY_DFS + + int numVectorizedLoops = 0; + // Note: the ASCIIZ string given as argument below is const char * + ReadStartLocFile(const_cast("startLoc.txt"), true); + LLVM_DEBUG(dbgs() + << "runOnMachineFunction(): treatRepeat2ndInnerLoop.size() = " + << treatRepeat2ndInnerLoop.size() << "\n"); + + if (EnableASMPrint) { + // ProcessFunction() just updates mapLD_ST_REPEAT_InlineAsm for the + // given function. + ProcessFunction(&MF); + + this->MF = &MF; + + // Inspired from ConnexRegisterInfo.cpp: + //const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + + // Inspired from http://llvm.org/docs/doxygen/html/AsmPrinter_8cpp_source.html: + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineFunction.html + for (auto &MBB : MF) { + const char *strMBB = MBB.getName().data(); + LLVM_DEBUG(dbgs() << " BB name: strMBB = " << strMBB << "\n"); + + /* + // This is for debugging purposes only - we check that MBB are + // (indeed?) in 1-to-1 correspondence to the BB in LLVM IR. + if (IsVectorBody(strMBB)) { + LLVM_DEBUG(dbgs() << " TreatRepeat2ndInnerLoopGlobal == true\n"); + LLVM_DEBUG(dbgs() << " Enumerating pred of vector.body\n"); + for (auto predMBB : MBB.predecessors()) { + const char *strPredMBB = predMBB->getName().data(); + + if (IsVectorBody(strPredMBB) == true) + continue; + + LLVM_DEBUG(dbgs() << " Enumerating pred of pred of vector.body\n"); + for (auto predPredMBB : predMBB->predecessors()) { + const char *strPredPredMBB = predPredMBB->getName().data(); + + LLVM_DEBUG(dbgs() << " strPredPredMBB = " + << strPredPredMBB << "\n"); + } + } + } + */ + + if (numVectorizedLoops >= (int)treatRepeat2ndInnerLoop.size()) + TreatRepeat2ndInnerLoopGlobal = false; + else + TreatRepeat2ndInnerLoopGlobal = treatRepeat2ndInnerLoop[numVectorizedLoops]; + + LLVM_DEBUG(dbgs() << "runOnMachineFunction(): TreatRepeat2ndInnerLoopGlobal = " + << TreatRepeat2ndInnerLoopGlobal << "\n"); + LLVM_DEBUG(dbgs() << "runOnMachineFunction(): numVectorizedLoops = " + << numVectorizedLoops << "\n"); + + if (TreatRepeat2ndInnerLoopGlobal == true) { +// TODO: think a bit: we should always call MoveToFrontRepeat() - we complicate a bit, BUT it is highly unlikely to have a REPEAT() after the last vector.body + // A bit inefficient - we try all MBB + MoveToFrontRepeat(&MBB); + } + else { + // If we do this we risk to have comments like "Map/Reduction part" after the REPEAT Opincaa instruction + MoveToFrontRepeat(&MBB); + } + + + // We take care to put the beginning marker for Opincaa kernel at the very front of its basic block, MBB - we try all MBBs + LLVM_DEBUG(dbgs() << "Calling MoveToFrontInlineAsm(STR_OPINCAA_CODE_BEGIN)\n"); + MoveToFrontInlineAsm(&MBB, STR_OPINCAA_CODE_BEGIN); + LLVM_DEBUG(dbgs() << "Finished calling MoveToFrontInlineAsm(STR_OPINCAA_CODE_BEGIN)\n"); + + if (IsVectorBody(strMBB) == false) + continue; + + numVectorizedLoops++; + + //MoveToFrontRepeat(MBB); + // + //ReplaceWithSymbolicIndex(&MBB); + /* IMPORTANT: + * We move the Inline ASM expressions to the beginning of the BB, + * by using MoveToFront(), + * such that, immediately after (see code below) we put the + * instructions of the predecessor of the vector.body BB + * at the top and then call MoveToFront(&MBB, true) again + * to make the code OK. + */ + //MoveToFront(&MBB, false); + + MachineBasicBlock *predMBBGood; + int numPredecessors = 0; + for (auto predMBB : MBB.predecessors()) { + numPredecessors++; + + const char *strPredMBB = predMBB->getName().data(); + + if (IsVectorBody(strPredMBB) == true) + continue; + else + predMBBGood = predMBB; + + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): strPredMBB = " + << strPredMBB << "\n"); + } + // I guess normally we should have 2 predecessors, but since I mess + // up in LoopVectorize.cpp the vector.body block in some cases + // (e.g., with a few iterations, in the order of magnitude of the + // vector unit width) it can remain with only 1 predecessor. + assert(numPredecessors <= 2 && "vector.body should have at most " + "2 predecessors: itself and one more"); + + if (TreatRepeat2ndInnerLoopGlobal == false) { + //CopyInstructionsFromPred(MF, MBB, predMBBGood); + + // We move the header of the Opincaa kernel + MoveToFront(predMBBGood, true); + } + + /* + if (TreatRepeat2ndInnerLoopGlobal == false) { + CopyInstructionsFromSucc(MF, MBB); + } + */ + + // Does NOT help: MoveToFront(&MBB, true); + LLVM_DEBUG(dbgs() << + " runOnMachineFunction(): calling MoveToFrontInlineAsm(&MBB)\n"); + //MoveToFront(&MBB, false); + MoveToFrontInlineAsm(&MBB, "for ("); + + if (TreatRepeat2ndInnerLoopGlobal == true) { + MoveToBackLastInlineAsm(&MBB); + } + } // END for (auto &MBB : MF) + } // end if EnableASMPrint + + SetupMachineFunction(MF); + EmitFunctionBody(); + + return false; + } // end bool runOnMachineFunction(MachineFunction &MF) + + + void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, + const char *Modifier = nullptr); + + + void EmitInstruction(const MachineInstr *MI) override; + + + // AsmPrinter::EmitInlineAsm() implemented in lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp + /* + void EmitInlineAsm(const MachineInstr *MI) const { + LLVM_DEBUG(dbgs() << "Entered ConnexAsmPrinter::EmitInlineAsm()\n"); + } + */ + // Taken from MSP430 back end + void printSrcMemOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O); + + + // ProcessFunction() just updates mapLD_ST_REPEAT_InlineAsm. + void ProcessFunction(const MachineFunction *MF) { + LLVM_DEBUG(dbgs() << "Entered ProcessFunction()\n"); + + for (auto &MBB : *MF) { + for (auto MIItr = MBB.begin(); MIItr != MBB.end(); ++MIItr) { + const MachineInstr *MI = &(*MIItr); + + LLVM_DEBUG(dbgs() << "ProcessFunction(): MI = " + << *MI << "\n"); + + //NOT working: if (MI->getOpcode() == ISD::INLINEASM) + if (MI->getOpcode() == TargetOpcode::INLINEASM) { +// TODO TODO TODO TODO: check also that the InlineAsm contains the substring "note that this line is normally NOT printed in the final .cpp" + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + MachineBasicBlock::const_iterator MIItr2 = MIItr; +// TODO TODO TODO TODO: check for more instr, not just the next... it should help... + MIItr2++; + + const MachineInstr *MI2 = &(*MIItr2); + LLVM_DEBUG(dbgs() << "ProcessFunction(): MI2->getOpcode() = " + << MI2->getOpcode() << "\n"); + LLVM_DEBUG(dbgs() << "ProcessFunction(): MI2 = " + << MI2 << "\n"); + + if (MI2->getOpcode() == 0) { + /* It crashes when giving dbgs << *MI2, unfortunately... + This case happens since I changed how I treat the + writeDataToArray...() primitives in LoopVectorize.cpp + because now I don't put them at the beginning of + vector.body. */ + } + else { + LLVM_DEBUG(dbgs() << "ProcessFunction(): MI2 = " + << *MI2 << "\n"); + } + + bool validCase = false; + if ((MI2->getOpcode() == Connex::LD_H) || + (MI2->getOpcode() == Connex::ST_H) || + (MI2->getOpcode() == Connex::REPEAT)) { + validCase = true; + } + else + if (MI2->getOpcode() == Connex::VLOAD_H) { + MIItr2++; + MI2 = &(*MIItr2); + + if (MI2->getOpcode() == Connex::ST_H) { + // TODO TODO TODO TODO: verify ALSO that dest vector register of MI2 (VLOAD_H) is used in ST_H instruction + validCase = true; + } + } + + if (validCase) { + if (IfImmSpecialUpdateMap(MI, MI2) == -1) { + /* For test 300_Opincaa_BUG_Connex/STDerr_llc_01 + we require to look 1 more instruction. + */ + MIItr2++; + MI2 = &(*MIItr2); + + if ((MI2->getOpcode() == Connex::LD_H) || + (MI2->getOpcode() == Connex::ST_H)) { + //validCase = true; + IfImmSpecialUpdateMap(MI, MI2); + } + } + } + } + } + } + } // END ProcessFunction() + + + bool /*ConnexAsmPrinter::*/ PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + LLVM_DEBUG(dbgs() << "Entered PrintAsmMemoryOperand()\n"); + return false; + } + + + bool /* ConnexAsmPrinter:: */ PrintAsmOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &OS) { + LLVM_DEBUG(dbgs() << "Entered PrintAsmOperand()\n"); + return false; + } + + + void PrintSpecial(const MachineInstr *MI, raw_ostream &OS, + const char *Code) const { + LLVM_DEBUG(dbgs() << "Entered PrintSpecial()\n"); + } + + + void printOffset(int64_t Offset, raw_ostream &OS) const { + LLVM_DEBUG(dbgs() << "Entered printOffset()\n"); + } + + + // Note: NOT called + void EmitInt32(int Value) const { + LLVM_DEBUG(dbgs() << "Entered EmitInt32()\n"); + } + + /* + // From [LLVM]/llvm38Nov2016/llvm/lib/Target/Mips/MipsAsmPrinter.cpp + void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O); + void printUnsignedImm8(const MachineInstr *MI, int opNum, raw_ostream &O); + */ +}; // END class ConnexAsmPrinter + +} // END namespace + + +/* +// From [LLVM]/llvm38Nov2016/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +void ConnexAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) + O << (unsigned short int)MO.getImm(); + else + printOperand(MI, opNum, O); +} + +// From [LLVM]/llvm38Nov2016/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +void ConnexAsmPrinter::printUnsignedImm8(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) + O << (unsigned short int)(unsigned char)MO.getImm(); + else + printOperand(MI, opNum, O); +} +*/ + + +// NOT Called - TODO TODO TODO TODO: remove +void ConnexAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O, const char *Modifier) { + LLVM_DEBUG(dbgs() << "Entered ConnexAsmPrinter::printOperand()\n"); + const MachineOperand &MO = MI->getOperand(OpNum); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << ConnexInstPrinter::getRegisterName(MO.getReg()); + break; + + case MachineOperand::MO_Immediate: { + unsigned imm = MO.getImm(); + LLVM_DEBUG(dbgs() << "printOperand(): imm = " << imm << "\n"); + + if (imm == CONNEX_MEM_NUM_ROWS + 10) { + O << STR_LOOP_SYMBOLIC_INDEX; + } + else { + O << MO.getImm(); + } + //O << MO.getImm(); + break; + } + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + break; + + case MachineOperand::MO_GlobalAddress: + O << *getSymbol(MO.getGlobal()); + break; + + default: + llvm_unreachable(""); + } +} + +void ConnexAsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + const MachineOperand &Base = MI->getOperand(OpNum); + const MachineOperand &Disp = MI->getOperand(OpNum+1); + + // Print displacement first + + // Imm here is in fact global address - print extra modifier. + if (Disp.isImm() && !Base.getReg()) + O << '&'; + + printOperand(MI, OpNum+1, O, "nohash"); + + // Print register base field + if (Base.getReg()) { + O << '('; + printOperand(MI, OpNum, O); + O << ')'; + } +} + +void ConnexAsmPrinter::EmitInstruction(const MachineInstr *MI) { + LLVM_DEBUG(dbgs() << "Entered ConnexAsmPrinter::EmitInstruction()...\n"); + + /* Inspired from lib/Target/AMDGPU/AMDGPUMCInstLower.cpp + (actually it's class AMDGPUAsmPrinter) + */ + if (MI->isBundle()) { + LLVM_DEBUG(dbgs() << " EmitInstruction(): handling bundle\n"); + const MachineBasicBlock *MBB = MI->getParent(); + //MachineBasicBlock::const_instr_iterator I = ++MI->getIterator(); + MachineBasicBlock::const_instr_iterator I = MI->getIterator(); + I++; + // THIS cycles ~forever... EmitInstruction(& (*I) ); + + /* + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + bool isInsideBundle () const + Return true if MI is in a bundle (but not the first MI in a bundle). + */ + while (I != MBB->instr_end() && I->isInsideBundle()) { + EmitInstruction(& (*I) ); + ++I; + } + + // Prints wrong instructions: EmitInstruction(& (*I) ); + return; + } + + //#ifdef ORIGINAL_CODE + ConnexMCInstLower MCInstLowering(OutContext, *this); + + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + + crtMI = MI; + + EmitToStreamer(*OutStreamer, TmpInst); + + //OutStreamer->EmitInstruction(MIPred, getSubtargetInfo()); + //#endif + + //AsmPrinter::EmitInstruction(MI); +} // END ConnexAsmPrinter::EmitInstruction() + + + +// Force static initialization. +extern "C" void LLVMInitializeConnexAsmPrinter() { + RegisterAsmPrinter Z(TheConnexTarget); +} + Index: lib/Target/Connex/ConnexAsmPrinter_more.h =================================================================== --- lib/Target/Connex/ConnexAsmPrinter_more.h +++ lib/Target/Connex/ConnexAsmPrinter_more.h @@ -0,0 +1,98 @@ +#ifndef CONNEX_ASM_PRINTER_MORE_H +#define CONNEX_ASM_PRINTER_MORE_H + +// Used by ReplaceLoopsWithOpincaaKernels.cpp and ConnexAsmPrinter.cpp + +std::vector treatRepeat2ndInnerLoop; +// The start and end of the innermost (or 2nd innermost) loop +std::vector linStart, colStart, linEnd, colEnd; +// +std::vector linStartLoopNest, colStartLoopNest, linEndLoopNest, colEndLoopNest; + +/* + We read the lines and columns from the startLoc.txt file. + We keep the numbering from 1 throughout the ENTIRE program, + BUT in FindEndLoop() we decrement the value. +*/ +void ReadStartLocFile(char *fileNameSrc, bool silentFail=false) { + int index; + char str[MAXLEN_STR]; + + int linStartTmp, colStartTmp; + int linEndTmp, colEndTmp; + + FILE *fin = fopen(fileNameSrc, "rt"); + /* We need to process each loop, from the last in the file to the first, + therefore preserving the line & column numbers of the loops that + remain to be replaces. + */ + if (silentFail) { + if (fin == NULL) { + printf("startLoc.txt file NOT found (maybe NO loop was vectorized)"); + return; + } + } + assert(fin != NULL && + "ReadStartLocFile(): fileNameSrc (e.g., startLoc.txt) file NOT found (maybe NO loop was vectorized). " + "Anyhow cannot automatically replace in source file vectorized loops with Opincaa kernels."); + + //for (index = 0; index < replaceString.size(); index++) + for (index = 0; ; index++) { + if (fgets(str, MAXLEN_STR - 1, fin) == NULL) + break; + + printf("str = %s\n", str); + fflush(stdout); + + fscanf(fin, "%d %d %d %d\r\n", &linStartTmp, &colStartTmp, + &linEndTmp, &colEndTmp); + // + printf("ReadStartLocFile(): (linStart = %d, colStart = %d) -> " + "(linEndTmp = %d, colEndTmp = %d)\n", + linStartTmp, colStartTmp, linEndTmp, colEndTmp); + fflush(stdout); + // + linStart.push_back(linStartTmp); + colStart.push_back(colStartTmp); + linEnd.push_back(linEndTmp); + colEnd.push_back(colEndTmp); + assert(linStartTmp <= linEndTmp); + + int ch = getc(fin); + ungetc(ch, fin); + + printf("ReadStartLocFile(): ch = %d\n", (int)ch); + fflush(stdout); + + if ((ch == '/') || (ch == -1)) { + treatRepeat2ndInnerLoop.push_back(false); + + linStartLoopNest.push_back(-1); + colStartLoopNest.push_back(-1); + linEndLoopNest.push_back(-1); + colEndLoopNest.push_back(-1); + } + else { + treatRepeat2ndInnerLoop.push_back(true); + + fscanf(fin, "%d %d %d %d\r\n", &linStartTmp, &colStartTmp, + &linEndTmp, &colEndTmp); + printf("ReadStartLocFile(): (linStart = %d, colStart = %d) -> " + "(linEndTmp = %d, colEndTmp = %d)\n", + linStartTmp, colStartTmp, linEndTmp, colEndTmp); + fflush(stdout); + + linStartLoopNest.push_back(linStartTmp); + colStartLoopNest.push_back(colStartTmp); + linEndLoopNest.push_back(linEndTmp); + colEndLoopNest.push_back(colEndTmp); + } + + printf("ReadStartLocFile(): treatRepeat2ndInnerLoop[%d] = %d\n", index, (int)treatRepeat2ndInnerLoop[index]); + fflush(stdout); + } + + fclose(fin); +} // END ReadStartLocFile() + +#endif Index: lib/Target/Connex/ConnexConfig.h =================================================================== --- lib/Target/Connex/ConnexConfig.h +++ lib/Target/Connex/ConnexConfig.h @@ -0,0 +1,68 @@ +#ifndef CONNEX_CONFIG_ALEX +#define CONNEX_CONFIG_ALEX + +// This file is used by... TODO TODO TODO + +// These 2 types are defined also in Opincaa lib, in include/Architecture.h +typedef short TypeElement; +typedef unsigned short UnsignedTypeElement; + + +#define CONNEX_VECTOR_LENGTH 8 +#define TYPE_SIZEOF 2 +#define CONNEX_LINE_SIZE (CONNEX_VECTOR_LENGTH * TYPE_SIZEOF) + +//#define STR_LOOP_SYMBOLIC_INDEX "indexLLVM_LV / CONNEX_VECTOR_LENGTH" +// NOTE: make sure it is equiavlent to the above commented macro +// NOTE: keep the paranthesis since >> has low operator priority +#define STR_LOOP_SYMBOLIC_INDEX "(indexLLVM_LV >> 7)" + +// This is the type of the scalar processor (basically the BPF processor) operand +// TODO_CHANGE_BACKEND: +#define TYPE_SCALAR_ELEMENT MVT::i64 +//#define TYPE_ELEMENT MVT::i32 + +//#define TYPE_VECTOR MVT::v8i64 +//#define TYPE_VECTOR MVT::v16i32 +//#define TYPE_VECTOR MVT::v32i16 +//#define TYPE_VECTOR_I16 MVT::v128i16 +#define TYPE_VECTOR_I16 MVT::v8i16 +//#define TYPE_VECTOR_ELEMENT MVT::i64 +#define TYPE_VECTOR_I16_ELEMENT MVT::i16 + +//#define TYPE_VECTOR_I32 MVT::v64i32 +#define TYPE_VECTOR_I32 MVT::v4i32 +#define TYPE_VECTOR_I32_ELEMENT MVT::i32 + +//#define TYPE_VECTOR_F16 MVT::v128f16 +#define TYPE_VECTOR_F16 MVT::v8f16 +#define TYPE_VECTOR_F16_ELEMENT MVT::f16 + + +#define TYPE_VECTOR_I16_ELEMENT_BITSIZE 16 +#define TYPE_VECTOR_I32_ELEMENT_BITSIZE 32 +#define TYPE_VECTOR_F16_ELEMENT_BITSIZE 16 + + +//#define CONNEX_MEM_SIZE 1024 +#define CONNEX_MEM_NUM_ROWS 1024 +// Extra LS memory for spills and LUTs for div/sqrt.f16, etc +#define CONNEX_MEM_NUM_ROWS_EXTRA 200 +// For 64 lanes: #define CONNEX_MEM_NUM_ROWS 2048 + +// NOTE: normally REPEAT accepts immediates in interval 0..1023 +#define VALUE_BOGUS_REPEAT_X_TIMES 32761 + + +//#ifndef MAXLEN_STR +#define MAXLEN_STR 8192 +//#endif + +// Used in ConnexAsmPrinter.cpp and LoopVectorize.cpp +#define STR_OPINCAA_CODE_BEGIN "// START_OPINCAA_HOST_DEVICE_CODE" +#define STR_OPINCAA_CODE_END "// END_OPINCAA_HOST_DEVICE_CODE" + +#define STR_OPINCAA_KERNEL_REDUCE_BEFORE_END "REDUCE R(0); // We add a 'bogus' REDUCE to wait for it" + +#endif + Index: lib/Target/Connex/ConnexFrameLowering.h =================================================================== --- lib/Target/Connex/ConnexFrameLowering.h +++ lib/Target/Connex/ConnexFrameLowering.h @@ -0,0 +1,49 @@ +//===-- ConnexFrameLowering.h - Define frame lowering for Connex -----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +// This class implements Connex-specific bits of TargetFrameLowering class. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXFRAMELOWERING_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXFRAMELOWERING_H + +// 2019_03_30: #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" + +namespace llvm { +class ConnexSubtarget; + +class ConnexFrameLowering : public TargetFrameLowering { +public: + explicit ConnexFrameLowering(const ConnexSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {} + + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + + bool hasFP(const MachineFunction &MF) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + + /* + void + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override { + MBB.erase(MI); + } + */ + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override { + return MBB.erase(MI); + } +}; +} +#endif Index: lib/Target/Connex/ConnexFrameLowering.cpp =================================================================== --- lib/Target/Connex/ConnexFrameLowering.cpp +++ lib/Target/Connex/ConnexFrameLowering.cpp @@ -0,0 +1,40 @@ +//===-- ConnexFrameLowering.cpp - Connex Frame Information ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Connex implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "ConnexFrameLowering.h" +#include "ConnexInstrInfo.h" +#include "ConnexSubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +bool ConnexFrameLowering::hasFP(const MachineFunction &MF) const { return true; } + +void ConnexFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const {} + +void ConnexFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const {} + +void ConnexFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + SavedRegs.reset(Connex::R6); + SavedRegs.reset(Connex::R7); + SavedRegs.reset(Connex::R8); + SavedRegs.reset(Connex::R9); +} Index: lib/Target/Connex/ConnexHazardRecognizers.h =================================================================== --- lib/Target/Connex/ConnexHazardRecognizers.h +++ lib/Target/Connex/ConnexHazardRecognizers.h @@ -0,0 +1,66 @@ +/* Inspired from llvm/lib/Target/PowerPC/PPCHazardRecognizers.h: + /// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based + /// hazard recognizer for PPC ooo processors with dispatch-group hazards. +*/ + + +#ifndef LLVM_LIB_TARGET_CONNEX_HAZARDRECOGNIZERS_H +#define LLVM_LIB_TARGET_CONNEX_HAZARDRECOGNIZERS_H + +#include "ConnexInstrInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" + +namespace llvm { + +/* NOTE: ScheduleHazardRecognizer is basically an "interface" + * (almost abstract, i.e. almost no functionality implemented)class, so better + * stick with ScoreboardHazardRecognizer if its functionality is OK for me: +class ConnexDispatchGroupSBHazardRecognizer : public ScheduleHazardRecognizer { +*/ + +/* We choose to inherit the ScoreboardHazardRecognizer because only this + * performs out-of-order scheduling, and NOT ScheduleHazardRecognizer. + */ +class ConnexDispatchGroupSBHazardRecognizer : public ScoreboardHazardRecognizer { + const ScheduleDAG *DAG; + bool isDataHazard(SUnit *SU); + + /* + SmallVector CurGroup; + unsigned CurSlots, CurBranches; + + bool isLoadAfterStore(SUnit *SU); + bool isBCTRAfterSet(SUnit *SU); + bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots); + */ + +public: + ConnexDispatchGroupSBHazardRecognizer(const InstrItineraryData *ItinData, + const ScheduleDAG *DAG_) : + ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) + //, CurSlots(0), CurBranches(0) + { + //DEBUG(dbgs() << "Entered ConnexDispatchGroupSBHazardRecognizer()\n"); + } + + HazardType getHazardType(SUnit *SU, int Stalls) override; + + unsigned PreEmitNoops(SUnit *SU) override; + /* + bool ShouldPreferAnother(SUnit* SU) override; + */ + void EmitInstruction(SUnit *SU) override; + /* + void AdvanceCycle() override; + void RecedeCycle() override; + void Reset() override; + void EmitNoop() override; + */ +}; + +} + +#endif + Index: lib/Target/Connex/ConnexHazardRecognizers.cpp =================================================================== --- lib/Target/Connex/ConnexHazardRecognizers.cpp +++ lib/Target/Connex/ConnexHazardRecognizers.cpp @@ -0,0 +1,538 @@ +/* +The delay slot issues that need to be handled are for: + - where normally; but NOW ([!!!!THINK BETTER - we added support for i32]) we only generate WHERE for the VSELECT LLVM IR instruction + a bundle of 4 instructions (in ConnexTargetMachine.cpp, passes PassCreateWhereBlocks and PassFinalizeBundles). + Basically we expand the following pseudo-machine instruction: + dst = VSELECT pred, true_assignment, false_assignment: + to the following Connex machine instr: + (note the comparison is excluded from the bundle - + it's scheduled before it) + // For pred == false + dst = false_assignment + WHERExy + // For pred == true: + dst = true_assignment + END_WHERE + + // The comparison is excluded from the bundle (SHOULD be scheduled before it) + predicate-false register assignment + WHERExy + predicate-true register assignment + END_WHERE + Note: I tried to use TII->PredicateInstruction() but id didn't work - see http://lists.llvm.org/pipermail/llvm-dev/2017-March/111026.html + - read, write + - iwrite + for each operation updating the register used by these instructions just before, which can be: + iread, vload, ldix, multlo/hi, ldsh, add/c, sub/c, eq/ult/lt, (i)shl, (i)shr, (i)shra, popcount, not/or/and/xor. + +Similarly with the wherexx Connex instruction. + +The point is that we should try NOT to focus on the delay slots of the producer instructions (in number of 24), but focus on these delays at the consumer side because there are only 6 consumer instructions (read/write, iwrite, wherecr/eq/lt). + +Not only that, but we should try to fill the delay slots with instructions in out-of-order fashion. + +Hal Finkel pointed me to lib/Target/PowerPC/PPCHazardRecognizers.cpp: + On 2/3/2017 10:25 PM, Hal Finkel wrote: + > Hi Alex, + > You can program a post-RA scheduler which will return NoopHazard in the appropriate + > circumstances. You can look at the PowerPC target (e.g. + > lib/Target/PowerPC/PPCHazardRecognizers.cpp) as an example. + +I guess Hal recommends customizing the post-RA scheduler because after RA we have finished all(?) instruction selection steps and we handle MachineInstr, which makes life simpler for us to see if we have ST_H or ST_INDIRECT, etc. +See the Figure with passes in \cite{Cardoso_Lopes2014}, page 134. + +\cite{Cardoso_Lopes2014} + "There are three distinct scheduler executions in the code generator: + two prior and one post register allocation. The frst works on + SelectionDAG nodes while the other two work on machine + instructions" + + "The scheduler runs before and after register allocation. However, the SDNode + instruction representation is only available in the former while the latter uses the + MachineInstr class. To cope with both SDNodes and MachineInstrs, the SUnit class + (see the fle /include/llvm/CodeGen/ScheduleDAG.h) abstracts the + underlying instruction representation as the unit used during instruction scheduling." + +See also http://llvm.org/docs/doxygen/html/classllvm_1_1ScheduleHazardRecognizer.html#details + <> +*/ + + + +// Inspired from llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp + +//===-- ConnexHazardRecognizers.cpp - Connex Hazard Recognizer Impls --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements hazard recognizers for scheduling on PowerPC processors. +// +//===----------------------------------------------------------------------===// + +#include "ConnexHazardRecognizers.h" +#include "Connex.h" +#include "ConnexInstrInfo.h" +#include "ConnexTargetMachine.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "SUnit_dump.h" // 2019_03_30 + +using namespace llvm; + + + +#define DEBUG_TYPE "post-RA-sched" + +// getPredMachineInstr() is declared in ConnexInstrInfo.cpp +extern MachineInstr *getPredMachineInstr(MachineInstr *MI, MachineInstr **succMI); + + + + + + +/* + From http://llvm.org/docs/doxygen/html/ScheduleHazardRecognizer_8h_source.html#l00078: + 00073 /// PreEmitNoops - This callback is invoked prior to emitting an instruction. + 00074 /// It should return the number of noops to emit prior to the provided + 00075 /// instruction. + 00076 /// Note: This is only used during PostRA scheduling. EmitNoop is not called + 00077 /// for these noops. + * + */ +unsigned ConnexDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { + assert(SU->isInstr() == true); + + /* + MachineInstr *MI = SU->getInstr(); + int MIOpcode = MI->getOpcode(); + if (MIOpcode == Connex::LD_INDIRECT_H) + */ + if (isDataHazard(SU)) + return 1; + + return ScoreboardHazardRecognizer::PreEmitNoops(SU); +} + + +bool ConnexDispatchGroupSBHazardRecognizer::isDataHazard(SUnit *SU) { + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MCInstrDesc.html + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (MCID == NULL) + return false; + + /* + // Note: MCPhysReg is an integer - see http://llvm.org/docs/doxygen/html/namespacellvm.html: "typedef uint16_t llvm::MCPhysReg" + const MCPhysReg *MCIDArray = MCID->getImplicitUses(); + unsigned numUses = MCID->getNumImplicitUses(); seems it is always 0 + */ + //const MCOperandInfo *MCIDArray = MCID->OpInfo; + unsigned numUses = MCID->getNumOperands() - MCID->getNumDefs(); + //LLVM_DEBUG(dbgs() << " isDataHazard(): SU = " << numUses << "\n"); + LLVM_DEBUG(dbgs() << " isDataHazard(): numUses = " << numUses << "\n"); + LLVM_DEBUG(dbgs() << " isDataHazard(): MCID->getNumOperands() = " + << MCID->getNumOperands() << "\n"); + LLVM_DEBUG(dbgs() << " isDataHazard(): MCID->getNumDefs() = " + << MCID->getNumDefs() << "\n"); + + + assert(SU->isInstr() == true); + + MachineInstr *MI = SU->getInstr(); + LLVM_DEBUG(dbgs() << " isDataHazard(): MI ="; + MI->dump(); + ); + + int MIOpcode = MI->getOpcode(); + LLVM_DEBUG(dbgs() << " isDataHazard(): MI->getOpcode() = " + << MI->getOpcode() << "\n"); + + + if (MIOpcode == Connex::ST_INDIRECT_H || + MIOpcode == Connex::ST_INDIRECT_W || + MIOpcode == Connex::ST_INDIRECT_MASKED_H || + MIOpcode == Connex::ST_H) { + /* NOTE: END_REPEAT returns, to my surprise, also mayStore(). + But we should not worry about this since END_REPEAT takes no + parameter. */ + /* + if (MCID->mayStore()) + if (MCID->mayLoad()) + */ + LLVM_DEBUG(dbgs() << " isDataHazard(): SU is Store\n"); + } + else + if (MIOpcode == Connex::LD_INDIRECT_H || + MIOpcode == Connex::LD_INDIRECT_W || + MIOpcode == Connex::LD_INDIRECT_MASKED_H) { + LLVM_DEBUG(dbgs() << " isDataHazard(): SU is Load\n"); + } + else + if ( + //assert(MIOpcode != Connex::WHERECRY); + //MIOpcode == Connex::WHERECRY || + MIOpcode == Connex::WHEREEQ_BUNDLE_H || + MIOpcode == Connex::WHERELT_BUNDLE_H || + MIOpcode == Connex::WHEREULT_BUNDLE_H) { + LLVM_DEBUG(dbgs() << " isDataHazard(): SU is Where\n"); + } + else { + LLVM_DEBUG(dbgs() << " isDataHazard(): SU NOT producing data hazard\n"); + + // VERY IMPORTANT + return false; + } + + LLVM_DEBUG(dbgs() << " isDataHazard(): MI->getNumOperands() = " + << MI->getNumOperands() << "\n"); + + /* + Why does getHazardType() find 3 Loads - because I was considering pred in DAG (SDNode), not in MachineInstr list, where it should be only 1? + +BB#14: derived from LLVM BB %vector.ph + Live Ins: %R0 %R1 %R2 %R3 %R4 %R5 %R12 %R13 + Predecessors according to CFG: BB#13 +<------>BUNDLE %Wh0, %SR16b_0_0, %SR16b_0_1, %SR16b_0_2, %SR16b_0_3, %SR16b_0_4, %SR16b_0_5, %SR16b_0_6, %SR16b_0_7, %SR16b_0_8, %SR16b_0_9, %SR16b_0_10, +<------> * %Wh0 = VLOAD_H_SYM_IMM; dbg:test.c:14:7 +<------> * INLINEASM > 7); // MSA_I10> [sideeffect] [attdialect], ; dbg:test.c:14:7 +<------>BUNDLE %Wh1, %SR16b_1_0, %SR16b_1_1, %SR16b_1_2, %SR16b_1_3, %SR16b_1_4, %SR16b_1_5, %SR16b_1_6, %SR16b_1_7, %SR16b_1_8, %SR16b_1_9, %SR16b_1_10, +<------> * %Wh1 = VLOAD_H_SYM_IMM; dbg:test.c:14:7 +<------> * INLINEASM > 7) << 1); // MSA_I10> [sideeffect] [attdialect], ; dbg:test.c:14:7 +<------>BUNDLE %Wh2, %SR16b_2_0, %SR16b_2_1, %SR16b_2_2, %SR16b_2_3, %SR16b_2_4, %SR16b_2_5, %SR16b_2_6, %SR16b_2_7, %SR16b_2_8, %SR16b_2_9, %SR16b_2_10, +<------> * %Wh2 = VLOAD_H_SYM_IMM; dbg:test.c:14:7 +<------> * INLINEASM > 7) * 3); // MSA_I10> [sideeffect] [attdialect], ; dbg:test.c:14:7 + +BB#15: derived from LLVM BB %vector.body + Live Ins: %R0 %R1 %R2 %R3 %R4 %R5 %R12 %R13 %R14 %Wh0 %Wh1 %Wh2 + Predecessors according to CFG: BB#14 BB#15 +<------>INLINEASM writeDataToArrayPartial( + connexGlobal->writeDataToArrayPartial( + _BEGIN_KERNEL(BatchNumberGlobal); // Generated in vectorizeLoop() + EXECUTE_IN_ALL( + // Handling spills (from predecessors) and fills +> [sideeffect] [attdialect]; dbg:test.c:14:7 +<------>INLINEASM [sideeffect] [attdialect]; dbg:test.c:14:7 +<------>INLINEASM [sideeffect] [attdialect]; dbg:test.c:14:7 +<------>REPEAT_H_SYM_IMM; dbg:test.c:14:7 +<------>INLINEASM > 7));> [sideeffect] [attdialect], ; dbg:test.c:14:7 +<------>%Wh3 = VLOAD_H 1; dbg:test.c:14:7 +<------>%Wh4 = VLOAD_H 0 +<------>%Wh5, %BoolMask1 = LD_INDIRECT_H %Wh4, %BoolMask0, %Wh0; mem:LD256[inttoptr (i16 51 to i16*)](tbaa=!12)(alias.scope=!16) dbg:test.c:14:7 +<------>%Wh4, %BoolMask1 = LD_INDIRECT_H %Wh4, %BoolMask0, %Wh1; mem:LD256[inttoptr (i16 51 to i16*)](tbaa=!12)(alias.scope=!19) dbg:test.c:14:7 +<------>%Wh4 = ADDV_H %Wh4, %Wh5; dbg:test.c:15:43 +<------>%BoolMask0 = ST_INDIRECT_H %Wh4, %BoolMask0, %Wh2; mem:ST256[inttoptr (i16 52 to i16*)](tbaa=!12)(alias.scope=!22)(noalias=!16,!19) dbg:test.c:14:7 +<------>%Wh2 = ADDV_H %Wh2, %Wh3; dbg:test.c:14:7 +<------>%Wh1 = ADDV_H %Wh1, %Wh3; dbg:test.c:14:7 +<------>%Wh0 = ADDV_H %Wh0, %Wh3; dbg:test.c:14:7 +<------>%R14 = ADD_ri %R14, 128; dbg:test.c:14:7 +<------>END_REPEAT_H; dbg:test.c:14:7 +<------>INLINEASM R(0) = // (fake but necessary ; ) VLOAD_H_SYM_IMM MSA_I10 // (numRowsCols >> 7); // MSA_I10 +<------>R(1) = // (fake but necessary ; ) VLOAD_H_SYM_IMM MSA_I10 // ((numRowsCols >> 7) << 1); // MSA_I10 +<------>R(2) = // (fake but necessary ; ) VLOAD_H_SYM_IMM MSA_I10 // ((numRowsCols >> 7) * 3); // MSA_I10 +<------>R(4) = 0 ; // MSA_I10 // REPEAT_X_TIMES( // (fake but necessary ; ) REPEAT_DESC_BASE_SYM_IMM // +<------> (numRowsCols >> 7)); +<------>R(3) = 1 ; // MSA_I10 // nop<---> ; // scalar or vector NOP // +<------>R(5) = LS[R(0)]; // READ (gather) // R(0) = R(0) + R(3) ; // MSA_3R generic instruction // R(4) = LS[R(1)]; // READ (gather) // R(1) = R(1) + R(3) ; // MSA_3R generic instruction // R(4) = R(4) + R(5) ; // MSA_3R generic instruction // nop<---> ; // scalar or vector NOP // +<------>LS[R(2)] = R(4) ; // WRITE (scatter) // R(2) = R(2) + R(3) ; // MSA_3R generic instruction // END_REPEAT; // END_REPEAT // +<------>REDUCE R(0); // We add a 'bogus' REDUCE to wait for it + ); + This should cover these cases described in ConnexISA.docx: + - (i)write using register defined in the previous instruction: + LS[R1] = R4 + LS[5] = R1 + and also this slightly different case: + LS[R10] = R1 + + - read using register defined in the previous instruction + R4 = LS[R1] + + - wherexx using the flag defined in the previous instruction + R1 = (R2 == R3) + WHERE_EQUAL + */ + + /* small-TODO: understand conceptually what PPC was doing with dispatch group. + + IMPORTANT: We keep this search for predecessors of SU in the DAG and not for + THE only predecessor of the MachineInstr (we are at Post-RA scheduler) + contained in SU because MAYBE/it is possible that when doing + ScoreboardHazardRecognizer (out-of-order scheduling to fill delay slots) + we could benefit from the DAG predecessors - QUITE UNLIKELY, but maybe + so. Otherwise, we should ONLY look at the + getPredMachineInstr(MachineInstr *MI). + + For any predecessors of SU with which we + have an ordering dependency, return true. */ + for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); + + if (PredMCID == NULL) // || !PredMCID->mayStore()) + continue; + + /* SU->Preds is SmallVector of SDep. + * - see http://llvm.org/docs/doxygen/html/classllvm_1_1SUnit.html + * - see http://llvm.org/docs/doxygen/html/classllvm_1_1SDep.html + */ + MachineInstr *PredMI = (SU->Preds[i].getSUnit())->getInstr(); + MachineInstr *tmpNotUsed; + if (PredMI != getPredMachineInstr(MI, &tmpNotUsed)) { + LLVM_DEBUG(dbgs() << " isDataHazard(): jumping DAG predecessor that is " + "NOT MachineInstr predecessor: PredMI ="; + PredMI->dump(); + dbgs() << " for MI ="; + MI->dump(); + ); + continue; + } + + LLVM_DEBUG(dbgs() << " isDataHazard(): Found DAG predecessor that is " + "MachineInstr predecessor: PredMI ="; + PredMI->dump(); + dbgs() << " for MI ="; + MI->dump(); + ); + + LLVM_DEBUG(dbgs() << " isDataHazard(SU->Preds[" + << i << "] = "; + PredMI->dump(); + //(SU->Preds[i].getSUnit())->dump(DAG); + //PredMCID->dump(DAG); + dbgs() << ")\n"); + + /* + * // TODO: check BETTER we have to check SU->Preds[i] is THE prev + instruction in the list of MachineInstr - .getParent() + * TODO TODO TODO: we have to check for + * LD_INDIRECT_H for the memory (offset) register, + * not the passthrough (or mask). + */ + + /* + const MCPhysReg *PredMCIDArray = PredMCID->getImplicitDefs(); + unsigned numDefs = PredMCID->getNumImplicitDefs(); seems it is always 0 + */ + unsigned numDefs = PredMCID->getNumDefs(); + //const MCOperandInfo *PredMCIDArray = PredMCID->OpInfo; + LLVM_DEBUG(dbgs() << " isDataHazard(): numDefs = " << numDefs << "\n"); + LLVM_DEBUG(dbgs() << " isDataHazard(): PredMI->getNumOperands() = " + << PredMI->getNumOperands() << "\n"); + LLVM_DEBUG(dbgs() << " isDataHazard(): PredMCID->getNumOperands() = " + << PredMCID->getNumOperands() << "\n"); + LLVM_DEBUG(dbgs() << " isDataHazard(): PredMCID->getNumDefs() = " + << PredMCID->getNumDefs() << "\n"); + + int idUseStart; + if (MIOpcode == Connex::LD_INDIRECT_H || MIOpcode == Connex::LD_INDIRECT_W || + MIOpcode == Connex::LD_INDIRECT_MASKED_H) { + LLVM_DEBUG(dbgs() << " isDataHazard(): PredMI->getOpcode() = " + << PredMI->getOpcode() << "\n"); + if (PredMI->getOpcode() == TargetOpcode::INLINEASM) { + LLVM_DEBUG(dbgs() + << " isDataHazard(): PredMI is INLINEASM so return true" + << "\n"); + /* We assume that the PredMI INLINEAASM is NOT a Connex + * instruction, but a host-side Opincaa C++ for loop. + * In such case, we can have 2 data hazards with MI: + * - one with the instruction above this C++ for statement + * - one with the instruction at the end of this for loop + * when we unroll (if the trip-count of the loop is >1) + * this for loop + * + * TODO TODO TODO TODO TODO TODO TODO: make full checks and + * return true only if it + * is the case, to be more efficient. + */ + #ifdef TODO_TODO_TODO_TODO_TODO_TODO_TODO_MORE + return true; + #endif + } + + /* %Wh5, %BoolMask1 = LD_INDIRECT_MASKED_H %Wh4, %BoolMask0, %Wh0; mem:LD256[inttoptr (i16 51 to i16*)](tbaa=!12)(alias.scope=!16) + The arguments ("uses") of LD_INDIRECT_MASKED_H are: + %Wh4 - I think it is the passthrough register + (if mask bit is 0 we use passthrough) + %BoolMask0 - is the mask + %Wh0 - the offset register (if mask bit is 0 we use passthrough) + Note that Connex does NOT support masked gather just with read + (it requires WHERE also and things become more complex than + just masked gather, in principle) + */ + + if (MIOpcode == Connex::LD_INDIRECT_MASKED_H) { + idUseStart = MCID->getNumDefs() + 2; // 1 for passthrough, 1 for bool mask + } + else + if (MIOpcode == Connex::LD_INDIRECT_H || MIOpcode == Connex::LD_INDIRECT_W) { + idUseStart = MCID->getNumDefs(); // 1 for passthrough, 1 for bool mask + } + } + else { + idUseStart = MCID->getNumDefs(); + } + + for (unsigned idUse = idUseStart; idUse < numUses; idUse++) { + /* + LLVM_DEBUG(dbgs() << " isDataHazard(): MCIDArray[" << idUse + << "] = " << MCIDArray[idUse] << "\n"); + */ + LLVM_DEBUG(dbgs() << " isDataHazard(): MI->getOperand(" << idUse + << ") = " << MI->getOperand(idUse) << "\n"); + for (unsigned idDef = 0; idDef < numDefs; idDef++) { + /* + LLVM_DEBUG(dbgs() << " isDataHazard(): PredMCIDArray[" << idDef + << "] = " << PredMCIDArray[idDef] << "\n"); + if (PredMCIDArray[idDef] == MCIDArray[idUse]) { + LLVM_DEBUG(dbgs() << " isDataHazard(): found an instr sequence that has to be separated by NOP to avoid true dependency hazard\n"); + return true; + } + */ + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineOperand.html + const MachineOperand &PredMIMO = PredMI->getOperand(idDef); + const MachineOperand &MIMO = MI->getOperand(idUse); + LLVM_DEBUG(dbgs() << " isDataHazard(): PredMI->getOperand(" << idDef + << ") = " << PredMI->getOperand(idDef) << "\n"); + + if ((PredMI->getOpcode() != Connex::END_WHERE) && + (PredMI->getOpcode() != Connex::WHEREEQ) && + (PredMI->getOpcode() != Connex::WHERELT) && + (PredMI->getOpcode() != Connex::WHERECRY) && + PredMIMO.isReg() && MIMO.isReg() && + PredMIMO.getReg() == MIMO.getReg()) { + LLVM_DEBUG(dbgs() + << " isDataHazard(): found an instr sequence " + "(defReg = PredOpcode; write/read/Where useReg;) and " + "defReg == useReg. " + "This sequence has to be separated by NOP to avoid " + "true dependency hazard\n"); + return true; + } + } + } + /* + if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) + continue; + */ + //return true; + } + + return false; +} + + +ScheduleHazardRecognizer::HazardType +ConnexDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + #ifdef USE_GETHAZARDTYPE + static bool emittedNoop = false; + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1SUnit.html + LLVM_DEBUG(dbgs() << "ConnexDispatchGroupSBHazardRecognizer::getHazardType(SU = "; + SU->dump(DAG); + dbgs() << ", Stalls = " << Stalls << ") and " + << "emittedNoop = " << emittedNoop << "\n"); + + //if (Stalls == 0 && isLoadAfterStore(SU)) + if (Stalls == 0 && // no (pipeline?) stalls + emittedNoop == false && // TODO This is a ~lousy solution, but can generate several NOPs in a function, etc + /* TODO TODO: the problem I have is due to wrong instr + itineraries??? */ + isDataHazard(SU)) { + LLVM_DEBUG(dbgs() << " getHazardType(): return NoopHazard\n"); + + emittedNoop = true; + + return NoopHazard; + /* TODO TODO TODO TODO TODO TODO TODO: figure out how to make this work. + Does NOT help at all (no change in code - not NOP, + nor other useful instr in the delay slot): + return Hazard; + */ + } + else { + emittedNoop = false; + } + + return NoHazard; + #endif + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); +} + +void ConnexDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) { + unsigned i, ie; + + LLVM_DEBUG(dbgs() << "Entered Connex's ConnexDispatchGroupSBHazardRecognizer::EmitInstruction("; + dumpSU(SU, dbgs()); + dbgs() << ")\n"); + // + assert(SU->isInstr() == true); + MachineInstr *MI = SU->getInstr(); + MachineBasicBlock *MBB = MI->getParent(); + LLVM_DEBUG(dbgs() << " EmitInstruction(): MBB = " + << MBB->getFullName() << "\n" + //MBB->dump(); + ); + + LLVM_DEBUG(dbgs() << " SU->Succs.size() = " + << SU->Succs.size() << "\n"); + LLVM_DEBUG(dbgs() << " SU->Preds.size() = " + << SU->Preds.size() << "\n"); + + for (i = 0, ie = (unsigned) SU->Succs.size(); i != ie; ++i) { + MachineInstr *SuccMI = (SU->Succs[i].getSUnit())->getInstr(); + if (SuccMI == NULL) { + LLVM_DEBUG(dbgs() << " SU->Succs[" + << i << "] = NULL\n"); + } + else { + LLVM_DEBUG(dbgs() << " SU->Succs[" + << i << "] = "; + SuccMI->dump(); + dbgs() << "\n"); + } + } + for (i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + MachineInstr *PredMI = (SU->Preds[i].getSUnit())->getInstr(); + if (PredMI == NULL) { + LLVM_DEBUG(dbgs() << " SU->Preds[" + << i << "] = NULL\n"); + } + else { + LLVM_DEBUG(dbgs() << " SU->Preds[" + << i << "] = "; + PredMI->dump(); + dbgs() << "\n"); + } + } + + return ScoreboardHazardRecognizer::EmitInstruction(SU); +} + Index: lib/Target/Connex/ConnexHazardRecognizersPreRAScheduler.h =================================================================== --- lib/Target/Connex/ConnexHazardRecognizersPreRAScheduler.h +++ lib/Target/Connex/ConnexHazardRecognizersPreRAScheduler.h @@ -0,0 +1,58 @@ +/* Inspired from llvm/lib/Target/PowerPC/PPCHazardRecognizers.h: + /// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based + /// hazard recognizer for PPC ooo processors with dispatch-group hazards. +*/ + + +#ifndef LLVM_LIB_TARGET_CONNEX_HAZARDRECOGNIZERS_PRE_RA_SCHEDULER_H +#define LLVM_LIB_TARGET_CONNEX_HAZARDRECOGNIZERS_PRE_RA_SCHEDULER_H + +#include "ConnexInstrInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" + +namespace llvm { + +/* We choose to inherit the ScoreboardHazardRecognizer because only this + * performs out-of-order scheduling, and NOT ScheduleHazardRecognizer. + */ +class ConnexDispatchGroupSBHazardRecognizerPreRAScheduler : public ScoreboardHazardRecognizer { + const ScheduleDAG *DAG; + bool isReadAfterWrite(SUnit *SU); + + /* + SmallVector CurGroup; + unsigned CurSlots, CurBranches; + + bool isLoadAfterStore(SUnit *SU); + bool isBCTRAfterSet(SUnit *SU); + bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots); + */ + +public: + ConnexDispatchGroupSBHazardRecognizerPreRAScheduler(const InstrItineraryData *ItinData, + const ScheduleDAG *DAG_) : + ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) + //, CurSlots(0), CurBranches(0) + { + //DEBUG(dbgs() << "Entered ConnexDispatchGroupSBHazardRecognizerPreRAScheduler()\n"); + } + + HazardType getHazardType(SUnit *SU, int Stalls) override; + /* + bool ShouldPreferAnother(SUnit* SU) override; + */ + unsigned PreEmitNoops(SUnit *SU) override; + void EmitInstruction(SUnit *SU) override; + /* + void AdvanceCycle() override; + void RecedeCycle() override; + void Reset() override; + */ + void EmitNoop() override; +}; + +} + +#endif Index: lib/Target/Connex/ConnexHazardRecognizersPreRAScheduler.cpp =================================================================== --- lib/Target/Connex/ConnexHazardRecognizersPreRAScheduler.cpp +++ lib/Target/Connex/ConnexHazardRecognizersPreRAScheduler.cpp @@ -0,0 +1,336 @@ +// Inspired from llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp + +//===-- ConnexHazardRecognizers.cpp - Connex Hazard Recognizer Impls --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements hazard recognizers for scheduling on PowerPC processors. +// +//===----------------------------------------------------------------------===// + +#include "ConnexHazardRecognizersPreRAScheduler.h" +#include "Connex.h" +#include "ConnexInstrInfo.h" +#include "ConnexTargetMachine.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "SUnit_dump.h" // 2019_03_30 + +using namespace llvm; + + + +#define DEBUG_TYPE "pre-RA-sched" + +/* +SUnit is meant for both types of schedulers: + - pre-RA, which deals with MachineSDNode and SDNode. + - post-RA, which deal with MachineInstr +But note that here we are a pre-RA scheduler. +So, as expected here an SUnit contains ONLY MachineSDNode and SDNode. +*/ +bool ConnexDispatchGroupSBHazardRecognizerPreRAScheduler::isReadAfterWrite(SUnit *SU) { + /* From http://llvm.org/docs/doxygen/html/classllvm_1_1MCInstrDesc.html + NOTE: although SU->isInstr() == false, we can use DAG->getInstrDesc(SU). + */ + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + + if (MCID == NULL) + return false; + + + LLVM_DEBUG(dbgs() << "isReadAfterWrite(SU = "; + dumpSU(SU, dbgs()); + dbgs() << ")\n"); + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): SU->Succs.size() = " + << SU->Succs.size() << "\n"); + /* See http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html#l00481 + /// Test if this node has a post-isel opcode, directly + /// corresponding to a MachineInstr opcode. + */ + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): (SU->getNode())->isMachineOpcode() = " + << (SU->getNode())->isMachineOpcode() << "\n"); + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): (SU->getNode())->getOpcode() = " + << (SU->getNode())->getOpcode() << "\n"); + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): (SU->getNode())->getMachineOpcode() = " + << (SU->getNode())->getMachineOpcode() << "\n"); + +#ifdef USE_FOUNDINLINEASM + bool foundINLINEASM = false; +#endif + //MachineInstr *SUpred_INLINEASM = NULL; + for (unsigned int i = 0; i < SU->Succs.size(); ++i) { + SUnit *SUsucc = SU->Succs[i].getSUnit(); + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): SU->Succs[" << i << "] = "; + dumpSU(SUsucc, dbgs()); + dbgs() << ")\n"); + + if ((SUsucc->getNode())->isMachineOpcode() == false) + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): (SUsucc->getNode())->getOpcode() = " + << (SUsucc->getNode())->getOpcode() << "\n"); + else + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): (SUsucc->getNode())->getMachineOpcode() = " + << (SUsucc->getNode())->getMachineOpcode() << "\n"); + + if ( ((SUsucc->getNode())->isMachineOpcode() == false) && + ((SUsucc->getNode())->getOpcode() == ISD::INLINEASM) ) { + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): Found SDNode ISD::INLINEASM\n"); + +#ifdef USE_FOUNDINLINEASM + foundINLINEASM = true; +#endif + /* + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + // This fails with: llvm::MachineInstr* llvm::SUnit::getInstr() const: Assertion `!Node && "Reading MachineInstr of SUnit with SDNode!"' failed. + SUpred_INLINEASM = SUsucc->getInstr(); + assert(SUpred_INLINEASM != NULL); + if ( ((SU->getNode())->isMachineOpcode() == true) && + ((SU->getNode())->getMachineOpcode() == Connex::VLOAD_H_SYM_IMM) ) { + SUpred_INLINEASM->bundleWithPred(); + } + */ + } + } + // See http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html#l00486 + if ( ((SU->getNode())->isMachineOpcode() == true) && + ((SU->getNode())->getMachineOpcode() == Connex::VLOAD_H_SYM_IMM) ) { + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): Found Connex::VLOAD_H_SYM_IMM\n"); + + /* + if (foundINLINEASM == true) { + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): before getInstr()\n"); + // Gives error: <> + (SU->getInstr())->bundleWithSucc(); + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): after getInstr()\n"); + } + */ + /* + LLVM_DEBUG(dbgs() << "isReadAfterWrite(): SU->Preds[0] = "; + (SU->Preds[0].getSUnit())->dump(DAG); + dbgs() << ")\n"); + */ + } + + /* + // Note: MCPhysReg is an integer - see http://llvm.org/docs/doxygen/html/namespacellvm.html: "typedef uint16_t llvm::MCPhysReg" + const MCPhysReg *MCIDArray = MCID->getImplicitUses(); + unsigned numUses = MCID->getNumImplicitUses(); seems it is always 0 + */ + //const MCOperandInfo *MCIDArray = MCID->OpInfo; + unsigned numUses = MCID->getNumOperands() - MCID->getNumDefs(); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): numUses = " << numUses << "\n"); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): MCID->getNumOperands() = " + << MCID->getNumOperands() << "\n"); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): MCID->getNumDefs() = " + << MCID->getNumDefs() << "\n"); + + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SU->Preds.size() = " + << SU->Preds.size() << "\n"); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SU->Succs.size() = " + << SU->Succs.size() << "\n"); + + /* + if (!MCID->mayLoad()) + return false; + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SU can load\n"); + */ + /* TODO: NOTE: END_REPEAT returns also mayStore(). But we should not worry + about this since END_REPEAT takes no parameter. */ + if (!MCID->mayStore()) + return false; + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SU can store\n"); + + // IMPORTANT: In the standard pre-RA, END_REPEAT has isInstr() == false + assert(SU->isInstr() == false); + /* + // TODO TODO TODO TODO: try to treat this since REPEAT is also intrinsic and can have conditional hazards + if (SU->isInstr() == false) { + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SU->isInstr() == false\n"); + return false; + } + */ + + SDNode *SDN = SU->getNode(); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SDN->getNumOperands() = " + << SDN->getNumOperands() << "\n"); + + // SU is a load; for any predecessors in this dispatch group, that are stores, + // and with which we have an ordering dependency, return true. + for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + /* + */ + const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit()); + + if (PredMCID == NULL) // || !PredMCID->mayStore()) + continue; + + /* SU->Preds is SmallVector of SDep. + * - see http://llvm.org/docs/doxygen/html/classllvm_1_1SUnit.html + * - see http://llvm.org/docs/doxygen/html/classllvm_1_1SDep.html + */ + SDNode *PredSDN = (SU->Preds[i].getSUnit())->getNode(); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(SU->Preds[" + << i << "] = "; + PredSDN->dump(); + //(SU->Preds[i].getSUnit())->dump(DAG); + //PredMCID->dump(DAG); + dbgs() << ")\n"); + + + /* + const MCPhysReg *PredMCIDArray = PredMCID->getImplicitDefs(); + unsigned numDefs = PredMCID->getNumImplicitDefs(); seems it is always 0 + */ + unsigned numDefs = PredMCID->getNumDefs(); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): numDefs = " << numDefs << "\n"); + //const MCOperandInfo *PredMCIDArray = PredMCID->OpInfo; + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): PredSDN->getNumOperands() = " + << PredSDN->getNumOperands() << "\n"); + /* + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): PredSDN->getNumDefs() = " + << PredMCID->getNumDefs() << "\n"); + */ + + + //for (unsigned idUse = MCID->getNumDefs(); idUse < numUses; idUse++) { + for (unsigned idUse = 0; idUse < numUses; idUse++) { + /* + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): MCIDArray[" << idUse + << "] = " << MCIDArray[idUse] << "\n"); + */ + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): SDN->getOperand(" << idUse + << ") = "; + SDN->getOperand(idUse)->dump(); + dbgs() << "\n"); + //for (unsigned idDef = 0; idDef < PredSDN->getNumOperands(); idDef++) { + for (unsigned idDef = 0; idDef < numDefs; idDef++) { + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): PredSDN->getOperand(" << idUse + << ") = "; + PredSDN->getOperand(idDef)->dump(); + dbgs() << "\n"); + //if (PredSDN->getOperand(idDef) == SDN->getOperand(idUse)) { + if (PredSDN == SDN->getOperand(idUse).getNode()) { + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): Found PredSDN == SDN->getOperand(idUse)\n"); + return true; + } + /* + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): PredMCIDArray[" << idDef + << "] = " << PredMCIDArray[idDef] << "\n"); + if (PredMCIDArray[idDef] == MCIDArray[idUse]) { + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): found an instr sequence that has to be separated by NOP to avoid true dependency hazard\n"); + return true; + } + */ + + /* + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineOperand.html + const MachineOperand &PredMIMO = PredMI->getOperand(idDef); + const MachineOperand &MIMO = MI->getOperand(idUse); + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): PredMI->getOperand(" << idDef + << ") = " << PredMI->getOperand(idDef) << "\n"); + + if (PredMIMO.isReg() && MIMO.isReg() && + PredMIMO.getReg() == MIMO.getReg()) { + LLVM_DEBUG(dbgs() << " isReadAfterWrite(): found an instr sequence that has to be separated by NOP to avoid true dependency hazard\n"); + return true; + } + */ + } + } + /* + if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier()) + continue; + */ + //return true; + } + + return false; +} + +ScheduleHazardRecognizer::HazardType +ConnexDispatchGroupSBHazardRecognizerPreRAScheduler::getHazardType(SUnit *SU, int Stalls) { + static bool emittedNoop = false; + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1SUnit.html + LLVM_DEBUG(dbgs() << "ConnexDispatchGroupSBHazardRecognizerPreRAScheduler::getHazardType(SU = "; + dumpSU(SU, dbgs()); + dbgs() << ", Stalls = " << Stalls << ")\n"); + + //if (Stalls == 0 && isLoadAfterStore(SU)) + if (Stalls == 0 && // no (pipeline?) stalls + emittedNoop == false && // TODO TODO TODO This is a very louzy tmp solution + isReadAfterWrite(SU)) { + LLVM_DEBUG(dbgs() << " Pre-RA: getHazardType(): return NoopHazard\n"); + + emittedNoop = true; + + return NoopHazard; + } + + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); +} + +void ConnexDispatchGroupSBHazardRecognizerPreRAScheduler::EmitInstruction(SUnit *SU) { + unsigned i, ie; + + LLVM_DEBUG(dbgs() << "Entered Connex's PreRA EmitInstruction("; + dumpSU(SU, dbgs()); + dbgs() << ")\n"); + LLVM_DEBUG(dbgs() << " SU->Succs.size() = " + << SU->Succs.size() << "\n"); + LLVM_DEBUG(dbgs() << " SU->Preds.size() = " + << SU->Preds.size() << "\n"); + + for (i = 0, ie = (unsigned) SU->Succs.size(); i != ie; ++i) { + MachineInstr *SuccMI = (SU->Succs[i].getSUnit())->getInstr(); + if (SuccMI == NULL) { + LLVM_DEBUG(dbgs() << " SU->Succs[" + << i << "] = NULL\n"); + } + else { + LLVM_DEBUG(dbgs() << " SU->Succs[" + << i << "] = "; + SuccMI->dump(); + dbgs() << "\n"); + } + } + for (i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) { + MachineInstr *PredMI = (SU->Preds[i].getSUnit())->getInstr(); + if (PredMI == NULL) { + LLVM_DEBUG(dbgs() << " SU->Preds[" + << i << "] = NULL\n"); + } + else { + LLVM_DEBUG(dbgs() << " SU->Preds[" + << i << "] = "; + PredMI->dump(); + dbgs() << "\n"); + } + } + + return ScoreboardHazardRecognizer::EmitInstruction(SU); +} + +/* See also http://llvm.org/docs/doxygen/html/classllvm_1_1ScheduleHazardRecognizer.html +PreEmitNoops - This callback is invoked prior to emitting an instruction. +*/ +unsigned ConnexDispatchGroupSBHazardRecognizerPreRAScheduler::PreEmitNoops(SUnit *SU) { + LLVM_DEBUG(dbgs() << "Entered Connex's PreRA PreEmitNoops()\n"); + return 0; +} + +/* See also http://llvm.org/docs/doxygen/html/classllvm_1_1ScheduleHazardRecognizer.html +EmitNoop - This callback is invoked when a noop was added to the instruction stream. +*/ +void ConnexDispatchGroupSBHazardRecognizerPreRAScheduler::EmitNoop() { + LLVM_DEBUG(dbgs() << "Entered Connex's PreRA EmitNoops()\n"); +} + Index: lib/Target/Connex/ConnexISelDAGToDAG.cpp =================================================================== --- lib/Target/Connex/ConnexISelDAGToDAG.cpp +++ lib/Target/Connex/ConnexISelDAGToDAG.cpp @@ -0,0 +1,5106 @@ +//===-- ConnexISelDAGToDAG.cpp - A dag to dag inst selector for Connex ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a DAG pattern matching instruction selector for Connex, +// converting from a legalized dag to a Connex dag. +// +//===----------------------------------------------------------------------===// + +#include "Connex.h" +#include "ConnexRegisterInfo.h" +#include "ConnexSubtarget.h" +#include "ConnexTargetMachine.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" + + +// Gives error (we are NOT in the right directory): #include "SelectionDAGBuilder.h" +//#include "../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h" + +#define DEBUG_TYPE "connex-isel" + +#include "../lib/Transforms/Vectorize/LoopVectorize_more.h" + + +using namespace llvm; + + + +#include "ConnexConfig.h" + +/* To help reading ASM code we put some useful comments (INLINE Asm nodes) + with where the emulation of unsupported operatio of type i32/f16/etc + starts and ends. +*/ +#define MARKER_FOR_EMULATION + + +/* IMPORTANT: these macros with BITCAST can add hazards due to delay slots. + We recommend disabling these macros. + */ +#define BITCAST_MAY2017_05_28 +//#define BITCAST_2018_06_F16 + + +/* + ConnexISelDAGToDAG is a subclass of SelectionDAGISel: + class ConnexDAGToDAGISel : public SelectionDAGISel + + From SelectionDAGBuilder.h: // the key is Value *, SDValue is the value + DenseMap NodeMap; + void setValue(const Value *V, SDValue NewN) { + SDValue &N = NodeMap[V]; + assert(!N.getNode() && "Already set a value for this node!"); + N = NewN; + } + (See http://llvm.org/docs/doxygen/html/classllvm_1_1DenseMap.html) + (http://llvm.org/docs/doxygen/html/classllvm_1_1DenseMapBase.html and + http://llvm.org/docs/doxygen/html/DenseMap_8h_source.html) + + From include/llvm/CodeGen/SelectionDAGISel.h: + /// SelectionDAGISel - This is the common base class used for SelectionDAG-based + /// pattern-matching instruction selectors. + class SelectionDAGISel : public MachineFunctionPass { + public: + TargetMachine &TM; + const TargetLibraryInfo *LibInfo; + FunctionLoweringInfo *FuncInfo; + MachineFunction *MF; + MachineRegisterInfo *RegInfo; + SelectionDAG *CurDAG; + SelectionDAGBuilder *SDB; +*/ + + +static bool isUnitSteppedZeroStartingVector(const BuildVectorSDNode *N) { + unsigned int nOps = N->getNumOperands(); + + assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector"); + + LLVM_DEBUG(dbgs() << "Entered isUnitStridedZeroStartingVector()\n"); + //SDValue Operand0 = N->getOperand(0); + + /* + assert(N->getOperand(0) == N->getOperand(1)); + assert(N->getOperand(0) == N->getOperand(2)); + */ + + for (unsigned int i = 0; i < nOps; ++i) { + LLVM_DEBUG(dbgs() << "N->getOperand(" << i << ") = "; + N->getOperand(i)->dump(); dbgs() << "\n"); + //return false; + } + +#ifdef NOTNOTNOT + SDNode *Nop0 = (N->getOperand(0)).getNode(); + LLVM_DEBUG(dbgs() << "Nop0->getOperand(0) = "; + Nop0->getOperand(0)->dump(); dbgs() << "\n"); + + SDNode *Nop00 = (Nop0->getOperand(0)).getNode(); + LLVM_DEBUG(dbgs() << "Nop00->getOperand(0) = "; + Nop00->getOperand(0)->dump(); dbgs() << "\n"); + + + SDNode *Nop000 = (Nop00->getOperand(0)).getNode(); + LLVM_DEBUG(dbgs() << "Nop000->getOperand(0) = "; + Nop000->getOperand(0)->dump(); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Nop000->getOperand(1) = "; + Nop000->getOperand(1)->dump(); dbgs() << "\n"); + + /* For: + Nop000->getOperand(1) = t1: i64 = Register %vreg3 + it does not have any operands. + */ + SDNode *Nop0001 = (Nop000->getOperand(1)).getNode(); + LLVM_DEBUG(dbgs() << "Nop0001->getOperand(0) = "; + Nop0001->getOperand(0)->dump(); dbgs() << "\n"); +#endif + + LLVM_DEBUG(dbgs() << "Exiting isUnitStridedZeroStartingVector()\n"); + + return true; +} + + + +// Instruction Selector Implementation +namespace { + +class ConnexDAGToDAGISel : public SelectionDAGISel { +public: + explicit ConnexDAGToDAGISel(ConnexTargetMachine &TM) : SelectionDAGISel(TM) {} + + StringRef getPassName() const override { + return "Connex DAG->DAG Pattern Instruction Selection"; + } + +private: + // Include the pieces autogenerated from the target description. + #include "ConnexGenDAGISel.inc" + + bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base); + + void SelectBUILD_VECTOR(SDNode *Node); + void SelectVECTOR_SHUFFLE(SDNode *Node); + + SDNode *SelectVSELECT(SDNode *Node); + + SDNode *SelectReduceI32(SDNode *Node); + SDNode *SelectAddI32(SDNode *Node); + SDNode *SelectSubI32(SDNode *Node); + SDNode *SelectMulI32(SDNode *Node); + SDNode *SelectSraI32(SDNode *Node); + // + SDNode *SelectDivI16(SDNode *Node); + // + SDNode *SelectReduceF16(SDNode *Node); + SDNode *SelectAddF16(SDNode *Node); + SDNode *SelectSubF16(SDNode *Node); + SDNode *SelectMulF16(SDNode *Node); + SDNode *SelectDivF16(SDNode *Node); + SDNode *SelectLtF16(SDNode *Node); + + void Select(SDNode *N) override; + + // Complex Pattern for address selection. + bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset); + + // Added from MipsSEISelDAGToDAG.cpp + bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, + SDValue &Offset, unsigned OffsetBits) const; + bool selectAddrRegImm10(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; + bool selectIntAddrMSA(SDValue Addr, SDValue &Base, SDValue &Offset) const; + + + + // In Mips we have MipsSEIselDAGToDAG inheriting MipsIselDAGToDAG, but + // in Connex we do NOT, so we comment the override qualifier + /// \brief Select constant vector splats. + bool selectVSplat(SDNode *N, APInt &Imm, + unsigned MinSizeInBits) const; //override; + /// \brief Select constant vector splats whose value fits in a given integer. + bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, + unsigned ImmBitSize) const; + /// \brief Select constant vector splats whose value fits in a uimm1. + bool selectVSplatUimm1(SDValue N, SDValue &Imm) const; // override; + /// \brief Select constant vector splats whose value fits in a uimm2. + bool selectVSplatUimm2(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value fits in a uimm3. + bool selectVSplatUimm3(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value fits in a uimm4. + bool selectVSplatUimm4(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value fits in a uimm5. + bool selectVSplatUimm5(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value fits in a uimm6. + bool selectVSplatUimm6(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value fits in a uimm8. + bool selectVSplatUimm8(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value fits in a simm5. + bool selectVSplatSimm5(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value is a power of 2. + bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value is the inverse of a + /// power of 2. + bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value is a run of set bits + /// ending at the most significant bit + bool selectVSplatMaskL(SDValue N, SDValue &Imm) const; //override; + /// \brief Select constant vector splats whose value is a run of set bits + /// starting at bit zero. + bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; //override; +}; // end class ConnexDAGToDAGISel +} // end namespace + + +// ComplexPattern used on Connex Load/Store instructions +bool ConnexDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { + // if Address is FI, get the TargetFrameIndex. + SDLoc DL(Addr); + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + // TODO_CHANGE_BACKEND: + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TYPE_SCALAR_ELEMENT); + + Offset = CurDAG->getTargetConstant(0, DL, TYPE_SCALAR_ELEMENT); + return true; + } + + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) + return false; + + // Addresses of the form Addr+const or Addr|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + if (isInt<32>(CN->getSExtValue())) { + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = + dyn_cast(Addr.getOperand(0))) + // TODO_CHANGE_BACKEND: + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TYPE_SCALAR_ELEMENT); + else + Base = Addr.getOperand(0); + + // TODO_CHANGE_BACKEND: + Offset = CurDAG->getTargetConstant(CN->getSExtValue(), DL, TYPE_SCALAR_ELEMENT); + + return true; + } + } + + Base = Addr; + // TODO_CHANGE_BACKEND: + Offset = CurDAG->getTargetConstant(0, DL, TYPE_SCALAR_ELEMENT); + + return true; +} + + +// ComplexPattern used on Connex FI instruction +bool ConnexDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset) { + SDLoc DL(Addr); + + if (!CurDAG->isBaseWithConstantOffset(Addr)) + return false; + + // Addresses of the form Addr+const or Addr|const + ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + if (isInt<32>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast(Addr.getOperand(0))) + // TODO_CHANGE_BACKEND: + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TYPE_SCALAR_ELEMENT); + else + return false; + + // TODO_CHANGE_BACKEND: + Offset = CurDAG->getTargetConstant(CN->getSExtValue(), DL, TYPE_SCALAR_ELEMENT); + return true; + } + + return false; +} + + +// IMPORTANT: Note that RecoverCExpressionFromSDNode() is used only for +// REPEAT and BUILD_VECTOR nodes, in method Select(). +std::string RecoverCExpressionFromSDNode(SDNode *theSDNode, + DenseMap &SDBNodeMap, + bool failOver) { + /* + NOTE: the SelectionDAGISel::crtNodeMap, defined in + lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp, + discussed at http://lists.llvm.org/pipermail/llvm-dev/2016-November/107361.html + + getNodeMap() (method defined by me) returns the NodeMap object from + SelectionDAGBuilder.h with this definition: + DenseMap NodeMap; + + Note however that this info is not enough since some SDNodes get generated + in the following phases of the back end, namely: + - DAG combining - see lib/CodeGen/SelectionDAG/DAGCombiner.cpp + + This class gets invoked much later, after all the ones mentioned above have + finished. + */ + LLVM_DEBUG(dbgs() + << "Entered RecoverCExpressionFromSDNode() (ConnexISelDAGToDAG.cpp)\n"); + + std::string res; + + // Important note: class SelectionDAGBuilder is forward declared. + //assert(SDB != NULL); + //assert(SDB->NodeMap[(const Value *)NULL]); // NodeMap is private + //DenseMapBase<> + //auto iterNodeMap = SDB->NodeMap.begin(); + + //bool res = SDB->HasTailCall; + + //DenseMap &SDBNodeMap = crtNodeMap; //SDB->getNodeMap(); + + //unsigned size = SDB->NodeMap.size(); + LLVM_DEBUG(dbgs() << "RecoverCExpressionFromSDNode(): SDB->NodeMap.size() = " + << SDBNodeMap.size() + << ", theSDNode = "; + theSDNode->dump(); + dbgs() + << ", theSDNode (ptr) = " + << theSDNode + << "\n"); + + /* + We retrieve from the SDBNodeMap the associated LLVM IR Instruction for + theSDNode (SDNode created by SelectionDAGBuilder). + */ + + int counter = 0; + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1DenseMapBase.html + for (auto iterNodeMap = SDBNodeMap.begin(); + iterNodeMap != SDBNodeMap.end(); iterNodeMap++, counter++) { + + // Type (in error of g++) is: llvm::detail::DenseMapPair + auto tmp1 = (*iterNodeMap); + //SDValue tmp1N = (*iterNodeMap); + //auto tmp2 = (*iterNodeMap); //->second; + + //Value *crtValue = tmp1.first; + const Instruction *crtValue = (const Instruction *)(tmp1.first); + SDNode *crtSDNode = tmp1.second.getNode(); + + LLVM_DEBUG(dbgs() << "RecoverCExpressionFromSDNode(): [#" << counter + << "] tmp1.first = " + << *crtValue + /* + << ", tmp1.second = "; + tmp1.second.dump(); + dbgs() << "\n" + */ + << "\n" + ); + + //assert(crtNode != nullptr); + if (crtSDNode != nullptr) { + LLVM_DEBUG(dbgs() << "RecoverCExpressionFromSDNode(): tmp1.second.getNode() = "; + crtSDNode->dump(); + dbgs() << "\n"); + //<< *crtNode << "\n"); + + if (crtSDNode == theSDNode) { + LLVM_DEBUG(dbgs() << "RecoverCExpressionFromSDNode(): Found a match:...\n"); + + /* + This corresponds to cases like: + From 300_Opincaa/sSub/STDerr_llc_01 + RecoverCExpressionFromSDNode(): tmp1.first = %broadcast.splatinsert10 = insertelement <128 x i16> undef, i16 %sub, i32 0, !dbg !8 + RecoverCExpressionFromSDNode(): tmp1.second.getNode() = t33: v128i16 = BUILD_VECTOR t35, t35, t35, ... t35 + + We can see here that the machine-independent back end instruction BUILD_VECTOR + is more complex (abstract) than the LLVM IR insertelement. + The equivalent to BUILD_VECTOR LLVM IR program uses also a shufflevector instruction: + %broadcast.splatinsert10 = insertelement <128 x i16> undef, i16 %sub, i32 0, !dbg !8 + %broadcast.splat11 = shufflevector <128 x i16> %broadcast.splatinsert10, <128 x i16> undef, <128 x i32> zeroinitializer, !dbg !8 + + Note that RecoverCExpressionFromSDNode() is used only for BUILD_VECTOR. + + For the SSD benchmark, the associated instruction is though + ShuffleVector + (see /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/90_CV/SSD/B/STDerr_llc_01). + */ + /* + assert( (crtValue->getOpcode() == Instruction::InsertElement) || + (crtValue->getOpcode() == Instruction::ShuffleVector) + ); + */ + + + Instruction *crtValueOp1; + + switch (crtValue->getOpcode()) { + case Instruction::InsertElement: + case Instruction::ShuffleVector: + if (crtValue->getOpcode() == Instruction::InsertElement) { + crtValueOp1 = (Instruction *)(crtValue->getOperand(1)); + } + else { + crtValueOp1 = (Instruction *)(crtValue->getOperand(0)); + assert(crtValueOp1->getOpcode() == Instruction::InsertElement); + // TODO: check that crtValueOp1->getOperand(0) is vec undef, crtValueOp1->getOperand(2) is 0 + crtValueOp1 = (Instruction *)(crtValueOp1->getOperand(1)); + } + LLVM_DEBUG(dbgs() << " crtValueOp1 = " + << *crtValueOp1 << "\n"); + + getExprForDMATransfer = true; + res = GetExpr(crtValueOp1); + LLVM_DEBUG(dbgs() << " GetExpr(crtValueOp1) = " + << res << "\n"); + break; + default: + getExprForDMATransfer = true; + res = GetExpr(const_cast(crtValue)); + + LLVM_DEBUG(dbgs() << " GetExpr(crtValue) = " + << res << "\n"); + break; + } + break; + } + } + else { + LLVM_DEBUG(dbgs() << + "RecoverCExpressionFromSDNode(): tmp1.second.getNode() == nullptr\n\n"); + } + } // end for + + //assert(res.size() != 0); + if (res.length() == 0) { + if (failOver) { + //#define NVEC_STR "n.vec" + #define NVEC_STR "VTC_ceil" + + /* TODO TODO TODO TODO TODO TODO TODO: find, if possible a better + solution. Keep track of the SelectionDAGs of all BBs, not just the + current BB. */ + + LLVM_DEBUG(dbgs() << + "RecoverCExpressionFromSDNode(): failOver == true --> we look " + "for NVEC_STR (vector tripcount defined in LoopVectorize.cpp) " + "in SDBNodeMap and retrieve for it\n"); + + /* Although not a great alternative, we look in SDBNodeMap for + * an entry containing %n.vec - this should exist from a previous + * BB. + */ + for (auto iterNodeMap = SDBNodeMap.begin(); + iterNodeMap != SDBNodeMap.end(); iterNodeMap++, counter++) { + auto tmp1 = (*iterNodeMap); + const Instruction *crtValue = (const Instruction *)(tmp1.first); + + LLVM_DEBUG(dbgs() << "RecoverCExpressionFromSDNode(): crtValue = " + << *crtValue << "\n"); + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1StringRef.html + //if (crtValue->getName().str() == NVEC_STR) + if (strncmp(crtValue->getName().str().c_str(), NVEC_STR, + strlen(NVEC_STR)) == 0) { + getExprForDMATransfer = true; + + res = GetExpr(const_cast(crtValue)); + LLVM_DEBUG(dbgs() << " RecoverCExpressionFromSDNode(): res = " + << res << "\n"); + + /* TODO TODO TODO TODO: this is NOT good if the res already + contains a constant such as 1 - OK we could take out + CreateDiv in LoopVectorize.cpp, etc */ + + // res = res + " / CONNEX_VECTOR_LENGTH"; // Unfortunately, we hard code this also here... + } + } + } + else { + assert(res.length() != 0); + } + } + + return res; +} + + + +// Inspired from lib/Target/X86/X86ISelDAGToDAG.cpp +bool ConnexDAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, + SDValue &Index) { + LLVM_DEBUG(dbgs() << "Entered ConnexDAGToDAGISel::selectVectorAddr()\n"); + + LLVM_DEBUG(dbgs() << " selectVectorAddr(): Parent = "; Parent->dump(CurDAG); + dbgs() << "\n N = "; N->dump(CurDAG); + /* + dbgs() << "\n Base.getNode() = " << Base.getNode(); + dbgs() << "\n Base = "; Base->dump(CurDAG); + */ + dbgs() << "\n"); + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MaskedGatherScatterSDNode.html + MaskedGatherScatterSDNode *Mgs = dyn_cast(Parent); + if (!Mgs) + return false; + + /* + // Retrieve the "scalar base pointer" (as said also at + // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20150831/297534.html) + Base = Mgs->getBasePtr(); + */ + Index = Mgs->getIndex(); + //Base = Mgs->getIndex(); + + LLVM_DEBUG(dbgs() << " selectVectorAddr(), after update: Parent = "; + Parent->dump(CurDAG); + dbgs() << "\n N = "; N->dump(CurDAG); + dbgs() << "\n Index.getNode() = " << Index.getNode(); + dbgs() << "\n Index = "; Index->dump(CurDAG); + dbgs() << "\n"); + + +#ifdef NOTNOT + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MaskedGatherScatterSDNode.html + MaskedGatherScatterSDNode *Mgs = dyn_cast(Parent); + if (!Mgs) + return false; + + LLVM_DEBUG(dbgs() << " after update, selectVectorAddr(): Parent = "; + Parent->dump(CurDAG); + dbgs() << "\n N = "; + N->dump(CurDAG); + dbgs() << "\n Base.getNode() = " << Base.getNode(); + dbgs() << "\n Base = "; Base->dump(CurDAG); + dbgs() << "\n Scale.getNode() = " << Scale.getNode(); + dbgs() << "\n ScalarSize = " << ScalarSize; + //dbgs() << "\n Scale = "; Scale->dump(CurDAG); + dbgs() << "\n Index.getNode() = " << Index.getNode(); + dbgs() << "\n Index = "; Index->dump(CurDAG); + dbgs() << "\n Disp.getNode() = " << Disp.getNode(); + dbgs() << "\n Disp = "; Disp->dump(CurDAG); + dbgs() << "\n Segment.getNode() = " << Segment.getNode(); + dbgs() << "\n Segment = "; Segment->dump(CurDAG); + dbgs() << "\n"); +#endif + + LLVM_DEBUG(dbgs() << "Exiting ConnexDAGToDAGISel::selectVectorAddr()\n"); + + return true; +} + + +SDNode *CreateInlineAsmNode(SelectionDAG *CurDAG, std::string asmString, + SDNode *nodeSYM_IMM, SDLoc &DL, + bool specialCase=false) { + /* This step is very IMPORTANT: + IMPORTANT: As of Oct 2016, we must malloc the char * that is + passed to + getTargetExternalSymbol as a reference, so we must make sure + the value persists after we get out of this function. + Hopefully no leak will happen either - maybe when deleting + SDNode the destructor frees the char *. + + // With difficulty I found with Google this method doing + creation of the SDNode, which is used also by + getTargetExternalSymbol(). + template + SDNodeT *newSDNode(ArgTypes &&... Args) { + return new (NodeAllocator.template Allocate()) + SDNodeT(std::forward(Args)...); + } + */ + + /* THIS is NOT correct - it's an automatic variable and gives error later + in the execution (e.g., scheduling or I-sel): + char exprStrChar[MAXLEN_STR]; */ + + char *exprStrChar = (char *)malloc(MAXLEN_STR); + strcpy(exprStrChar, asmString.c_str()); + LLVM_DEBUG(dbgs() << "CreateInlineAsmNode(): exprStrChar = " + << exprStrChar << "\n"); + /* + See http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html: + SDValue getTargetExternalSymbol (const char *Sym, EVT VT, + unsigned char TargetFlags=0) + */ + SDValue extSym = CurDAG->getTargetExternalSymbol( + //"VLOAD R_todo, !!!!\n", + //exprStr.c_str(), + + exprStrChar, + + //TYPE_VECTOR_I16 + MVT::i64 + ); + SDNode *extSymNode = extSym.getNode(); + LLVM_DEBUG(dbgs() << "CreateInlineAsmNode(): extSymNode = "; + extSymNode->dump(); dbgs() << "\n"); + + /* + From http://llvm.org/doxygen/namespacellvm_1_1ISD.html + "INLINEASM - Represents an inline asm block. + This node always has two return values: a chain and a flag result. + The inputs are as follows: + Operand #0 : Input chain. + Operand #1 : a ExternalSymbolSDNode with a pointer to the asm string. + Operand #2 : a MDNodeSDNode with the !srcloc metadata. + Operand #3 : HasSideEffect, IsAlignStack bits. + After this, it is followed by a list of operands with this format: + ConstantSDNode: Flags that encode whether it is a mem or not, the + of operands that follow, etc. + See InlineAsm.h. ... however many operands ... Operand #last: Optional, an incoming flag." + */ + std::vector opsInline; + + /* This generates either: + - a glue edge/link if the return type is MVT::Glue + - a chain edge/link if the return type is MVT::Other + between the nodeSYM_IMM and the INLINEASM node. + */ + // 2018_06_29 + if (specialCase) { + //opsInline.push_back(CurDAG->getEntryNode()); + } + else + opsInline.push_back(SDValue(nodeSYM_IMM, 0)); + // + opsInline.push_back(extSym); //SDValue(extSym, 0)); + + +//#ifdef NOTNOT + /* Creating a null-MDNode MDNodeSDNode object. + Inspiring from (since only SelectionDAG can call constructor) + http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html: + 01756 class MDNodeSDNode : public SDNode { + 01757 const MDNode *MD; + 01758 friend class SelectionDAG; + 01759 explicit MDNodeSDNode(const MDNode *md) + 01760 : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md) + 01761 {} + See also, although not helpful, + http://llvm.org/docs/doxygen/html/classllvm_1_1MDNodeSDNode.html . + */ + /* Does NOT work: MDNodeSDNode mdNodeSDNode; // = MDNodeSDNode::getMD(); + is private: MDNodeSDNode::MDNodeSDNode(mdNode); */ + +#ifdef INTERESTING_BUG + /* IMPORTANT: this NON-standard mdNode created below gives errors + if we generate 2 or more of these nodes in the compiled ASM + module generated. + See for example + /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/!/300_Opincaa/sAdd_BUG/STDerr_llc_01_old01 + for error: + ::ret_type llvm::cast(Y*) [with X = llvm::ValueAsMetadata; Y = const llvm::Metadata; typename llvm::c + ast_retty::ret_type = const llvm::ValueAsMetadata*]: Assertion `isa(Val) && "cast() argument of incompatible type!"' failed. + (reported in http://lists.llvm.org/pipermail/llvm-dev/2016-October/106629.html). + + This error is given when trying to print what when succesful gives: + ; dbg:ReduceSymbolic.c:18:5 + (I presume is the MDNode, and is where the error is given). + */ + + // Creating an SDNode MDNode (MetaData) with a ch out port + SDValue mdNode = CurDAG->getNode(ISD::MDNODE_SDNODE, DL, + CurDAG->getVTList(MVT::Other) + ); + SDNode *mdNodeSDNode = mdNode.getNode(); + /* + SDNode *mdNodeSDNode = CurDAG->getNode(ISD::MDNODE_SDNODE, DL, + CurDAG->getVTList(MVT::Other) + //Node->getOperand(0) //gives error: ScheduleDAG.cpp:425: unsigned int llvm::ScheduleDAG::VerifyScheduledDAG(bool): Assertion `!AnyNotSched' failed. + ).getNode(); + */ +#endif + //#endif + + // Creating a NON-null-MDNode MDNodeSDNode object (has a + // hexadecimal value when outputing the DOT file). + /* From + http://llvm.org/docs/doxygen/html/classllvm_1_1MDNode.html: + Detailed Description + Metadata nodes can be uniqued, like constants, or distinct. + */ + // Actually inspired from http://ftp.nchc.org.tw/NetBSD/NetBSD-current/src/external/bsd/llvm/dist/llvm/unittests/IR/MetadataTest.cpp + MDNode *mdNode = MDNode::get(* (CurDAG->getContext()), None); + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + <> + */ + SDNode *mdNodeSDNode = CurDAG->getMDNode(mdNode).getNode(); + // + /* Avoiding error - see /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/DawnCC/30l_dotprod_f16/5/STDerr_llc_01_old03: + << Assertion `Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && "Chain and glue operands should occur at end of operand list!"' failed. + */ + if (specialCase == false) { + opsInline.push_back(SDValue(mdNodeSDNode, 0)); + } + +#ifdef NOTNOT + /* Inspiring from (since only SelectionDAG can call + constructor) + http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html + 01435 class ConstantSDNode : public SDNode + 01436 const ConstantInt *Value; + 01437 friend class SelectionDAG; + 01438 ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, + 01439 DebugLoc DL, EVT VT) + 01440 : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, + 01441 0, DL, getSDVTList(VT)), Value(val) + 01442 SubclassData |= (uint16_t)isOpaque; + 01443 } + */ + SDValue targetConstant = CurDAG->getNode(ISD::TargetConstant, DL, + CurDAG->getVTList(MVT::i64)); + SDNode *targetConstantSDNode = targetConstant.getNode(); +#endif + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + <> + */ + SDValue targetConstant = CurDAG->getTargetConstant(1, DL, MVT::i64); + SDNode *targetConstantSDNode = targetConstant.getNode(); + // + opsInline.push_back(SDValue(targetConstantSDNode, 0)); // TargetConstant<1>, 0) - a i64 port); + +#ifdef DO_NOT_EXEC_BUT_INTERESTING + /* Unfortunately, ISD::INLINEASM accepts only ConstantSDNode + from 2nd operand onwards - see InstrEmitter.cpp, line 966: + unsigned Flags = + cast(Node->getOperand(i))->getZExtValue(); + */ + // Unsuccessful - Attempting to add a chain edge + SDValue NodeOp0 = Node->getOperand(0); + LLVM_DEBUG(dbgs() << "Selecting NodeOp0 = "; + NodeOp0->dump(); + dbgs() << '\n'); + opsInline.push_back(NodeOp0); +#endif + + // 2018_06_29: + if (specialCase) + opsInline.push_back(SDValue(nodeSYM_IMM, 0)); + + // Note that you can also look at the .dot file output + // from the LLVM I-sel stage to get an idea on how an + // INLINEASM node looks. + + // Related to CODE2018_07_01 + SDNode *inlineAsmNode; + if (specialCase == true) { + inlineAsmNode = CurDAG->getMachineNode( + Connex::INLINEASM, + DL, + // Result types: + //CurDAG->getVTList(TYPE_VECTOR_I16), + CurDAG->getVTList(MVT::Other, MVT::Glue), + opsInline); + } + else { + SDValue inlineAsm = CurDAG->getNode( + // We use this non-machine SDNode to avoid + // <> e.g. + // in middle.block + ISD::INLINEASM, + DL, + // Result types: + //CurDAG->getVTList(TYPE_VECTOR_I16), + CurDAG->getVTList(MVT::Other, MVT::Glue), + opsInline); + inlineAsmNode = inlineAsm.getNode(); + } + + LLVM_DEBUG(dbgs() << "CreateInlineAsmNode(): inlineAsmNode = "; + inlineAsmNode->dump(); + //dbgs() << '\n' + ); + + return inlineAsmNode; +} // END CreateInlineAsmNode() + + + + +static SDValue ChangeVectorType(SDValue InOp, MVT NVT, SelectionDAG &DAG, + bool FillWithZeroes = false) { + // Check if InOp already has the right width. + MVT InVT = InOp.getSimpleValueType(); + if (InVT == NVT) + return InOp; + + if (InOp.isUndef()) + return DAG.getUNDEF(NVT); + + /* + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + */ + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + /* + assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && + "Unexpected request for vector widening"); + */ + assert(WidenNumElts == InNumElts && + "WidenNumElts == InNumElts failed"); + + EVT EltVT = NVT.getVectorElementType(); + + SDLoc dl(InOp); + if (InOp.getOpcode() == ISD::CONCAT_VECTORS && + InOp.getNumOperands() == 2) { + SDValue N1 = InOp.getOperand(1); + if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) || + N1.isUndef()) { + InOp = InOp.getOperand(0); + InVT = InOp.getSimpleValueType(); + InNumElts = InVT.getVectorNumElements(); + } + } + + if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) { + SmallVector Ops; + for (unsigned i = 0; i < InNumElts; ++i) { + //Ops.push_back(InOp.getOperand(i)); + Ops.push_back(InOp.getOperand(0)); + } + + + /* + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : + DAG.getUNDEF(EltVT); + for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) + Ops.push_back(FillVal); + */ + SDValue res = DAG.getBuildVector(NVT, dl, Ops); + + LLVM_DEBUG(dbgs() << "Exiting ChangeVectorType() with: res = " + << res.getNode() << ".\n"); + + return res; + } + + assert(0 && "ChangeVectorType(): I guess this case should not be reached"); + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) : + DAG.getUNDEF(NVT); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal, + InOp, DAG.getIntPtrConstant(0, dl)); +} + + + +void ConnexDAGToDAGISel::SelectBUILD_VECTOR(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered SelectBUILD_VECTOR().\n"); + + // NEW32 + EVT typeVecNode; + SDLoc DL(Node); + + BuildVectorSDNode *BVN = cast(Node); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned LdiOp; + EVT ResTy = BVN->getValueType(0); + EVT ViaVecTy; + + bool needsConvertionToResultType = true; + + SDNode *Res; + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): We are in the case TYPE_VECTOR_I32\n"); + /* + TODO TODO TODO TODO TODO TODO TODO TODO: + Although so far we do not have a test for this case, in principle we + should lower the following target-independent SDNode: + BUILD_VECTOR i32ct + to: + R0 = 1; + R1 = VLOAD i32ct_lower16bits; + R2 = VLOAD i32ct_higher16bits; + CELLSHR R2, R0; + WHERE_EQ (INDEX & 1 == 1) // for all odd indices + R1 = R2 | R2; + END_WHERE; + */ + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): We are in the case TYPE_VECTOR_I16\n"); + } + typeVecNode = ResTy; + + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1BuildVectorSDNode.html: + bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, + unsigned &SplatBitSize, bool &HasAnyUndefs, + unsigned MinSplatBits=0, bool isBigEndian=false) const + Check if this is a constant splat, and if so, find the smallest element + size that splats the vector. + + By constant splat we understand a vector filled with the same + constant value in all elements. + */ + if (BVN->isConstantSplat(SplatValue, SplatUndef, + SplatBitSize, HasAnyUndefs, + 8, true) == false) { + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): " + "BVN->isConstantSplat() == false:\n"); + + /* Checking if we have a symbolic splat. + From + http://llvm.org/docs/doxygen/html/classllvm_1_1BuildVectorSDNode.html: + SDValue getSplatValue (BitVector *UndefElements=nullptr) const + <> + */ + SDValue symbolicValue = BVN->getSplatValue(); + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): symbolicValue.getNode() = " + << symbolicValue.getNode() << "\n"); + + // Inspired VAGUELY from + // http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html + if (symbolicValue.getNode() != nullptr) { + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): symbolicValue = "; + symbolicValue->dump(); + dbgs() << "\n"); + //LdiOp = Connex::VLOAD_H_STR; + + /* For the case BUILD_VECTOR is a variable splat + (contains the same variable in all elements of the vector), + we retrieve the C expression from the variable and generate + an inlineasm with VLOAD variable_C_Expression (so this is Opincaa host + and Connex ASM code together). */ + + + + /* + From http://llvm.org/docs/doxygen/html/namespacellvm_1_1ISD.html: + <> + Also, ISD::INLINEASM accepts only objects of type ConstantSDNode + from 2nd operand onwards - see InstrEmitter.cpp, line 966: + unsigned Flags = + cast(Node->getOperand(i))->getZExtValue(); + + Examples of creating an INLINEASM SDNode, in llc: + From llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp, + (or llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp) : + + if (Glue.getNode()) + AsmNodeOperands.push_back(Glue); + if (!Changed) + return false; + + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), + CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); + New->setNodeId(-1); + ReplaceNode(N, New.getNode()); + + Less useful: From SelectionDAGISel.cpp: + void SelectionDAGISel::Select_INLINEASM(SDNode *N) { + SDLoc DL(N); + + std::vector Ops(N->op_begin(), N->op_end()); + SelectInlineAsmMemoryOperands(Ops, DL); + + const EVT VTs[] = {MVT::Other, MVT::Glue}; + SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops); + New->setNodeId(-1); + ReplaceUses(N, New.getNode()); + CurDAG->RemoveDeadNode(N); + } + + From SelectionDAGBuilder.cpp: + Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), + DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); + + LESS relevant note: to create an InlineAsm Value in LLVM program, + in clang/opt, we can use API described at: + http://llvm.org/docs/doxygen/html/classllvm_1_1InlineAsm.html + http://llvm.org/docs/doxygen/html/InlineAsm_8h_source.html + http://llvm.org/docs/doxygen/html/InlineAsm_8cpp_source.html + */ + + + /* + This gives llc error: + <NodeNum] > Node2Index[I->getSUnit()->NodeNum] && + "Wrong topological sorting"' failed.>> + Res = Node; + */ + + /* + SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), + getTargetNode(N, Ty, DAG, GOTFlag)); + + From ...: + t37: ch,glue = inlineasm t34, TargetExternalSymbol:i64'sum = connexGlobal->readReduction(); + ... // END making a separate scope + // END_HOST_DEVICE_CODE', MDNode:ch, TargetConstant:i64<1> + */ + + /* + // This is outdated: NOT surpisingly, this results in creating a node like: + // RED_H getVTList(MVT::Other, MVT::Glue), + it gives error: + // <> + + When using + CurDAG->getVTList(TYPE_VECTOR_I16), + llc gives error: + llvm/include/llvm/CodeGen/SelectionDAGNodes.h:662: + const llvm::SDValue& llvm::SDNode::getOperand(unsigned int) const: + Assertion `Num < NumOperands && "Invalid child # of SDNode!"' failed. + + This fails at instruction scheduling. + */ + + SDValue InFlag(nullptr, 0); // NO Glue - Null incoming flag value. + // Inspired from ConnexISelLowering.cpp + MachineFunction &MF = CurDAG->getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + /* From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB); + //SDValue bb = CurDAG->getBasicBlock(MachineBasicBlock *MBB); + */ + + //SDValue firstAsmInlineSDValue; + SDNode *firstAsmInlineSDNode = NULL; + for (auto dagIter = CurDAG->allnodes_begin(); //allnodes_iterator + dagIter != CurDAG->allnodes_end(); + dagIter++) { + SDNode iterSDNode = *dagIter; + //iterSDValue + //SDNode *iterSDNode = dagIter->getNode(); + /* + LLVM_DEBUG(dbgs() << "dagIter = "; + iterSDNode.dump(CurDAG); + dbgs() << '\n'); + */ + if (iterSDNode.getOpcode() == ISD::INLINEASM) { + firstAsmInlineSDNode = &iterSDNode; + break; + } + } + + // Using the MDNode - because Inline gives error: + //firstAsmInlineSDNode = (firstAsmInlineSDNode->getOperand(2)).getNode(); + + if (firstAsmInlineSDNode == NULL) + firstAsmInlineSDNode = (CurDAG->getEntryNode()).getNode(); + LLVM_DEBUG(dbgs() << "firstAsmInlineSDNode = " + << firstAsmInlineSDNode + << "\n"); + LLVM_DEBUG(dbgs() << "firstAsmInlineSDNode = "; + firstAsmInlineSDNode->dump(); + dbgs() << "[END]\n"); + + SDValue firstAsmInlineSDValue = SDValue(firstAsmInlineSDNode, 0); + LLVM_DEBUG(dbgs() << "firstAsmInlineSDValue = "; + firstAsmInlineSDValue->dump(); + dbgs() << "[END]\n"); + + #ifdef NOTNOT + unsigned virtScalarReg = RegInfo.createVirtualRegister( + &Connex::GPRRegClass); + // In the end not useful: Adding it just to force ordering between predecessors of Node and Res + SDValue copyToRegAux = CurDAG->getCopyToReg( + //CurDAG->getEntryNode(), // messes up scheduling + Node->getOperand(0), + + DL, + virtScalarReg, + Node->getOperand(0), + InFlag); + #endif + + /* TODO TODO TODO TODO TODO: Treat preoperly case + typeVecNode == TYPE_VECTOR_I32. + I.e., with multiple VLOAD_H, CELL_SH*, WHERE*, etc*/ + + SDNode *vloadSpecial = CurDAG->getMachineNode( + typeVecNode == TYPE_VECTOR_I16 ? + Connex::VLOAD_H_SYM_IMM : + //Connex::VLOAD_W_SYM_IMM, + Connex::VLOAD_H_SYM_IMM, + DL, + // + // We add MVT::Glue to the return + // types to avoid that llc performs CSE + // on these nodes: if this + // getMachineNode() function + // is called more than once we + // return the same value again and + // again (i.e., perform CSE) + // since the node doesn't take any + // actual inputs. + // - see why this is so at + // http://llvm.org/docs/doxygen/html/SelectionDAG_8cpp_source.html#l06206 */ + CurDAG->getVTList( + // 2017_08_03 + // typeVecNode, + TYPE_VECTOR_I16, + MVT::Glue), + // + CurDAG->getEntryNode() + // We add a chain edge + /* TODO TODO TODO TODO TODO + TODO TODO TODO VERY + IMPORTANT - figure if I + can do this better + (maybe in Selection + Lowering): + //SDValue(firstAsmInlineSDNode, 0) + firstAsmInlineSDValue */ + //SDValue(copyToRegAux, 0), + //copyToRegAux + /* + Gives error: InstrEmitter.cpp:782: + void llvm::InstrEmitter::EmitMachineNode(llvm::SDNode*, + bool, bool, llvm::DenseMap&): + Assertion `NumMIOperands >= II.getNumOperands() && + NumMIOperands <= II.getNumOperands() + + II.getNumImplicitDefs() + NumImpUses && + "#operands for dag node doesn't match .td file!"' failed. + */ + // Node->getOperand(0) + ); + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): vloadSpecial = " + << vloadSpecial << ".\n" + << "vloadSpecial = "; + vloadSpecial->dump(); dbgs() << "\n"); + + + + std::string exprStr = RecoverCExpressionFromSDNode(Node, crtNodeMap, + #ifdef NEW_2019_03_21 + false + #else + true + #endif + ); + //std::string exprStr = RecoverCExpressionFromSDNode( + // symbolicValue.getNode(), crtNodeMap); + exprStr = " " + exprStr; + exprStr = exprStr + "; // MSA_I16"; + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): exprStr = " + << exprStr << "\n"); + + SDNode *inlineAsmNode = CreateInlineAsmNode(CurDAG, exprStr, + vloadSpecial, DL); + + /* VERY IMPORTANT: + You might wonder why we require creating also + SDNodes CopyToReg and CopyFromReg. + We put them to preserve the INLINEASM SDNode, which does NOT + have a type and needs to be chained/glued to its VLOAD* and + the result (Res) from this instr-selection needs to be + a vector type (typeVecNode). + If we don't put them (e.g., we make + Res = inlineAsmNode; + we end up with erroneous cases like this + (which gives an assertion failure like: + "#operands for dag node doesn't match .td file!"): + SU(10): t71: v128i16,glue = VLOAD_H_SYM_IMM t0 + SU(9): t74: ch,glue = inlineasm t71, TargetExternalSymbol:i64' ((N + -1) << 1)) + 2) / (((int *)&CONNEX_VL)[0])) ...; // MSA_I10', MDNode:ch<0x1724220>, TargetConstant:i64<1> + SU(8): t75: v64i32 = NOP_BITCONVERT_WH t74 + */ + unsigned virtRegRes = RegInfo.createVirtualRegister( + typeVecNode == TYPE_VECTOR_I16 ? + &Connex::VectorHRegClass : + #ifdef PREFERABLY_NOT_2019_03_21 + &Connex::MSA128WRegClass + #else + &Connex::VectorHRegClass + #endif + ); + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html: + SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT) + SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT, + SDValue Glue) + // + SDValue getCopyToReg (SDValue Chain, SDLoc dl, unsigned Reg, SDValue N) + SDValue getCopyToReg (SDValue Chain, SDLoc dl, unsigned Reg, SDValue N, SDValue Glue) + SDValue getCopyToReg (SDValue Chain, SDLoc dl, SDValue Reg, SDValue N, SDValue Glue) + */ + + SDValue copyToRegRes = CurDAG->getCopyToReg( + //CurDAG->getEntryNode(), // messes up scheduling + //SDValue(vloadSpecial, 0), // this should be considered chain edge, even if VLOAD does NOT have output ch port + SDValue(inlineAsmNode, 0), + //extSym, + + DL, + virtRegRes, + SDValue(vloadSpecial, 0), + InFlag); + + SDValue copyFromRegRes = CurDAG->getCopyFromReg( + copyToRegRes, // chain + DL, + virtRegRes, + typeVecNode + //, copyToRegOp2 + ); + + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html: + SDValue getRegister (unsigned Reg, EVT VT) + */ + //Res = CurDAG->getRegister(virtRegRes, TYPE_VECTOR_I16).getNode(); + Res = copyFromRegRes.getNode(); + + + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): Res = "; + Res->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): inlineAsmNode = "; + inlineAsmNode->dump(); + dbgs() << "\n"); + + /* TODO TODO TODO TODO TODO TODO: make sure I am not deleting an SDNode nc + incoming on the chain port of Node, where nc is an arbitrary + node which happened to be before Node. */ + // ReplaceNode(Node, Res); + // return; + + needsConvertionToResultType = false; + } // END symbolicValue.getNode() != nullptr + else { + bool isUnitStepped = isUnitSteppedZeroStartingVector(BVN); + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): isUnitStepped = true\n"); + + if (isUnitStepped) { + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): isUnitStepped = true\n"); + /* + LLVM_DEBUG(dbgs() << "Select() for ISD::BUILD_VECTOR: Res = "; + Res->print(dbgs()); dbgs() << "\n"); + */ + + LdiOp = Connex::LDIX_H; + + ViaVecTy = TYPE_VECTOR_I16; + /* + //return std::make_pair(false, nullptr); + LLVM_DEBUG(dbgs() << "Select() for ISD::BUILD_VECTOR: exiting with 1st return nullptr\n"); + + return; + */ + + /* IMPORTANT: We use Connex's LDIX (LDIX_H) + instruction to load the immediate value Imm in all vector elements. */ + Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy); + + if (ResTy != ViaVecTy) { + // If LdiOp is writing to a different register class to ResTy, then + // fix it up here. This COPY_TO_REGCLASS should never cause a move.v + // since the source and destination register sets contain the same + // registers. + const TargetLowering *TLI = getTargetLowering(); + MVT ResTySimple = ResTy.getSimpleVT(); + const TargetRegisterClass *RC = TLI->getRegClassFor(ResTySimple); + + LLVM_DEBUG(dbgs() + << "SelectBUILD_VECTOR(): before CurDAG->getMachineNode()\n"); + Res = CurDAG->getMachineNode(Connex::COPY_TO_REGCLASS, DL, + ResTy, SDValue(Res, 0), + CurDAG->getTargetConstant(RC->getID(), + DL, + // TODO_CHANGE_BACKEND: + //MVT::i64)); + TYPE_SCALAR_ELEMENT)); + } + } + } + } // END BVN->isConstantSplat == false + else { + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): BVN->isConstantSplat() == true, " + << "SplatValue = " << SplatValue + << ", SplatUndef = " << SplatUndef + << ", SplatBitSize = " << SplatBitSize + << "\n" + ); + + // TODO_CHANGE_BACKEND: + //if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) + if (SplatBitSize != TYPE_VECTOR_I16_ELEMENT_BITSIZE) { + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): SplatBitSize == " + << SplatBitSize << "(8 is NOT supported in our back end)\n"); + // !!!! TODO TODO TODO: kindda wicked hack - try to avoid by working defining in TableGen the right conversion records + // TODO_CHANGE_BACKEND: + SplatBitSize = 16; + //SplatBitSize = 32; + //SplatBitSize = 64; + + LLVM_DEBUG(dbgs() << " --> Extending element type to SplatBitSize = " + << SplatBitSize << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1APInt.html + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR: SplatValue = " + << SplatValue.toString(10, 1) << "\n"); + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR: SplatValue.getBitWidth() = " + << SplatValue.getBitWidth() << "\n"); + + //!!!! !!!! TODO TODO TODO This should be performed through TableGen + //if (SplatBitSize > SplatValue.getBitWidth()) + // See http://llvm.org/docs/doxygen/html/classllvm_1_1APInt.html + SplatValue = SplatValue.zextOrTrunc(SplatBitSize); + + LLVM_DEBUG(dbgs() << "Select() for ISD::BUILD_VECTOR: After, SplatValue.getBitWidth() = " + << SplatValue.getBitWidth() << "\n"); + } + + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR: SplatUndef = " + << SplatUndef.toString(10, 1) << "\n"); + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR: SplatBitSize = " + << SplatBitSize << "\n"); + + /* !!!! TODO: VLOAD is NOT a feasible option if BUILD_VECTOR is loaded + with DIFFERENT constant values. */ + + switch (SplatBitSize) { + default: + //return std::make_pair(false, nullptr); + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR: exiting with 2nd return nullptr\n"); + return; + case 8: + //LdiOp = Connex::VLOAD_B; + LdiOp = Connex::VLOAD_H; + // TODO_CHANGE_BACKEND: + //ViaVecTy = MVT::v16i8; + //ViaVecTy = MVT::v16i32; + ViaVecTy = TYPE_VECTOR_I16; + /* + LdiOp = Connex::VLOAD_H; + ViaVecTy = MVT::v8i64; + */ + break; + case 16: + LdiOp = Connex::VLOAD_H; + // TODO_CHANGE_BACKEND: + ViaVecTy = TYPE_VECTOR_I16; + break; + case 32: + // TODO_CHANGE_BACKEND: + // TODO TODO: we should add also WHERE and vload depending on index + LdiOp = Connex::VLOAD_H; + ViaVecTy = TYPE_VECTOR_I16; + /* + LdiOp = Connex::VLOAD_W; + ViaVecTy = TYPE_VECTOR_I32; */ + break; + case 64: + assert(0 && "Connex supports only 16 bits immediate operands - see ConnexISA.docx"); + LdiOp = Connex::VLOAD_W; // TODO: actually VLOAD_D + // TODO_CHANGE_BACKEND: + //ViaVecTy = MVT::v8i64; + ViaVecTy = TYPE_VECTOR_I16; + break; + /* + LdiOp = Connex::VLOAD_H; //VLOAD: + ViaVecTy = MVT::v8i64; + break; + */ + } + + /* + From http://llvm.org/docs/doxygen/html/APInt_8h_source.html: + 00379 bool isSignedIntN(unsigned N) const + Check if this APInt has an N-bits signed integer value. + */ + if (!SplatValue.isSignedIntN(16)) { + //return std::make_pair(false, nullptr); + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR: exiting via 3rd return nullptr\n"); + return; + } + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR: SplatValue.isSignedIntN(16) == true\n"); + + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR: SplatValue = " + << SplatValue.toString(10, 1) << "\n"); + + // See http://llvm.org/docs/doxygen/html/structllvm_1_1EVT.html + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR: ViaVecTy.getVectorElementType() = " + << ViaVecTy.getVectorElementType().getEVTString() + << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1SDLoc.html and http://llvm.org/docs/doxygen/html/classllvm_1_1DebugLoc.html + //LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR: DL = " << DL.getDebugLoc().getLoc() << "\n"); + + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR: before CurDAG->getTargetConstant()\n"); + SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, + ViaVecTy.getVectorElementType()); + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR: after CurDAG->getTargetConstant()\n"); + // See http://llvm.org/docs/doxygen/html/classllvm_1_1SDValue.html + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR: Imm = "; + Imm.dump(); + dbgs() << "\n"); + + /* IMPORTANT: if we got this far then we use Connex's VLOAD (VLOAD_H) + instruction to load the immediate value Imm in all vector elements. */ + Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm); + + // It doesn't make sense to use target independent BITCAST + /* + Res = CurDAG->getMachineNode(ISD::BITCAST, DL, + typeVecNode, SDValue(Res2, 0)); + */ + + #ifdef DIFFERENT_IMPLEMENTATION_TO_BITCAST_FROM_v64i16_to_v128i16 + if (ResTy != ViaVecTy) { + // If LdiOp is writing to a different register class to ResTy, then + // fix it up here. This COPY_TO_REGCLASS should never cause a move.v + // since the source and destination register sets contain the same + // registers. + const TargetLowering *TLI = getTargetLowering(); + MVT ResTySimple = ResTy.getSimpleVT(); + const TargetRegisterClass *RC = TLI->getRegClassFor(ResTySimple); + + LLVM_DEBUG(dbgs() + << "SelectBUILD_VECTOR(): before CurDAG->getMachineNode()\n"); + Res = CurDAG->getMachineNode(Connex::COPY_TO_REGCLASS, DL, + ResTy, SDValue(Res, 0), + CurDAG->getTargetConstant(RC->getID(), DL, + // TODO_CHANGE_BACKEND: + TYPE_SCALAR_ELEMENT)); + } + #endif + } + +// 2017_08_03 + if (ResTy == TYPE_VECTOR_I32 && needsConvertionToResultType) { + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): Adding NOP_BITCONVERT_HW node\n"); + + SDNode *ResOrig = Res; + Res = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, DL, + typeVecNode, SDValue(ResOrig, 0)); + } + + /* + return std::make_pair(true, Res); + */ + LLVM_DEBUG(dbgs() << "SelectBUILD_VECTOR(): Res = "; + /* print() gives "Segmentation fault" when BUILD_VECTOR + contains vars Res->print(dbgs()); dbgs() << "\n"); */ + Res->dump(CurDAG); + dbgs() << "\n"); + + ReplaceNode(Node, Res); +} // END SelectBUILD_VECTOR() + + +SDNode *ConnexDAGToDAGISel::SelectReduceI32(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered SelectReduceI32(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() + << "SelectReduceI32(): We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + // NOTE: Opnd 1 is a ct + SDValue nodeOpSrc = Node->getOperand(2); + + // We need to preserve the node that was chained with Node to avoid it is removed + SDValue nodeOpChain = Node->getOperand(0); // Opnd 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "SelectReduceI32(): nodeOpSrc.getValueType() = " + << nodeOpSrc.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectReduceI32(): nodeOpSrc = "; + (nodeOpSrc.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_I32); + +#ifdef MARKER_FOR_EMULATION + SDNode *nodeOpSrcCastBogus = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // This gives a serious error: MVT::Glue, + nodeOpSrc, + // chain edge + nodeOpChain + ); + + std::string exprStrBegin = "// Starting RED.i32 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCastBogus, DL); + LLVM_DEBUG(dbgs() << "SelectReduceI32: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); + + /* This node is also bogus, only for the sake of "sandwhiching" the INLINE + assembly with 2 NOPs. + */ + SDNode *nodeOpSrcCast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HH, // IMPORTANT: this is a BOGUS NOP_BITCONVERT - we just put it since it has a Glue result, while nodeOpSrcCast2 does NOT + DL, + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + SDValue(nodeOpSrcCastBogus, 0), + // chain + SDValue(inlineAsmNodeBegin, 0) + ); +#else + SDNode *nodeOpSrcCast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + nodeOpSrc, + // chain edge + nodeOpChain + ); + +#endif + + +#include "Select_REDi32_OpincaaCodeGen.h" + + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing RED.i32 emulation ;)"; + + /* + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + reduceHigh16, DL); + LLVM_DEBUG(dbgs() << "SelectReduceI32(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); + + LLVM_DEBUG(dbgs() << "SelectReduceI32(): reduceHigh16 = "; + reduceHigh16->dump(CurDAG); dbgs() << "\n"); + // return inlineAsmNodeEnd; // Gives error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> + */ + + SDNode *resHH = CreateInlineAsmNode(CurDAG, exprStrEnd, reduceHigh16, DL); + + + /* + // This node is also bogus, only for the sake of "sandwhiching" the INLINE + // assembly with 2 instructions. + SDNode *resHH = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HH, + DL, + // Gives error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> TYPE_VECTOR_I16, + // Gives error: <= II.getNumOperands() && NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + NumImpUses && "#operands for dag node doesn't match .td file!"' failed.>> MVT::Other, + SDValue(reduceHigh16, 0), + // chain edge + //SDValue(resH, 1) + SDValue(inlineAsmNodeEnd, 0) + ); + */ + LLVM_DEBUG(dbgs() << "SelectReduceI32(): resHH = "; + resHH->dump(CurDAG); + dbgs() << "\n"); + + return resHH; +#else + LLVM_DEBUG(dbgs() << "SelectReduceI32(): reduceHigh16 = "; + reduceHigh16->dump(CurDAG); + dbgs() << "\n"); + + return reduceHigh16; +#endif +} // END SelectReduceI32() + + + +SDNode *ConnexDAGToDAGISel::SelectReduceF16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered SelectReduceF16(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() + << "SelectReduceF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + // NOTE: Opnd 1 is a ct + SDValue nodeOpSrc = Node->getOperand(2); + + // We need to preserve the node that was chained with Node to avoid it is removed + SDValue nodeOpChain = Node->getOperand(0); // Opnd 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "SelectReduceF16(): nodeOpSrc.getValueType() = " + << nodeOpSrc.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectReduceF16(): nodeOpSrc = "; + (nodeOpSrc.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + +#ifdef MARKER_FOR_EMULATION + SDNode *nodeOpSrcCastBogus1 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // This gives a serious error: MVT::Glue, + nodeOpSrc, + // chain edge + nodeOpChain + ); + + std::string exprStrBegin = "// Starting red.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCastBogus1, DL); + LLVM_DEBUG(dbgs() << "SelectReduceF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); + + /* This node is also bogus, only for the sake of "sandwhiching" the INLINE + assembly with 2 NOPs. + */ + SDNode *nodeOpSrcCast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HH, // IMPORTANT: this is a BOGUS NOP_BITCONVERT - we just put it since it has a Glue result, while nodeOpSrcCast2 does NOT + DL, + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + SDValue(nodeOpSrcCastBogus1, 0), + // chain + SDValue(inlineAsmNodeBegin, 0) + ); +#else + SDNode *nodeOpSrcCast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + nodeOpSrc, + // chain edge + nodeOpChain + ); +#endif + + +#include "Select_REDf16_OpincaaCodeGen.h" + + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing red.f16 emulation ;)"; + /* + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + reduceH, DL); + LLVM_DEBUG(dbgs() << "SelectReduceF16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); + */ + SDNode *reduceHH = CreateInlineAsmNode(CurDAG, exprStrEnd, + reduceH, DL); + + LLVM_DEBUG(dbgs() << "SelectReduceF16(): reduceH = "; + reduceH->dump(CurDAG); dbgs() << "\n"); + // return inlineAsmNodeEnd; // Gives error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> + + /* + // This node is also bogus, only for the sake of "sandwhiching" the INLINE + // assembly with 2 instructions. + SDNode *reduceHH = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HH, + DL, + // Gives error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> TYPE_VECTOR_I16, + // Gives error: <= II.getNumOperands() && NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + NumImpUses && "#operands for dag node doesn't match .td file!"' failed.>> MVT::Other, + SDValue(reduceH, 0), + // chain edge + //SDValue(reduceH, 1) + SDValue(inlineAsmNodeEnd, 0) + ); + */ + LLVM_DEBUG(dbgs() << "SelectReduceF16(): reduceHH = "; + reduceHH->dump(CurDAG); + dbgs() << "\n"); + + return reduceHH; +#else + LLVM_DEBUG(dbgs() << "SelectReduceF16(): reduceH = "; + reduceH->dump(CurDAG); + dbgs() << "\n"); + + return reduceH; +#endif +} // END SelectReduceF16() + + + +SDNode *ConnexDAGToDAGISel::SelectAddI32(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered SelectAddI32(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + /* + We look into doing "instruction-select" to + OpDst = ADD OpSRC1, OpSRC2 + where the 3 operands are vectors of type : + */ + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "SelectAddI32(): We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "SelectAddI32(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectAddI32(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SelectAddI32(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectAddI32(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_I32); + + /* + VERY IMPORTANT: + We convert the v64i32 add operation into a sequence of nodes that take as + input the v64i32 operands of the operation convert them to v128i16 operands + using the NOP_BITCONVERT_WH nodes and then instantiating the SDNodes + emulating the v64i32 add operation. + At the end we put a NOP_BITCONVERT_HW SDNode converting the result from + v128i16 to v64i32. + Note that these NOP_BITCONVERT_* nodes are more helpful conceptually - but + they also keep the nodes s.t. they are not scheduled badly. + */ + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting ADD.i32 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "SelectAddI32: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + + + #include "Select_ADDi32_OpincaaCodeGen.h" + + + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing ADD.i32 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, //resH, + DL); + LLVM_DEBUG(dbgs() << "SelectAddI32(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); + + LLVM_DEBUG(dbgs() << "SelectAddI32(): resH = "; + resH->dump(CurDAG); dbgs() << "\n"); + // return inlineAsmNodeEnd; // Gives error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> +#endif + + + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resH, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resH, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "SelectAddI32(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END SelectAddI32() + + +SDNode *ConnexDAGToDAGISel::SelectSubI32(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered SelectSubI32(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "SelectSubI32(): We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "SelectSubI32(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectSubI32(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SelectSubI32(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectSubI32(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_I32); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + nodeOpSrc1); + // + std::string exprStrBegin = "// Starting SUB.i32 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "Select() for SUB.i32: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); + // + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + //SDValue(nodeOpSrcCast1, 1) + SDValue(inlineAsmNodeBegin, 0) + ); + + + #include "Select_SUBi32_OpincaaCodeGen.h" + + + std::string exprStrEnd = "// Finishing SUB.i32 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, //resH, + DL); + LLVM_DEBUG(dbgs() << "SelectSubI32(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); + dbgs() << "\n"); + + + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resH, 0), + // chain edge + //SDValue(resH, 1) + SDValue(inlineAsmNodeEnd, 0) + ); + LLVM_DEBUG(dbgs() << "SelectSubI32(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END SelectSubI32() + + +SDNode *ConnexDAGToDAGISel::SelectMulI32(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered SelectMulI32(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "SelectMulI32(): We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "SelectMulI32(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectMulI32(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SelectMulI32(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectMulI32(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_I32); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting MUL.i32 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "SelectMulI32: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + // Note: COPY generated by TwoAddressInctruction in WHERE blocks and handled + // by me in ConnexTargetMachine.cpp, etc) + +//#include "Select_MULTi32_SignAndMagnitude_OpincaaCodeGen.h" +#include "Select_MULTi32_ComplementedRepresentation_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing MUL.i32 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, //resH, + DL); + LLVM_DEBUG(dbgs() << "SelectMulI32(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); + dbgs() << "\n"); +#endif + + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resH, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resH, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "SelectMulI32(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END SelectMulI32() + + +SDNode *ConnexDAGToDAGISel::SelectSraI32(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered SelectSraI32(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "SelectSraI32(): We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOp0 = Node->getOperand(0); + SDValue nodeOp1 = Node->getOperand(1); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + #else + MVT::Glue, + #endif + nodeOp0); + + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting SHRA.i32 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "SelectSraI32(): inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOp1, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + +#include "Select_SHRAi32_OpincaaCodeGen.h" + + LLVM_DEBUG(dbgs() << "SelectSraI32(): resH = "; + resH->dump(CurDAG); + dbgs() << "\n"); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing SHRA.i32 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, //resH, + DL); + LLVM_DEBUG(dbgs() << "SelectSraI32(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resH, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resH, 1) + #endif + ); + + LLVM_DEBUG(dbgs() << "SelectSraI32(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END SelectSraI32() + + + + +SDNode *ConnexDAGToDAGISel::SelectAddF16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered SelectAddF16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "SelectAddF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "SelectAddF16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectAddF16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SelectAddF16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectAddF16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting add.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "SelectAddF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + // Note: COPY generated by TwoAddressInctruction in WHERE blocks and handled + // by me in ConnexTargetMachine.cpp, etc + +#include "Select_ADDf16_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing add.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, // resF16, + DL); + LLVM_DEBUG(dbgs() << "SelectAddF16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); + dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resF16, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resF16, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "SelectAddF16(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END SelectAddF16() + + +SDNode *ConnexDAGToDAGISel::SelectSubF16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered SelectSubF16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "SelectSubF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "SelectSubF16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectSubF16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SelectSubF16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectSubF16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting sub.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "SelectSubF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + // Note: COPY generated by TwoAddressInctruction in WHERE blocks and handled + // by me in ConnexTargetMachine.cpp, etc) + +#include "Select_SUBf16_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing sub.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, // resF16, + DL); + LLVM_DEBUG(dbgs() << "SelectSubF16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resF16, 0), + // chain edge +#ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) +#else + SDValue(resF16, 1) +#endif + ); + LLVM_DEBUG(dbgs() << "SelectSubF16(): resW = "; + resW->dump(CurDAG); dbgs() << "\n"); + + return resW; +} // END SelectSubF16() + +SDNode *ConnexDAGToDAGISel::SelectLtF16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered SelectLtF16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() + << "SelectLtF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "SelectLtF16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectLtF16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SelectLtF16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectLtF16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting lt.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "SelectLtF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + // Note: COPY generated by TwoAddressInctruction in WHERE blocks and handled + // by me in ConnexTargetMachine.cpp, etc) + +#include "Select_LTf16_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing lt.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, //resF16, + DL); + LLVM_DEBUG(dbgs() << "SelectLtF16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(resF16, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resF16, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "SelectLtF16(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END SelectLtF16() + + +SDNode *ConnexDAGToDAGISel::SelectMulF16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered SelectMulF16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() + << "SelectMulF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "SelectMulF16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectMulF16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SelectMulF16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectMulF16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting mult.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "SelectMulF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + // Note: COPY generated by TwoAddressInctruction in WHERE blocks and handled + // by me in ConnexTargetMachine.cpp, etc) + +#include "Select_MULTf16_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing mult.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, + DL); + LLVM_DEBUG(dbgs() << "SelectMulF16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); + dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resF16, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + #error Normally no longer supported + SDValue(resF16, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "SelectMulF16(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END SelectMulF16() + + +SDNode *ConnexDAGToDAGISel::SelectDivF16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered SelectDivF16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "SelectDivF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "SelectDivF16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectDivF16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SelectDivF16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectDivF16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting div.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "SelectDivF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + // Note: COPY generated by TwoAddressInctruction in WHERE blocks and handled + // by me in ConnexTargetMachine.cpp, etc) + +#include "Select_DIVf16_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing div.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + lastNode, + DL); + LLVM_DEBUG(dbgs() << "SelectDivF16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); + dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resF16, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + #error Normally no longer supported + SDValue(resF16, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "SelectDivF16(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return resW; +} // END SelectDivF16() + + +SDNode *ConnexDAGToDAGISel::SelectDivI16(SDNode *Node) { + LLVM_DEBUG(dbgs() << "Entered SelectDivI16(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "SelectDivI16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectDivI16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SelectDivI16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectDivI16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_I32); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode( + // IMPORTANT: this is a BOGUS + // NOP_BITCONVERT - we just + // put it since it has a Glue + // result, while + // nodeOpSrcCast1 does NOT + Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting DIV.i16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "SelectDivI16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode( + // IMPORTANT: this is a BOGUS + // NOP_BITCONVERT - we just + // put it since it has a Glue + // result, while + // nodeOpSrcCast1 does NOT + Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // This gives error: MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + +#include "Select_DIVi16_OpincaaCodeGen.h" + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing DIV.i16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + resH, DL); + LLVM_DEBUG(dbgs() << "SelectDivI16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); + dbgs() << "\n"); + + LLVM_DEBUG(dbgs() << "SelectDivI16(): resH = "; + resH->dump(CurDAG); + dbgs() << "\n"); + // return inlineAsmNodeEnd; // Gives error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> + + SDNode *resHH = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + SDValue(resH, 0), + // chain edge + //SDValue(resH, 1) + SDValue(inlineAsmNodeEnd, 0) + ); + LLVM_DEBUG(dbgs() << "SelectDivI16(): resHH = "; + resHH->dump(CurDAG); + dbgs() << "\n"); + + return resHH; +#else + return resH; +#endif +} // END SelectDivI16() + + + +SDNode *ConnexDAGToDAGISel::SelectVSELECT(SDNode *vselectNode) { + LLVM_DEBUG(dbgs() << "Entered SelectVSELECT(): Selecting vselectNode = "; + vselectNode->dump(CurDAG); + dbgs() << "\n"); + /* + Basically we expand ("instruction-select") the following machine-independent instruction: + dst = VSELECT pred, true_assignment, false_assignment: + to the following Connex machine instr sequence: + (note the comparison is excluded from the listing below + and will be scheduled before it) + + // For pred == false + dst = false_assignment + WHERExy + // For pred == true: + dst = true_assignment + END_WHERE + + NOTE: we could use a WHERE !pred to assign for the false case, + but our above solution "destructive" assignment is OK and + it takes fewer instructions. + */ + + + /* + In the end I do VSELECT treatment here, in + ConnexISelDAGToDAG, and not in ISelLowering::LowerOperation. + + Note that register allocation is performed after Instruction selection + (see [Cardoso_2014], Figure on page 134). + + Note that although it is not required to create virtual registers for + the ORV_H machine instructions (since we failed to add a ch input port + to the setcc - see 50_IfConversion/Setcc_with_ch_input_port_NOT_working + - and I guess we would fail here also), we create it for the true + ORV_H because we need to make the associated predecessor CopyToRegister a + successor of WHEREEQ, otherwise the WHEREEQ would not have a successor. + TODO if we are extremely precious: + I guess we could make a succcessor of WHEREEQ the CopyToReg successor + of ORV_H and could get rid of all input virtual registers. + NOTE: we canNOT get rid of the virtual register that keeps the result of + both ORV_H, because we can replace it only with a VSELECT (reminds me + of dataflow machines and multiplexors :) ), BUT we want + to lower VSELECT in other components. + + Note that the nodes we create here have to have correct ordering, otherwise + instruction selection can fail or have wrong semantics. + */ + + // END_WHERE, etc are defined in anonymous enum in TableGen generated ConnexGenInstrInfo.inc + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html: + // LLVMContext * getContext () const + + SDLoc DL(vselectNode); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResTy = Node->getValueType(1); // 0 is ch (chain) + + //SDValue chain = DAG.getEntryNode(); + + assert(vselectNode->getNumOperands() == 3); + LLVM_DEBUG(dbgs() << " LowerOperation(): Initially vselectNode->use_size() = " + << vselectNode->use_size() << "\n"); + for (SDNode::use_iterator UI = vselectNode->use_begin(), + UE = vselectNode->use_end(); UI != UE; ++UI) { + // Note: UI is an SDNode * + LLVM_DEBUG(dbgs() << " LowerOperation(): Initially a use of vselectNode is: "; + UI->print(dbgs()); + dbgs() << "\n"); + } + + //EVT nodeResType = vselectNode->getValueType(0); + SDValue vselectNodeOp0 = vselectNode->getOperand(0); + SDValue vselectNodeOp1 = vselectNode->getOperand(1); + SDValue vselectNodeOp2 = vselectNode->getOperand(2); + + LLVM_DEBUG(dbgs() << "SelectVSELECT(): vselectNodeOp0.getValueType() = " + << vselectNodeOp0.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectVSELECT(): vselectNodeOp0 = "; + (vselectNodeOp0.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SelectVSELECT(): vselectNodeOp1.getValueType() = " + << vselectNodeOp1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectVSELECT(): vselectNodeOp1 = "; + (vselectNodeOp1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SelectVSELECT(): vselectNodeOp2 = "; + (vselectNodeOp2.getNode())->dump(); + dbgs() << "\n"); + + SDValue setCC = vselectNodeOp0; + SDNode *setCCNode = setCC.getNode(); + SDValue setCCPred = vselectNodeOp0.getNode()->getOperand(2); + SDNode *setCCPredNode = setCCPred.getNode(); + // + LLVM_DEBUG(dbgs() << "SelectVSELECT(): setCCPredNode = "; + //<< setCCPredNode + setCCPredNode->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SelectVSELECT(): setCCNode = "; + //<< setCCPredNode + setCCNode->dump(); + dbgs() << "\n"); + + assert(setCCPredNode->isMachineOpcode() == false); + assert(setCCPredNode->getOpcode() == ISD::CONDCODE); + + EVT ResTy = TYPE_VECTOR_I16; + + unsigned whereOpcode; + switch (cast(setCCPredNode)->get()) { + case ISD::SETEQ: + whereOpcode = Connex::WHEREEQ; + break; + case ISD::SETLT: + whereOpcode = Connex::WHERELT; + break; + case ISD::SETOLT: { + whereOpcode = Connex::WHEREEQ; + + ResTy = TYPE_VECTOR_F16; + + // We ISel an lt.f16 and compare its result with 1. + SDNode *resLtF16 = SelectLtF16(setCCNode); + + // VLOAD 1; + SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); + SDNode *vload1 = CurDAG->getMachineNode(Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // Glue input edge + SDValue(resLtF16, 1) + ); + + SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(resLtF16, 0), + SDValue(vload1, 0), + // Glue input edge + SDValue(vload1, 1) + ); + LLVM_DEBUG(dbgs() << "SelectVSELECT(): eq1 = "; + eq1->dump(); + dbgs() << "\n"); + + ReplaceNode(setCCNode, eq1); + setCCNode = eq1; + setCC = SDValue(eq1, 0); + + break; + } + case ISD::SETULT: + whereOpcode = Connex::WHEREULT; + break; + default: + assert(0 && "case not reachable"); + break; + } + + +#ifdef OLD_UNTIL_2018_07_26 + /* IMPORTANT: In essence this is ONLY to allocate a virtual register to use + it later for the TargetMachine. + */ + unsigned virtRegDst = RegInfo->createVirtualRegister( + &Connex::VectorHRegClass); + + SDNode *useSetCC = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + // TODO: actually it should be a NOP_BOGUS + DL, + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + setCC); + + // IMPORTANT: This should NOT be correct - we put an OR, which following + // ConnexISA.docx (and especially the Opincaa simulator) should alter the + // Connex flags between the predicate and the WHERE. + // + // Note however, that following scalar_alu.v (and scalar_logic.v), the logic + // operations do NOT alter the Connex flags - see + // e.g. /home/alarm/Experiments/Testing_OR_wrt_flags/STD_run_*. + // + // In https://en.wikipedia.org/wiki/Zero_flag + // " the zero flag is used to check the result of an arithmetic operation, + // including bitwise logical instructions. + // It is set if an arithmetic result is zero, and reset otherwise." + // "In some instruction sets such as the MIPS architecture, a dedicated + // flag register is not used; jump instructions instead check a register + // for zero. " + SDNode *copyFalse = CurDAG->getMachineNode(Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + vselectNodeOp2, + vselectNodeOp2, + // chain edge + //SDValue(vloadCt0, 1) // Glue edge + //SDValue(setCCNode, 0) + SDValue(useSetCC, 1) + //setCC + ); + + SDValue copyToRegDst1 = CurDAG->getCopyToReg( + // chain + SDValue(copyFalse, 1), + DL, + virtRegDst, + // Value copied to register + SDValue(copyFalse, 0) + ); + + /* + SDValue copyFromRegDest = DAG.getCopyFromReg( + chain, + //SDValue(endWhere, 0), + DL, + regDest, + TYPE_VECTOR_I16 // result type + //endWhere->getOperand(0)); //RegTy); + //SDValue(endWhere, 0) + ); + */ + SDNode *whereEq = CurDAG->getMachineNode(whereOpcode, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + SDValue(copyFalse, 0), + // chain edge + //SDValue(idxPredicate, 1) + //setCCPred + SDValue(copyFalse, 1) + ); +#else + // small-TODO: try to get rid of this Connex::NOP_BITCONVERT_WH, although I already tried and I got ISel errors + SDNode *useSetCC = CurDAG->getMachineNode( + // Works: + Connex::NOP_BITCONVERT_WH, + // Gives error: <<#operands for dag node doesn't match .td file!>> Connex::NOP_BOGUS, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + setCC); // The result of SDNode EQ_H + + #ifdef ANOTHER_WITHOUT_NOP_BETWEEN_PRED_AND_WHERE + /* IMPORTANT: This should NOT be correct - we put an OR, which following + * ConnexISA.docx (and especially the Opincaa simulator) should alter the + * Connex flags between the predicate and the WHERE. + * + * Note however, that following scalar_alu.v (and scalar_logic.v), the logic + * operations do NOT alter the Connex flags - see + * e.g. /home/alarm/Experiments/Testing_OR_wrt_flags/STD_run_*. + * + * In https://en.wikipedia.org/wiki/Zero_flag + * " the zero flag is used to check the result of an arithmetic operation, + * including bitwise logical instructions. + * It is set if an arithmetic result is zero, and reset otherwise." + * "In some instruction sets such as the MIPS architecture, a dedicated + * flag register is not used; jump instructions instead check a register + * for zero. " + */ + SDNode *copyFalse = CurDAG->getMachineNode(Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + vselectNodeOp2, + vselectNodeOp2, + // chain edge + //SDValue(vloadCt0, 1) // Glue edge + //SDValue(setCCNode, 0) + //SDValue(useSetCC, 1), + SDValue(useSetCC, 1) + //setCC // TODO: check + ); + + SDNode *whereEq = CurDAG->getMachineNode(whereOpcode, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + SDValue(copyFalse, 0), + // chain edge + //SDValue(idxPredicate, 1) + //setCCPred + SDValue(copyFalse, 1) + ); + #else + SDValue ct1 = CurDAG->getConstant(1 /* Num of cycles to NOP */, + DL, MVT::i16, true, false); + SDNode *copyFalse = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct1, + // glue (or chain) input edge + //SDValue(eq1, 1) + // ERROR: setCC + //SDValue(setCCNode, 1) + SDValue(useSetCC, 1) + ); + + SDNode *whereEq = CurDAG->getMachineNode(whereOpcode, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + MVT::Glue, + //SDValue(copyFalse, 0), + vselectNodeOp2, + // Glue/chain edge + //SDValue(idxPredicate, 1) + //setCCPred + SDValue(copyFalse, 0) + ); + #endif +#endif + + // IMPORTANT: Note that we use ORV_SPECIAL_H, which puts a tied-to constraint + // to allocate to the same physical vector register (dst) both vSelectNodeOp1 + // and vSelectNodeOp2. + // Therefore, this ORV_SPECIAL_H puts over the vSelectNodeOp2, + // the false value, the values of the selected (where the predicate is true) + // vSelectNodeOp1. + SDNode *copyTrue = CurDAG->getMachineNode(Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + vselectNodeOp1, + vselectNodeOp1, + vselectNodeOp2, + // Glue edge + SDValue(whereEq, 1) // Glue edge + ); + + SDNode *endWhere = CurDAG->getMachineNode(Connex::END_WHERE, + DL, + // 2018_09_08 ResTy, + //TYPE_VECTOR_I16, + // 2018_09_08 + MVT::Other, + // 2018_09_08 SDValue(copyTrue, 0), + //MVT::Glue, + /* Important: we put this bogus + operand here to force the PostRA + scheduler to keep the + WHERE..END_WHERE block intact + withOUT using instruction bundles. + */ + // chain edge + SDValue(copyTrue, 1) // Glue edge + ); + // 2018_09_08: + std::string exprStrEnd = "// Finishing VSELECT emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + endWhere, + DL); + LLVM_DEBUG(dbgs() << "SelectVSELECT(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); + + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + //typeVecNode, + TYPE_VECTOR_F16, + SDValue(copyTrue, 0), + // chain edge + SDValue(inlineAsmNodeEnd, 0) + ); + + LLVM_DEBUG(dbgs() << "SelectVSELECT(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + LLVM_DEBUG(dbgs() << "SelectVSELECT(): whereEq = "; + whereEq->dump(CurDAG); + dbgs() << "\n"); + + /* + // Unfortunately this messes up at register allocation because it considers + // the result of the ORV_SPECIAL_H node to be dead after the use at END_WHERE + SDValue copyFromRegDest = CurDAG->getCopyFromReg( + //chain, + SDValue(endWhere, 0), + DL, + virtRegDst, + TYPE_VECTOR_I16 + ); + + SDNode *res = copyFromRegDest.getNode(); + LLVM_DEBUG(dbgs() << "SelectVSELECT(): res = "; + res->dump(CurDAG); + dbgs() << "\n"); + */ + +// 2018_09_08 + //SDNode *res = endWhere; + SDNode *res = resW; + + return res; +} // END SelectVSELECT() + + + + +// Note: all ISD opcodes can be also found at +// http://llvm.org/docs/doxygen/html/namespacellvm_1_1ISD.html. +// There are also Connex opcodes that are generated by TableGen. +void ConnexDAGToDAGISel::Select(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + + // Dump information about the Node being selected + LLVM_DEBUG(dbgs() << "Entered ConnexDAGToDAGISel::Select(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "Opcode = " << Opcode << "\n"); + + // If we have a (custom) Machine node, it means we already have selected it + if (Node->isMachineOpcode()) { + LLVM_DEBUG(dbgs() << "== "; + Node->dump(CurDAG); + dbgs() << '\n'); + return; + } + + // tablegen selection should be handled here. + switch (Opcode) { + default: + LLVM_DEBUG(dbgs() << "ConnexDAGToDAGISel::Select(): default case: Opcode = " + << Opcode << "\n"); + break; + + /* From http://llvm.org/docs/doxygen/html/ISDOpcodes_8h_source.html: + 00156 /// OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) + 00157 /// This node represents a target intrinsic function with side effects that + 00158 /// does not return a result. The first operand is a chain pointer. The + 00159 /// second is the ID number of the intrinsic from the llvm::Intrinsic + 00160 /// namespace. The operands to the intrinsic follow. + */ + case ISD::INTRINSIC_VOID: { + LLVM_DEBUG(dbgs() << "ConnexDAGToDAGISel::Select(): case ISD::INTRINSIC_VOID" + << "\n"); + + unsigned intrinsicOpcode = cast( + Node->getOperand(1))->getZExtValue(); + LLVM_DEBUG(dbgs() << "intrinsicOpcode = " << intrinsicOpcode << "\n"); + + /* + LLVM_DEBUG(dbgs() << "Intrinsic::connex_end_repeat = " + << Intrinsic::connex_end_repeat << "\n"); + LLVM_DEBUG(dbgs() << "Intrinsic::connex_reduce = " + << Intrinsic::connex_reduce << "\n"); + LLVM_DEBUG(dbgs() << "Intrinsic::connex_repeat_x_times = " + << Intrinsic::connex_repeat_x_times << "\n"); + */ + + LLVM_DEBUG(dbgs() << "Node->getOperand(0) = "; + Node->getOperand(0).dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Node->getOperand(1) = "; + Node->getOperand(1).dump(); + dbgs() << "\n"); + + switch (intrinsicOpcode) { + case Intrinsic::connex_repeat_x_times: { + SDLoc DL(Node); + + /* llvm.connex.repeat.x.times SDNode has 3 operands: + * - 0, which is the chain - a bit to my surprise + * SelectionDAGBuilder puts as input to the chain port + * the node just above it, not SDNode t0 + * - 1, which is the intrinsic's opcode + * - 2, which is the actual parameter + * t16: ch = llvm.connex.repeat.x.times t10, TargetConstant:i64<471>, t15 + * + */ + + LLVM_DEBUG(dbgs() << "ConnexDAGToDAGISel::Select(): case " + "Intrinsic::connex_repeat_x_times" + << "\n"); + LLVM_DEBUG(dbgs() << " Node->getOperand(2) = "; + Node->getOperand(2).dump(); + dbgs() << "\n"); + +#define CODE2018_06_29 + /* + std::vector opsSDVRepeat; + opsSDVRepeat.push_back(CurDAG->getEntryNode()); + std::vector opsEVTRepeat; + // MachineSDNode *getMachineNode(unsigned Opcode, SDLoc dl, ArrayRef< EVT > ResultTys, ArrayRef< SDValue > Ops) + */ + SDNode *repeatSpecial = CurDAG->getMachineNode( + Connex::REPEAT_SYM_IMM, + DL, + // Return types + #ifdef CODE2018_06_29 + /* Gives error when doing "List Scheduling": + - when doing things as correct as possible (glue edge put in CONNEX::INLINEASM as the last operand): + <> + + - glue edge put in + ISD::INLINEASM as the last operand): <> + + - <getNodeId() == -1 && "Node already inserted!">> + - because I put this Glue edge as 1st operand of INLINEASM, which is documented as being wrong + */ + MVT::Glue, + #else + MVT::Other, + #endif + // We add a chain edge + /* IMPORTANT: this was wrong since + * when we give ReplaceNode() it + * deletes the platform independent + * REPEAT SDNode which has as input opnd0 + * (Node->getOperand(0), an Inline + * ASM epxression, as discussed, + * fed on the chain input port) and opnd0 + * is not used by any other + * node. + WRONG: CurDAG->getEntryNode() + + * But now I give opnd0 as input + * to the chain port of the new + * machine-dependent node + * and this avoids + * opnd0 becoming a dead node and + * be eventually removed. + */ + Node->getOperand(0) + ); + LLVM_DEBUG(dbgs() << "Select() for Intrinsic::connex_repeat_x_times: repeatSpecial = "; + repeatSpecial->dump(); + dbgs() << "\n"); + + SDNode *op2 = Node->getOperand(2).getNode(); + LLVM_DEBUG(dbgs() << "op2 = "; + op2->dump(); + dbgs() << "\n"); + std::string exprStr = " " + + RecoverCExpressionFromSDNode(op2, crtNodeMap, true) + + ");"; + + SDNode *inlineAsmNode = CreateInlineAsmNode(CurDAG, exprStr, + repeatSpecial, DL + #ifdef CODE2018_06_29 + , true + #endif + ); + + //ReplaceAllUsesWith(Node, inlineAsmNode); + //CurDAG->RemoveDeadNode(Node); // Gives at scheduling error: Assertion `Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] && "Wrong topological sorting"' failed. + // ReplaceNode defined in https://llvm.org/svn/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGISel.h. + + ReplaceNode(Node, inlineAsmNode); + + // This takes out the REPEAT and symbolic expression INLINE Asm + //ReplaceNode(Node, Node->getOperand(0).getNode()); + return; + } + /* + case Intrinsic::connex_end_repeat: + // Note: this case is handled in TableGen match pattern in ConnexInstrInfo_REPEAT.td + */ + default: + break; + } + } + + /* From http://llvm.org/docs/doxygen/html/ISDOpcodes_8h_source.html: + 00148 /// RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) + 00149 /// This node represents a target intrinsic function with side effects that + 00150 /// returns a result. The first operand is a chain pointer. The second is + 00151 /// the ID number of the intrinsic from the llvm::Intrinsic namespace. The + 00152 /// operands to the intrinsic follow. The node has two results, the result + 00153 /// of the intrinsic and an output chain. + */ + case ISD::INTRINSIC_W_CHAIN: { + LLVM_DEBUG(dbgs() + << "ConnexDAGToDAGISel::Select(): case ISD::INTRINSIC_W_CHAIN" + << "\n"); + unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); + LLVM_DEBUG(dbgs() << "IntNo = " << IntNo << "\n"); + switch (IntNo) { + case Intrinsic::connex_load_byte: + case Intrinsic::connex_load_half: + case Intrinsic::connex_load_word: { + SDLoc DL(Node); + SDValue Chain = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue Skb = Node->getOperand(2); + SDValue N3 = Node->getOperand(3); + + // TODO_CHANGE_BACKEND: + //SDValue R6Reg = CurDAG->getRegister(Connex::R6, MVT::i64); + SDValue R6Reg = CurDAG->getRegister(Connex::R6, TYPE_SCALAR_ELEMENT); + + Chain = CurDAG->getCopyToReg(Chain, DL, R6Reg, Skb, SDValue()); + Node = CurDAG->UpdateNodeOperands(Node, Chain, N1, R6Reg, N3); + break; + } + case Intrinsic::connex_reduce: { + //EVT ResTy = Node->getValueType(0); + EVT ResTy = (Node->getOperand(2).getNode())->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for Intrinsic::connex_reduce:\n" + << " ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for connex_reduce.i32\n"); + + SDNode *reduceHigh16 = SelectReduceI32(Node); + + ReplaceNode(Node, reduceHigh16); // Res // does NOT work - gives RT error: whereEq); + //ReplaceNode(Node, nodeOpSrcCast); + + /* + // See http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + CurDAG->SelectNodeTo(Node, + Connex::RED_H, + TYPE_VECTOR_I16, + SDValue(vloadCt0_srcAux, 0)); + */ + return; + } // END case Intrinsic::connex_reduce_i32 + else + if (ResTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "Select() for connex_reduce.f16\n"); + + SDNode *reduceH = SelectReduceF16(Node); + + ReplaceNode(Node, reduceH); + + return; + } // END case Intrinsic::connex_reduce_f16 + } + } + break; + } + + case ISD::FrameIndex: { + int FI = cast(Node)->getIndex(); + EVT VT = Node->getValueType(0); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); + unsigned Opc = Connex::MOV_rr; + if (Node->hasOneUse()) { + CurDAG->SelectNodeTo(Node, Opc, VT, TFI); + return; + } + ReplaceNode(Node, CurDAG->getMachineNode(Opc, SDLoc(Node), VT, TFI)); + return; + } + + + case ISD::INSERT_VECTOR_ELT: { + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::INSERT_VECTOR_ELT.\n"); + return; + } + /* + case ISD::SETCC: { + SDNode *res = Select...(Node); + ReplaceNode(Node, res); + return; + } + */ + case ISD::VSELECT: { + SDNode *res = SelectVSELECT(Node); + ReplaceNode(Node, res); + return; + } + // NEW_FP16 + case ISD::FADD: { + EVT ResTy = Node->getValueType(0); + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::FADD: \n" + << " ResTy = " << ResTy.getEVTString() + << "\n"); + + SDLoc DL(Node); + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + // NEW_FP16 + if (ResTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "Select() for FADD: " + "We are in the case TYPE_VECTOR_F16\n"); + + + //#define OLD_NOT_CORRECT_ONLY_GOOD_FOR_MATMUL_128_F16 + #ifdef OLD_NOT_CORRECT_ONLY_GOOD_FOR_MATMUL_128_F16 +// TODO TODO TODO TODO TODO TODO TODO: use instead of Connex::NOP_BITCONVERT_WH a new node called Connex::NOP_BITCONVERT_F16H + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + ResTy, + nodeOpSrc1 + ); + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + ResTy, + //MVT::Other, + nodeOpSrc2 + ); + + SDNode *res = CurDAG->getMachineNode(Connex::ADDV_H, + DL, + ResTy, + MVT::Other, + SDValue(nodeOpSrcCast1, 0), + SDValue(nodeOpSrcCast2, 0) + // glue (or chain) input edge + //SDValue(nodeOpSrcCast2, 0) + ); + + SDNode *resW = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + ResTy, + SDValue(res, 0), + // chain edge + SDValue(res, 1) + ); + ReplaceNode(Node, resW); + #else // OLD_NOT_CORRECT_ONLY_GOOD_FOR_MATMUL_128_F16 + SDNode *res = SelectAddF16(Node); + ReplaceNode(Node, res); + #endif + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): res = "; + res->dump(); + dbgs() << "\n"); + + return; + } + else + if (ResTy == MVT::f16) { + // 2018_12_29 + // small-MEGA-TODO: we should emulate with BPF assembler the add.f16 scalar op. This means we need to use a NOP_CONVERT_F16_TO_I64, etc + LLVM_DEBUG(dbgs() << "Select() for FADD: We are in the case MVT::F16\n"); + SDNode *res = CurDAG->getMachineNode(Connex::ADD_rr, // This is actually a BPF instruction + DL, + ResTy, + // NOT working - error <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>>: MVT::i64, + //MVT::Other, + //nodeOpSrc1, // I guess this is not needed, since the auto-ISeled BPF instructions don't need it either + nodeOpSrc1, + nodeOpSrc2 + //opChain + ); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): res = "; + res->dump(); + dbgs() << "\n"); + + return; + } + + } // END ISD::FADD + case ISD::FSUB: { + EVT ResTy = Node->getValueType(0); + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::FSUB: \n" + << " ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "Select() for FSUB: " + "We are in the case TYPE_VECTOR_F16\n"); + //typeVecNode = TYPE_VECTOR_F16; + +#define GOOD + #ifdef GOOD + SDNode *res = SelectSubF16(Node); + #else + SDLoc DL(Node); + SDNode *res = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_F16, + MVT::Glue, + Node->getOperand(0), + Node->getOperand(1) + // glue (or chain) input edge + //SDValue(lt0, 1) + ); + #endif + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::FSUB: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + } + case ISD::FMUL: { + EVT ResTy = Node->getValueType(0); + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::FMUL: \n" + << " ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "Select() for FMUL: " + "We are in the case TYPE_VECTOR_F16\n"); + //typeVecNode = TYPE_VECTOR_F16; + + //TODO TODO TODO + SDNode *res = SelectMulF16(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::FMUL: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + } + // NEW32 + case ISD::ADD: { + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::ADD: \n" + << " ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for ADD: " + "We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDNode *res = SelectAddI32(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::ADD: res = "; + res->dump(CurDAG); dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for ADD: " + "We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END ISD::ADD + // NEW32 + case ISD::SUB: { + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::SUB.\n" + << "Select() for SUB: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for SUB: " + "We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDNode *res = SelectSubI32(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::SUB: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for SUB: " + "We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END ISD::SUB + case ISD::MUL: { + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::MUL.\n"); + + LLVM_DEBUG(dbgs() << "Select() for MUL: " + "ResTy = " + << ResTy.getEVTString() + << "\n"); + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for MUL: " + "We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDNode *res = SelectMulI32(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::MUL: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for ISD::MUL: We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END case ISD::MUL + case ISD::FDIV: { + EVT ResTy = Node->getValueType(0); + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::FDIV: \n" + << " ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "Select() for FDIV: " + "We are in the case TYPE_VECTOR_F16\n"); + //typeVecNode = TYPE_VECTOR_F16; + + //TODO TODO TODO + SDNode *res = SelectDivF16(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::FDIV: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + /* + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::FDIV.\n"); + + LLVM_DEBUG(dbgs() << "Select() for FDIV: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + */ + } + // TODO TODO TODO TODO TODO: should be also case ISD::SDIVREM: + case ISD::SDIV: { + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::DIV.\n"); + + LLVM_DEBUG(dbgs() << "Select() for DIV: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for DIV: " + "We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + assert(0 && "Not implemented"); + + /* SDNode *res = SelectDivI32(Node); + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::DIV: res = "; + res->dump(CurDAG); dbgs() << "\n"); + */ + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for ISD::DIV: " + "We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + + SDNode *res = SelectDivI16(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::DIV: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + + break; + } + case ISD::OR: { + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::OR.\n"); + LLVM_DEBUG(dbgs() << "Select() for OR: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for OR: We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOp0 = Node->getOperand(0); + SDValue nodeOp1 = Node->getOperand(1); + + SDNode *nodeOp0Cast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + nodeOp0); + SDNode *nodeOp1Cast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + nodeOp1); + + SDNode *Res16 = CurDAG->getMachineNode(Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + SDValue(nodeOp0Cast, 0), + SDValue(nodeOp1Cast, 0) + ); + LLVM_DEBUG(dbgs() << "Select() for ISD::OR: Res16 = "; + Res16->dump(CurDAG); + dbgs() << "\n"); + + SDNode *Res = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(Res16, 0)); + + ReplaceNode(Node, Res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::OR: Res = "; + Res->dump(CurDAG); + dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for OR: We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END ISD::OR + // NEW32 + case ISD::AND: { + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::AND.\n"); + + + LLVM_DEBUG(dbgs() << "Select() for AND: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for AND: We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOp0 = Node->getOperand(0); + SDValue nodeOp1 = Node->getOperand(1); + + SDNode *nodeOp0Cast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + nodeOp0); + SDNode *nodeOp1Cast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + nodeOp1); + + SDNode *Res16 = CurDAG->getMachineNode(Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + SDValue(nodeOp0Cast, 0), + SDValue(nodeOp1Cast, 0) + ); + LLVM_DEBUG(dbgs() << "Select() for ISD::AND: Res16 = "; + Res16->dump(CurDAG); + dbgs() << "\n"); + + SDNode *Res = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(Res16, 0)); + + ReplaceNode(Node, Res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::AND: Res = "; + Res->dump(CurDAG); + dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for AND: We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END ISD::AND + // NEW32 + case ISD::XOR: { + SDLoc DL(Node); + + /* !!!!TODO TODO: check that the flags are also equivalent: XOR i16 + sets flags like SUBC: + see ConnexVector.cpp + BINARY_OP_FLAGS_LIKE_SUBC(^) - look for the macros + */ + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::XOR.\n"); + + LLVM_DEBUG(dbgs() << "Select() for XOR: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for XOR: We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + SDValue nodeOp0 = Node->getOperand(0); + SDValue nodeOp1 = Node->getOperand(1); + + SDNode *nodeOp0Cast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + nodeOp0); + SDNode *nodeOp1Cast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + nodeOp1); + + SDNode *Res16 = CurDAG->getMachineNode(Connex::XORV_H, + DL, + TYPE_VECTOR_I16, + //MVT::Other, + SDValue(nodeOp0Cast, 0), + SDValue(nodeOp1Cast, 0) + ); + LLVM_DEBUG(dbgs() << "Select() for ISD::XOR: Res16 = "; + Res16->dump(CurDAG); + dbgs() << "\n"); + + SDNode *Res = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(Res16, 0)); + + ReplaceNode(Node, Res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::XOR: Res = "; + Res->dump(CurDAG); + dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for XOR: We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END ISD::XOR + // NEW32 + case ISD::SRA: { // Arithmetic Shift Right + // See http://llvm.org/docs/LangRef.html#ashr-instruction + // and https://en.wikipedia.org/wiki/Arithmetic_shift + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::SRA.\n"); + LLVM_DEBUG(dbgs() << "Select() for SRA: " + "ResTy = " << ResTy.getEVTString() + << "\n"); + + if (ResTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "SelectSraI32() for MUL: We are in the case TYPE_VECTOR_I32\n"); + //typeVecNode = TYPE_VECTOR_I32; + + #ifdef TODO_INTERESTING + //ConstantSDNode *nodeOp0CtSDNode = cast(nodeOp1); + BuildVectorSDNode *BVN = cast(nodeOp1.getNode()); + //!!!! TODO: need to discriminate case: immediate operand - it takes fewer cycles + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN->isConstantSplat(SplatValue, SplatUndef, + SplatBitSize, HasAnyUndefs, + 8, true) == true) { + LLVM_DEBUG(dbgs() << "Select() for SRA: BVN->isConstantSplat() == TRUE\n"); + // MEGA-TODO: in this case we should do ISHRA.i32 instead of SHRA.i32 + } + #endif + + SDNode *res = SelectSraI32(Node); + + ReplaceNode(Node, res); + + LLVM_DEBUG(dbgs() << "Select() for ISD::SRA: res = "; + res->dump(CurDAG); + dbgs() << "\n"); + return; + } + else + if (ResTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for SRA: We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + } + + break; + } // END ISD::SRA + // NEW32 + case ISD::MGATHER: { + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::MGATHER.\n"); + LLVM_DEBUG(dbgs() << " Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + + SDLoc DL(Node); + EVT ViaVecTy; + EVT typeVecNode; + EVT ResTy = Node->getValueType(0); + + + MaskedGatherSDNode *nodeGather = dyn_cast(Node); + assert(nodeGather != NULL); + + // See http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html#l02107 + SDValue indexVec = nodeGather->getIndex(); + SDValue passthruVec = nodeGather->getPassThru(); // 2019_03_30: getValue(); + + LLVM_DEBUG(dbgs() << "Select() for MGATHER: indexVec = "; + (indexVec.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select() for MGATHER: passthruVec = "; + (passthruVec.getNode())->dump(); + dbgs() << "\n"); + + EVT opIndexVecTy = indexVec.getValueType(); + EVT opValVecTy = passthruVec.getValueType(); + + LLVM_DEBUG(dbgs() << "Select() for MGATHER: opIndexVecTy = " + << opIndexVecTy.getEVTString() + << ", opValVecTy = " << opValVecTy.getEVTString() + << ", ResTy = " << ResTy.getEVTString() + << "\n"); + + SDValue opChain = Node->getOperand(0); + LLVM_DEBUG(dbgs() << "Select() for MGATHER: opChain = "; + (opChain.getNode())->dump(); + dbgs() << "\n"); + + // NEW_FP16 + //if (opValVecTy == TYPE_VECTOR_F16) + if (ResTy == TYPE_VECTOR_F16) { + typeVecNode = TYPE_VECTOR_F16; + + LLVM_DEBUG(dbgs() << "Select() for MGATHER: We are in the case ResTy == TYPE_VECTOR_F16\n"); + + #ifdef BITCAST_2018_06_F16 + SDNode *indexVec16 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + // The address operand + indexVec); + #endif + + SDNode *Res16 = CurDAG->getMachineNode(Connex::LD_INDIRECT_H, + DL, + #ifdef BITCAST_2018_06_F16 + TYPE_VECTOR_I16, + #else + typeVecNode, // We prevent getting error: <hasAnyUseOfValue(i) || From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> + #endif + //MVT::Other, + #ifdef BITCAST_2018_06_F16 + SDValue(indexVec16, 0), //indexVec + #else + indexVec, + #endif + opChain + ); + + SDNode *Res; + #ifdef BITCAST_2018_06_F16 + Res = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(Res16, 0)); + #else + Res = Res16; + #endif + + // TODO TODO TODO TODO TODO TODO TODO: use instead of Connex::NOP_BITCONVERT_WH a new node called Connex::NOP_BITCONVERT_F16H + #ifdef GOOD_I_THINK_TESTED_JUN_21_2018_SOME_BUG_IN_MULF16 + SDNode *Res16 = CurDAG->getMachineNode(Connex::LD_INDIRECT_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + indexVec + ); + SDNode *Res = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + //typeVecNode, + ResTy, + SDValue(Res16, 0)); + #endif + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + else + if (opIndexVecTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for MGATHER: We are in the case TYPE_VECTOR_I32\n"); + typeVecNode = TYPE_VECTOR_I32; + + /* VERY IMPORTANT: we add opChain to chain this new node with the node + the target-independent masked_gather node was chained with. + If we do not do this then we will eventually have other useful + chained nodes removed, resulting in a incorrect/partial program. */ + /* TODO: not sure if the chain is going to always be operand 0. + However masked_gather has a chain following attribute SDNPHasChain, + see include/llvm/Target/TargetSelectionDAG.td + See also indirectly the other params (methods get*()) of + MaskedGatherScatterSDNode at + http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html#l02107 + */ + #ifdef USE16bits_2017_05_27 + SDNode *Res = CurDAG->getMachineNode(Connex::LD_INDIRECT_W, + DL, + typeVecNode, + //MVT::Other, + indexVec, + opChain + ); + #else + #ifdef BITCAST_MAY2017_05_28 + SDNode *indexVec16 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + // The address operand + indexVec); + #endif + SDNode *Res16 = CurDAG->getMachineNode(Connex::LD_INDIRECT_H, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + //MVT::Other, + #ifdef BITCAST_MAY2017_05_28 + SDValue(indexVec16, 0), //indexVec + #else + indexVec, + #endif + opChain + ); + SDNode *Res; + #ifdef BITCAST_MAY2017_05_28 + Res = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + MVT::Other, // We need this only for DotProd.i16 + SDValue(Res16, 0)); + #else + Res = Res16; + #endif + #endif + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + else + if (opIndexVecTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for MGATHER: We are in the case TYPE_VECTOR_I16\n"); + typeVecNode = TYPE_VECTOR_I16; + + SDNode *Res = CurDAG->getMachineNode(Connex::LD_INDIRECT_H, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + /* Usually it comes with ch + putting it here avoids error + <> */ + MVT::Other, + indexVec, + opChain + ); + + LLVM_DEBUG(dbgs() << "Res = "; + Res->dump(CurDAG); + dbgs() << "\n"); + ReplaceNode(Node, Res); + + return; + } + //Res = CurDAG->getMachineNode(Connex::LD_INDIRECT_W, DL, ViaVecTy, Node->getOperand(0)); + //Res = CurDAG->getMachineNode(LD_INDIRECT_W_DESC_BASE, DL, ViaVecTy, Node->getOperand(0)); + //Res = CurDAG->getMachineNode(ST_INDIRECT_H_DESC_BASE, DL, ViaVecTy, Node->getOperand(0)); + + break; + } // END ISD::MGATHER + // NEW32 + case ISD::MSCATTER: { + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::MSCATTER.\n"); + + SDLoc DL(Node); + MVT typeVecNode; + // For SCATTER it is chain: EVT ResTy = Node->getValueType(0); + //MVT mResTy = ResTy.getSimpleVT(); + + + MaskedScatterSDNode *nodeScatter = dyn_cast(Node); + // See llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html#l02107 + SDValue indexVec = nodeScatter->getIndex(); + SDValue sourceVec = nodeScatter->getValue(); + + EVT opIndexVecTy = indexVec.getValueType(); /*Node->getOperand(0).getValueType(); */ // getSimpleValueType(); + EVT opSourceVecTy = sourceVec.getValueType(); + + LLVM_DEBUG(dbgs() << "Select() for MSCATTER: " + << "opIndexVecTy = " << opIndexVecTy.getEVTString() + << ", opSourceVecTy = " << opSourceVecTy.getEVTString() + << "\n"); + + // NEW_FP16 + if (opSourceVecTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "Select() for MSCATTER: We are in the case " + "opSourceVecTy == TYPE_VECTOR_F16\n"); + +// TODO TODO TODO TODO TODO TODO TODO: use instead of Connex::NOP_BITCONVERT_WH a new node called Connex::NOP_BITCONVERT_F16H + #ifdef BITCAST_2018_06_F16 + SDNode *sourceVec16 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + TYPE_VECTOR_I16, + sourceVec + ); + SDNode *Res = CurDAG->getMachineNode(Connex::ST_INDIRECT_H, + DL, + MVT::Other, + indexVec, + SDValue(sourceVec16, 0) + ); + #else + SDNode *Res = CurDAG->getMachineNode(Connex::ST_INDIRECT_H, + DL, + MVT::Other, + indexVec, + sourceVec + ); + #endif + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + else + if (opIndexVecTy == TYPE_VECTOR_I32) { + LLVM_DEBUG(dbgs() << "Select() for MSCATTER: We are in the case opIndexVecTy == TYPE_VECTOR_I32\n"); + + typeVecNode = TYPE_VECTOR_I32; + + /* VERY IMPORTANT: we add opChain to chain this new node with the node + the target-independent masked_gather node was chained with. + If we do not do this then we will eventually have other useful + chained nodes removed, resulting in a incorrect/partial program. */ + /* TODO: not sure if the chain is going to always be operand 0. + However masked_gather has a chain following attribute SDNPHasChain, + see include/llvm/Target/TargetSelectionDAG.td + See also indirectly the other params (methods get*()) of + MaskedGatherScatterSDNode at + http://llvm.org/docs/doxygen/html/SelectionDAGNodes_8h_source.html#l02107 + */ + SDValue opChain = Node->getOperand(0); + LLVM_DEBUG(dbgs() << "Select() for MSCATTER: opChain = "; + (opChain.getNode())->dump(); + dbgs() << "\n"); + #ifdef USE16bits_2017_05_27 + SDNode *Res = CurDAG->getMachineNode(Connex::ST_INDIRECT_W, + DL, + //typeVecNode, + //voidEVT, + MVT::Other, + indexVec, + sourceVec + //,opChain + ); + #else + #ifdef BITCAST_MAY2017_05_28 + SDNode *indexVec16 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + indexVec); + SDNode *sourceVec16 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + //typeVecNode, + TYPE_VECTOR_I16, + sourceVec + ); + #endif + SDNode *Res = CurDAG->getMachineNode(Connex::ST_INDIRECT_H, + DL, + //typeVecNode, + //voidEVT, + MVT::Other, + #ifdef BITCAST_MAY2017_05_28 + SDValue(indexVec16, 0), //indexVec, + //sourceVec + SDValue(sourceVec16, 0) + #else + indexVec, + sourceVec + #endif + /* + //,opChain + TODO TODO TODO: figure out why can't I add a chain + edge to scatter like I did for MGAHTER + MAYBE use: CurDAG->getVTList(MVT::Other, MVT::Glue), */ + ); + #endif + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + else + if (opIndexVecTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for MSCATTER: We are in the case " + "opIndexVecTy == TYPE_VECTOR_I16\n"); + + typeVecNode = TYPE_VECTOR_I16; + + SDNode *Res = CurDAG->getMachineNode(Connex::ST_INDIRECT_H, + DL, + //typeVecNode, + //voidEVT, + MVT::Other, + indexVec, + sourceVec + ); + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + + /* + LLVMContext &theContext = *(CurDAG->getContext()); + EVT voidEVT = EVT::getEVT(Type::getVoidTy(theContext)); + LLVM_DEBUG(dbgs() << " voidEVT = " + << voidEVT.getEVTString() << "\n"); + */ + + break; + } // END ISD::MSCATTER + case ISD::ConstantPool: { + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::ConstantPool.\n"); + LLVM_DEBUG(dbgs() << " Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + + SDLoc DL(Node); + + // MEGA-TODO: check for splat 0..CVL-1 + // MEGA-TODO: I need to return TYPE_VECTOR_I16 (maybe create a virtreg also) + SDNode *Res = CurDAG->getMachineNode(Connex::LDIX_H, + DL, + MVT::i64 + //TYPE_VECTOR_I16, + // We add a chain edge + //CurDAG->getEntryNode() + //sourceVec, + //offsetVec + //MVT::Other + //offset, + //basePtr, + //opChain + ); + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + // NEW_FP16: required for non-vector BBs like for.body + case ISD::LOAD: { + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::LOAD.\n"); + LLVM_DEBUG(dbgs() << " Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + + SDLoc DL(Node); + EVT typeVecNode; + EVT resTy = Node->getValueType(0); + + LoadSDNode *nodeLoad = dyn_cast(Node); + assert(nodeLoad != NULL); + + // See http://llvm.org/doxygen/SelectionDAGNodes_8h_source.html#l02048 + SDValue opChain = nodeLoad->getOperand(0); + SDValue basePtr = nodeLoad->getBasePtr(); // Operand 1 + SDValue offset = nodeLoad->getOffset(); // Operand 2 + LLVM_DEBUG(dbgs() << "Select() for LOAD: basePtr = "; + (basePtr.getNode())->dump(); + dbgs() << "Select() for LOAD: offset = "; + (offset.getNode())->dump(); + dbgs() << "Select() for LOAD: opChain = "; + (opChain.getNode())->dump(); + dbgs() << "\n"); + + EVT offsetTy = offset.getValueType(); + + LLVM_DEBUG(dbgs() << "Select() for LOAD: " + << "resTy = " << resTy.getEVTString() + << ", offsetTy = " << offsetTy.getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "Select() for LOAD: offset = "; + (offset.getNode())->dump(); + dbgs() << " basePtr = "; + (basePtr.getNode())->dump(); + dbgs() << " opChain = "; + (opChain.getNode())->dump(); + dbgs() << "\n"); + + if (resTy == MVT::f16) { + LLVM_DEBUG(dbgs() << "Select() for LOAD: We are in the case resTy == MVT::f16\n"); + + // small-TODO: although useless, normally we should emulate f16 on BPF + SDNode *Res16 = CurDAG->getMachineNode(Connex::LDH, + DL, + resTy, + MVT::Other, // 2018_09_04 + // NOT useful: MVT::Other, + ////offset, // 2018_09_04 + // Error: <> opChain, // 2018_12_28 + basePtr, // , + // + // IMPORTANT: unfortunately this operand becomes a register, not an immediate: offset, // 2018_12_29 + CurDAG->getTargetConstant(0, DL, MVT::i64), // TODO: we should put probably a different value than 0 + // + opChain // 2018_09_04 + // This gives < 0>> , basePtr + ); + SDNode *Res = Res16; + + #ifdef NOT_GOOD + //SDNode *Res16 = offset.getNode(); + SDNode *Res = CurDAG->getMachineNode( + //Connex::NOP_BPF, // This must take an immediate operand + // An unnecessary NOP: Connex::NOP, + Connex::NOP_BOGUS, + DL, + resTy, + MVT::Other, + // We add a chain edge + //CurDAG->getEntryNode() + //sourceVec, + //offsetVec + opChain + ); + /* + // Extremely crappy - VERY BAD. + // VERY BAD: It messes up at test covar - wrong register types, + // vector register mixed with scalar register: e.g. R7 = r4 | r4; + SDNode *Res = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HW, + DL, + //typeVecNode, + resTy, + MVT::Other, // 2018_09_04 + basePtr, // 2018_09_04 + // NOTE: we can also use offset + //SDValue(Res16, 0) + opChain + ); + //offsetVec); + */ + #endif // NOT_GOOD + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << " Select(): Res->getOpcode() = " + << Res->getOpcode() + << "\n"); + + ReplaceNode(Node, Res); + + return; + } // END if (resTy == MVT::f16) + else + if (resTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for LOAD: We are in the case " + "resTy == TYPE_VECTOR_I16\n"); + + SDNode *Res = CurDAG->getMachineNode(Connex::LD_INDIRECT_H, + DL, + TYPE_VECTOR_I16, + // We add a chain edge + //CurDAG->getEntryNode() + //sourceVec, + //offsetVec + MVT::Other, + //offset, + basePtr, + opChain + ); + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + + break; + } // END ISD::LOAD + // NEW_FP16: normally required for non-vector BBs like for.body + case ISD::STORE: { + LLVM_DEBUG(dbgs() << "Entered Select() for ISD::STORE.\n"); + LLVM_DEBUG(dbgs() << " Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + + SDLoc DL(Node); + EVT typeNode; + EVT resTy = Node->getValueType(0); + + + StoreSDNode *nodeStore = dyn_cast(Node); + assert(nodeStore != NULL); + + // See http://llvm.org/doxygen/SelectionDAGNodes_8h_source.html#l02076 + SDValue opChain = nodeStore->getOperand(0); + SDValue source = nodeStore->getValue(); // Operand 1 + SDValue basePtr = nodeStore->getBasePtr(); // Operand 2 + SDValue offset = nodeStore->getOffset(); // Operand 3 + LLVM_DEBUG(dbgs() << "Select() for STORE: offset = "; + (offset.getNode())->dump(); + dbgs() << "Select() for STORE: basePtr = "; + (basePtr.getNode())->dump(); + dbgs() << "Select() for STORE: source = "; + (source.getNode())->dump(); + dbgs() << "Select() for STORE: opChain = "; + (opChain.getNode())->dump(); + dbgs() << "\n"); + + EVT offsetTy = offset.getValueType(); + EVT sourceTy = source.getValueType(); + + LLVM_DEBUG(dbgs() << "Select() for STORE: " + << "sourceTy = " << sourceTy.getEVTString() + << ", offsetTy = " << offsetTy.getEVTString() + << ", resTy = " << resTy.getEVTString() + << "\n"); + + if (sourceTy == MVT::f16) { + /* We need to treat this case because the BPF processor doesn't + have any floating point support. + */ + LLVM_DEBUG(dbgs() << "Select() for STORE: We are in the case sourceTy == MVT::f16\n"); + + // I was not able to make this one work - see below: #define DOES_NOT_WORK_AND_DONNO_WHY_SPENT_4_HOURS + //#define DOES_NOT_WORK_AND_DONNO_WHY_SPENT_4_HOURS + #ifdef DOES_NOT_WORK_AND_DONNO_WHY_SPENT_4_HOURS + /* MINOR TODO: I get this error here, but we can use the other solution, at the + #else: <> + */ + /* + SDNode *Res16 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + MVT::i16, + source, + opChain + ); + LLVM_DEBUG(dbgs() << "Select(): Res16 = "; + Res16->dump(); + dbgs() << "\n"); + */ + + SDNode *Res = CurDAG->getMachineNode(Connex::STH, + DL, + MVT::Other, + //source, + //SDValue(Res16, 0), + opChain, // 2018_12_29 + source, + basePtr, // 2018_12_29 + offset + //, opChain + // This gives < 0>> , basePtr + ); + #else + /* + // TODO: use instead of Connex::NOP_BITCONVERT_WH a new node called Connex::NOP_BITCONVERT_F16H + SDNode *Res = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_WH, + DL, + MVT::Other, + sourceVec, + offsetVec + ); + */ + + /* Crappy but it works: this is a scalar f16 STORE - we simply + avoid generating a useful instruction - we just replace it + with "pseudo"-instruction NOP_BOGUS, which doesn't have a + useful assembly instruction. + */ + SDNode *Res = CurDAG->getMachineNode( + //Connex::NOP_BPF, // This must take an immediate operand + // An unnecessary NOP: Connex::NOP, + Connex::NOP_BOGUS, + DL, + MVT::Other, + // We add a chain edge + //CurDAG->getEntryNode() + //sourceVec, + //offsetVec + opChain + ); + //assert(0 && "I don't think it's implemented - anyhow I don't think it's (much) used - we should try harder with NOP_BITCONVERT, etc..."); + #endif + + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + else + if (sourceTy == TYPE_VECTOR_I16) { + LLVM_DEBUG(dbgs() << "Select() for STORE: We are in the case " + "sourceTy == TYPE_VECTOR_I16\n"); + + SDNode *Res = CurDAG->getMachineNode(Connex::ST_INDIRECT_H, + DL, + // We add a chain edge + //CurDAG->getEntryNode() + //sourceVec, + //offsetVec + MVT::Other, + offset, + source, + opChain + ); + LLVM_DEBUG(dbgs() << "Select(): Node = "; + Node->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Select(): Res = "; + Res->dump(); + dbgs() << "\n"); + + ReplaceNode(Node, Res); + + return; + } + + break; + } // END ISD::STORE + + // Inspired from MipsSEISelDAGToDAG.cpp + case ISD::BUILD_VECTOR: { + SelectBUILD_VECTOR(Node); + return; + } // END case ISD::BUILD_VECTOR + /* + // VERY IMPORTANT: In ISelLowering the DAG Combiner changes + // (I think in all cases) the vector_shuffle SDNode into a BUILD_VECTOR. + case ISD::VECTOR_SHUFFLE: { + SelectVECTOR_SHUFFLE(Node); + return; + } // END case ISD::VECTOR_SHUFFLE + */ + } // END switch (Opcode) + + /* + // Select the default instruction + SDNode *ResNode = SelectCode(Node); + + LLVM_DEBUG(dbgs() << "=> "; + if (ResNode == nullptr || ResNode == Node) + Node->dump(CurDAG); + else + ResNode->dump(CurDAG); + dbgs() << '\n'); + + LLVM_DEBUG(dbgs() << "Exiting Select()\n"); // - but first calling SelectCode()\n"); + ReplaceNode(Node, ResNode); + return; + */ + + // Select the default instruction + //SDNode *ResNode = SelectCode(Node); + SelectCode(Node); +} + + +FunctionPass *llvm::createConnexISelDag(ConnexTargetMachine &TM) { + return new ConnexDAGToDAGISel(TM); +} + + +// Added from MipsSEISelDAGToDAG.cpp +/// Match frameindex +bool ConnexDAGToDAGISel::selectAddrFrameIndex(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + EVT ValTy = Addr.getValueType(); + + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), ValTy); + return true; + } + return false; +} + + +// Added from MipsSEISelDAGToDAG.cpp +/// Match frameindex+offset and frameindex|offset +bool ConnexDAGToDAGISel::selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, + SDValue &Offset, + unsigned OffsetBits) const { + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast(Addr.getOperand(1)); + if (isIntN(OffsetBits, CN->getSExtValue())) { + EVT ValTy = Addr.getValueType(); + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast + (Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), + ValTy); + return true; + } + } + return false; +} + + +// Added from MipsSEISelDAGToDAG.cpp +bool ConnexDAGToDAGISel::selectAddrRegImm10(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10)) + return true; + + return false; +} + + +// Added from MipsSEISelDAGToDAG.cpp +bool ConnexDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Addr.getValueType()); + return true; +} + + +// Added from MipsSEISelDAGToDAG.cpp +bool ConnexDAGToDAGISel::selectIntAddrMSA(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + if (selectAddrRegImm10(Addr, Base, Offset)) + return true; + + if (selectAddrDefault(Addr, Base, Offset)) + return true; + + return false; +} + + +// Added from MipsSEISelDAGToDAG.cpp +// Select constant vector splats. +// +// Returns true and sets Imm if: +// * MSA is enabled +// * N is a ISD::BUILD_VECTOR representing a constant splat +bool ConnexDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, + unsigned MinSizeInBits) const { + LLVM_DEBUG(dbgs() << "Entered ConnexDAGToDAGISel::selectVSplat()\n"); + + /* + if (!Subtarget->hasMSA()) + return false; + */ + + BuildVectorSDNode *Node = dyn_cast(N); + + if (!Node) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + MinSizeInBits, + // !Subtarget->isLittle() + false)) + return false; + + Imm = SplatValue; + + LLVM_DEBUG(dbgs() << "ConnexDAGToDAGISel::selectVSplat(): returning true\n"); + return true; +} + + +// Select constant vector splats. +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value fits in an integer with the specified signed-ness and +// width. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +// +// It's worth noting that this function is not used as part of the selection +// of ldi.[bhwd] since it does not permit using the wrong-typed ldi.[bhwd] +// instruction to achieve the desired bit pattern. ldi.[bhwd] is selected in +// MipsSEDAGToDAGISel::selectNode. +bool ConnexDAGToDAGISel::selectVSplatCommon(SDValue N, SDValue &Imm, + bool Signed, unsigned ImmBitSize) const { + LLVM_DEBUG(dbgs() << "Entered ConnexDAGToDAGISel::selectVSplatCommon()\n"); + + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + + if (( Signed && ImmValue.isSignedIntN(ImmBitSize)) || + (!Signed && ImmValue.isIntN(ImmBitSize))) { + Imm = CurDAG->getTargetConstant(ImmValue, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + +// Select constant vector splats. +bool ConnexDAGToDAGISel:: +selectVSplatUimm1(SDValue N, SDValue &Imm) const { + LLVM_DEBUG(dbgs() << "Entered selectVSplatUimm1()\n"); + return selectVSplatCommon(N, Imm, false, 1); +} + +bool ConnexDAGToDAGISel:: +selectVSplatUimm2(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 2); +} + +bool ConnexDAGToDAGISel:: +selectVSplatUimm3(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 3); +} + +// Select constant vector splats. +bool ConnexDAGToDAGISel:: +selectVSplatUimm4(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 4); +} + +// Select constant vector splats. +bool ConnexDAGToDAGISel:: +selectVSplatUimm5(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 5); +} + + +// Select constant vector splats. +bool ConnexDAGToDAGISel:: +selectVSplatUimm6(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 6); +} + + +// Select constant vector splats. +bool ConnexDAGToDAGISel:: +selectVSplatUimm8(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, false, 8); +} + + +// Select constant vector splats. +bool ConnexDAGToDAGISel:: +selectVSplatSimm5(SDValue N, SDValue &Imm) const { + return selectVSplatCommon(N, Imm, true, 5); +} + + +// Select constant vector splats whose value is a power of 2. +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a power of two. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +bool ConnexDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = ImmValue.exactLogBase2(); + + if (Log2 != -1) { + Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + + +// Select constant vector splats whose value only has a consecutive sequence +// of left-most bits set (e.g. 0b11...1100...00). +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a consecutive sequence of left-most bits. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +bool ConnexDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + // Extract the run of set bits starting with bit zero from the bitwise + // inverse of ImmValue, and test that the inverse of this is the same + // as the original value. + if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) { + + Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), SDLoc(N), + EltTy); + return true; + } + } + + return false; +} + + +// Select constant vector splats whose value only has a consecutive sequence +// of right-most bits set (e.g. 0b00...0011...11). +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a consecutive sequence of right-most bits. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +bool ConnexDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + // Extract the run of set bits starting with bit zero, and test that the + // result is the same as the original value + if (ImmValue == (ImmValue & ~(ImmValue + 1))) { + Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), SDLoc(N), + EltTy); + return true; + } + } + + return false; +} + + +bool ConnexDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, + SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = (~ImmValue).exactLogBase2(); + + if (Log2 != -1) { + Imm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + Index: lib/Target/Connex/ConnexISelLowering.h =================================================================== --- lib/Target/Connex/ConnexISelLowering.h +++ lib/Target/Connex/ConnexISelLowering.h @@ -0,0 +1,214 @@ +//===-- ConnexISelLowering.h - Connex DAG Lowering Interface ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the interfaces that Connex uses to lower LLVM code into a +/// selection DAG. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXISELLOWERING_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXISELLOWERING_H + +#include "Connex.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLowering.h" + +#include "ConnexConfig.h" + + + +namespace llvm { +class ConnexSubtarget; + +namespace ConnexISD { + /* + From http://llvm.org/docs/doxygen/html/namespacellvm_1_1ISD.html: + <> + */ + enum NodeType : unsigned { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + RET_FLAG, + CALL, + SELECT_CC, + BR_CC, + + /* Inspired from lib/Target/X86/X86ISelLowering.h + /// A wrapper node for TargetConstantPool, + /// TargetExternalSymbol, and TargetGlobalAddress. + */ + Wrapper, + + // From [LLVM]/llvm/lib/Target/Mips/MipsISelLowering.h + // Extended vector element extraction + VEXTRACT_SEXT_ELT, + VEXTRACT_ZEXT_ELT, + + //ConstantPool, + + // Vector Shuffle with mask as an operand + VSHF, // Generic shuffle + SHF, // 4-element set shuffle. + ILVEV, // Interleave even elements + ILVOD, // Interleave odd elements + ILVL, // Interleave left elements + ILVR, // Interleave right elements + PCKEV, // Pack even elements + PCKOD, // Pack odd elements + }; +} + + +class ConnexTargetLowering : public TargetLowering { +public: + explicit ConnexTargetLowering(const TargetMachine &TM, + const ConnexSubtarget &STI); + + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + + // Inspired from lib/Target/AMDGPU/AMDGPUISelLowering.h + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const; + + // Provide custom lowering hooks for some operations. + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + + // This method returns the name of a target specific DAG node. + const char *getTargetNodeName(unsigned Opcode) const override; + + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const override; + +private: + /* + // From llvm/lib/Target/Mips/MipsISelLowering.h + // Create a TargetGlobalAddress node. + SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetExternalSymbol node. + SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetBlockAddress node. + SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Create a TargetJumpTable node. + SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + */ + // Create a TargetConstantPool node. + SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + + // Added from lib/Target/Mips/MipsSEISelLowering.cpp (method addMSAIntType) + void addVectorIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); + + // Inspired from lib/Target/Mips/MipsSEISelLowering.cpp, addMSAFloatType() + void addVectorFloatType(MVT::SimpleValueType Ty, + const TargetRegisterClass *RC); + + bool allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const; + + void ReplaceAddI32UseWithADDVH(MVT &aType, SDValue &Index, + SelectionDAG &DAG) const; + + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + /*static */ SDValue LowerMGATHER(SDValue &Op, + //const ConnexSubtarget &Subtarget, + SelectionDAG &DAG) const; + /*static */ SDValue LowerMSCATTER(SDValue &Op, + //const ConnexSubtarget &Subtarget, + SelectionDAG &DAG) const; + + // Lower the result values of a call, copying them out of physregs into vregs + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, + const SDLoc &DL, SelectionDAG &DAG, + SmallVectorImpl &InVals) const; + + // Maximum number of arguments to a call + static const unsigned MaxArgs; + + // Lower a call into CALLSEQ_START - ConnexISD:CALL - CALLSEQ_END chain + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + + // Lower incoming arguments, copy physregs into vregs + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Ins, + const SDLoc &DL, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SDLoc &DL, + SelectionDAG &DAG) const override; + + EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + MachineFunction &MF) const override { + #define DEBUG_TYPE "connex-lower" + + LLVM_DEBUG(dbgs() << "Entered getOptimalMemOpType(Size = " << Size + << ")\n"); + + return Size >= 8 ? MVT::i64 : MVT::i32; + + // TODO_CHANGE_BACKEND - Seems it's NOT required: + //return Size >= 8 ? TYPE_VECTOR_ELEMENT : MVT::i32; + + #undef DEBUG_TYPE + } + + bool shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const override { + return true; + } + + SDValue LowerVSELECT(SDValue &Op, SelectionDAG &DAG) const; + + // From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.h + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerADD_I32(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerADD_F16(SDValue &Op, SelectionDAG *CurDAG) const; + SDValue LowerMUL_F16(SDValue &Op, SelectionDAG *CurDAG) const; + SDValue LowerREDUCE_F16(SDValue &Op, SelectionDAG *CurDAG) const; + + SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; + + + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + // + EVT getSetCCResultType(const DataLayout &, LLVMContext &, EVT VT) const; +}; // end class ConnexTargetLowering +} // end namespace llvm + +#endif + Index: lib/Target/Connex/ConnexISelLowering.cpp =================================================================== --- lib/Target/Connex/ConnexISelLowering.cpp +++ lib/Target/Connex/ConnexISelLowering.cpp @@ -0,0 +1,3559 @@ +//===-- ConnexISelLowering.cpp - Connex DAG Lowering Implementation ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Connex uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "ConnexISelLowering.h" +#include "Connex.h" +#include "ConnexTargetMachine.h" +#include "ConnexSubtarget.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +// +// See http://llvm.org/docs/doxygen/html/classllvm_1_1DILocation.html +#include "llvm/IR/DebugInfoMetadata.h" + +using namespace llvm; + + + +#define DEBUG_TYPE "connex-lower" + + +//#define DO_F16_EMULATION_IN_ISEL_LOWERING +#ifdef DO_F16_EMULATION_IN_ISEL_LOWERING + #define DO_MUL_F16_EMULATION_IN_ISEL_LOWERING + #define DO_ADD_F16_EMULATION_IN_ISEL_LOWERING +#endif + + +static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) { + MachineFunction &MF = DAG.getMachineFunction(); + DAG.getContext()->diagnose( + DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc())); +} + +static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg, + SDValue Val) { + MachineFunction &MF = DAG.getMachineFunction(); + std::string Str; + raw_string_ostream OS(Str); + OS << Msg; + Val->print(OS); + OS.flush(); + DAG.getContext()->diagnose( + DiagnosticInfoUnsupported(MF.getFunction(), Str, DL.getDebugLoc())); +} + + + +/* +SDValue MipsTargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); +} + +SDValue MipsTargetLowering::getTargetNode(ExternalSymbolSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); +} + +SDValue MipsTargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); +} + +SDValue MipsTargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); +} +*/ + +SDValue ConnexTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(), + N->getOffset(), Flag); +} + + +// Enable vector (inspired from Mips MSA) support for the given integer +// type and Register class. +void ConnexTargetLowering::addVectorIntType(MVT::SimpleValueType aType, + const TargetRegisterClass *RC) { + LLVM_DEBUG(dbgs() << "Entered addVectorIntType(aType = " + << aType << ")\n"); + //LLVM_DEBUG(dbgs() << "addVectorIntType(): "; RC->dump(); dbgs() << "\n"); + + addRegisterClass(aType, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, aType, Expand); + + + //Original code: + //setOperationAction(ISD::BITCAST, aType, Legal); + setOperationAction(ISD::BITCAST, aType, Custom); + /* + setOperationAction(ISD::BITCAST, aType, Promote); + //setOperationAction(ISD::BITCAST, TYPE_VECTOR_I32, Promote); + // Inspired from book Cardoso_2014, page 152 + AddPromotedToType(ISD::BITCAST, TYPE_VECTOR_I16, TYPE_VECTOR_I32); + */ + + // This is found in include/llvm/Target/TargetSelectionDAG.td + setOperationAction(ISD::NON_EXTLOAD, aType, Legal); + setOperationAction(ISD::EXTLOAD, aType, Legal); + + setOperationAction(ISD::LOAD, aType, Legal); + setOperationAction(ISD::STORE, aType, Legal); + + /* IMPORTANT: NONE of these seem to be required anymore after the last + changes of the TableGen spec in ConnexInstrInfo_vec.td. + + // This is to help instruction selection of masked_gather: + //addVectorIntType(MVT::v128i64, &Connex::VectorHRegClass); + // + //setOperationAction(ISD::MGATHER, aType, Legal); + //setOperationAction(ISD::MGATHER, aType, Custom); + */ + /* Required if we work with index vector that is not zeroinitializer, + * or if it is LD256[] + * NOT with something like LD256[%B] */ + //setOperationAction(ISD::MGATHER, MVT::v64i32, Custom); + //setOperationAction(ISD::MGATHER, MVT::v128i16, Custom); +#ifdef MAY_2017_05_26_WORKS_FOR_V128I16 +#endif + // 2019_03_30: setOperationAction(ISD::MGATHER, aType, Custom); + setOperationAction(ISD::MGATHER, aType, Legal); // 2019_03_30 + //setOperationAction(ISD::MGATHER, aType, Legal); + /* + setOperationAction(ISD::MGATHER, aType, Legal); + setOperationAction(ISD::MGATHER, MVT::v128i64, Legal); + */ + + /* + * Failing to put this line gives the following STRANGE error - can't explain + * why this happens: + * include/llvm/CodeGen/ValueTypes.h:249: + * unsigned int llvm::EVT::getVectorNumElements() const: + * Assertion `isVector() && "Invalid vector type!"' failed. + */ + //setOperationAction(ISD::MSCATTER, aType, Legal); + // + setOperationAction(ISD::MSCATTER, aType, Custom); +/* + setOperationAction(ISD::MSCATTER, MVT::v64i32, Expand); + AddPromotedToType(ISD::MSCATTER, TYPE_VECTOR_I32, TYPE_VECTOR_I16); +*/ + setOperationAction(ISD::EXTRACT_VECTOR_ELT, aType, Custom); + + // TODO!!!!: do a call to addVectorIntType(MVT::i32) instead of this + /* + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i16, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, aType, Legal); + */ + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, aType, Custom); + + setOperationAction(ISD::BUILD_VECTOR, aType, Custom); + + // TODO!!!!: do a call to addVectorIntType(MVT::i32) instead of this + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, aType, Custom); + + setOperationAction(ISD::ADD, aType, Legal); + setOperationAction(ISD::AND, aType, Legal); + setOperationAction(ISD::CTLZ, aType, Legal); + setOperationAction(ISD::CTPOP, aType, Legal); + setOperationAction(ISD::MUL, aType, Legal); + setOperationAction(ISD::OR, aType, Legal); + + //setOperationAction(ISD::SDIV, aType, Custom); + setOperationAction(ISD::SDIV, aType, Legal); + + //setOperationAction(ISD::SREM, aType, Custom); + setOperationAction(ISD::SREM, aType, Legal); + + setOperationAction(ISD::SHL, aType, Legal); + + setOperationAction(ISD::SRA, aType, Legal); + //setOperationAction(ISD::SRA, aType, Custom); + + setOperationAction(ISD::SRL, aType, Legal); + setOperationAction(ISD::SUB, aType, Legal); + + //setOperationAction(ISD::UDIV, aType, Custom); //Legal); + setOperationAction(ISD::UDIV, aType, Legal); + + //setOperationAction(ISD::UREM, aType, Custom); //Legal); + setOperationAction(ISD::UREM, aType, Legal); + + setOperationAction(ISD::VECTOR_SHUFFLE, aType, Custom); +#ifdef IMPLEMENT_VSELECT_WITH_PSEUDOINSTRS_BUNDLES + setOperationAction(ISD::VSELECT, aType, Custom); +#else + setOperationAction(ISD::VSELECT, aType, Legal); +#endif + setOperationAction(ISD::XOR, aType, Legal); + + /* + if (aType == MVT::v4i32 || aType == MVT::v2i64) { + setOperationAction(ISD::FP_TO_SINT, aType, Legal); + setOperationAction(ISD::FP_TO_UINT, aType, Legal); + setOperationAction(ISD::SINT_TO_FP, aType, Legal); + setOperationAction(ISD::UINT_TO_FP, aType, Legal); + } + */ + + // changed + setOperationAction(ISD::SETCC, aType, Legal); + /* + * Following advice Bruno Cardoso - see email Jun 7, 2016 from + * alex.susu@gmail.com + setOperationAction(ISD::SETCC, aType, Custom); // Expand, Promote or Legal + */ + + setCondCodeAction(ISD::SETEQ, aType, Legal); + setCondCodeAction(ISD::SETNE, aType, Expand); + setCondCodeAction(ISD::SETGE, aType, Expand); + setCondCodeAction(ISD::SETGT, aType, Expand); + setCondCodeAction(ISD::SETUGE, aType, Expand); + setCondCodeAction(ISD::SETUGT, aType, Expand); +} + + + +// Inspired from lib/Target/Mips/MipsSEISelLowering.cpp, addMSAFloatType() +// Enable support for the given floating-point type and Register class. +void ConnexTargetLowering::addVectorFloatType(MVT::SimpleValueType aType, + const TargetRegisterClass *RC) { + LLVM_DEBUG(dbgs() << "Entered addVectorFloatType(aType = " + << aType << ")\n"); + addRegisterClass(aType, RC); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, aType, Expand); + + setOperationAction(ISD::LOAD, aType, Legal); + setOperationAction(ISD::STORE, aType, Legal); + setOperationAction(ISD::BITCAST, aType, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, aType, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, aType, Legal); + setOperationAction(ISD::BUILD_VECTOR, aType, Custom); + + setOperationAction(ISD::MGATHER, aType, Custom); + setOperationAction(ISD::MSCATTER, aType, Legal); + // TODO: only if we use f32, f64 I guess: setOperationAction(ISD::MSCATTER, aType, Custom); + + + //if (Ty != MVT::v8f16) { + setOperationAction(ISD::FABS, aType, Legal); + #ifdef DO_ADD_F16_EMULATION_IN_ISEL_LOWERING + // 2018_08_17: + setOperationAction(ISD::FADD, aType, Custom); + #else + setOperationAction(ISD::FADD, aType, Legal); + #endif + // + setOperationAction(ISD::FDIV, aType, Legal); + setOperationAction(ISD::FEXP2, aType, Legal); + setOperationAction(ISD::FLOG2, aType, Legal); + setOperationAction(ISD::FMA, aType, Legal); + #ifdef DO_MUL_F16_EMULATION_IN_ISEL_LOWERING + setOperationAction(ISD::FMUL, aType, Custom); + #else + // 2018_08_17: + setOperationAction(ISD::FMUL, aType, Legal); + #endif + setOperationAction(ISD::FRINT, aType, Legal); + setOperationAction(ISD::FSQRT, aType, Legal); + setOperationAction(ISD::FSUB, aType, Legal); + setOperationAction(ISD::VSELECT, aType, Legal); + + setOperationAction(ISD::SETCC, aType, Legal); + setCondCodeAction(ISD::SETOGE, aType, Expand); + setCondCodeAction(ISD::SETOGT, aType, Expand); + setCondCodeAction(ISD::SETUGE, aType, Expand); + setCondCodeAction(ISD::SETUGT, aType, Expand); + setCondCodeAction(ISD::SETGE, aType, Expand); + setCondCodeAction(ISD::SETGT, aType, Expand); + //} +} + + +// Inspired from llvm/lib/Target/Mips/MipsSEISelLowering.cpp +bool ConnexTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { + //MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + + // if (Subtarget.systemSupportsUnalignedAccess()) { + // MIPS32r6/MIPS64r6 is required to support unaligned access. It's + // implementation defined whether this is handled by hardware, software, or + // a hybrid of the two but it's expected that most implementations will + // handle the majority of cases in hardware. + if (Fast) + *Fast = true; + return true; + // } + + /* + switch (SVT) { + case MVT::i64: + case MVT::i32: + if (Fast) + *Fast = true; + return true; + default: + return false; + } + */ +} + + + +ConnexTargetLowering::ConnexTargetLowering(const TargetMachine &TM, + const ConnexSubtarget &STI) + : TargetLowering(TM) { + + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::ConnexTargetLowering(): \n"); + + // Inspired from lib/Target/ARM/ARMISelLowering.cpp, ARMTargetLowering::ARMTargetLowering() + static const struct { + const RTLIB::Libcall Op; + const char *const Name; + const CallingConv::ID CC; + const ISD::CondCode Cond; + } MemOpsLibraryCalls[] = { + // Memory operations + // RTABI chapter 4.3.4 + /* + // NOTE: CallingConv::ARM_AAPCS is defined in http://llvm.org/docs/doxygen/html/namespacellvm_1_1CallingConv.html + { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + */ + { RTLIB::MEMMOVE, "memmove", CallingConv::C, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "memset", CallingConv::C, ISD::SETCC_INVALID }, + }; + + for (const auto &LC : MemOpsLibraryCalls) { + LLVM_DEBUG(dbgs() << "ConnexTargetLowering::ConnexTargetLowering(): " + "registering RT-Libcall LC.name = " + << LC.Name << "\n"); + + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); + } + // END + + + // Set up the register classes. + // TODO_CHANGE_BACKEND: + //addRegisterClass(MVT::i64, &Connex::GPRRegClass); + addRegisterClass(TYPE_SCALAR_ELEMENT, &Connex::GPRRegClass); + + // Taken from llvm/lib/Target/Mips/MipsSEISelLowering.cpp + //if (Subtarget.hasDSP() || Subtarget.hasMSA()) { + // Expand all truncating stores and extending loads. + for (MVT VT0 : MVT::vector_valuetypes()) { + for (MVT VT1 : MVT::vector_valuetypes()) { + #ifdef NOT_NOT_NOT + LLVM_DEBUG(dbgs() << "VT0.getSizeInBits() = " + << VT0.getSizeInBits() << "\n"); + LLVM_DEBUG(dbgs() << "VT1.getSizeInBits() = " + << VT1.getSizeInBits() << "\n"); + #endif + setTruncStoreAction(VT0, VT1, Expand); + //This is WRONG - it was added by me and caused llc to give core dump: setLoadExtAction(ISD::STORE, VT0, VT1, Expand); + + setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); + setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); + } + } + //} + + + // As said in [Pandey_2015], page 152: + // "The legalize phase can also instruct the kind of classes of registers + // supported for given data." + + // Taken from llvm/lib/Target/Mips/MipsSEISelLowering.cpp + //if (Subtarget.hasMSA()) { + /* + addVectorIntType(MVT::v16i8, &Connex::MSA128BRegClass); + addVectorIntType(MVT::v8i16, &Connex::VectorHRegClass); + addVectorIntType(MVT::v4i32, &Connex::MSA128WRegClass); + addVectorIntType(MVT::v2i64, &Connex::VectorHRegClass); + */ + + /* + // TODO to add these reg classes in the end + addVectorIntType(MVT::v64i8, &Connex::MSA128BRegClass); + addVectorIntType(MVT::v32i16, &Connex::VectorHRegClass); + addVectorIntType(MVT::v16i32, &Connex::MSA128WRegClass); + */ + // TODO_CHANGE_BACKEND: + //addVectorIntType(MVT::v8i64, &Connex::VectorHRegClass); + // To prevent error: <getSize() && "Size mismatch!"' failed.>>: + addVectorIntType(MVT::v8i64, &Connex::VectorHRegClass); // 2019_03_30 + // + addVectorIntType(TYPE_VECTOR_I16, &Connex::VectorHRegClass); + + // NEW32 + //addVectorIntType(TYPE_VECTOR_I32, &Connex::MSA128WRegClass); + addVectorIntType(TYPE_VECTOR_I32, &Connex::VectorHRegClass); + // + + /* + // These are not useful since we already gave addVectorIntType(TYPE_VECTOR_I32) above + // NEW32 + LLVM_DEBUG(dbgs() << "Calling addRegisterClass(TYPE_VECTOR_I32, &Connex::MSA128WRegClass)\n"); + addRegisterClass(TYPE_VECTOR_I32, &Connex::MSA128WRegClass); + */ + + /* + LLVM_DEBUG(dbgs() << "Calling setOperationAction(ISD::ADD, Custom)\n"); + setOperationAction(ISD::ADD, TYPE_VECTOR_I32, Custom); + */ + + + /* + LLVM_DEBUG(dbgs() << "Calling setOperationAction(ISD::ADD, Expand)\n"); + setOperationAction(ISD::ADD, TYPE_VECTOR_I32, Expand); + AddPromotedToType(ISD::ADD, TYPE_VECTOR_I32, TYPE_VECTOR_I16); + */ + +#ifdef SPECIAL_BITCAST_PROMOTE_EXPAND + // NEW32 + /* This normally results in having at I-sel something like: + Legally typed node: t35: v64i32,ch = masked_gather)> t21, undef:v64i32, t37, Constant:i64<51>, t23 + Promote integer result: t535: i32 = extract_vector_elt t35, Constant:i64<0> + Legally typed node: t727: i64 = extract_vector_elt t35, Constant:i64<0> + Promote integer result: t538: i32 = extract_vector_elt t35, Constant:i64<1> + Legally typed node: t728: i64 = extract_vector_elt t35, Constant:i64<1> + */ + + // Inspired from book Cardoso_2014, page 152 + // + LLVM_DEBUG(dbgs() << "Calling setOperationAction(ISD::OR, Expand)\n"); + setOperationAction(ISD::OR, TYPE_VECTOR_I32, /*Promote*/ Expand); + AddPromotedToType(ISD::OR, /*src*/ TYPE_VECTOR_I32, /*dst*/ TYPE_VECTOR_I16); + + + LLVM_DEBUG(dbgs() << "ISD::BITCAST - we use setOperationAction(..., Expand).\n"); + setOperationAction(ISD::BITCAST, TYPE_VECTOR_I16, Expand /*Promote*/); + AddPromotedToType(ISD::BITCAST, /*src*/ TYPE_VECTOR_I16, /*dst*/ TYPE_VECTOR_I32); + setOperationAction(ISD::BITCAST, TYPE_VECTOR_I32, /*Promote*/ Expand); + AddPromotedToType(ISD::BITCAST, /*src*/ TYPE_VECTOR_I32, /*dst*/ TYPE_VECTOR_I16); + + LLVM_DEBUG(dbgs() << "ISD::ADD - we use setOperationAction(..., Expand).\n"); + setOperationAction(ISD::ADD, TYPE_VECTOR_I16, Expand /*Promote*/); + AddPromotedToType(ISD::ADD, /*src*/ TYPE_VECTOR_I16, /*dst*/ TYPE_VECTOR_I32); + setOperationAction(ISD::ADD, TYPE_VECTOR_I32, /*Promote*/ Expand); + AddPromotedToType(ISD::ADD, /*src*/ TYPE_VECTOR_I32, /*dst*/ TYPE_VECTOR_I16); +#endif + + //addVectorFloatType(MVT::v128f16, &Connex::VectorHRegClass); + addVectorFloatType(TYPE_VECTOR_F16, &Connex::VectorHRegClass); + + /* + addVectorFloatType(MVT::v8f16, &Mips::VectorHRegClass); + addVectorFloatType(MVT::v4f32, &Mips::MSA128WRegClass); + addVectorFloatType(MVT::v2f64, &Mips::VectorHRegClass); + */ + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetLoweringBase.html: + void llvm::TargetLoweringBase::setTargetDAGCombine(ISD::NodeType NT) [inline, protected] + <> + */ + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::SRA); +#ifdef IMPLEMENT_VSELECT_WITH_PSEUDOINSTRS_BUNDLES + setTargetDAGCombine(ISD::VSELECT); +#endif + setTargetDAGCombine(ISD::XOR); + //} + + + /* VERY IMPORTANT: this is why I've spent ~5 days of debugging + * - the computeRegisterProperties() function is called at the end of the + * constructor in lib/Target/Mips/MipsSEISelLowering.cpp (or + * Mips16SEILoweing.cpp; note that ARM/ARMISelLowering.cpp is somewhat similar + * to our case - computeRegisterProperties() is called AFTER all + * addRegisterClass() calls). + * But here it is called in the "middle", after the types are being + * declared - i.e., addRegisterClass() has to be called BEFORE + * computeRegisterProperties() - THIS IS VERY IMPORTANT. + */ + // Compute derived properties from the register classes + computeRegisterProperties(STI.getRegisterInfo()); + + setStackPointerRegisterToSaveRestore(Connex::R11); + + #ifdef NEW_BIGGER_OPS + /* + setOperationAction(ISD::DIV, MVT::u16, Custom); + setOperationAction(ISD::DIV, MVT::i16, Custom); + */ + if (MVT::i16 != TYPE_SCALAR_ELEMENT) { + setOperationAction(ISD::MUL, MVT::i16, Custom); + } + if (MVT::i32 != TYPE_SCALAR_ELEMENT) { + setOperationAction(ISD::ADD, MVT::i32, Custom); + setOperationAction(ISD::SUB, MVT::i32, Custom); + setOperationAction(ISD::MUL, MVT::i32, Custom); + } + #endif + /* + setOperationAction(ISD::~~~~VLOAD, TYPE_VECTOR_I32, Custom); + setOperationAction(ISD::MGATHER, TYPE_VECTOR_I32, Custom); + */ + + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetLoweringBase.html + void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) + Indicate that the specified operation does not work with the specified type and indicate what to do about it. + + // From http://llvm.org/docs/WritingAnLLVMBackend.html#the-selectiondag-legalize-phase + "For some operations, simple type promotion or operation expansion may be insufficient. + [...] + In the LowerOperation method, for each Custom operation, a case statement should be added to indicate what function to call. + " + */ +// TODO_CHANGE_BACKEND: + setOperationAction(ISD::BR_CC, TYPE_SCALAR_ELEMENT, Custom); + + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + + setOperationAction(ISD::SETCC, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::SELECT, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::SELECT_CC, TYPE_SCALAR_ELEMENT, Custom); + + setOperationAction(ISD::GlobalAddress, TYPE_SCALAR_ELEMENT, Custom); + + setOperationAction(ISD::DYNAMIC_STACKALLOC, TYPE_SCALAR_ELEMENT, Custom); + + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + + setOperationAction(ISD::SDIVREM, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::UDIVREM, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::SREM, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::UREM, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::MULHU, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::MULHS, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::UMUL_LOHI, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::SMUL_LOHI, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::ADDC, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::ADDE, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::SUBC, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::SUBE, TYPE_SCALAR_ELEMENT, Expand); + + + setOperationAction(ISD::ROTR, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::ROTL, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::SHL_PARTS, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::SRL_PARTS, TYPE_SCALAR_ELEMENT, Expand); + setOperationAction(ISD::SRA_PARTS, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::CTTZ, TYPE_SCALAR_ELEMENT, Custom); + setOperationAction(ISD::CTLZ, TYPE_SCALAR_ELEMENT, Custom); + // + setOperationAction(ISD::CTTZ_ZERO_UNDEF, TYPE_SCALAR_ELEMENT, Custom); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, TYPE_SCALAR_ELEMENT, Custom); + + setOperationAction(ISD::CTPOP, TYPE_SCALAR_ELEMENT, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); + + /* + Inspired from llvm/lib/Target/X86/X86ISelLowering.cpp. + + IMPORTANT: From https://llvm.org/svn/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h + + /// Convenience method to set an operation to Promote and specify the type + /// in a single call. + void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { + setOperationAction(Opc, OrigVT, Promote); + AddPromotedToType(Opc, OrigVT, DestVT); + } + */ + setOperationPromotedToType(ISD::ConstantPool, MVT::i32, TYPE_SCALAR_ELEMENT); + setOperationPromotedToType(ISD::Constant, MVT::i32, TYPE_SCALAR_ELEMENT); + setOperationPromotedToType(ISD::ADD, MVT::i32, TYPE_SCALAR_ELEMENT); + + setOperationPromotedToType(ISD::ADD, MVT::i16, TYPE_SCALAR_ELEMENT); + setOperationPromotedToType(ISD::ADD, MVT::i64, MVT::i32); + + + // Inspired from AMDGPU/AMDGPUISelLowering.cpp + //Need DAG EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), MVT::i32); + //LLVM_DEBUG(dbgs() << "addVectorIntType(): LegalVT " << LegalVT << "\n"); + + // Extended load operations for i1 types must be promoted + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); + } + + setBooleanContents(ZeroOrOneBooleanContent); + + // Function alignments (log2) + setMinFunctionAlignment(3); + setPrefFunctionAlignment(3); + + // inline memcpy() for kernel to see explicit copy + MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CONNEX_VECTOR_LENGTH; + MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CONNEX_VECTOR_LENGTH; + MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CONNEX_VECTOR_LENGTH; + + + // Inspired from ARMISelLowering.cpp: + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, MVT::i64, Legal); + setIndexedLoadAction(im, MVT::i16, Promote); + setIndexedStoreAction(im, MVT::i64, Legal); + setIndexedStoreAction(im, MVT::i16, Promote); + } + +#ifdef NOTNOT + /* + LLVM_DEBUG(dbgs() << "addVectorIntType(): calling setTypeAction()\n"); + ValueTypeActionImpl::setTypeAction(MVT::i16, TypePromoteInteger); + ValueTypeActionImpl::setTypeAction(MVT::i32, TypePromoteInteger); + // + LLVM_DEBUG(dbgs() << "addVectorIntType(): calling setTypeAction()\n"); + ValueTypeActionImpl::setTypeAction(MVT::i16, TypeLegal); + ValueTypeActionImpl::setTypeAction(MVT::i32, TypeLegal); + */ + + /* + IMPORTANT: the whole reason I am using below setTypeAction() is that we get + error: + <> + when using setOperationAction(Intrinsic::connex_repeat_x_times...): + //setOperationAction(Intrinsic::connex_repeat_x_times, MVT::i16, Expand); //Legal); + //setOperationAction(Intrinsic::connex_repeat_x_times, MVT::i32, Legal); + //setOperationAction(Intrinsic::connex_repeat_x_times, MVT::i32, Promote); + + + IMPORTANT: This piece of code HAS to be put at the end of this method because + otherwise one or more of the above calls are rendering this + setTypeAction() below useless. + But then it gives error like: + <, BasicBlock:ch >> + because I made i16 a legal type and the instruction scheduler does NOT + have to promote it to i64, although br_cc requires it (see TableGen + definition). + To fix this we should make sure we put setTypeAction(MVT::i16, TypeLegal) + before all setOperation...() that intefere with it. + + See http://llvm.org/docs/doxygen/html/TargetLowering_8h_source.html#l00096 + for enum LegalizeTypeAction. + */ + #ifdef DO_F16_EMULATION_IN_ISEL_LOWERING + // 2018_08_17 + setOperationAction(Intrinsic::connex_reduce_f16, MVT::f16, Custom); + #endif + + LLVM_DEBUG(dbgs() << "ConnexTargetLowering(): calling " + "setTypeAction(MVT::i16, ...)\n"); + LegalizeTypeAction ta = ValueTypeActions.getTypeAction(MVT::i16); + LLVM_DEBUG(dbgs() + << " Before setTypeAction(MVT::i16, ...), i16 has action " + << ta << "\n"); + + // Inspired from lib/Target/X86/X86ISelLowering.cpp: + // Gives error: "Do not know how to promote this operator's operand!" + ValueTypeActions.setTypeAction(MVT::i16, TypeLegal); + // Gives error: "Do not know how to promote this operator's operand!" + //ValueTypeActions.setTypeAction(MVT::i16, TypeExpandInteger); //TypePromoteInteger); + ta = ValueTypeActions.getTypeAction(/* DAG.getContext(), */ MVT::i16); //TypeLegal); + LLVM_DEBUG(dbgs() + << " setTypeAction(MVT::i16, ...) has set for i16 action to " + << ta << "\n"); +#endif // NOTNOT + + + // NEW_FP16: it seems these are very useful + // See http://llvm.org/doxygen/TargetLowering_8h_source.html#l00122 + // Quite GOOD: + ValueTypeActions.setTypeAction(MVT::f16, TypeLegal); + + /* "// Convert this float to a same size integer type, + if an operation is not supported in target HW." */ + // ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat); //TypePromoteInteger); + + setOperationAction(ISD::MSCATTER, TYPE_VECTOR_F16, Legal); + + /* + // It seems it does not help: + + setOperationAction(ISD::LOAD, MVT::f16, Promote); + + // Gives: << UNREACHABLE executed at /home/asusu/LLVM/llvm38Nov2016/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:465!>> + //setOperationAction(ISD::STORE, MVT::f16, LibCall); + + setOperationAction(ISD::STORE, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, LibCall); + */ + + + AddPromotedToType(ISD::LOAD, MVT::f16, MVT::i16); + AddPromotedToType(ISD::STORE, MVT::f16, MVT::i16); + /* + // It seems it doesn't help: + AddPromotedToType(ISD::FADD, MVT::f16, MVT::i16); + */ + // END NEW_FP16 + + LLVM_DEBUG(dbgs() << "Exiting ConnexTargetLowering()\n"); +} // END ConnexTargetLowering::ConnexTargetLowering() + + + +// Inspired from lib/Target/AMDGPU/AMDGPUISelLowering.cpp +SDValue ConnexTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + const Function &Fn = DAG.getMachineFunction().getFunction(); + + DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "unsupported dynamic alloca", + SDLoc(Op).getDebugLoc()); + DAG.getContext()->diagnose(NoDynamicAlloca); + auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)}; + return DAG.getMergeValues(Ops, SDLoc()); +} + + +// Inspired from lib/Target/X86/X86ISelLowering.cpp +// Widen vector InOp to vector type NVT. +static SDValue ChangeVectorType(SDValue InOp, + MVT NVT, + SelectionDAG &DAG, + bool FillWithZeroes = false, + // This is meant for the index operand of MGATHER and MSCATTER + bool allowUnsafeChanges = false) { + LLVM_DEBUG(dbgs() << " ChangeVectorType(): InOp = "; + InOp.dump(); + dbgs() << "\n"); + + LLVM_DEBUG(dbgs() << " ChangeVectorType(): NVT = " + << ((EVT)NVT).getEVTString() << "\n"); + + // Check if InOp already has the right width. + MVT InVT = InOp.getSimpleValueType(); + if (InVT == NVT) + return InOp; + + if (InOp.isUndef()) + return DAG.getUNDEF(NVT); + + /* + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + */ + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + LLVM_DEBUG(dbgs() << " ChangeVectorType(): InNumElts = " << InNumElts + << "\n WidenNumElts = " << WidenNumElts << "\n"); + /* + assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && + "Unexpected request for vector widening"); + */ + if (allowUnsafeChanges == false) + assert(WidenNumElts == InNumElts && + "WidenNumElts == InNumElts failed"); + + EVT EltVT = NVT.getVectorElementType(); + + SDLoc dl(InOp); + if (InOp.getOpcode() == ISD::CONCAT_VECTORS && + InOp.getNumOperands() == 2) { + SDValue N1 = InOp.getOperand(1); + if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) || + N1.isUndef()) { + InOp = InOp.getOperand(0); + InVT = InOp.getSimpleValueType(); + InNumElts = InVT.getVectorNumElements(); + } + } + + if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) { + SmallVector Ops; + for (unsigned i = 0; i < InNumElts; ++i) + Ops.push_back(InOp.getOperand(i)); + + /* + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : + DAG.getUNDEF(EltVT); + for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) + Ops.push_back(FillVal); + */ + return DAG.getBuildVector(NVT, dl, Ops); + } + + LLVM_DEBUG(dbgs() << " ChangeVectorType(): InOp = "; + // << InOp.getNode() << "\n"); + InOp.dump(); + dbgs() << "\n"); + + if (allowUnsafeChanges == false) { + assert(0 && "ChangeVectorType(): I guess this case should not be reached"); + } + else { + assert(0 && "Finish implementation"); + // MEGA-TODO: implement well, although this case might NOT be required + SDValue Ops[] = { InOp.getOperand(0), InOp.getOperand(1) }; + + LLVM_DEBUG(dbgs() << " ChangeVectorType(): Ops[0] = "; + Ops[0].dump(); + dbgs() << "\n"); + + LLVM_DEBUG(dbgs() << " ChangeVectorType(): Ops[1] = "; + Ops[1].dump(); + dbgs() << "\n"); + + SDValue res = DAG.getNode(InOp->getOpcode(), dl, + NVT, + Ops); + + LLVM_DEBUG(dbgs() << "ChangeVectorType(): res = "; + res.dump(); + dbgs() << "\n"); + + return res; + } + + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) : + DAG.getUNDEF(NVT); + + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, + NVT, FillVal, + InOp, DAG.getIntPtrConstant(0, dl)); +} // END ChangeVectorType() + + + + +void ConnexTargetLowering::ReplaceAddI32UseWithADDVH(MVT &aType, + SDValue &Index, + SelectionDAG &DAG) const { + SDLoc dl(Index); + + /* + We make an unsafe assumption that if the Index of the + MSCATTER/MGATHER instruction is used in an ADD, then this Index is an + induction variable and we can change it to i16 type + (we also assume this ind.var. is NOT overflowing the i16 type). + MEGA-TODO: check if initializing this Index is safely done on i32 type or on i16. + + VERY IMPORTANT: + The Connex processor we target allows only + indirect Loads (and Stores) that work on lanes of ONLY 16-bits. + Therefore we need to make sure that the index/address register is not + used in i32 operations and if it is we change them to MachineNodes + here, in the ISelLowering phase (before ISelDAGToDAG), that have + actually type v128i16. +IMPORTANT-TODO: make at least a check that the BUILD_VECTOR with initial index/address value is a short (i16) value AND LOWER the v64i32 to v128i16 by doing a splat with the lower 16-bits value of element 0 + */ + + // Inspired from LegalizeTypes.cpp + SDNode *nodeIndex = Index.getNode(); + for (SDNode::use_iterator UI = nodeIndex->use_begin(), UE = nodeIndex->use_end(); + UI != UE; ++UI) { + SDNode *nUser = UI.getUse().getUser(); + + /* + if (UI.getUse().getResNo() == i) + assert(UI->getNodeId() == NewNode && + "Remapped value has non-trivial use!"); + */ + LLVM_DEBUG(dbgs() << "ReplaceAddI32UseWithADDVH(): nUser = "; + nUser->dump(); + //dbgs() << "\n" + ); + + if (nUser->getOpcode() == ISD::ADD) { + LLVM_DEBUG(dbgs() << "ReplaceAddI32UseWithADDVH(): Converting nUser " + "ISD::ADD to MachineSDNode Connex::ADDV_H\n"); + + /* IMPORTANT: We do here an unsafe type hack: we use ADDV_H which actually + * has TYPE_VECTOR_I16 and declare the type returned is TYPE_VECTOR_I32. + * It is a type mismatch at the level of semantics of the defined + * MachineSDNodes of Connex - I've actually done this before and + * it seems SelectionDAG doesn't complain. + * (Note that llc actually does TypeLegalization). + */ + SDNode *nUserNew = DAG.getMachineNode(Connex::ADDV_H, + dl, + //TYPE_VECTOR_I16, + aType, + //Ops + nUser->getOperand(0), + nUser->getOperand(1)); + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + DAG.ReplaceAllUsesWith(nUser, nUserNew); + } + } +} + + + +// Inspired from [LLVM]/llvm/lib/Target/X86/X86ISelLowering.cpp +/* static */ +SDValue ConnexTargetLowering::LowerMGATHER(SDValue &Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerMGATHER()\n"); + + MaskedGatherSDNode *N = cast(Op.getNode()); + + SDLoc dl(Op); + EVT resVT = Op.getSimpleValueType(); + + LLVM_DEBUG(dbgs() << "LowerMGATHER(): " + << "resVT = " << resVT.getEVTString() + << "\n"); + + SDValue Index = N->getIndex(); + SDValue Mask = N->getMask(); + SDValue Src = N->getPassThru(); // this is actually passthru + MVT IndexVT = Index.getSimpleValueType(); + MVT MaskVT = Mask.getSimpleValueType(); + + //unsigned NumElts = VT.getVectorNumElements(); + //assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); + + /* + if (!Subtarget.hasVLX() && !VT.is512BitVector() && + !Index.getSimpleValueType().is512BitVector()) { + // AVX512F supports only 512-bit vectors. Or data or index should + // be 512 bit wide. If now the both index and data are 256-bit, but + // the vector contains 8 elements, we just sign-extend the index + if (NumElts == 8) { + Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), Index }; + DAG.UpdateNodeOperands(N, Ops); + return Op; + } + + // Minimal number of elements in Gather + NumElts = 8; + // Index + MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts); + Index = ExtendToType(Index, NewIndexVT, DAG); + if (IndexVT.getScalarType() == MVT::i32) + Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); + + // Mask + MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts); + // At this point we have promoted mask operand + assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); + MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); + Mask = ExtendToType(Mask, ExtMaskVT, DAG, true); + Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask); + + // The pass-thru value + MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts); + Src = ExtendToType(Src, NewVT, DAG); + + SDValue Ops[] = { N->getChain(), Src, Mask, N->getBasePtr(), Index }; + SDValue NewGather = DAG.getMaskedGather(DAG.getVTList(NewVT, MVT::Other), + N->getMemoryVT(), dl, Ops, + N->getMemOperand()); + SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, + NewGather.getValue(0), + DAG.getIntPtrConstant(0, dl)); + SDValue RetOps[] = {Exract, NewGather.getValue(1)}; + return DAG.getMergeValues(RetOps, dl); + } + */ + + LLVM_DEBUG(dbgs() << " LowerMGATHER(): Op.getNode() = " << Op.getNode() + << "\n Op = "; + Op->dump(&DAG)); + + //MVT NewVT = MVT::v128i16; + //SDValue Index2 = ExtendToType(Index, NewVT, DAG); + // The index value - is normally i32, and we have to lower it to i16 + MVT aType = (resVT == TYPE_VECTOR_I16 ? TYPE_VECTOR_I16 : TYPE_VECTOR_I32); + /* + // 2018_06_23 + MVT aType = (resVT == TYPE_VECTOR_I16 ? TYPE_VECTOR_I16 : TYPE_VECTOR_I16); + */ + // NEW_FP16 + if (resVT == TYPE_VECTOR_F16) + // I guess this case NEVER happens + aType = TYPE_VECTOR_I16; + + // 2019_03_30: SDValue Index2 = ChangeVectorType(Index, aType, DAG); + SDValue Index2 = ChangeVectorType(Index, aType, DAG, false, true); + /* + // 2018_06_23 + SDValue Index2 = ChangeVectorType(Index, aType, DAG, false, true); + + We get the following error: + <hasAnyUseOfValue(i) || + From->getValueType(i) == To->getValueType(i)) && "Cannot use this version of ReplaceAllUsesWith!"' failed.>> + The reason is that we change index to have type v128i16, while + masked_gather has type v64i32, and this type difference gives the + assertion error. + */ + + + + LLVM_DEBUG(dbgs() << " LowerMGATHER(): Index = "; + Index->dump();); + LLVM_DEBUG(dbgs() << " LowerMGATHER(): N->getNumOperands() = " + << N->getNumOperands() << "\n"); + LLVM_DEBUG(dbgs() << " ChangeVectorType(): N = "; + N->dump();); // << "\n"); + + // 2018_06_23 + if (aType == TYPE_VECTOR_I32) { + ReplaceAddI32UseWithADDVH(aType, Index, DAG); + } + + assert(N->getNumOperands() == 6); + /* The definition of the MaskedGatherSDNode class can be found at + * http://llvm.org/doxygen/SelectionDAGNodes_8h_source.html#l02324 + * IMPORTANT NOTE: we are treating here the machine-independent + * masked_gather, which has different parameters than the + * machine-SDNode masked_gather node defined in TableGen (with params specified by + * constraints defined in SDTMaskedGather). + * machine-independent masked_gather looks like: + * t21: v128i16,ch = masked_gather t0, t29, t35, Constant:i64<51>, t32 + * where: + * - 1st param (in this case t0) is chain (this case, EntryToken) + * - 2nd param (in this case t29) is passthru (vector) + * - 3rd param (in this case t35) is mask (vector) + * - 4th param (in this case Constant) is the base pointer (scalar) of the loads + * (the origin/reference for the index of the gather) + * (the base of GEP, also repeated in LD256[...] symbolically) + * NOTE: if it has value TargetConstant:i64<0> then we have LD256[] - this seems to always make llc crash. + * - 5th param (in this case t32) is index (vector). + * - 6th param is scale. + */ +#ifdef NOTNOT_2019_03_30 + // IMPORTANT: Here we avoid materializing the passthru operand + SDValue ct = DAG.getConstant(1, dl, MVT::i64); + SDValue Ops[] = { + N->getOperand(0), + + // passthru + N->getOperand(1), + // Cycles forever in I-selection: DAG.getUNDEF(TYPE_VECTOR_I16), + // See http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + + // mask + N->getOperand(2), + + // base pointer + ct, + //N->getOperand(3), + + Index2 + //Index + }; + + DAG.UpdateNodeOperands(N, Ops); +#endif + + /* + */ + LLVM_DEBUG(dbgs() << " LowerMGATHER(), after update: Op.getNode() = " + << Op.getNode() //->dump(CurDAG); + << "\n Op = "; + Op->dump(&DAG); + dbgs() << "\n N = " << N; + dbgs() << "\n N = "; N->dump(&DAG); + //dbgs() << "\n Scale = "; Scale->dump(CurDAG); + dbgs() << "\n Index.getNode() = " << Index.getNode(); //<< ", Base.getNode() = " << Base.getNode(); + dbgs() << "\n Index = "; Index->dump(&DAG); + // + dbgs() << "\n N->getBasePtr() = "; N->getBasePtr()->dump(&DAG); + // + dbgs() << "\n Index2.getNode() = " << Index2.getNode(); //<< ", Base.getNode() = " << Base.getNode(); + dbgs() << "\n Index2 = "; Index2->dump(&DAG); + // + dbgs() << "\n Mask.getNode() = " << Mask.getNode(); //<< ", Base.getNode() = " << Base.getNode(); + dbgs() << "\n Mask = "; Mask->dump(&DAG); + // + dbgs() << "\n Src.getNode() = " << Src.getNode(); //<< ", Base.getNode() = " << Base.getNode(); + dbgs() << "\n Src = "; Src->dump(&DAG); + // + /* + // Not working + dbgs() << "\n resVT.SimpleTy = " << ((MVT)resVT).SimpleTy; + dbgs() << "\n IndexVT.SimpleTy = " << IndexVT.SimpleTy; + dbgs() << "\n MaskVT.SimpleTy = " << MaskVT.SimpleTy; + */ + // + dbgs() << "\n"); + + /* + // NOT working + if (N->getNumValues() > 1) { + LLVM_DEBUG(dbgs() << " LowerMGATHER(): calling getMergeValues()\n"); + SDValue RetOps[] = {Op.getValue(0), Op.getValue(1)}; + // NOT working: still gives assertion error after this: + // <getNumValues() && "Custom lowering returned the wrong number of results!"' failed.>> + // (and modifying LowerOperationWrapper() also does NOT help). + return DAG.getMergeValues(RetOps, dl); + } + */ + + LLVM_DEBUG(dbgs() << "Exiting ConnexTargetLowering::LowerMGATHER()\n"); + + return Op; +} // END ConnexTargetLowering::LowerMGATHER() + + +// We only basically implemente in LowerMSCATTER() a call to ReplaceAddI32UseWithADDVH(aType, Index) +SDValue ConnexTargetLowering::LowerMSCATTER(SDValue &Op, + //const ConnexSubtarget &Subtarget, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerMSCATTER()\n"); + + MaskedScatterSDNode *N = cast(Op.getNode()); + + SDLoc dl(Op); + // This returns ch for the MSCATTER SDNode: EVT resVT = Op.getSimpleValueType(); + + SDValue Index = N->getIndex(); + SDValue Mask = N->getMask(); + SDValue Src = N->getValue(); // this is actually passthru + MVT IndexVT = Index.getSimpleValueType(); + MVT MaskVT = Mask.getSimpleValueType(); + EVT SrcVT = Src.getSimpleValueType(); + LLVM_DEBUG(dbgs() << "LowerMSCATTER(): " + << "SrcVT = " << SrcVT.getEVTString() + << "\n"); + + LLVM_DEBUG(dbgs() << " LowerMSCATTER(): Op.getNode() = " << Op.getNode(); + dbgs() << "\n Op = "; Op->dump(&DAG)); + + // The index value - is normally i32, and we have to lower it to i16 + MVT aType = (SrcVT == TYPE_VECTOR_I16 ? TYPE_VECTOR_I16 : TYPE_VECTOR_I32); + + + LLVM_DEBUG(dbgs() << " LowerMSCATTER(): Index = "; + Index->dump();); + + // 2018_06_23 + if (aType == TYPE_VECTOR_I32) { + ReplaceAddI32UseWithADDVH(aType, Index, DAG); + } + + LLVM_DEBUG(dbgs() << "Exiting ConnexTargetLowering::LowerMSCATTER()\n"); + + return Op; +} // END ConnexTargetLowering::LowerMSCATTER() + + + + +#ifdef DO_F16_EMULATION_IN_ISEL_LOWERING + + #define MARKER_FOR_EMULATION + +extern SDNode *CreateInlineAsmNode(SelectionDAG *CurDAG, std::string asmString, + SDNode *nodeSYM_IMM, SDLoc &DL, + bool specialCase=false); + + +SDValue ConnexTargetLowering::LowerMUL_F16(SDValue &Op, + SelectionDAG *CurDAG) const { + SDNode *Node = Op.getNode(); + + LLVM_DEBUG(dbgs() << "Entered LowerMUL_F16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResVecTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting MUL.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + +/* +// Tested - works well, but a bit complicated and inefficient. +// BUT a GOOD test for the various issues that can appear in llc +// (COPY generated by TwoAddressInctruction in WHERE blocks and handled by me +// in ConnexTargetMachine.cpp, etc) +*/ +#include "Select_MULf16_OpincaaCodeGen.h" + + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing MUL.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + resF16, DL); + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resF16, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resF16, 1) + #endif + ); + + LLVM_DEBUG(dbgs() << "LowerMUL_F16(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return SDValue(resW, 0); +} // END LowerMUL_F16() + + +SDValue ConnexTargetLowering::LowerADD_F16(SDValue &Op, + SelectionDAG *CurDAG) const { + SDNode *Node = Op.getNode(); + + LLVM_DEBUG(dbgs() << "Entered LowerADD_F16(): [LATEST] Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResVecTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "LowerADD_F16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + SDValue nodeOpSrc1 = Node->getOperand(0); + SDValue nodeOpSrc2 = Node->getOperand(1); + + LLVM_DEBUG(dbgs() << "LowerADD_F16(): nodeOpSrc1.getValueType() = " + << nodeOpSrc1.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "LowerADD_F16(): nodeOpSrc1 = "; + (nodeOpSrc1.getNode())->dump(); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "LowerADD_F16(): nodeOpSrc2.getValueType() = " + << nodeOpSrc2.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "LowerADD_F16(): nodeOpSrc2 = "; + (nodeOpSrc2.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + + SDNode *nodeOpSrcCast1 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + #ifdef MARKER_FOR_EMULATION + MVT::Other, + // This gives a serious error: MVT::Glue, + #else + MVT::Glue, + #endif + nodeOpSrc1); + +#ifdef MARKER_FOR_EMULATION + std::string exprStrBegin = "// Starting ADD.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCast1, DL); + LLVM_DEBUG(dbgs() << "LowerADD_F16(): inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); +#endif + + SDNode *nodeOpSrcCast2 = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_WH, + DL, + // The output type of the node + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + nodeOpSrc2, + // chain + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeBegin, 0) + #else + SDValue(nodeOpSrcCast1, 1) + #endif + ); + + +/* +// Tested - works well, but a bit complicated and inefficient. +// BUT a GOOD test for the various issues that can appear in llc +// (COPY generated by TwoAddressInctruction in WHERE blocks and handled by me +// in ConnexTargetMachine.cpp, etc) +*/ +#include "Select_ADDf16_OpincaaCodeGen.h" + + +#ifdef MARKER_FOR_EMULATION + std::string exprStrEnd = "// Finishing ADD.f16 emulation ;)"; + SDNode *inlineAsmNodeEnd = CreateInlineAsmNode(CurDAG, exprStrEnd, + resF16, DL); + LLVM_DEBUG(dbgs() << "LowerADD_F16(): inlineAsmNodeEnd = "; + inlineAsmNodeEnd->dump(); dbgs() << "\n"); +#endif + + // END of method - we convert resH (vector of short/i16) to resW (vector of i32) + SDNode *resW = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HW, + DL, + typeVecNode, + SDValue(resF16, 0), + // chain edge + #ifdef MARKER_FOR_EMULATION + SDValue(inlineAsmNodeEnd, 0) + #else + SDValue(resF16, 1) + #endif + ); + LLVM_DEBUG(dbgs() << "LowerADD_F16(): resW = "; + resW->dump(CurDAG); + dbgs() << "\n"); + + return SDValue(resW, 0); +} // END LowerADD_F16() + + +SDValue ConnexTargetLowering::LowerREDUCE_F16(SDValue &Op, + SelectionDAG *CurDAG) const { + SDNode *Node = Op.getNode(); + + LLVM_DEBUG(dbgs() << "Entered SelectReduceF16(): Selecting Node = "; + Node->dump(CurDAG); + dbgs() << "\n"); + + SDLoc DL(Node); + + EVT ViaVecTy; + EVT typeVecNode; + + //EVT ResVecTy = Node->getValueType(1); // 0 is ch (chain) + + LLVM_DEBUG(dbgs() + << "SelectReduceF16(): We are in the case TYPE_VECTOR_F16\n"); + typeVecNode = TYPE_VECTOR_F16; + + // NOTE: Opnd 1 is a ct + SDValue nodeOpSrc = Node->getOperand(2); + + // We need to preserve the node that was chained with Node to avoid it is removed + SDValue nodeOpChain = Node->getOperand(0); // Opnd 0 is ch (chain) + + LLVM_DEBUG(dbgs() << "SelectReduceF16(): nodeOpSrc.getValueType() = " + << nodeOpSrc.getValueType().getEVTString() + << "\n"); + LLVM_DEBUG(dbgs() << "SelectReduceF16(): nodeOpSrc = "; + (nodeOpSrc.getNode())->dump(); + dbgs() << "\n"); + //assert(nodeOpSrc.getValueType() == TYPE_VECTOR_F16); + +#ifdef MARKER_FOR_EMULATION + SDNode *nodeOpSrcCastBogus1 = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // This gives a serious error: MVT::Glue, + nodeOpSrc, + // chain edge + nodeOpChain + ); + + std::string exprStrBegin = "// Starting RED.f16 emulation ;)"; + SDNode *inlineAsmNodeBegin = CreateInlineAsmNode(CurDAG, + exprStrBegin, + nodeOpSrcCastBogus1, DL); + LLVM_DEBUG(dbgs() << "SelectReduceF16: inlineAsmNodeBegin = "; + inlineAsmNodeBegin->dump(); + dbgs() << "\n"); + + /* This node is also bogus, only for the sake of "sandwhiching" the INLINE + assembly with 2 NOPs. + */ + SDNode *nodeOpSrcCast = CurDAG->getMachineNode( + Connex::NOP_BITCONVERT_HH, // IMPORTANT: this is a BOGUS NOP_BITCONVERT - we just put it since it has a Glue result, while nodeOpSrcCast2 does NOT + DL, + TYPE_VECTOR_I16, + MVT::Other, + // IMPORTANT: this can give error: <getNodeId() == -1 && "Node already inserted!">> MVT::Glue, + SDValue(nodeOpSrcCastBogus1, 0), + // chain + SDValue(inlineAsmNodeBegin, 0) + ); +#else + SDNode *nodeOpSrcCast = CurDAG->getMachineNode(Connex::NOP_BITCONVERT_HH, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + nodeOpSrc, + // chain edge + nodeOpChain + ); + +#endif + + return SDValue(); +} // END LowerREDUCE_F16() + + +#else // ! DO_F16_EMULATION_IN_ISEL_LOWERING +SDValue ConnexTargetLowering::LowerMUL_F16(SDValue &Op, + SelectionDAG *CurDAG) const { + return SDValue(); +} // END LowerMUL_F16() + + +SDValue ConnexTargetLowering::LowerADD_F16(SDValue &Op, + SelectionDAG *CurDAG) const { + return SDValue(); +} // END LowerADD_F16() + + +SDValue ConnexTargetLowering::LowerREDUCE_F16(SDValue &Op, + SelectionDAG *CurDAG) const { + return SDValue(); +} // END LowerREDUCE_F16() +#endif // #ifdef DO_F16_EMULATION_IN_ISEL_LOWERING + + +/* static */ SDValue ConnexTargetLowering::LowerVSELECT(SDValue &Op, + //const ConnexSubtarget &Subtarget, + SelectionDAG &DAG) const { + assert(0 && "This code is no longer executed."); + + /* + case ISD::VSELECT: + SDNode *N = Op.getNode(); + return performVSELECTCombine(N, DAG); + static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { + EVT Ty = N->getValueType(0); + + if (Ty.is128BitVector() && Ty.isInteger()) { + // Try the following combines: + // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b) + // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b) + // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b) + // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b) + // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b) + // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b) + // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b) + // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b) + // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but + // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the + // legalizer. + SDValue Op0 = N->getOperand(0); + + if (Op0->getOpcode() != ISD::SETCC) + return SDValue(); + + ISD::CondCode CondCode = cast(Op0->getOperand(2))->get(); + bool Signed; + + if (CondCode == ISD::SETLT || CondCode == ISD::SETLE) + Signed = true; + else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE) + Signed = false; + else + return SDValue(); + + SDValue Op1 = N->getOperand(1); + SDValue Op2 = N->getOperand(2); + SDValue Op0Op0 = Op0->getOperand(0); + SDValue Op0Op1 = Op0->getOperand(1); + + if (Op1 == Op0Op0 && Op2 == Op0Op1) + return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N), + Ty, Op1, Op2); + else if (Op1 == Op0Op1 && Op2 == Op0Op0) + return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N), + Ty, Op1, Op2); + } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) { + SDValue SetCC = N->getOperand(0); + + if (SetCC.getOpcode() != MipsISD::SETCC_DSP) + return SDValue(); + + return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, + SetCC.getOperand(0), SetCC.getOperand(1), + N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); + } + + return SDValue(); + } + */ + + + /* + VERY VERY IMPORTANT: I prefer to do VSELECT treatment here, in + ISelLowering::LowerOperation, not in ConnexISelDAGToDAG, + because, I only do lowering, and only where required I put Machine Nodes + (i.e. ORV_H nodes). + So I let ISelDAGToDAG to do proper selection after this lowering, and + in ISelDAGToDAG some of the TableGen patterns are being used for the + lowered VSELECT. + + Note that register allocation is performed after Instruction selection + (see [Cardoso_2014], Figure on page 134). + %So we have to replace VSELECT before Register allocation. + + Note that although it is not required to create virtual registers for + the ORV_H machine instructions (since we failed to add a ch input port + to the setcc - see 50_IfConversion/Setcc_with_ch_input_port_NOT_working + - and I guess we would fail here also), we create it for the true + ORV_H because we need to make the associated predecessor CopyToRegister a + successor of WHEREEQ, otherwise the WHEREEQ would not have a successor. + TODO if we are extremely precious: + I guess we could make a succcessor of WHEREEQ the CopyToReg successor + of ORV_H and could get rid of all input virtual registers. + NOTE: we canNOT get rid of the virtual register that keeps the result of + both ORV_H, because we can replace it only with a VSELECT (reminds me + of dataflow machines and multiplexors :) ), BUT we want + to lower VSELECT in other components. + + Note that the nodes we create here have to have correct ordering, + otherwise instruction selection can fail or have wrong semantics. + */ + LLVM_DEBUG(dbgs() << "Treating LowerOperation() for ISD::VSELECT...\n"); + + // END_WHERE, etc are defined in anonymous enum in TableGen generated ConnexGenInstrInfo.inc + + /* From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html: + * LLVMContext * getContext () const + */ + + + LLVMContext &theContext = *(DAG.getContext()); + EVT voidEVT = EVT::getEVT(Type::getVoidTy(theContext)); + LLVM_DEBUG(dbgs() << " LowerOperation(): voidEVT = " + << voidEVT.getEVTString() << "\n"); + + SDValue chain = DAG.getEntryNode(); + SDValue InFlag(nullptr, 0); // NO Glue - Null incoming flag value. + + SDNode *vselectNode = Op.getNode(); + assert(vselectNode->getNumOperands() == 3); + LLVM_DEBUG(dbgs() << " LowerOperation(): Initially, vselectNode->use_size() = " + << vselectNode->use_size() + << "\n"); + + for (SDNode::use_iterator UI = vselectNode->use_begin(), UE = vselectNode->use_end(); + UI != UE; ++UI) { + // Note: UI is an SDNode * + LLVM_DEBUG(dbgs() << " LowerOperation(): Initially, one use of vselectNode is: "; + UI->print(dbgs()); + dbgs() << "\n"); + } + + //EVT nodeResType = vselectNode->getValueType(0); + SDValue vselectNodeOp0 = vselectNode->getOperand(0); + SDValue vselectNodeOp1 = vselectNode->getOperand(1); + SDValue vselectNodeOp2 = vselectNode->getOperand(2); + + SDValue setCC = vselectNode->getOperand(0); + SDNode *setCCNode = setCC.getNode(); + SDValue setCCPred = (vselectNode->getOperand(0)).getNode()->getOperand(2); + SDNode *setCCPredNode = setCCPred.getNode(); + + assert(setCCPredNode->isMachineOpcode() == false); + assert(setCCPredNode->getOpcode() == ISD::CONDCODE); + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1SDLoc.html + //const SDLoc DL; + //const SDLoc DL(vselectNode); + const SDLoc DL(vselectNodeOp0); // trying to avoid problems when giving DeleteNode(vselectNode) + + // Inspired from ConnexISelLowering.cpp + MachineFunction &MF = DAG.getMachineFunction(); + //MachineRegisterInfo &RegInfo = MF.getRegInfo(); + // Inspiring from MipsSEISelLowering.cpp + //MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + + + unsigned whereOpcode; + switch (cast(setCCPredNode)->get()) { + case ISD::SETEQ: + whereOpcode = Connex::WHEREEQ_BUNDLE_H; + break; + case ISD::SETLT: + whereOpcode = Connex::WHERELT_BUNDLE_H; + break; + case ISD::SETULT: + whereOpcode = Connex::WHEREULT_BUNDLE_H; + break; + default: + assert(0 && "case not reachable"); + break; + } + +#define WORKING_WITH_PHYSICAL_REGISTER + +#ifdef WORKING_WITH_PHYSICAL_REGISTER + unsigned regDest = CONNEX_RESERVED_REGISTER_01; + + /* + unsigned virtRegRes = RegInfo.createVirtualRegister( + &Connex::VectorHRegClass); + */ +#else + /* IMPORTANT: In essence this is ONLY to allocate a virtual register to use + it later for the TargetMachine, PassPredicate. + */ + unsigned regDest = RegInfo->createVirtualRegister( + &Connex::VectorHRegClass); + + SDValue copyFromRegDest = DAG.getCopyFromReg( + chain, + //SDValue(endWhere, 0), + DL, + regDest, + TYPE_VECTOR_I16 // result type + //endWhere->getOperand(0) //RegTy + //SDValue(endWhere, 0) + ); +#endif + + // Signature: MachineSDNode *getMachineNode (unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1, SDValue Op2, SDValue Op3) + SDNode *whereXY = DAG.getMachineNode(whereOpcode, + DL, + // Return type + //voidEVT //, + TYPE_VECTOR_I16, + //MVT::Glue, + //TYPE_VECTOR_I16, + //orNodeTrue->getValue(0) + + vselectNodeOp2, + vselectNodeOp1, + /* NOTE: if we move this value as 1st parameter it crashes with: Assertion `NumMIOperands >= II.getNumOperands() && NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + NumImpUses && "#operands for dag node doesn't match .td file!"' failed., etc */ + //SDValue(copyToRegFalse.getNode(), 0), // the ch output port of CopyToReg + + //vselectNodeOp0 + #ifdef WORKING_WITH_PHYSICAL_REGISTER + SDValue(setCCNode, 0) + #else + copyFromRegDest + #endif + // The glue output port of CopyToReg. + //SDValue(copyToRegFalse.getNode(), 1) + //setCCNode->getOperand(1), + //copyToRegOp2 + ); + LLVM_DEBUG(dbgs() << " LowerOperation(): whereXY (chained with setCC) = "; + whereXY->print(dbgs()); + dbgs() << "\n"); + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + //DAG.ReplaceAllUsesWith(vselectNode, whereXY); + //return SDValue(whereXY, 0); + +#ifdef WORKING_WITH_PHYSICAL_REGISTER + SDValue copyFromRegDest = DAG.getCopyFromReg( + //chain, + //SDValue(endWhere, 0), + SDValue(whereXY, 0), + DL, + regDest, + TYPE_VECTOR_I16 // result type + //endWhere->getOperand(0)); //RegTy); + //SDValue(endWhere, 0) + ); +#endif + + /* VERY IMPORTANT: the rest of codegen is performed in ConnexTargetMachine.cpp, + PassPredicate, + since we do NOT want the scheduler to do OoO or even DCE on the + instructions we add - this was the case when we were generating + everything here in lowering. + */ + +#ifdef WORKING_WITH_PHYSICAL_REGISTER + DAG.ReplaceAllUsesWith(vselectNode, ©FromRegDest); + return copyFromRegDest; + + /* + DAG.ReplaceAllUsesWith(vselectNode, whereXY); + return SDValue(whereXY, 0); + */ +#else + DAG.ReplaceAllUsesWith(vselectNode, whereXY); + return copyFromRegDest; +#endif +} // END LowerVSELECT() + + +/* +From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetLowering.html: + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const + <> + */ +// From [LLVM]/llvm38Nov2016/llvm/include/llvm/CodeGen/ISDOpcodes.h +SDValue ConnexTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + // This will print the numeric (decimal) value of the Opcode. + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerOperation(): " + << "Op.getOpcode() = " << Op.getOpcode() + << ", getTargetNodeName() = " + << getTargetNodeName(Op.getOpcode()) << "\n"); + + /* + LLVM_DEBUG(dbgs() << "ConnexTargetLowering::LowerOperation(): ISD::VSELECT = " + << ISD::VSELECT << "\n"); + if (Op.getOpcode() == ISD::VSELECT) + LLVM_DEBUG(dbgs() << "ConnexTargetLowering::LowerOperation() - ISD::VSELECT\n"); + */ + + + switch (Op.getOpcode()) { + #ifdef NEW_BIGGER_OPS + //!!!! TODO TODO: check for the type to be i32/u32 + (Op.getOperand(0).getValueType() == MVT::i32) && + (Op.getOperand(1).getValueType() == MVT::i32) { + /* + * % NOTE: reg alloc is NOT performed yet - but this is + * dataflow mostly... + * + // Let's do an incorrect, but simpler version: + Dest_v32i16 = Src1_v32i16 ADD Src2_v32i16 + //Reg_dest_low16 = Reg_src1_low16 ADD Reg_src2_low16 + //Reg_dest_high16 = Reg_src1_high16 ADD Reg_src2_high16 + Reg_tmp = 1 + //WHERE INDEX & 1 == 0 + LDIX (load index of the Processing Element) %to Reg_tmp2 + AND 1 + == 0 + WHERE true + WHERE CARRY + Reg_dest_high16 = Reg_src1_high16 ADD Reg_tmp + END_WHERE + END_WHERE + */ + return DAG.getNode(ConnexISD::ADD, + DL, + Op.getValueType(), + Chain, + LHS, + RHS, + // TODO_CHANGE_BACKEND: + //DAG.getConstant(CC, DL, MVT::i64), Dest); + DAG.getConstant(CC, DL, TYPE_SCALAR_ELEMENT), Dest); + } + + /* + * The Op.getOperand(0).getValueType() == MVT::u32 + * in this + return DAG.getNode(ConnexISD::ADD, DL, Op.getValueType(), Chain, LHS, RHS, + // TODO_CHANGE_BACKEND: + //DAG.getConstant(CC, DL, MVT::i64), Dest); + DAG.getConstant(CC, DL, TYPE_SCALAR_ELEMENT), Dest); + */ + return Lower(Op, DAG); + #endif + + #ifdef DO_F16_EMULATION_IN_ISEL_LOWERING + // NEW_FP16 + /* + case ISD::Intrinsic::connex_reduce_f16: { + LLVM_DEBUG(dbgs() << "LowerOperation() for Intrinsic::connex_reduce_f16\n"); + + SDLoc DL(Op); + SDNode *Node = Op.getNode(); + EVT ResVecTy = Node->getValueType(0); + // MEGA-TODO: input opnd has to have type TYPE_VECTOR_F16 + if (ResVecTy == MVT::f16) { + LLVM_DEBUG(dbgs() << "LowerOperation() for Intrinsic::connex_reduce_f16 for f16\n"); + return LowerREDUCE_F16(Op, &DAG); + } + + break; + } + */ + + // 2018_08_17_HANDLING_F16_IN_ISEL_LOWERING + case ISD::FMUL: { + LLVM_DEBUG(dbgs() << "LowerOperation() for FMUL\n"); + + SDLoc DL(Op); + SDNode *Node = Op.getNode(); + EVT ResVecTy = Node->getValueType(0); + + #ifdef DO_MUL_F16_EMULATION_IN_ISEL_LOWERING + //if (ResVecTy == MVT::f16) + if (ResVecTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "LowerOperation() for FMUL for f16\n"); + return LowerMUL_F16(Op, &DAG); + } + #endif + + break; + } + case ISD::FADD: { + LLVM_DEBUG(dbgs() << "LowerOperation() for FADD\n"); + + SDLoc DL(Op); + SDNode *Node = Op.getNode(); + EVT ResVecTy = Node->getValueType(0); + + #ifdef DO_ADD_F16_EMULATION_IN_ISEL_LOWERING + //if (ResVecTy == MVT::f16) + if (ResVecTy == TYPE_VECTOR_F16) { + LLVM_DEBUG(dbgs() << "LowerOperation() for FADD for f16\n"); + return LowerADD_F16(Op, &DAG); + //return DAG.getNode(Connex::ADD_rr, + // DL, + // Op.getValueType(), + // Op.getOperand(1), + // Op.getOperand(2)); + } + #endif + + break; + } +#endif // #ifdef DO_F16_EMULATION_IN_ISEL_LOWERING + + case ISD::BR_CC: + return LowerBR_CC(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::SELECT_CC: + return LowerSELECT_CC(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + // Inspired from [LLVM]/llvm/lib/Target/ARM/ARMISelLowering.cpp + return LowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + // From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.cpp + return LowerEXTRACT_VECTOR_ELT(Op, DAG); + // return EXTRACT_VECTOR_ELT; + case ISD::BUILD_VECTOR: + // From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.cpp + return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + // From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.cpp + return LowerVECTOR_SHUFFLE(Op, DAG); + + // Inspired from lib/Target/AMDGPU/AMDGPUISelLowering.cpp + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + + // From [LLVM]/llvm/lib/Target/Mips/MipsISelLowering.cpp + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + + case ISD::MGATHER: + // From [LLVM]/llvm/lib/Target/X86/X86ISelLowering.cpp + return LowerMGATHER(Op, DAG); + + case ISD::MSCATTER: + // From [LLVM]/llvm/lib/Target/X86/X86ISelLowering.cpp + return LowerMSCATTER(Op, DAG); + + #ifdef TREAT_SETCC_VSELECT + /* + // Inspired From lib/Target/Mips/MipsSEISelLowering.cpp + static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { + bool IsV216 = (Ty == MVT::v2i16); + + switch (CC) { + case ISD::SETEQ: + case ISD::SETNE: return true; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return IsV216; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return !IsV216; + default: return false; + } + } + + case ISD::SETCC: + //static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { + SDNode *N = Op.getNode(); + + EVT Ty = N->getValueType(0); + + if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) + return SDValue(); + + if (!isLegalDSPCondCode(Ty, cast(N->getOperand(2))->get())) + return SDValue(); + + return DAG.getNode(MipsISD::SETCC_DSP, + SDLoc(N), + Ty, + N->getOperand(0), + N->getOperand(1), + N->getOperand(2)); + //} + + + */ + #endif + + case ISD::VSELECT: { + //return LowerVSELECT(Op, DAG); + } // END ISD::VSELECT + + default: + llvm_unreachable("unimplemented operand"); + } +} // END ConnexTargetLowering::LowerOperation + + + +// Calling Convention Implementation +#include "ConnexGenCallingConv.inc" + + + +// Taken from lib/Target/Mips/MipsISelLowering.cpp +static SDValue UnpackFromArgumentSlot(SDValue Val, const CCValAssign &VA, + EVT ArgVT, const SDLoc &DL, + SelectionDAG &DAG) { + MVT LocVT = VA.getLocVT(); + EVT ValVT = VA.getValVT(); + + // Shift into the upper bits if necessary. + switch (VA.getLocInfo()) { + default: + break; + case CCValAssign::AExtUpper: + case CCValAssign::SExtUpper: + case CCValAssign::ZExtUpper: { + unsigned ValSizeInBits = ArgVT.getSizeInBits(); + unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); + unsigned Opcode = + VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; + Val = DAG.getNode( + Opcode, DL, VA.getLocVT(), Val, + DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); + break; + } + } + + // If this is an value smaller than the argument slot size (32-bit for O32, + // 64-bit for N32/N64), it has been promoted in some way to the argument slot + // size. Extract the value and insert any appropriate assertions regarding + // sign/zero extension. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::AExtUpper: + case CCValAssign::AExt: + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::SExtUpper: + case CCValAssign::SExt: + Val = DAG.getNode(ISD::AssertSext, DL, LocVT, Val, DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::ZExtUpper: + case CCValAssign::ZExt: + Val = DAG.getNode(ISD::AssertZext, DL, LocVT, Val, DAG.getValueType(ValVT)); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); + break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); + break; + } + + return Val; +} + +#ifdef NOT_NOT_NOT +void ConnexTargetLowering::writeVarArgRegs(std::vector &OutChains, + SDValue Chain, const SDLoc &DL, + SelectionDAG &DAG, + CCState &State) const { + ArrayRef ArgRegs = ABI.GetVarArgRegs(); + unsigned Idx = State.getFirstUnallocated(ArgRegs); + unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); + MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); + const TargetRegisterClass *RC = getRegClassFor(RegTy); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + + // Offset of the first variable argument from stack pointer. + int VaArgOffset; + + if (ArgRegs.size() == Idx) + VaArgOffset = alignTo(State.getNextStackOffset(), RegSizeInBytes); + else { + VaArgOffset = + (int)ABI.GetCalleeAllocdArgSizeInBytes(State.getCallingConv()) - + (int)(RegSizeInBytes * (ArgRegs.size() - Idx)); + } + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true); + MipsFI->setVarArgsFrameIndex(FI); + + // Copy the integer registers that have not been used for argument passing + // to the argument register save area. For O32, the save area is allocated + // in the caller's stack frame, while for N32/64, it is allocated in the + // callee's stack frame. + for (unsigned I = Idx; I < ArgRegs.size(); + ++I, VaArgOffset += RegSizeInBytes) { + unsigned Reg = addLiveIn(MF, ArgRegs[I], RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); + FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, + MachinePointerInfo(), false, false, 0); + cast(Store.getNode())->getMemOperand()->setValue( + (Value *)nullptr); + OutChains.push_back(Store); + } +} +#endif + +SDValue ConnexTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerFormalArguments()\n"); + + switch (CallConv) { + default: + llvm_unreachable("Unsupported calling convention"); + case CallingConv::C: + case CallingConv::Fast: + break; + } + + // Inspired from lib/Target/Mips/MipsISelLowering.cpp, MipsTargetLowering::LowerFormalArguments(): + // Used with vargs to acumulate store chains. + std::vector OutChains; + + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + +// TODO_CHANGE_BACKEND: + //CCInfo.AnalyzeFormalArguments(Ins, CC_Connex64); + CCInfo.AnalyzeFormalArguments(Ins, CC_Connex64); + + unsigned i = 0; + for (auto &VA : ArgLocs) { + if (VA.isRegLoc()) { + LLVM_DEBUG(dbgs() << "LowerFormalArguments(): case VA.isRegLoc()\n"); + // Arguments passed in registers + EVT RegVT = VA.getLocVT(); + switch (RegVT.getSimpleVT().SimpleTy) { + default: { + errs() << "LowerFormalArguments Unhandled argument type: " + << RegVT.getEVTString() << '\n'; + llvm_unreachable(0); + } + // TODO_CHANGE_BACKEND: + case TYPE_SCALAR_ELEMENT: + unsigned VReg = RegInfo.createVirtualRegister(&Connex::GPRRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); + + // If this is an 8/16/32-bit value, it is really passed promoted to 64 + // bits. Insert an assert[sz]ext to capture this, then truncate to the + // right size. + if (VA.getLocInfo() == CCValAssign::SExt) + ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + + if (VA.getLocInfo() != CCValAssign::Full) + ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); + + InVals.push_back(ArgValue); + } + } // END VA.isRegLoc() + else { + /* + fail(DL, DAG, "defined with too many args"); + InVals.push_back(DAG.getConstant(0, DL, VA.getLocVT())); + */ + + LLVM_DEBUG(dbgs() << "LowerFormalArguments(): case NOT VA.isRegLoc()\n"); + + // Inspired from lib/Target/Mips/MipsISelLowering.cpp, MipsTargetLowering::LowerFormalArguments(): + MachineFrameInfo &MFI = MF.getFrameInfo(); + + MVT LocVT = VA.getLocVT(); + + /* + if (ABI.IsO32()) { + // We ought to be able to use LocVT directly but O32 sets it to i32 + // when allocating floating point values to integer registers. + // This shouldn't influence how we load the value into registers unless + // we are targeting softfloat. + if (VA.getValVT().isFloatingPoint() && !Subtarget.useSoftFloat()) + LocVT = VA.getValVT(); + } + */ + // sanity check + assert(VA.isMemLoc()); + + // The stack pointer offset is relative to the caller stack frame. + int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, + VA.getLocMemOffset(), true); + + // Create load nodes to retrieve arguments from the stack + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI)); + // 2019_03_30: false, false, false, 0); + OutChains.push_back(ArgValue.getValue(1)); + + ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG); + + InVals.push_back(ArgValue); + } + + i++; + } + + /* + if (IsVarArg || MF.getFunction()->hasStructRetAttr()) { + fail(DL, DAG, "functions with VarArgs or StructRet are not supported"); + } + */ + + // Inspired from lib/Target/Mips/MipsISelLowering.cpp, MipsTargetLowering::LowerFormalArguments(): +#ifdef NOT_NOT_NOT + ConnexFunctionInfo *MipsFI = MF.getInfo(); + unsigned e = ArgLocs.size(); + for (i = 0 ; i != e; ++i) { + // The mips ABIs for returning structs by value requires that we copy + // the sret argument into $v0 for the return. Save the argument into + // a virtual register so that we can access it from the return points. + if (Ins[i].Flags.isSRet()) { + unsigned Reg = MipsFI->getSRetReturnReg(); + if (!Reg) { + Reg = MF.getRegInfo().createVirtualRegister( + // TODO_CHANGE_BACKEND: + //getRegClassFor(ABI.IsN64() ? MVT::i64 : MVT::i32)); + getRegClassFor(ABI.IsN64() ? MVT::i64 : MVT::i32)); + MipsFI->setSRetReturnReg(Reg); + } + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain); + break; + } + } + + if (IsVarArg) + writeVarArgRegs(OutChains, Chain, DL, DAG, CCInfo); +#endif + + // All stores are grouped in one node to allow the matching between + // the size of Ins and InVals. This only happens when on varg functions + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + } + + return Chain; +} + + +const unsigned ConnexTargetLowering::MaxArgs = 5; + +SDValue ConnexTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + auto &Outs = CLI.Outs; + auto &OutVals = CLI.OutVals; + auto &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &IsTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + MachineFunction &MF = DAG.getMachineFunction(); + + // Connex target does not support tail call optimization. + IsTailCall = false; + + switch (CallConv) { + default: + report_fatal_error("Unsupported calling convention"); + case CallingConv::Fast: + case CallingConv::C: + break; + } + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CC_Connex64); + + unsigned NumBytes = CCInfo.getNextStackOffset(); + + if (Outs.size() > MaxArgs) + fail(CLI.DL, DAG, "too many args to ", Callee); + + for (auto &Arg : Outs) { + ISD::ArgFlagsTy Flags = Arg.Flags; + if (!Flags.isByVal()) + continue; + + fail(CLI.DL, DAG, "pass by value not supported ", Callee); + } + + auto PtrVT = getPointerTy(MF.getDataLayout()); + Chain = DAG.getCALLSEQ_START(Chain, + // 2019_3_30: DAG.getConstant(NumBytes, CLI.DL, PtrVT, true), + NumBytes, 0, + CLI.DL); + + SmallVector, MaxArgs> RegsToPass; + + //LLVM_DEBUG(dbgs() << "DAG. = "; DAG.dump(); /* << "\n" */); + LLVM_DEBUG(dbgs() << "DAG = "; DAG.dump(); /* << "\n" */); + //LLVM_DEBUG(dbgs() << "CLI = " << CLI << "\n"); + LLVM_DEBUG(dbgs() << "InVals.size() = " << InVals.size() << "\n"); + + for (unsigned j = 0; j < InVals.size(); ++j) { + //LLVM_DEBUG(dbgs() << "InVals[j] = " << InVals[j] << "\n"); + LLVM_DEBUG(dbgs() << "InVals[" << j << "] = "; + InVals[j]->dump(); /* << "\n" */); + } + LLVM_DEBUG(dbgs() << "ArgLocs.size() = " << ArgLocs.size() << "\n"); + + // Walk arg assignments + for (unsigned i = 0, + e = std::min(static_cast(ArgLocs.size()), MaxArgs); + i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = OutVals[i]; + + //LLVM_DEBUG(dbgs() << "ArgLocs[i] = " << ArgLocs[i] << "\n"); + LLVM_DEBUG(dbgs() << "Arg = "; Arg->dump(); /* << "\n" */); + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, CLI.DL, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, CLI.DL, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, CLI.DL, VA.getLocVT(), Arg); + break; + } + + // Push arguments into RegsToPass vector + if (VA.isRegLoc()) + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + else { + if (VA.isMemLoc()) + LLVM_DEBUG(dbgs() << "VA.isMemLoc() == true\n"); + llvm_unreachable("call arg pass bug"); + } + } + + SDValue InFlag; + + // Build a sequence of copy-to-reg nodes chained together with token chain and + // flag operands which copy the outgoing args into registers. The InFlag in + // necessary since all emitted instructions must be stuck together. + for (auto &Reg : RegsToPass) { + Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT, + G->getOffset(), 0); + else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); + + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (auto &Reg : RegsToPass) + Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + Chain = DAG.getNode(ConnexISD::CALL, CLI.DL, NodeTys, Ops); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END( + Chain, DAG.getConstant(NumBytes, CLI.DL, PtrVT, true), + DAG.getConstant(0, CLI.DL, PtrVT, true), InFlag, CLI.DL); + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, CLI.DL, DAG, + InVals); +} + + +SDValue ConnexTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &DL, SelectionDAG &DAG) const { + unsigned Opc = ConnexISD::RET_FLAG; + + // CCValAssign - represent the assignment of the return value to a location + SmallVector RVLocs; + MachineFunction &MF = DAG.getMachineFunction(); + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); + + if (MF.getFunction().getReturnType()->isAggregateType()) { + fail(DL, DAG, "only integer returns supported"); + return DAG.getNode(Opc, DL, MVT::Other, Chain); + } + + // Analize return values. + CCInfo.AnalyzeReturn(Outs, RetCC_Connex64); + + SDValue Flag; + SmallVector RetOps(1, Chain); + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Flag); + + // Guarantee that all emitted copies are stuck together, + // avoiding something bad. + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + + return DAG.getNode(Opc, DL, MVT::Other, RetOps); +} + + +SDValue ConnexTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Ins, + const SDLoc &DL, + SelectionDAG &DAG, + SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); + + if (Ins.size() >= 2) { + fail(DL, DAG, "only small returns supported"); + for (unsigned i = 0, e = Ins.size(); i != e; ++i) + InVals.push_back(DAG.getConstant(0, DL, Ins[i].VT)); + return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InFlag).getValue(1); + } + + CCInfo.AnalyzeCallResult(Ins, RetCC_Connex64); + + // Copy all of the result registers out of their specified physreg. + for (auto &Val : RVLocs) { + Chain = DAG.getCopyFromReg(Chain, DL, Val.getLocReg(), + Val.getValVT(), InFlag).getValue(1); + InFlag = Chain.getValue(2); + InVals.push_back(Chain.getValue(0)); + } + + return Chain; +} + +static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) { + switch (CC) { + default: + break; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETLT: + case ISD::SETLE: + CC = ISD::getSetCCSwappedOperands(CC); + std::swap(LHS, RHS); + break; + } +} + + +SDValue ConnexTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); + SDLoc DL(Op); + + NegateCC(LHS, RHS, CC); + + return DAG.getNode(ConnexISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS, + // TODO_CHANGE_BACKEND: + //DAG.getConstant(CC, DL, MVT::i64), Dest); + DAG.getConstant(CC, DL, TYPE_SCALAR_ELEMENT), Dest); +} + +SDValue ConnexTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue TrueV = Op.getOperand(2); + SDValue FalseV = Op.getOperand(3); + ISD::CondCode CC = cast(Op.getOperand(4))->get(); + SDLoc DL(Op); + + NegateCC(LHS, RHS, CC); + + // TODO_CHANGE_BACKEND: + //SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i64); + SDValue TargetCC = DAG.getConstant(CC, DL, TYPE_SCALAR_ELEMENT); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); + SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV}; + + return DAG.getNode(ConnexISD::SELECT_CC, DL, VTs, Ops); +} + +const char *ConnexTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch ((ConnexISD::NodeType)Opcode) { + case ConnexISD::FIRST_NUMBER: + break; + case ConnexISD::RET_FLAG: + return "ConnexISD::RET_FLAG"; + case ConnexISD::CALL: + return "ConnexISD::CALL"; + case ConnexISD::SELECT_CC: + return "ConnexISD::SELECT_CC"; + case ConnexISD::BR_CC: + return "ConnexISD::BR_CC"; + case ConnexISD::Wrapper: + return "ConnexISD::Wrapper"; + // Inspired from lib/Target/Mips/MipsISelLowering.cpp + case ConnexISD::VSHF: + return "ConnexISD::VSHF"; + /* We should IGNORE gcc -Wswitch when it gives: + <> + See definition of NodeType in ConnexISelLowering.h. + */ + case ISD::MGATHER: + return "ISD::MGATHER"; + // Probably not good + //case ConnexISD::VSELECT: + /* We should IGNORE gcc -Wswitch when it gives: + <> + See definition of NodeType in ConnexISelLowering.h. + */ + case ISD::VSELECT: + return "ISD::VSELECT"; + /* + case ConnexISD::ConstantPool: + return "ConnexISD::ConstantPool"; + */ + default: + //return TargetLowering::NodeType; + /* See + http://llvm.org/docs/doxygen/html/TargetLowering_8cpp_source.html + - returns nullptr: return TargetLowering::getTargetNodeName(Opcode); + */ + return "NONAME (getTargetNodeName NOT supporting this Opcode)"; + } + return nullptr; +} + +SDValue ConnexTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + const GlobalValue *GV = cast(Op)->getGlobal(); + + // TODO_CHANGE_BACKEND: + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, TYPE_SCALAR_ELEMENT); + + // TODO_CHANGE_BACKEND: + return DAG.getNode(ConnexISD::Wrapper, DL, TYPE_SCALAR_ELEMENT, GA); +} + +MachineBasicBlock * +ConnexTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + assert(MI.getOpcode() == Connex::Select && "Unexpected instr type to insert"); + + // To "insert" a SELECT instruction, we actually have to insert the diamond + // control-flow pattern. The incoming instruction knows the destination vreg + // to set, the condition code register to branch on, the true/false values to + // select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator I = ++BB->getIterator(); + + // ThisMBB: + // ... + // TrueVal = ... + // jmp_XX r1, r2 goto Copy1MBB + // fallthrough --> Copy0MBB + MachineBasicBlock *ThisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); + + F->insert(I, Copy0MBB); + F->insert(I, Copy1MBB); + // Update machine-CFG edges by transferring all successors of the current + // block to the new block which will contain the Phi node for the select. + Copy1MBB->splice(Copy1MBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + Copy1MBB->transferSuccessorsAndUpdatePHIs(BB); + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(Copy0MBB); + BB->addSuccessor(Copy1MBB); + + // Insert Branch if Flag + unsigned LHS = MI.getOperand(1).getReg(); + unsigned RHS = MI.getOperand(2).getReg(); + int CC = MI.getOperand(3).getImm(); + switch (CC) { + case ISD::SETGT: + BuildMI(BB, DL, TII.get(Connex::JSGT_rr)) + .addReg(LHS) + .addReg(RHS) + .addMBB(Copy1MBB); + break; + case ISD::SETUGT: + BuildMI(BB, DL, TII.get(Connex::JUGT_rr)) + .addReg(LHS) + .addReg(RHS) + .addMBB(Copy1MBB); + break; + case ISD::SETGE: + BuildMI(BB, DL, TII.get(Connex::JSGE_rr)) + .addReg(LHS) + .addReg(RHS) + .addMBB(Copy1MBB); + break; + case ISD::SETUGE: + BuildMI(BB, DL, TII.get(Connex::JUGE_rr)) + .addReg(LHS) + .addReg(RHS) + .addMBB(Copy1MBB); + break; + case ISD::SETEQ: + BuildMI(BB, DL, TII.get(Connex::JEQ_rr)) + .addReg(LHS) + .addReg(RHS) + .addMBB(Copy1MBB); + break; + case ISD::SETNE: + BuildMI(BB, DL, TII.get(Connex::JNE_rr)) + .addReg(LHS) + .addReg(RHS) + .addMBB(Copy1MBB); + break; + default: + report_fatal_error("unimplemented select CondCode " + Twine(CC)); + } + + // Copy0MBB: + // %FalseValue = ... + // # fallthrough to Copy1MBB + BB = Copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(Copy1MBB); + + // Copy1MBB: + // %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ] + // ... + BB = Copy1MBB; + BuildMI(*BB, BB->begin(), DL, TII.get(Connex::PHI), MI.getOperand(0).getReg()) + .addReg(MI.getOperand(5).getReg()) + .addMBB(Copy0MBB) + .addReg(MI.getOperand(4).getReg()) + .addMBB(ThisMBB); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + + +static bool isIndexVectorVal(const BuildVectorSDNode *N) { + unsigned int nOps = N->getNumOperands(); + + //SDValue Operand0 = N->getOperand(0); + + for (unsigned int i = 0; i < nOps; ++i) { + //if (N->getOperand(i) != Operand0) + // See http://llvm.org/docs/ProgrammersManual.html#the-isa-cast-and-dyn-cast-templates + ConstantSDNode *ctNode = dyn_cast(N->getOperand(i)); + if (ctNode == NULL) + return false; + + LLVM_DEBUG(dbgs() << " ctNode = "; ctNode->dump()); + + if (N->getConstantOperandVal(i) != i) + return false; + } + /* + if (Op->getOpcode() == ISD::UNDEF) + return true; + if (isConstantOrUndef(Op->getOperand(i))) + return true; + */ + + return true; +} + + +// From llvm/lib/Target/Mips/MipsSEISelLowering.cpp +static bool isSplatVector(const BuildVectorSDNode *N) { + unsigned int nOps = N->getNumOperands(); + assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector"); + + SDValue Operand0 = N->getOperand(0); + + for (unsigned int i = 1; i < nOps; ++i) { + if (N->getOperand(i) != Operand0) + return false; + } + + return true; +} + + +// From llvm/lib/Target/Mips/MipsSEISelLowering.cpp +static bool isConstantOrUndef(const SDValue Op) { + if (Op->getOpcode() == ISD::UNDEF) + return true; + if (isa(Op)) + return true; + if (isa(Op)) + return true; + return false; +} + + +// From /home/asusu/LLVM/llvm38Nov2016/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { + for (unsigned i = 0; i < Op->getNumOperands(); ++i) + if (isConstantOrUndef(Op->getOperand(i))) + return true; + return false; +} + + +// Getting inspired from lib/Target/X86/X86ISelLowering.cpp +SDValue ConnexTargetLowering::LowerBITCAST(SDValue Op, + SelectionDAG &DAG) const { + EVT SrcVT = Op.getOperand(0).getSimpleValueType(); + EVT DstVT = Op.getSimpleValueType(); + + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerBITCAST(): " + << "SrcVT = " << SrcVT.getEVTString() + << ", DstVT = " << DstVT.getEVTString() + << ". Returning SrcVT... \n"); + + //return SDValue(); + //return Op; + return Op.getOperand(0); +} + + +SDValue ConnexTargetLowering::LowerADD_I32(SDValue Op, + SelectionDAG &DAG) const { + // TODO TODO: build opnd0&1 that takes the same operands, but have type TYPE_VECTOR_I16 + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerADD_I32()\n"); + + assert(Op.getOperand(0).getValueType() == TYPE_VECTOR_I32); + + SDValue opnd0 = Op.getOperand(0); + SDValue opnd1 = Op.getOperand(1); + + // I need to convert the v128i16 vector operand to v64i32. + + SDValue opnd1Native = DAG.getNode(ISD::BITCAST, SDLoc(Op), + TYPE_VECTOR_I16, opnd0); + SDValue opnd2Native = DAG.getNode(ISD::BITCAST, SDLoc(Op), + TYPE_VECTOR_I16, opnd1); + + SDValue Result = DAG.getNode(ISD::ADD, + //ConnexISD::ADDV_H, + SDLoc(Op), + TYPE_VECTOR_I16, + opnd1Native, + opnd2Native); + + LLVM_DEBUG(dbgs() << "LowerADD_I32: UNSPECIFIED case\n"); + return Result; //SDValue(); +} + + +// From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the +// backend. +// +// Lowers according to the following rules: +// - Constant splats are legal as-is as long as the SplatBitSize is a power of +// 2 less than or equal to 64 and the value fits into a signed 10-bit +// immediate +// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize +// is a power of 2 less than or equal to 64 and the value does not fit into a +// signed 10-bit immediate +// - Non-constant splats are legal as-is. +// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. +// - All others are illegal and must be expanded. +SDValue ConnexTargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerBUILD_VECTOR()\n"); + + BuildVectorSDNode *BVN = cast(Op); + EVT ResTy = Op->getValueType(0); + SDLoc DL(Op); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + /* + if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) + return SDValue(); + */ + + /* + From http://llvm.org/docs/doxygen/html/classllvm_1_1BuildVectorSDNode.html: + bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, + unsigned &SplatBitSize, bool &HasAnyUndefs, + unsigned MinSplatBits=0, bool isBigEndian=false) const + Check if this is a constant splat, and if so, find the smallest element + size that splats the vector. + + By constant splat we understand a vector filled with the same + constant value in all elements. + */ + if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8, false) //, true) + //!Subtarget.isLittle()) + && SplatBitSize <= 64) { + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR(): Case isConstantSplat(): " + << "SplatValue = " << SplatValue + << ", SplatUndef = " << SplatUndef + << ", SplatBitSize = " << SplatBitSize + << "\n" + ); + /* + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: SplatValue = " << SplatValue.toString(10, 1) << "\n"); + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: SplatUndef = " << SplatUndef.toString(10, 1) << "\n"); + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: SplatBitSize = " << SplatBitSize << "\n"); + */ + + // We can only cope with 8 or 16 (NOT 32 or 64) bit elements + if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32) { // && SplatBitSize != 32 && SplatBitSize != 64) + + /* !!!! TODO TODO: NOT sure this is correct for case vector register is + v*i32 or v*i16 */ + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: canNOT cope with " << SplatBitSize + << " bits.\n"); + return SDValue(); + } + + // If the value fits into a simm10 then we can use ldi.[bhwd] + // However, if it isn't an integer type we will have to bitcast from an + // integer type first. Also, if there are any undefs, we must lower them + // to defined values first. + if (ResTy.isInteger() && !HasAnyUndefs && SplatValue.isSignedIntN(10)) { + // See http://llvm.org/docs/doxygen/html/classllvm_1_1SDValue.html + //LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: return Op (Op = " << Op << ")\n"); + LLVM_DEBUG(dbgs() << " LowerBUILD_VECTOR(): Case SIMM10 taken. " + << "(Op = "; Op->dump(); dbgs() << ")\n"); + + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: return Op\n"); + return Op; + + // TODO TODO TODO We should return as selected instruction VLOAD + } + + EVT ViaVecTy; + + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: SplatBitSize = " + << SplatBitSize << "\n"); + + switch (SplatBitSize) { + default: + return SDValue(); + + // TODO_CHANGE_BACKEND: + case 8: + //ViaVecTy = MVT::v16i8; + ViaVecTy = TYPE_VECTOR_I16; + break; + case 16: + ViaVecTy = TYPE_VECTOR_I16; + break; + case 32: + ViaVecTy = TYPE_VECTOR_I32; + break; + case 64: + ViaVecTy = MVT::v8i64; + /* !!!! TODO TODO: NOT sure this is correct for case vector register is + v*i32 or v*i16 */ + break; + /* + // There's no fill.d to fall back on for 64-bit values + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: case 64 return SDValue.\n"); + return SDValue(); + */ + } + + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: Before DAG.getConstant()\n"); + // SelectionDAG::getConstant will promote SplatValue appropriately. + SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: After DAG.getConstant()\n"); + + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR(): " + << "(Result = "; Result->dump(); dbgs() << ")\n"); + + // See http://llvm.org/docs/doxygen/html/structllvm_1_1EVT.html + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR(): " + << "(ResTy = " << ResTy.getEVTString() << ")\n"); + + /* + // Bitcast to the type we originally wanted + if (ViaVecTy != ResTy) + Result = DAG.getNode(ISD::BITCAST, SDLoc(BVN), ResTy, Result); + */ + + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: return Result\n"); + return Result; + } + else + if (isSplatVector(BVN)) { // This is used for splat vectors filled with the same variable + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: case isSplatVector(BVN)\n"); + return Op; + } + else + if (isIndexVectorVal(BVN)) { + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: case isIndexVectorVal(BVN)\n"); + + SDNode *Res = DAG.getMachineNode(Connex::LDIX_H, + DL, + TYPE_VECTOR_I16 + // We add a chain edge + //CurDAG->getEntryNode() + //opChain + ); + return SDValue(Res, 0); + + // LDIX_H + //return Op; + } +//#ifdef NOT_USEFUL_SO_FAR + else + // This case seems to not have been taken for BUILD_VECTOR from + // reduction pattern - + // see Tests/201_LoopVectorize/27_reduce_bugs/isConstantOrUndefBUILD_VECTOR + if (!isConstantOrUndefBUILD_VECTOR(BVN)) { + LLVM_DEBUG(dbgs() + << "LowerBUILD_VECTOR: case !isConstantOrUndefBUILD_VECTOR(BVN)\n"); + + // Use INSERT_VECTOR_ELT operations rather than expand to stores. + // The resulting code is the same length as the expansion, but it doesn't + // use memory operations + EVT ResTy = BVN->getValueType(0); + + assert(ResTy.isVector()); + + return Op; // Not 100% sure it covers all cases + } +//#endif + + LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: UNSPECIFIED case\n"); + return SDValue(); +} + + +// Inspired from [LLVM]/llvm/lib/Target/ARM/ARMISelLowering.cpp +SDValue ConnexTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerINSERT_VECTOR_ELT().\n"); + + /* + TODO TODO: + We need to implement INSERT_VECTOR_ELT with: + WHERE INDEX == lane(op2) + VLOAD Rdst, ct (op3) + END_WHERE + */ + // INSERT_VECTOR_ELT is legal only for immediate indexes. + SDValue Lane = Op.getOperand(2); + if (!isa(Lane)) + return SDValue(); + + LLVM_DEBUG(dbgs() << "ConnexTargetLowering::LowerINSERT_VECTOR_ELT(): 2nd opnd (lane) is ct.\n"); + + return Op; +} + +/* +ALEX_TO_PROCESS +From /lib/Target/AMDGPU/AMDGPUISelLowering.h +/// This node is for VLIW targets and it is used to represent a vector + /// that is stored in consecutive registers with the same channel. + /// For example: + /// |X |Y|Z|W| + /// T0|v.x| | | | + /// T1|v.y| | | | + /// T2|v.z| | | | + /// T3|v.w| | | | + BUILD_VERTICAL_VECTOR, + + +From /home/asusu/LLVM/llvm38Nov2016/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Vector = Op.getOperand(0); + SDValue Value = Op.getOperand(1); + SDValue Index = Op.getOperand(2); + + if (isa(Index) || + Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) + return Op; + + Vector = vectorToVerticalVector(DAG, Vector); + SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), + Vector, Value, Index); + return vectorToVerticalVector(DAG, Insert); +} +*/ + + + +// From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.cpp +// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. +// +// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We +// choose to sign-extend but we could have equally chosen zero-extend. The +// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT +// result into this node later (possibly changing it to a zero-extend in the +// process). +SDValue ConnexTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + + SDLoc DL(Op); + EVT ResTy = Op->getValueType(0); + SDValue Op0 = Op->getOperand(0); + EVT VecTy = Op0->getValueType(0); + + /* TODO : See http://llvm.org/docs/doxygen/html/classllvm_1_1SDValue.html - requires + to print each components: Type, operation, etc. */ + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerEXTRACT_VECTOR_ELT(): Op = "); + + return SDValue(); +} + + +// Inspired from llvm/lib/Target/X86/X86ISelLowering.cpp: +// +// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as +// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is +// one of the above mentioned nodes. It has to be wrapped because otherwise +// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only +// be used to form addressing mode. These wrapped nodes will be selected +// into MOV32ri. +SDValue ConnexTargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerConstantPool().\n"); + + ConstantPoolSDNode *CP = cast(Op); + + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the + // global base reg. + //unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr); + + /* If we avoid using WrapperKind in DAG.getNode() below then + * we end up with an instruction selection error like + <> TargetConstantPool:i64<<8 x i64> > 0 + llc: /home/asusu/LLVM/llvm38Nov2016/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp:1466: llvm::SDValue {anonymous}::DAGCombiner::combine(llvm::SDNode*): Assertion `N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!"' failed.>> + (see /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/201_LoopVectorize/25_GOOD_map/NEW/6/UF_1/NEW/STDerr31 ) + */ + unsigned WrapperKind = ConnexISD::Wrapper; + + //CodeModel::Model M = DAG.getTarget().getCodeModel(); + + auto PtrVT = getPointerTy(DAG.getDataLayout()); + // See http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html + SDValue Result = DAG.getTargetConstantPool( + CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), + //OpFlag); + 0); + SDLoc DL(CP); + Result = DAG.getNode(WrapperKind, + //0, + DL, PtrVT, Result); + + return Result; +} + +// From [LLVM]/llvm/lib/Target/Mips/MipsSEISelLowering.h +/* Lower VECTOR_SHUFFLE into one of a number of instructions depending on the + indices in the shuffle. + + VERY IMPORTANT: Here, in ISelLowering the DAG Combiner changes + (I think in all cases) the vector_shuffle SDNode into a BUILD_VECTOR. + So we have to identify it here, before the DAG Combiner changes it and + replace it with the equivalent Connex instructions. + In fact, the DAG Combiner combines, if possible, a few vector_shuffles + into only one - I personally find it annoying, without any real benefit... +*/ +SDValue ConnexTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexTargetLowering::LowerVECTOR_SHUFFLE()\n"); + LLVM_DEBUG(dbgs() << " Op = "; + (Op.getNode())->dump()); + + //return SDValue(); + + EVT ResTy = Op->getValueType(0); + + // See http://llvm.org/doxygen/SelectionDAGNodes_8h_source.html#l01432 + ShuffleVectorSDNode *SVN = dyn_cast(Op); + assert(SVN != NULL); + + unsigned int numElemsMask = SVN->getValueType(0).getVectorNumElements(); + assert(numElemsMask == CONNEX_VECTOR_LENGTH); + + int mask[CONNEX_VECTOR_LENGTH]; + for (unsigned int i = 0; i < numElemsMask; ++i) { + mask[i] = SVN->getMaskElt(i); + LLVM_DEBUG(dbgs() << " mask[" << i << "] = " << mask[i] << "\n"); + } + + + if (mask[0] == 0) { + // It seems we have no shifting + } + else { + // It seems we have shifting by constant delta + int delta = mask[0]; + + bool shiftByDelta = true; + // Checking if we really have shifting by delta + int i; + for (i = 0; i < numElemsMask - delta; ++i) { + // MEGA-TODO: we should also check that we have delta-shift w.r.t. the 2nd data vector operand: if (mask[i] != CVL + i + delta) + if (mask[i] != i + delta) { + shiftByDelta = false; + break; + } + } + LLVM_DEBUG(dbgs() << " shiftByDelta = " << shiftByDelta << "\n"); + + bool circularShiftByDelta = false; + if (shiftByDelta == true) { + circularShiftByDelta = true; + for (i = numElemsMask - delta; i < numElemsMask; ++i) { + // MEGA-TODO: we should also check that we have circular-delta-shift w.r.t. the 2nd data vector operand + if (mask[i] != i + delta) { + circularShiftByDelta = false; + break; + } + } + } + LLVM_DEBUG(dbgs() << " circularShiftByDelta = " + << circularShiftByDelta << "\n"); + + bool assignPartOf2ndOpnd = true; + if (assignPartOf2ndOpnd == false) { + for (i = numElemsMask - delta; i < numElemsMask; ++i) { + if (mask[i] == CONNEX_VECTOR_LENGTH + i + delta) { + assignPartOf2ndOpnd = false; + break; + } + } + } + LLVM_DEBUG(dbgs() << " assignPartOf2ndOpnd = " + << assignPartOf2ndOpnd << "\n"); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + SDLoc DL(Op); + SDValue svnOp0 = SVN->getOperand(0); + SDValue svnOp1 = SVN->getOperand(1); + SDNode *ldSh; // Def required here + // + + if (circularShiftByDelta || shiftByDelta) { + SDValue chain = DAG.getEntryNode(); + + SDValue ctDelta = DAG.getConstant(delta, + DL, MVT::i16, true, false); + SDNode *vloadDelta = DAG.getMachineNode(Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ctDelta + // Glue (or chain) input edge + // TODO maybe: SDValue(ldIx, 1) + ); + + SDNode *cellShl = DAG.getMachineNode(Connex::CELLSHL_H, + DL, + // NO return type + MVT::Glue, + svnOp0, + SDValue(vloadDelta, 0), + // The glue input edge + SDValue(vloadDelta, 1) + ); + + // MEGA-TODO: put delta NOPs + SDValue ct1 = DAG.getConstant(1 /* Num of cycles to NOP */, + DL, MVT::i16, true, false); + SDNode *nop = DAG.getMachineNode(Connex::NOP_BPF, + DL, + MVT::Glue, + ct1, + // Glue/chain edge + SDValue(cellShl, 0) + ); + + ldSh = DAG.getMachineNode(Connex::LDSH_H, + DL, + // Return type + TYPE_VECTOR_I16, + MVT::Glue, + // The glue output port of predecessor + SDValue(nop, 0) + ); + } // END if (circularShiftByDelta || shiftByDelta) + + #ifdef BUGGY_DUE_TO_DAG_COMBINER + unsigned virtReg = RegInfo.createVirtualRegister(&Connex::VectorHRegClass); + /* + * VERY IMPORTANT: + * From http://llvm.org/docs/doxygen/html/classllvm_1_1SelectionDAG.html: + * SDValue getCopyToReg(SDValue Chain, SDLoc dl, + unsigned Reg, + SDValue N, + SDValue Glue) + */ + SDValue copyToReg = DAG.getCopyToReg( + // VERY IMPORTANT: Chain input edge + (circularShiftByDelta || shiftByDelta) ? + SDValue(ldSh, 1) : + DAG.getEntryNode(), + + DL, + virtReg, + + // Value copied to register + (circularShiftByDelta || shiftByDelta) ? + SDValue(ldSh, 0) : svnOp1, + // VERY IMPORTANT: Glue input edge + (circularShiftByDelta || shiftByDelta) ? + SDValue(ldSh, 1) : + DAG.getEntryNode() // Hope this passes as a glue + ); + LLVM_DEBUG(dbgs() << " copyToReg = "; + (copyToReg.getNode())->dump()); + #endif + + SDNode *endWhere; // Definition required + + if (assignPartOf2ndOpnd) { + SDNode *ldIx = DAG.getMachineNode(Connex::LDIX_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // We add a chain edge + (circularShiftByDelta || shiftByDelta) ? + SDValue(ldSh, 1) : + DAG.getEntryNode() + ); + + SDValue ctCVLDelta = DAG.getConstant(CONNEX_VECTOR_LENGTH - delta, + DL, MVT::i16, true, false); + SDNode *vloadCVLDelta = DAG.getMachineNode(Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ctCVLDelta, + // Glue (or chain) input edge + SDValue(ldIx, 1) + ); + + SDNode *lt = DAG.getMachineNode(Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ldIx, 0), + SDValue(vloadCVLDelta, 0), + // Glue (or chain) input edge + SDValue(vloadCVLDelta, 1) + ); + + SDValue ct1 = DAG.getConstant(1 /* Num of cycles to NOP */, + DL, MVT::i16, true, false); + SDNode *nop = DAG.getMachineNode(Connex::NOP_BPF, + DL, + MVT::Glue, + ct1, + // Glue/chain edge + SDValue(lt, 1) + ); + + SDNode *whereLt = DAG.getMachineNode(Connex::WHERELT, //_BUNDLE_H, + DL, + // Return type + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt, 0), + //svnOp1, + // The glue output port of CopyToReg. + SDValue(nop, 0) + ); + + SDValue ct0 = DAG.getConstant(0, DL, MVT::i16, true, false); + SDNode *ishl = DAG.getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + svnOp1, + ct0, + #ifdef BUGGY_DUE_TO_DAG_COMBINER + DAG.getRegister(virtReg, TYPE_VECTOR_I16), + #else + (circularShiftByDelta || shiftByDelta) ? + SDValue(ldSh, 0) : svnOp1, + #endif + // Glue (or chain) input edge + SDValue(whereLt, 1) + ); + endWhere = DAG.getMachineNode( + Connex::END_WHERE, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl, 0), + // Glue (or chain) input edge + SDValue(ishl, 1) + ); + } // END if (assignPartOf2ndOpnd) + + if (assignPartOf2ndOpnd) + DAG.ReplaceAllUsesWith(SVN, endWhere); + else + if (circularShiftByDelta || shiftByDelta) + DAG.ReplaceAllUsesWith(SVN, ldSh); + } + + return SDValue(); + + /* + ShuffleVectorSDNode *N = SVN; + unsigned int nOps = N->getNumOperands(); + for (unsigned int i = 0; i < nOps; ++i) { + // See http://llvm.org/docs/ProgrammersManual.html#the-isa-cast-and-dyn-cast-templates + ConstantSDNode *ctNode = dyn_cast(N->getOperand(i)); + LLVM_DEBUG(dbgs() << " ctNode = " << ctNode << "\n"); + if (ctNode == NULL) + continue; //return false; + + LLVM_DEBUG(dbgs() << " *ctNode = "; ctNode->dump()); + + //if (N->getConstantOperandVal(i) != i) + // return false; + } + */ + + + //MEGA-TODO: check for delta..CVL-delta, 2CVL-delta.. 2CVL-1 + // MEGA-TODO TODO TODO: else if BVN is 0..x x + CVL + 1 .. 2CVL-1 + + /* !!!! TODO: here it was cycling forever in reduction + loop code - see /home/asusu/LLVM/llvm38Nov2016/llvm/build30/bin/Tests/201_LoopVectorize/27_reduce_bugs/STDerr_old15 + for exact details. */ + + // Note: HexagonISelLowering.cpp has also method LowerVECTOR_SHIFT() + + #ifdef NOT_ORIGINAL_CODE + // From MipsISelLowering.cpp + ShuffleVectorSDNode *Node = cast(Op); + + int ResTyNumElts = ResTy.getVectorNumElements(); + SmallVector Indices; + + for (int i = 0; i < ResTyNumElts; ++i) + Indices.push_back(Node->getMaskElt(i)); + + // splati.[bhwd] is preferable to the others but is matched from + // MipsISD::VSHF. + if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) + return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); + SDValue Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; + return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); + #endif +} + + +/* From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetLoweringBase.html: +virtual EVT getSetCCResultType (const DataLayout &DL, LLVMContext &Context, EVT VT) const + Return the ValueType of the result of SETCC operations. + +See also https://github.com/llvm-mirror/llvm/blob/master/lib/CodeGen/TargetLoweringBase.cpp + */ +// This code fixes the issue with type legalization of vector type: +// Reported in llvm-dev thread: +// http://lists.llvm.org/pipermail/llvm-dev/2016-June/100719.html +EVT ConnexTargetLowering::getSetCCResultType(const DataLayout &DL, + LLVMContext &Ctx, + EVT VT) const { + EVT res; + + LLVM_DEBUG(dbgs() + << "Entered ConnexTargetLowering::getSetCCResultType().\n" + << " VT = " + // See http://llvm.org/docs/doxygen/html/structllvm_1_1EVT.html + << VT.getEVTString() + << " [END]\n"); + + if (VT.isVector()) { + LLVM_DEBUG(dbgs() << "getSetCCResultType(): " + << "VT.getVectorNumElements() = " + << VT.getVectorNumElements() + << "\n"); + // From llvm/lib/Target/NVPTX/NVPTXISelLowering.h + //res = EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); + + // From llvm/lib/Target/NVPTX/MipsISelLowering.h + res = VT.changeVectorElementTypeToInteger(); + + LLVM_DEBUG(dbgs() + << "getSetCCResultType(), case VT.isVector(): res = " + // See http://llvm.org/docs/doxygen/html/structllvm_1_1EVT.html + << res.getEVTString() + << " [END]\n"); + + return res; + } + + + res = getPointerTy(DL).SimpleTy; + + LLVM_DEBUG(dbgs() << "getSetCCResultType(): res = " + // See http://llvm.org/docs/doxygen/html/structllvm_1_1EVT.html + << res.getEVTString() + << " [END]\n"); + + // Using the code from lib/CodeGen/TargetLoweringBase.cpp + return res; + + /* + // This was the original code from llvm/lib/Target/NVPTX/NVPTXISelLowering.h + Cycles forever - see !!!! + return MVT::i1; + */ + + /* Messes up 25_Map (for types i16 or i32), etc: + llc gives assertion error: + llc: lib/CodeGen/SelectionDAG/SelectionDAG.cpp:3116: llvm::SDValue llvm::SelectionDAG::getNode(unsigned int, const llvm::SDLoc&, llvm::EVT, llvm::SDValue): Assertion `VT.isInteger() && Operand.getValueType().isInteger() && "Invalid ZERO_EXTEND!"' failed. + //return VT; + */ +} +/* +lib/Target/PowerPC/PPCISelLowering.cpp +EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, + EVT VT) const { + if (!VT.isVector()) + return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; + + if (Subtarget.hasQPX()) + return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); + + return VT.changeVectorElementTypeToInteger(); +} +*/ + Index: lib/Target/Connex/ConnexInstrInfo.h =================================================================== --- lib/Target/Connex/ConnexInstrInfo.h +++ lib/Target/Connex/ConnexInstrInfo.h @@ -0,0 +1,96 @@ +//===-- ConnexInstrInfo.h - Connex Instruction Information ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the Connex implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXINSTRINFO_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXINSTRINFO_H + +#include "Connex.h" +#include "ConnexRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "ConnexGenInstrInfo.inc" + +namespace llvm { + +class ConnexInstrInfo : public ConnexGenInstrInfo { + const ConnexRegisterInfo RI; + +public: + ConnexInstrInfo(); + + const ConnexRegisterInfo &getRegisterInfo() const { return RI; } + + + // Got a bit inspired from lib/Target/AMDGPU/SIInstrInfo.cpp + bool expandPostRAPseudo(MachineInstr &MI) const; + +//#ifdef USE_POSTRA_SCHED + // Got inspired from llvm/lib/Target/PowerPC/PPCInstrInfo.h + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const override; +//#endif + ScheduleHazardRecognizer * + CreateTargetMIHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const override; + + /* + ScheduleHazardRecognizer *CreateTargetPostRAHazardRecognizer( + const MachineFunction &MF) const override; + */ + +#ifdef USE_PRERA_HAZARD_RECOGNIZER + ScheduleHazardRecognizer * + CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, + const ScheduleDAG *DAG) const override; +#endif + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const override; + + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; + + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef Cond, + const DebugLoc &DL, + int *BytesAdded = nullptr) const override; + + bool isPredicable(MachineInstr &MI) const; + +protected: + MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI, + MachineMemOperand::Flags Flag) const; +}; // end class ConnexInstrInfo +} // end namespace llvm + +#endif Index: lib/Target/Connex/ConnexInstrInfo.cpp =================================================================== --- lib/Target/Connex/ConnexInstrInfo.cpp +++ lib/Target/Connex/ConnexInstrInfo.cpp @@ -0,0 +1,919 @@ +//===-- ConnexInstrInfo.cpp - Connex Instruction Information ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Connex implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Connex.h" + +/* NOT required - it works on the MachineInstr immediately after 1st scheduling pass, + (before RA, before TwoAddressInstructionPass, etc) - so a lot of other instructions + will be added after 1st scheduling pass: +#define USE_PRERA_HAZARD_RECOGNIZER +*/ + +//#ifdef USE_POSTRA_SCHED + #include "ConnexHazardRecognizers.h" +//#endif +#include "ConnexHazardRecognizersPreRAScheduler.h" +//#include "llvm/CodeGen/ScheduleDAG.h" +#include "ConnexInstrInfo.h" +#include "ConnexSubtarget.h" +#include "ConnexTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Support/Debug.h" +#define DEBUG_TYPE "connex-lower" + +#define GET_INSTRINFO_CTOR_DTOR +#include "ConnexGenInstrInfo.inc" + +using namespace llvm; + + + + + + +MachineInstr *getPredMachineInstr(MachineInstr *MI, MachineInstr **succMI) { + + MachineBasicBlock *MBB = MI->getParent(); + DebugLoc DL = MBB->findDebugLoc(MI); + + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): MI.getOpcode() = " + << MI->getOpcode() << "\n"); + + //switch (MI.getOpcode()) + + MachineInstr *predMI = NULL; + /* + MachineInstr *succMI = NULL; + */ + *succMI = NULL; + + for (MachineBasicBlock::iterator I = MBB->begin(), + IE = MBB->end(); I != IE; ++I) { + MachineInstr *IMI = (MachineInstr *)(&(*I)); + if (IMI == MI) { + I++; + *succMI = (MachineInstr *)(&(*I)); + break; + } + predMI = (MachineInstr *)(&(*I)); + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): (I in MBB of MI) I->getOpcode() = " + << I->getOpcode() << "\n"); + } + + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): MI = " + << MI + << "(" << MI << ")" + << "\n"); + if ((*succMI) != NULL && (*succMI) != nullptr) { + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): succMI = " + //We do not put this one because we can have issues with NULL/invalid MachineInstr (at least in case of llc -regalloc=fast) << **succMI + << "[TO BE DONE]" + << "(" << *succMI << ")" + << "\n"); + } + else { + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): succMI = NULL\n"); + } + + if (predMI != NULL) { + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): predMI = " + << *predMI + << "(" << predMI << ")" + << "\n"); + } + else { + LLVM_DEBUG(dbgs() << "getPredMachineInstr(): predMI = NULL\n"); + } + + return predMI; +} + + + + +ConnexInstrInfo::ConnexInstrInfo() + : ConnexGenInstrInfo(Connex::ADJCALLSTACKDOWN, Connex::ADJCALLSTACKUP) {} + + +// Inspired from lib/Target/Mips/MipsInstrInfo.cpp +MachineMemOperand *ConnexInstrInfo::GetMemOperand(MachineBasicBlock &MBB, + int FI, + // 2019_03_30 unsigned Flag + MachineMemOperand::Flags Flag + ) const { + LLVM_DEBUG(dbgs() << "Entered ConnexInstrInfo::GetMemOperand()\n"); + + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), + Flag, MFI.getObjectSize(FI), Align); +} + + +/* +From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html: + virtual void copyPhysReg (MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const + Emit instructions to copy a pair of physical registers. + virtual void storeRegToStackSlot (MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const + Store the specified register of the given register class to the specified stack frame index. + virtual void loadRegFromStackSlot (MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const + Load the specified register of the given register class from the specified stack frame index. +*/ +void ConnexInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, + unsigned SrcReg, bool KillSrc) const { + LLVM_DEBUG(dbgs() + << "Entered ConnexInstrInfo::copyPhysReg(I = " << *I + << ", DestReg = " << DestReg + << ", SrcReg = " << SrcReg + << ")\n"); + + if (Connex::GPRRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(Connex::MOV_rr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } + else + if (Connex::VectorHRegClass.contains(DestReg, SrcReg)) { + //llvm_unreachable("NOT implemented well!"); + + #ifdef TODO_TODO + if (SrgReg == ct) { + BuildMI(MBB, I, DL, get(Connex::VLOAD_H), DestReg) + .addImm(ct) //, getKillRegState(KillSrc)) + .addReg(SrcReg); + } + #endif + + BuildMI(MBB, I, DL, get(Connex::ORV_H), DestReg) + .addReg(SrcReg) //, getKillRegState(KillSrc)) + .addReg(SrcReg); + } + else + //if (Connex::BoolMaskRegClass.contains(DestReg, SrcReg)) + if (Connex::BoolMaskRegClass.contains(DestReg) || + Connex::BoolMaskRegClass.contains(SrcReg)) { + LLVM_DEBUG(dbgs() + << "ConnexInstrInfo::copyPhysReg(): DestReg or SrcReg are in BoolMask\n"); + /* + // TODO TODO TODO TODO: what if register Wh31, also called R(31), is already in use for some other var? + BuildMI(MBB, I, DL, get(Connex::VLOAD_H), Connex::Wh31) + .addImm(0); + + BuildMI(MBB, I, DL, get(Connex::ORV_H), DestReg) + .addReg(SrcReg) //, getKillRegState(KillSrc)) + .addReg(Connex::Wh31, getKillRegState(KillSrc)); + */ + } +#ifdef PREFERABLY_NOT_2019_03_21 + else + if ( (Connex::MSA128WRegClass.contains(DestReg) && + Connex::VectorHRegClass.contains(SrcReg)) || + // + (Connex::MSA128WRegClass.contains(SrcReg) && + Connex::VectorHRegClass.contains(DestReg)) ) { + + if (Connex::MSA128WRegClass.contains(DestReg)) { + LLVM_DEBUG(dbgs() + << "ConnexInstrInfo::copyPhysReg(): DestReg is TYPE_VECTOR_I32 and SrcReg is TYPE_VECTOR_I16\n"); + } + else + if (Connex::MSA128WRegClass.contains(DestReg)) { + LLVM_DEBUG(dbgs() + << "ConnexInstrInfo::copyPhysReg(): DestReg is TYPE_VECTOR_I16 and SrcReg is TYPE_VECTOR_I32\n"); + } + + // BuildMI(MBB, I, DL, get(Connex::INLINEASM)); // This makes llc give error: <> + // This works surprisingly: BuildMI(MBB, I, DL, get(Connex::NOP_BITCONVERT_HW)); + + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + //BuildMI(MBB, I, DL, get(Connex::NOP_BOGUS)); + BuildMI(MBB, I, DL, get(Connex::ORV_H), DestReg) + .addReg(SrcReg) //, getKillRegState(KillSrc)) + .addReg(SrcReg); + #endif + } +#endif // PREFERABLY_NOT_2019_03_21 + else + llvm_unreachable("Impossible reg-to-reg copy"); +} + + +// storeRegToStackSlot() and loadRegFromStackSlot() use +// the FI argument (frame index) +// This implements spilling of registers (both scalar, and vector). +void ConnexInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, bool IsKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + + assert(FI >= 2 && "It seems I assumed wrong that framed index >= 2"); + unsigned LSOffsetSpillLoad = (CONNEX_MEM_NUM_ROWS + 1) - FI; + + if (I != MBB.end()) + DL = I->getDebugLoc(); + + if (RC == &Connex::GPRRegClass) + BuildMI(MBB, I, DL, get(Connex::STD)) + .addReg(SrcReg, getKillRegState(IsKill)) + .addFrameIndex(FI) + .addImm(0); + else + if (RC == &Connex::VectorHRegClass) { + LLVM_DEBUG(dbgs() << " ConnexInstrInfo::storeRegToStackSlot(): Spilling Wh" + << SrcReg + << " to LSOffsetSpillLoad = " + << LSOffsetSpillLoad + << " (FI = " + << FI + << "), " + << "MBB = " << MBB.getFullName() + //<< ", MBB.front() = " << MBB.front() + << ", &MBB.front() = " << &(MBB.front()) << "\n"); + + /* VERY IMPORTANT: after experimenting (see + /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/DawnCC/91_SAD_f16/FEATURE_LENGTH_128/A/STDerr_llc_01) + if we have INLINEASM at the beginning of the MBB, the MBB.front() is + the 1st instruction AFTER these INLINEASM - this is why we can end up + adding more NOPs... + IMPORTANT-TODO: we should take into consideration that vector.body has + INLINEASM with host-side for loop here normally. + */ + + // Note: this method is spilling the destination register of the instruction *(I-1) + /* + // I got a strange error in LLVM when printing in certain cases *I - see e.g. /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/DawnCC/90_SSD_f16/3/STDerr_llc_01_old03 + LLVM_DEBUG(dbgs() << " ConnexInstrInfo::storeRegToStackSlot(): *I = " + << *I); + */ + + /* + Important-TODO: maybe we can avoid inserting the NOP now by making the + post-RA (maybe even the pre-RA) scheduler reschedule instructions + to insert a useful instruction in this delay slot. + + Adding the NOP is mandatory if the previous instruction updates the + spilled register, since all (i)write instructions require the + insertion of a delay slot between them and the instructions that + generates their operands + - in this case the register to be written to the LS memory. + + It prints something like: + << + *(I--) = %vreg538 = XORV_H %vreg106, %vreg105; VectorH:%vreg538,%vreg106,%vreg105 dbg:test.c:48:36 + *I = %vreg175 = ADDV_H %vreg149, %vreg164; VectorH:%vreg175,%vreg149,%vreg164 dbg:test.c:48:36>> + + In this case it spills %vreg538 to LS memory + - with an instruction like LS[1020] = R... + */ + + MachineBasicBlock::iterator Iprev; // = I; + MachineInstr *IMI = (MachineInstr *)(&(*I)); + + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): IMI = " + << IMI + << "\n"); + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): IMI != &MBB.front() = " + << (IMI != (&MBB.front()) ) + << "\n"); + + if ( (IMI != NULL) && + (IMI != (&MBB.front())) ) { + Iprev = I; + Iprev--; + MachineInstr *IprevMI = (MachineInstr *)(&(*Iprev)); + + + #ifdef NNNO_2019_01_10 + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): *IMI = " + << *IMI + << "\n"); + #endif + + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): IprevMI = " + << *IprevMI + << "\n"); + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): IprevMI->getNumOperands() = " + << IprevMI->getNumOperands() + << "\n"); + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): IprevMI->getOpcode() == Connex::INLINEASM = " + << (IprevMI->getOpcode() == Connex::INLINEASM) + << "\n"); + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): IprevMI->getOpcode() == Connex::VLOAD_H_SYM_IMM = " + << (IprevMI->getOpcode() == Connex::VLOAD_H_SYM_IMM) + << "\n"); + // The case where I screw up is LS[1013] = ... + // because the INLINEASM before it is the MBB.front() and is INLINEASM. + + if (IprevMI != NULL && + // NOT necessary: (IprevMI != (&MBB.front())) && + //(IMI != (&MBB.front())) && + (IprevMI->getNumOperands() > 0 || // MEGA-TODO: understand why I give this + IprevMI->getOpcode() == Connex::INLINEASM || + // 2018_12_27 + IprevMI->getOpcode() == Connex::VLOAD_H_SYM_IMM) ) { + + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): Handling special cases.\n"); + + MachineOperand &I0Opnd = IprevMI->getOperand(0); + + if (IprevMI->getOpcode() == Connex::VLOAD_H_SYM_IMM) { + // 2018_12_27: Treating Symbolic immediate operands + // MEGA-TODO: check + /* + MachineBasicBlock::iterator I2 = I; + MachineInstr *I2MI; + I2++; + I2MI = I2; + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): I2MI = " + << *I2MI + << "\n"); + assert(I2MI->getOpcode() == Connex::INLINEASM); + */ + //assert(0 && "Bogus"); + assert(IprevMI->getNumOperands() > 0); // Just checking + assert(IMI->getOpcode() == Connex::INLINEASM && "The INLINEASM with the immediate operand should be next for VLOAD_H_SYM_IMM."); + + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): Treating VLOAD_H_SYM_IMM case.\n"); + // Gives error: <> + I++; + //Iprev++; + } + + if ( (//IprevMI->getNumOperands() > 0 && + /* + (IprevMI->getOperand(0)).isReg() && + (IprevMI->getOperand(0)).isDef() && + (IprevMI->getOperand(0)).getReg() == SrcReg + */ + I0Opnd.isReg() && + I0Opnd.isDef() && + I0Opnd.getReg() == SrcReg + ) || + (IprevMI->getOpcode() == Connex::INLINEASM)) { + /* Important-TODO: check better: first, for SAD.f16 we have a COPY + between the host-for and the spill - so we should do these checks + after the hoisting of spills, etc - IMPORTANT: either in + ConnexAsmPrinter.cpp or PostRAHazardRecognizer which I'm afraid to + run for programs using bigger types like f16 - e.g., SSD.f16. + It is possible that the instruction IprevMI be a + VLOAD or a for loop that has an instruction with dst register + the one that is spilled. */ + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): Adding NOP_BPF to avoid data hazards...[Explain better...]\n"); + BuildMI(MBB, I, DL, get(Connex::NOP_BPF)); + } + else { + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): Not putting NOP after IprevMI = " + << *IprevMI + //<< " before I = " << *I << "\n"); + << " before: IMI = " << IMI << ",\n" + << " IMI->getOpcode() = " << IMI->getOpcode() << "\n"); + /* I get some error here, from MachineInstr.cpp:1695: + <> and then it + crashes without any warning: + << " I = " << *IMI << "\n"); */ + } + } + else { + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): else case for " + "if (IprevMI != NULL && ...)\n"); + } + } + else { + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): else case for " + "if (IMI != NULL && Iprev != &MBB.front())\n"); + + if (IMI == (&MBB.front())) { + // We conservatively put a NOP before the spill (Store) + BuildMI(MBB, I, DL, get(Connex::NOP_BPF)); + } + } + //BuildMI(MBB, I, DL, get(Connex::NOP_BOGUS)); + + BuildMI(MBB, I, DL, get(Connex::ST_SPILL_H)) + .addReg(SrcReg, getKillRegState(IsKill)) + /* + // Gives error: void llvm::MachineInstr::addOperand(llvm::MachineFunction&, + // const llvm::MachineOperand&): Assertion `(isImpReg || Op.isRegMask() || + // MCID->isVariadic() || OpNo < MCID->getNumOperands() || isMetaDataOp) && + // "Trying to add an operand to a machine instr that is already done!"' + // failed. + .addFrameIndex(FI) + */ + + // Even if Connex does NOT have a stack, we can use LS mem to easily + // simulate it. + .addImm(LSOffsetSpillLoad); + // TODO TODO: get num vector registers from ConnexRegisterInfo.td: def VectorH: RegisterClass<"Connex", [v8i16], 32, + + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): Added ST_SPILL_H instruction.\n"); + LLVM_DEBUG(dbgs() << " storeRegToStackSlot(): MBB = " << MBB << "\n"); + } + else + if (RC == &Connex::BoolMaskRegClass) { + /* + BuildMI(MBB, I, DL, get(Connex::ST_H)) + .addReg(SrcReg, getKillRegState(IsKill)) + .addImm(CONNEX_MEM_NUM_ROWS - 100); + // TODO: this is just bogus I guess, no need to spill v8i1 register + */ + } + else + llvm_unreachable("Connex back end: Can't store this register to stack slot"); +} + +/* +This implements filling/reloading - i.e., load for spilled registers + (both scalar, and vector). +*/ +void ConnexInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + + assert(FI >= 2 && "I assumed wrong that framed index >= 2"); + unsigned LSOffsetFillLoad = (CONNEX_MEM_NUM_ROWS + 1) - FI; + + if (I != MBB.end()) + DL = I->getDebugLoc(); + + if (RC == &Connex::GPRRegClass) + BuildMI(MBB, I, DL, get(Connex::LDD), DestReg) + .addFrameIndex(FI) + .addImm(0); + else + if (RC == &Connex::VectorHRegClass) { + /* + // This actually generates a malformed scalar instruction with + // vector register + BuildMI(MBB, I, DL, get(Connex::LDD), DestReg) + .addFrameIndex(FI) + .addImm(0); + */ + /* + // It is NOT correct since LLVM assumes it uses a stack and the + // operations are sort of PUSH/POP. Even if Connex does NOT have + // a stack, we can use LS to easily simulate it. + BuildMI(MBB, I, DL, get(Connex::LD_H), DestReg) + .addImm(CONNEX_MEM_NUM_ROWS - 1 - DestReg); + */ + + LLVM_DEBUG(dbgs() << " ConnexInstrInfo::loadRegFromStackSlot(): Filling Wh" + << DestReg + << " from LSOffsetFillLoad = " + << LSOffsetFillLoad + << " (FI = " + << FI + << ")\n"); + + /* + IMPORTANT: Adding the NOP is NOT required, since the iread Connex + instruction does NOT require the insertion of a delay slot between + them and the instruction that uses the register read from the LS memory. + */ + BuildMI(MBB, I, DL, get(Connex::LD_FILL_H), DestReg) + .addImm(LSOffsetFillLoad); + // TODO TODO TODO: get num vector registers from ConnexRegisterInfo.td: def VectorH: RegisterClass<"Connex", [v128i16], 32, + } + else + llvm_unreachable("Connex back end: Can't load this register from stack slot"); +} + +bool ConnexInstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // Start from the bottom of the block and work up, examining the + // terminator instructions. + MachineBasicBlock::iterator I = MBB.end(); + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + + // Working from the bottom, when we see a non-terminator + // instruction, we're done. + if (!isUnpredicatedTerminator(*I)) + break; + + // A terminator that isn't a branch can't easily be handled + // by this analysis. + if (!I->isBranch()) + return true; + + // Handle unconditional branches. + if (I->getOpcode() == Connex::JMP) { + if (!AllowModify) { + TBB = I->getOperand(0).getMBB(); + continue; + } + + // If the block has any instructions after a J, delete them. + while (std::next(I) != MBB.end()) + std::next(I)->eraseFromParent(); + Cond.clear(); + FBB = 0; + + // Delete the J if it's equivalent to a fall-through. + if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { + TBB = 0; + I->eraseFromParent(); + I = MBB.end(); + continue; + } + + // TBB is used to indicate the unconditinal destination. + TBB = I->getOperand(0).getMBB(); + continue; + } + // Cannot handle conditional branches + return true; + } + + return false; +} + +unsigned ConnexInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, + int *BytesAdded) const { + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + if (Cond.empty()) { + // Unconditional branch + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, DL, get(Connex::JMP)).addMBB(TBB); + return 1; + } + + llvm_unreachable("Unexpected conditional branch"); +} + +unsigned ConnexInstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + if (I->getOpcode() != Connex::JMP) + break; + // Remove the branch. + I->eraseFromParent(); + I = MBB.end(); + ++Count; + } + + return Count; +} + +/* +TODO TODO: better implement it in ConnexTargetMachine::addPreRegAlloc(), in + order to avoid any spills the register allocator might create. + +Creating in ConnexInstrInfo::expandPostRAPseudo() bundle instructions + with VLOAD_H_SYM_IMM + INLINEASM. + This is a decent compromise although I do NOT use pseudo-instructions, + using this after Register Allocation (PostRA) works because: + - IMPORTANT: INLINEASM is considered a pseudo-instruction (NOTE that + VLOAD_H_SYM_IMM is NOT considered a pseudo-instruction); + - pre-RA scheduler does NOT break the VLOAD_H_SYM_IMM from its associated + INLINEASM; + - register allocator does NOT break either the VLOAD_H_SYM_IMM from its + associated INLINEASM, more exactly it doesn't insert spills or fills + between the two instructions as far as I can see. IMPORTANT: however I + am NOT sure if this is always going to hold. +As of Feb 2017, class TargetInstrInfo + (see http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html) + has a few methods called on MachineInstr, but expandPostRAPseudo() seems + to be a very good candidate (also it has no method with MachineSDNode). + Anyhow, we could create and register our own pass working on MachineInstr in + order to bundle instructions together (or on MachineSDNode, before pre-RA + scheduler, although I guess it might be DIFFICULT to bundle from + MachineSDNode to MachineInstr, since we have to perform a simple scheduling). + +From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html + <> +*/ +bool ConnexInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + // Making expandPostRAPseudo() do nothing: + return false; + + LLVM_DEBUG(dbgs() << "ConnexInstrInfo::expandPostRAPseudo(): MI.getOpcode() = " + << MI.getOpcode() << "\n"); + + MachineBasicBlock *MBB = MI.getParent(); + DebugLoc DL = MBB->findDebugLoc(MI); + + /* + // Inspired from lib/Target/PowerPC/PPCCTRLoops.cpp + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PIE = MBB->pred_end(); PI != PIE; ++PI) + Preds.push_back(*PI); + */ + switch (MI.getOpcode()) { + default: + //return expandPostRAPseudo(MI); + return false; + + case Connex::VLOAD_H_SYM_IMM: + // This is just a placeholder for register allocation. + LLVM_DEBUG(dbgs() << + "ConnexInstrInfo::expandPostRAPseudo(): found VLOAD_H_SYM_IMM\n"); + //MI.eraseFromParent(); + break; + + case Connex::INLINEASM: + // This is just a placeholder for register allocation. + LLVM_DEBUG(dbgs() << + "ConnexInstrInfo::expandPostRAPseudo(): found INLINEASM\n"); + + /* + MachineInstr *predMI = NULL; + MachineInstr *succMI = NULL; + for (MachineBasicBlock::iterator I = MBB->begin(), + IE = MBB->end(); I != IE; ++I) { + MachineInstr *IMI = I; + if (IMI == &MI) { + I++; + succMI = I; + // predMI contains normally instruction VLOAD_H_SYM_IMM + break; + } + predMI = I; + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): (pred) I->getOpcode() = " + << I->getOpcode() << "\n"); + } + */ + MachineInstr *succMI; + MachineInstr *predMI = getPredMachineInstr(&MI, &succMI); + + if (predMI != NULL) { + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): predMI = " + << *predMI + << "(" << predMI << ")" + << "\n"); + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): succMI = " + << *succMI + << "(" << succMI << ")" + << "\n"); + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): MI = " + << MI + << "(" << &MI << ")" + << "\n"); + + if (predMI->getOpcode() == Connex::VLOAD_H_SYM_IMM) { + // Inspired from lib/Target/AMDGPU/SIInstrInfo.cpp + // (or Mips/MipsDelaySlotFiller.cpp) + /* Create a bundle so these instructions won't be re-ordered by the + post-RA scheduler. */ + + /* + #ifdef THIS_DOES_NOT_ASMPRINT_BUNDLES + MIBundleBuilder Bundler(*MBB, MI); + + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): predMI->getParent() = " + << predMI->getParent() << "\n"); + + // This must NOT be commented. Otherwise, it results in ~strange error + in ConnexMCInstLower::Lower() + predMI->eraseFromParent(); + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): appending predMI to bundle\n"); + Bundler.append(predMI); + + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): calling finalizeBundle()\n"); + // See http://llvm.org/docs/doxygen/html/MachineInstrBundle_8cpp_source.html#l00217 + llvm::finalizeBundle(*MBB, Bundler.begin()); + + MI.eraseFromParent(); + + #ifdef NOT_USEFUL + // Inspired from http://llvm.org/docs/doxygen/html/MachineInstrBuilder_8h_source.html#l00434 + MI.bundleWithPred(); + // Does NOT compile: llvm::finalizeBundle(MBB, predMI); + #endif + */ + + /* We now know that MI is the INLINEASM instruction that + needs to be bundled with the previous instruction, predMI. + */ + /* + We do NOT use MIBundleBuilder, with eventual MI/predMI/succMI.eraseFromParent(). + Just predMI and succMI iterators. + Note that succMI is required if we want to bundle + instructions in the interval + predMI..MI, where succMI = succ(MI). + + So we normally bundle here: predMI, MI (without succMI). + */ + /* See llvm.org/docs/doxygen/html/MachineInstrBundle_8cpp_source.html#l00106 + and http://llvm.org/docs/doxygen/html/MachineInstrBundle_8cpp_source.html#l00217 + */ + llvm::finalizeBundle(*MBB, + (MachineBasicBlock::instr_iterator)predMI, + (MachineBasicBlock::instr_iterator)succMI); + //(MachineBasicBlock::instr_iterator)&MI); + + /* + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MIBundleBuilder.html + // MIBundleBuilder (MachineBasicBlock &BB, MachineBasicBlock::iterator B, MachineBasicBlock::iterator E) + // Create a bundle from the sequence of instructions between B and E. + MIBundleBuilder Bundler(*MBB, predMI, MI); + + // MI.eraseFromParent(); + // Bundler.append(&MI); + + //Bundler.append(&MI); + // + + // Gives error + //include/llvm/CodeGen/MachineInstrBundleIterator.h:42: + //llvm::MachineInstrBundleIterator::MachineInstrBundleIterator(Ty*) + //[with Ty = llvm::MachineInstr]: + //Assertion `(!MI || !MI->isBundledWithPred()) && "It's not legal to + //initialize " "MachineInstrBundleIterator " "with a bundled MI"' failed. + ////MIBundleBuilder Bundler(*MBB, predMI, *succMI); + + // See http://llvm.org/docs/doxygen/html/MachineInstrBundle_8cpp_source.html#l00217 + llvm::finalizeBundle(*MBB, Bundler.begin()); + + MI.eraseFromParent(); + + // This yields error <<[with Ty = llvm::MachineInstr]: + // Assertion `(!MI || !MI->isBundledWithPred()) && + // "It's not legal to initialize " "MachineInstrBundleIterator " + // "with a bundled MI"' failed.>> + // predMI->eraseFromParent(); + */ + } + } + + break; + } + + LLVM_DEBUG(dbgs() << "Before exit expandPostRAPseudo():\n"); + // Gives error since MI can be bundled: <> MachineBasicBlock &MBB = *(MI.getParent()); + + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + //for (auto it: *MBB) + for (MachineBasicBlock::iterator I = MBB->begin(), + IE = MBB->end(); I != IE; ++I) { + /* + LLVM_DEBUG(dbgs() << "ConnexInstrInfo::expandPostRAPseudo(): it->getOpcode() = " + << it->getOpcode() << "\n"); + */ + LLVM_DEBUG(dbgs() << " I = " << *I << "\n"); + /* + switch (MI.getOpcode()) { + } + */ + } + + /* + const SIRegisterInfo *TRI + = static_cast(ST.getRegisterInfo()); + MachineFunction &MF = MBB->getParent(); + unsigned Reg = MI.getOperand(0).getReg(); + unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0); + unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1); + + // Create a bundle so these instructions won't be re-ordered by the + // post-RA scheduler. + MIBundleBuilder Bundler(*MBB, MI); + Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); + + // Add 32-bit offset from this instruction to the start of the + // constant data. + Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo) + .addReg(RegLo) + .addOperand(MI.getOperand(1))); + + llvm::finalizeBundle(*MBB, Bundler.begin()); + + MI.eraseFromParent(); + break; + */ + + return false; +} // END ConnexInstrInfo::expandPostRAPseudo() + + +//#ifdef USE_POSTRA_SCHED +// Inspired from llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +// See http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html +ScheduleHazardRecognizer *ConnexInstrInfo::CreateTargetPostRAHazardRecognizer( + const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + /* + unsigned Directive = + DAG->MF.getSubtarget().getDarwinDirective(); + */ + LLVM_DEBUG(dbgs() << "Entered ConnexInstrInfo::CreateTargetPostRAHazardRecognizer()\n"); + + return new ConnexDispatchGroupSBHazardRecognizer(II, DAG); +} +//#endif + + +/* +ScheduleHazardRecognizer * +ConnexInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { + LLVM_DEBUG(dbgs() << "Entered ConnexInstrInfo::CreateTargetPostRAHazardRecognizer(MachineFunction)\n"); + +// TODO TODO TODO TODO TODO TODO TODO: Get inspired from AMDGPU how they added separate +// PostRA HazardRecognizer. +// See http://llvm.org/doxygen/classllvm_1_1MachineFunction.html + return new ConnexDispatchGroupSBHazardRecognizer(II, DAG); +} +*/ + +// Pre-RA MI-scheduler - used if I give llc -enable-misched ... +// See http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html +ScheduleHazardRecognizer *ConnexInstrInfo::CreateTargetMIHazardRecognizer( + const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + LLVM_DEBUG(dbgs() << + "Entered ConnexInstrInfo::CreateTargetMIHazardRecognizer()\n"); + + return new ConnexDispatchGroupSBHazardRecognizerPreRAScheduler(II, DAG); +} + + +#ifdef USE_PRERA_HAZARD_RECOGNIZER +// Pre-RA scheduler - default scheduler (no special param given to llc) +// See http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html +ScheduleHazardRecognizer *ConnexInstrInfo::CreateTargetHazardRecognizer( + const TargetSubtargetInfo *STI, + const ScheduleDAG *DAG) const { + LLVM_DEBUG(dbgs() << "Entered ConnexInstrInfo::CreateTargetHazardRecognizer()\n"); + + return new ConnexDispatchGroupSBHazardRecognizerPreRAScheduler( + // See http://llvm.org/docs/doxygen/html/TargetSubtargetInfo_8h_source.html#l00100 + STI->getInstrItineraryData(), + DAG); +} +#endif + +// Inspired from llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +void ConnexInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + LLVM_DEBUG(dbgs() << "Entered ConnexInstrInfo::insertNoop()\n"); + + DebugLoc DL; + BuildMI(MBB, MI, DL, get(Connex::NOP)); +} + + +// From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetInstrInfo.html: <> +/* From http://llvm.org/docs/doxygen/html/classllvm_1_1MachineInstr.html: + <> +*/ +// Inspired from ARMBaseInstrInfo::isPredicable +bool ConnexInstrInfo::isPredicable(MachineInstr &MI) const { + //if (!MI.isPredicable()) + // return false; + LLVM_DEBUG(dbgs() << "ConnexInstrInfo::isPredicable(): MI.getOpcode() = " + << MI.getOpcode() << "\n"); + + if (MI.getOpcode() == Connex::VLOAD_H) { + return true; + } + + return false; +} + Index: lib/Target/Connex/ConnexMCInstLower.h =================================================================== --- lib/Target/Connex/ConnexMCInstLower.h +++ lib/Target/Connex/ConnexMCInstLower.h @@ -0,0 +1,43 @@ +//===-- ConnexMCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXMCINSTLOWER_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXMCINSTLOWER_H + +#include "llvm/Support/Compiler.h" + +namespace llvm { +class AsmPrinter; +class MCContext; +class MCInst; +class MCOperand; +class MCSymbol; +class MachineInstr; +class MachineModuleInfoMachO; +class MachineOperand; +class Mangler; + +// ConnexMCInstLower - This class is used to lower an MachineInstr into an MCInst. +class LLVM_LIBRARY_VISIBILITY ConnexMCInstLower { + MCContext &Ctx; + + AsmPrinter &Printer; + +public: + ConnexMCInstLower(MCContext &ctx, AsmPrinter &printer) + : Ctx(ctx), Printer(printer) {} + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + + MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; +}; +} + +#endif Index: lib/Target/Connex/ConnexMCInstLower.cpp =================================================================== --- lib/Target/Connex/ConnexMCInstLower.cpp +++ lib/Target/Connex/ConnexMCInstLower.cpp @@ -0,0 +1,117 @@ +//=-- ConnexMCInstLower.cpp - Convert Connex MachineInstr to an MCInst ------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower Connex MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "ConnexMCInstLower.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/SmallString.h" + +#include "llvm/Support/Debug.h" // for dbgs and LLVM_DEBUG() macro +#define DEBUG_TYPE "mc-inst-lower" + + +using namespace llvm; + +MCSymbol * +ConnexMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { + return Printer.getSymbol(MO.getGlobal()); +} + +MCOperand ConnexMCInstLower::LowerSymbolOperand(const MachineOperand &MO, + MCSymbol *Sym) const { + + const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); + + if (!MO.isJTI() && MO.getOffset()) + llvm_unreachable("unknown symbol op"); + + return MCOperand::createExpr(Expr); +} + +void ConnexMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + LLVM_DEBUG(dbgs() << "Entered ConnexMCInstLower::Lower(*MI = " + << *MI << ")...\n"); + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + LLVM_DEBUG(dbgs() << "ConnexMCInstLower::Lower(): MO = " + << MO << "\n"); + LLVM_DEBUG(dbgs() << " ConnexMCInstLower::Lower(): MO.getType() = " + << MO.getType() << "\n"); + + MCOperand MCOp; + + switch (MO.getType()) { + + default: + MI->dump(); + /* + LLVM_DEBUG(dbgs() << "ConnexMCInstLower::Lower(): MO.getType() = " + << MO.getType() << "\n"); + */ + + llvm_unreachable("unknown operand type"); + + + + case MachineOperand::MO_ExternalSymbol: { + const MCSymbol *Symbol = Printer.GetExternalSymbolSymbol(MO.getSymbolName()); + MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, Kind, Ctx); + MCOp = MCOperand::createExpr(Expr); + //Offset += MO.getOffset(); + break; + } + + //case MachineOperand::MO_MetaData: { + case MachineOperand::MO_Metadata: { + continue; + //break; + } + + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) + continue; + MCOp = MCOperand::createReg(MO.getReg()); + break; + + case MachineOperand::MO_Immediate: + MCOp = MCOperand::createImm(MO.getImm()); + break; + + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::createExpr( + MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx)); + break; + + case MachineOperand::MO_RegisterMask: + continue; + case MachineOperand::MO_GlobalAddress: + MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + break; + } + + OutMI.addOperand(MCOp); + } +} + Index: lib/Target/Connex/ConnexRegisterInfo.h =================================================================== --- lib/Target/Connex/ConnexRegisterInfo.h +++ lib/Target/Connex/ConnexRegisterInfo.h @@ -0,0 +1,77 @@ +//===-- ConnexRegisterInfo.h - Connex Register Information Impl -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the Connex implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXREGISTERINFO_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXREGISTERINFO_H + +// 2019_03_30: #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "ConnexGenRegisterInfo.inc" + +namespace llvm { + +struct ConnexRegisterInfo : public ConnexGenRegisterInfo { + + ConnexRegisterInfo(); + + // Inspired from lib/Target/Mips/MipsRegisterInfo.cpp + const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const; + + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + + /* + From http://llvm.org/doxygen/classllvm_1_1TargetRegisterInfo.html: + <> + */ + BitVector getReservedRegs(const MachineFunction &MF) const override; + + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; + + unsigned getFrameRegister(const MachineFunction &MF) const override; + + + /* Addressing bug + (llc -O0, at pass: "********** FAST REGISTER ALLOCATION **********") + <> + + (Using suggestion from at https://groups.google.com/forum/#!topic/llvm-dev/fEyD9YREi5M). + */ + // See http://llvm.org/docs/doxygen/html/classllvm_1_1TargetRegisterInfo.html + // Returns true if the target requires (and can make use of) the register scavenger. + virtual bool requiresRegisterScavenging (const MachineFunction &MF) const { + //return true; + return false; + } + + virtual bool requiresFrameIndexScavenging (const MachineFunction &MF) const { + //return true; + return false; + } +}; +} + +#endif Index: lib/Target/Connex/ConnexRegisterInfo.cpp =================================================================== --- lib/Target/Connex/ConnexRegisterInfo.cpp +++ lib/Target/Connex/ConnexRegisterInfo.cpp @@ -0,0 +1,146 @@ +//===-- ConnexRegisterInfo.cpp - Connex Register Information ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Connex implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Connex.h" +#include "ConnexRegisterInfo.h" +#include "ConnexSubtarget.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +#define GET_REGINFO_TARGET_DESC +#include "ConnexGenRegisterInfo.inc" +using namespace llvm; + +#include "llvm/Support/Debug.h" // for dbgs and LLVM_DEBUG() macro +#define DEBUG_TYPE "mc-inst-lower" + + + +ConnexRegisterInfo::ConnexRegisterInfo() + : ConnexGenRegisterInfo(Connex::R0) {} + +// Inspired from lib/Target/Mips/MipsRegisterInfo.cpp +const TargetRegisterClass *ConnexRegisterInfo::getPointerRegClass( + const MachineFunction &MF, + unsigned Kind) const { + return &Connex::GPRRegClass; +} + +const MCPhysReg *ConnexRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + return CSR_SaveList; +} + +BitVector ConnexRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + int numRegs = getNumRegs(); + + LLVM_DEBUG(dbgs() << "getReservedRegs(): numRegs = " + << numRegs << "\n"); + + BitVector Reserved(numRegs); + Reserved.set(Connex::R10); // R10 is read only frame pointer + Reserved.set(Connex::R11); // R11 is pseudo stack pointer + + /* Wh30, vector register R(30), is used by me to codegen: + - LLVM's VSELECT on Connex in ConnexTargetMachine.cpp - PassAfterPostRAScheduler + (NO longer: in ConnexISelLowering::Lower() for VSELECT to be + lowered to WHERE*). + Doing so we avoid errors like: + <<*** Bad machine code: Using an undefined physical register *** + - function: IfConversion + - basic block: BB#6 vector.body (0x1501fd8) + - instruction: %vreg47 = COPY + - operand 1: %Wh31>> + + - in ConnexInstrInfo::copyPhysReg() . + */ + Reserved.set(CONNEX_RESERVED_REGISTER_01); + Reserved.set(CONNEX_RESERVED_REGISTER_02); + Reserved.set(CONNEX_RESERVED_REGISTER_03); + + return Reserved; +} + +void ConnexRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + + unsigned i = 0; + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + DebugLoc DL = MI.getDebugLoc(); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + unsigned FrameReg = getFrameRegister(MF); + int FrameIndex = MI.getOperand(i).getIndex(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineBasicBlock &MBB = *MI.getParent(); + + if (MI.getOpcode() == Connex::MOV_rr) { + MI.getOperand(i).ChangeToRegister(FrameReg, false); + + // !!!!TODO MAYBE: we took out the scalar ADD and therefore we have to comment this + /* + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); + unsigned reg = MI.getOperand(i - 1).getReg(); + + BuildMI(MBB, ++II, DL, TII.get(Connex::ADD_ri), reg) + .addReg(reg) + .addImm(Offset); + */ + + return; + } + + int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) + + MI.getOperand(i + 1).getImm(); + + if (!isInt<32>(Offset)) + llvm_unreachable("bug in frame offset"); + + if (MI.getOpcode() == Connex::FI_ri) { + // architecture does not really support FI_ri, replace it with + // MOV_rr , frame_reg + // ADD_ri , imm + unsigned reg = MI.getOperand(i - 1).getReg(); + + BuildMI(MBB, ++II, DL, TII.get(Connex::MOV_rr), reg) + .addReg(FrameReg); + + // !!!!TODO MAYBE: we took out the scalar ADD and therefore we have to comment this + /* + BuildMI(MBB, II, DL, TII.get(Connex::ADD_ri), reg) + .addReg(reg) + .addImm(Offset); + */ + + // Remove FI_ri instruction + MI.eraseFromParent(); + } else { + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i + 1).ChangeToImmediate(Offset); + } +} + +unsigned ConnexRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + return Connex::R10; +} Index: lib/Target/Connex/ConnexSelectionDAGInfo.h =================================================================== --- lib/Target/Connex/ConnexSelectionDAGInfo.h +++ lib/Target/Connex/ConnexSelectionDAGInfo.h @@ -0,0 +1,75 @@ +//===-- ConnexSelectionDAGInfo.h - Connex SelectionDAG Info -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the Connex subclass for SelectionDAGTargetInfo. +/// +//===----------------------------------------------------------------------===// + +// Inspired from ARM/ARMSelectionDAGInfo.cpp + + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXSELECTIONDAGINFO_H + +//#include "MCTargetDesc/ConnexAddressingModes.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +namespace llvm { + +/* +namespace Connex_AM { + static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) { + switch (Opcode) { + default: return Connex_AM::no_shift; + case ISD::SHL: return Connex_AM::lsl; + case ISD::SRL: return Connex_AM::lsr; + case ISD::SRA: return Connex_AM::asr; + case ISD::ROTR: return Connex_AM::ror; + //case ISD::ROTL: // Only if imm -> turn into ROTR. + // Can't handle RRX here, because it would require folding a flag into + // the addressing mode. :( This causes us to miss certain things. + //case ConnexISD::RRX: return Connex_AM::rrx; + } + } +} // end namespace Connex_AM +*/ + +class ConnexSelectionDAGInfo : public SelectionDAGTargetInfo { +public: + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, + bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + + SDValue + EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, + unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; + + // Adjust parameters for memset, see RTABI section 4.3.4 + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const override; + + SDValue EmitSpecializedLibcall(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + RTLIB::Libcall LC) const; +}; + +} // end namespace llvm + +#endif Index: lib/Target/Connex/ConnexSelectionDAGInfo.cpp =================================================================== --- lib/Target/Connex/ConnexSelectionDAGInfo.cpp +++ lib/Target/Connex/ConnexSelectionDAGInfo.cpp @@ -0,0 +1,343 @@ +//===-- ConnexSelectionDAGInfo.cpp - Connex SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ConnexSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "ConnexTargetMachine.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/DerivedTypes.h" +#include "ConnexSelectionDAGInfo.h" + + +// Inspired from ARM/ARMSelectionDAGInfo.cpp + +using namespace llvm; + +#define DEBUG_TYPE "connex-selectiondag-info" + +// Emit, if possible, a specialized version of the given Libcall. Typically this +// means selecting the appropriately aligned version, but we also convert memset +// of 0 into memclr. +SDValue ConnexSelectionDAGInfo::EmitSpecializedLibcall( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, RTLIB::Libcall LC) const { +#ifdef NOTNOT + const ConnexSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget(); + const ConnexTargetLowering *TLI = Subtarget.getTargetLowering(); + + // Only use a specialized AEABI function if the default version of this + // Libcall is an AEABI function. + if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0) + return SDValue(); + + // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be + // able to translate memset to memclr and use the value to index the function + // name array. + enum { + AEABI_MEMCPY = 0, + AEABI_MEMMOVE, + AEABI_MEMSET, + AEABI_MEMCLR + } AEABILibcall; + switch (LC) { + case RTLIB::MEMCPY: + AEABILibcall = AEABI_MEMCPY; + break; + case RTLIB::MEMMOVE: + AEABILibcall = AEABI_MEMMOVE; + break; + case RTLIB::MEMSET: + AEABILibcall = AEABI_MEMSET; + if (ConstantSDNode *ConstantSrc = dyn_cast(Src)) + if (ConstantSrc->getZExtValue() == 0) + AEABILibcall = AEABI_MEMCLR; + break; + default: + return SDValue(); + } + + // Choose the most-aligned libcall variant that we can + enum { + ALIGN1 = 0, + ALIGN4, + ALIGN8 + } AlignVariant; + if ((Align & 7) == 0) + AlignVariant = ALIGN8; + else if ((Align & 3) == 0) + AlignVariant = ALIGN4; + else + AlignVariant = ALIGN1; + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + if (AEABILibcall == AEABI_MEMCLR) { + Entry.Node = Size; + Args.push_back(Entry); + } else if (AEABILibcall == AEABI_MEMSET) { + // Adjust parameters for memset, EABI uses format (ptr, size, value), + // GNU library uses (ptr, value, size) + // See RTABI section 4.3.4 + Entry.Node = Size; + Args.push_back(Entry); + + // Extend or truncate the argument to be an i32 value for the call. + if (Src.getValueType().bitsGT(MVT::i32)) + Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); + else if (Src.getValueType().bitsLT(MVT::i32)) + Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + + Entry.Node = Src; + Entry.Ty = Type::getInt32Ty(*DAG.getContext()); + Entry.isSExt = false; + Args.push_back(Entry); + } else { + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Node = Size; + Args.push_back(Entry); + } + + char const *FunctionNames[4][3] = { + { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" }, + { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, + { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" }, + { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } + }; + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee( + TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], + TLI->getPointerTy(DAG.getDataLayout())), + std::move(Args)) + .setDiscardResult(); + std::pair CallResult = TLI->LowerCallTo(CLI); + + return CallResult.second; +#endif + + const ConnexSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget(); + const ConnexTargetLowering *TLI = Subtarget.getTargetLowering(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + /* + if (AEABILibcall == AEABI_MEMCLR) { + Entry.Node = Size; + Args.push_back(Entry); + } else if (AEABILibcall == AEABI_MEMSET) { + */ + // Adjust parameters for memset, EABI uses format (ptr, size, value), + // GNU library uses (ptr, value, size) + // See RTABI section 4.3.4 + Entry.Node = Size; + Args.push_back(Entry); + + // Extend or truncate the argument to be an i32 value for the call. + if (Src.getValueType().bitsGT(MVT::i32)) + Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); + else if (Src.getValueType().bitsLT(MVT::i32)) + Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); + + Entry.Node = Src; + Entry.Ty = Type::getInt32Ty(*DAG.getContext()); + Entry.IsSExt = false; // 2019_03_30 + Args.push_back(Entry); + /* + } else { + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Node = Size; + Args.push_back(Entry); + } + */ + + char const *FunctionNames[4][3] = { + { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" }, + { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, + //{ "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" }, + { "memset", "memset", "memset" }, + { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } + }; + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee( + TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(FunctionNames[2][2], + TLI->getPointerTy(DAG.getDataLayout())), + std::move(Args)) + .setDiscardResult(); + std::pair CallResult = TLI->LowerCallTo(CLI); + + return CallResult.second; +} + +SDValue ConnexSelectionDAGInfo::EmitTargetCodeForMemcpy( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { +#ifdef NOTNOTNOT + const ConnexSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget(); + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (!ConstantSize) + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, + RTLIB::MEMCPY); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, + RTLIB::MEMCPY); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + EVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + // Emit a maximum of 4 loads in Thumb1 since we have fewer registers + const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6; + SDValue TFOps[6]; + SDValue Loads[6]; + uint64_t SrcOff = 0, DstOff = 0; + + // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to + // VLDM/VSTM and make this code emit it when appropriate. This would reduce + // pressure on the general purpose registers. However this seems harder to map + // onto the register allocator's view of the world. + + // The number of MEMCPY pseudo-instructions to emit. We use up to + // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm + // later on. This is a lower bound on the number of MEMCPY operations we must + // emit. + unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; + + // Code size optimisation: do not inline memcpy if expansion results in + // more instructions than the libary call. + if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) { + return SDValue(); + } + + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); + + for (unsigned I = 0; I != NumMEMCPYs; ++I) { + // Evenly distribute registers among MEMCPY operations to reduce register + // pressure. + unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; + unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; + + Dst = DAG.getNode(ConnexISD::MEMCPY, dl, VTs, Chain, Dst, Src, + DAG.getConstant(NumRegs, dl, MVT::i32)); + Src = Dst.getValue(1); + Chain = Dst.getValue(2); + + DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); + SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); + + EmittedNumMemOps = NextEmittedNumMemOps; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, dl, MVT::i32)), + SrcPtrInfo.getWithOffset(SrcOff), + false, false, false, 0); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, dl, MVT::i32)), + DstPtrInfo.getWithOffset(DstOff), false, false, 0); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); +#endif + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, + RTLIB::MEMCPY); +} + +SDValue ConnexSelectionDAGInfo::EmitTargetCodeForMemmove( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, + RTLIB::MEMMOVE); +} + +SDValue ConnexSelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, + const SDLoc &dl, + SDValue Chain, + SDValue Dst, + SDValue Src, + SDValue Size, + unsigned Align, + bool isVolatile, + MachinePointerInfo DstPtrInfo) const { + LLVM_DEBUG(dbgs() << "Entered ConnexSelectionDAGInfo::EmitTargetCodeForMemset()" + << "\n"); + + return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, + RTLIB::MEMSET); +} Index: lib/Target/Connex/ConnexSubtarget.h =================================================================== --- lib/Target/Connex/ConnexSubtarget.h +++ lib/Target/Connex/ConnexSubtarget.h @@ -0,0 +1,71 @@ +//===-- ConnexSubtarget.h - Define Subtarget for the Connex -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Connex specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXSUBTARGET_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXSUBTARGET_H + +#include "ConnexFrameLowering.h" +#include "ConnexISelLowering.h" +#include "ConnexInstrInfo.h" +#include "ConnexSelectionDAGInfo.h" + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" + +#define GET_SUBTARGETINFO_HEADER +#include "ConnexGenSubtargetInfo.inc" + +namespace llvm { +class StringRef; + +class ConnexSubtarget : public ConnexGenSubtargetInfo { + virtual void anchor(); + ConnexInstrInfo InstrInfo; + ConnexFrameLowering FrameLowering; + ConnexTargetLowering TLInfo; + + SelectionDAGTargetInfo TSInfo; + ConnexSelectionDAGInfo TSInfo2; + +public: + // This constructor initializes the data members to match that + // of the specified triple. + ConnexSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, + const TargetMachine &TM); + + // ParseSubtargetFeatures - Parses features string setting specified + // subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + const ConnexInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const ConnexFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const ConnexTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + + const TargetRegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + + // Inspired from ARM/ARMSubtarget.cpp + const ConnexSelectionDAGInfo *getSelectionDAGInfo() const override { + return &TSInfo2; + } +}; +} // End llvm namespace + +#endif Index: lib/Target/Connex/ConnexSubtarget.cpp =================================================================== --- lib/Target/Connex/ConnexSubtarget.cpp +++ lib/Target/Connex/ConnexSubtarget.cpp @@ -0,0 +1,31 @@ +//===-- ConnexSubtarget.cpp - Connex Subtarget Information ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Connex specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "ConnexSubtarget.h" +#include "Connex.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define DEBUG_TYPE "connex-subtarget" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "ConnexGenSubtargetInfo.inc" + +void ConnexSubtarget::anchor() {} + +ConnexSubtarget::ConnexSubtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, const TargetMachine &TM) + : ConnexGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(*this), + TLInfo(TM, *this) {} Index: lib/Target/Connex/ConnexTargetMachine.h =================================================================== --- lib/Target/Connex/ConnexTargetMachine.h +++ lib/Target/Connex/ConnexTargetMachine.h @@ -0,0 +1,54 @@ +//===-- ConnexTargetMachine.h - Define TargetMachine for Connex --- C++ ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Connex specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXTARGETMACHINE_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXTARGETMACHINE_H + +#include "ConnexSubtarget.h" +// 2019_03_30 +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetMachine.h" // This was before +#include +// END 2019_03_30 + +namespace llvm { +class ConnexTargetMachine : public LLVMTargetMachine { + std::unique_ptr TLOF; + ConnexSubtarget Subtarget; + +public: + ConnexTargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Optional RM, Optional CM, + CodeGenOpt::Level OL, bool JIT); + + const ConnexSubtarget *getSubtargetImpl() const { return &Subtarget; } + const ConnexSubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } + + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + // 2019_03_30: Inspired from ARC/ARCTargetMachine.h + TargetTransformInfo getTargetTransformInfo(const Function &F) override; + + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } +}; +} + +#endif Index: lib/Target/Connex/ConnexTargetMachine.cpp =================================================================== --- lib/Target/Connex/ConnexTargetMachine.cpp +++ lib/Target/Connex/ConnexTargetMachine.cpp @@ -0,0 +1,1642 @@ +//===-- ConnexTargetMachine.cpp - Define TargetMachine for Connex ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the info about Connex target spec. +// NOTE: I (partly) documented what the passes PassCreateBundles and +// PassFinalizeBundles do and my design decisions at +// http://lists.llvm.org/pipermail/llvm-dev/2017-March/110990.html +//===----------------------------------------------------------------------===// + +#include "Connex.h" +#include "ConnexTargetMachine.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" // For MIBundleBuilder +// +#include "llvm/CodeGen/MachineRegisterInfo.h" + +#include "llvm/Support/Debug.h" +#define DEBUG_TYPE "connex-target-config" + +#include "ConnexTargetTransformInfo.h" // 2019_03_30 + + + + +using namespace llvm; + +static cl::opt DontTreatCopyInstructions("dont-treat-copy-instructions", + cl::Hidden, + cl::init(false), + cl::desc("Don't treat copy instructions")); + + +#define CONNEX_RESERVED_REGISTER_DST_FOR_SPLIT CONNEX_RESERVED_REGISTER_02 +// NOT compiling - <>: #define CONNEX_RESERVED_REGISTER_DST_FOR_SPLIT Connex::Wh3000 +// Gives strange results, but sortta helps for reading output.cpp: #define CONNEX_RESERVED_REGISTER_DST_FOR_SPLIT 3000 + +extern "C" void LLVMInitializeConnexTarget() { + // Register the target - Force static initialization. + RegisterTargetMachine Z(TheConnexTarget); +} + +// DataLayout: little or big endian +// 2019_03_30: static std::string computeDataLayout(const Triple &TT) +static StringRef computeDataLayout(const Triple &TT) { + //return "e-m:e-p:64:64-i64:64-n32:64-S128"; + + /* + See http://llvm.org/docs/LangRef.html#data-layout for all details regarding layout declaration. + - S + Specifies the natural alignment of the stack in bits. Alignment promotion of stack variables is limited to the natural stack alignment to avoid dynamic stack realignment. The stack alignment must be a multiple of 8-bits. If omitted, the natural stack alignment defaults to “unspecified”, which does not prevent any alignment promotions. + - p[n]::: + This specifies the size of a pointer and its and erred alignments for address space n. All sizes are in bits. The address space, n, is optional, and if not specified, denotes the default address space 0. The value of n must be in the range [1,2^23). + - i:: + This specifies the alignment for an integer type of a given bit . The value of must be in the range [1,2^23). + - n::... + This specifies a set of native integer widths for the target CPU in bits. + - v:: + This specifies the alignment for a vector type of a given bit . + + See also http://llvm.org/docs/WritingAnLLVMBackend.html + An upper-case “E” in the string indicates a big-endian target data model. + A lower-case “e” indicates little-endian. + “p:” is followed by pointer information: size, ABI alignment, and preferred alignment. + If only two figures follow “p:”, then the first value is pointer size, and the second value is both ABI and preferred alignment. + Then a letter for numeric type alignment: “i”, “f”, “v”, or “a” (corresponding to integer, floating point, vector, or aggregate). + “i”, “v”, or “a” are followed by ABI alignment and preferred alignment. + “f” is followed by three values: the first indicates the size of a long double, then ABI alignment, and then ABI preferred alignment. + */ + + // 64-bits pointer + //return "e-m:e-p:64:32-i16:16:16-i32:32:32-i64:64-n32:32-S128"; + // Added the alignment for the vector type of 2048 bits. + return "e-m:e-p:64:32-i16:16:16-i32:32:32-i64:64-v2048:2048:2048-n32:32-S128"; + + // 16-bits pointer + // TRIED also: return "e-m:e-p:16:32-i32:32:32-i64:64-n32:32-S128"; + //return "e-m:e-p:16:16-i32:32:32-i64:64-n32:32-S128"; +} + + +static Reloc::Model getEffectiveRelocModel(Optional RM) { + if (!RM.hasValue()) + return Reloc::PIC_; + return *RM; +} + + +// Inspired from XCore/XCoreTargetMachine.cpp +// 2019_03_30 +static CodeModel::Model getEffectiveXCoreCodeModel( + Optional CM) { + if (CM) { + if (*CM != CodeModel::Small && *CM != CodeModel::Large) + report_fatal_error("Target only supports CodeModel Small or Large"); + return *CM; + } + return CodeModel::Small; +} + + +ConnexTargetMachine::ConnexTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Optional RM, + Optional CM, + CodeGenOpt::Level OL, + bool JIT) + : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, + getEffectiveRelocModel(RM), + getEffectiveCodeModel(CM, CodeModel::Small), OL), + TLOF(make_unique()), + Subtarget(TT, CPU, FS, *this) { + initAsmInfo(); +} + + + + + +namespace { + + +/* I made sure that the iterators don't become invalid by using + another iterator, e.g. I2succ, which stores the next pointer in the + data structures. + +small-TODO: it might be safer to do a change by moving (maybe also + erasing) COPY instrs one per WHERE block (or even per MBB) and then get out of + the MBB::iterator loop and restart the loop from the beginning again until + NO more changes are performed - this in order to avoid any (eventual) issue with + iterator invalidation. +*/ +class PassAfterPostRAScheduler : public MachineFunctionPass { + public: + PassAfterPostRAScheduler() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "PassAfterPostRAScheduler"; + } + + /* // GMS said he doesn't like having arithmetic or logic instruction between predicate and WHERE* instruction: + #ifdef ALLOW_COPY_BETWEEN_PREDICATE_AND_WHERE_INSTRUCTIONS + - this case needs to be implemented carefully - I only sketched it a bit, so + it isn't tested either + */ + + void UpdateUsesOfRegUntilCOPY(MachineBasicBlock::iterator &Ipredicate, + // We start replacing uses from Ipredicate + 1 + MachineBasicBlock::iterator &I2, // COPY + MachineBasicBlock::iterator &IE, + unsigned regCrt, + unsigned regNew) { + LLVM_DEBUG(dbgs() << " I2 = " << *I2); + + /* We update all following occurences of the dest register + of COPY instr (which was also the dest register of the + predicate) + - for both uses and def, until 1st def. */ + MachineBasicBlock::iterator Iupdate; + Iupdate = Ipredicate; + Iupdate++; + + for (; Iupdate != I2 && Iupdate != IE; Iupdate++) { + LLVM_DEBUG(dbgs() << " Iupdate = " << *Iupdate); + + /* IMPORTANT: we go in reverse order to make the def last since we + break at def. */ + for (int idOpnd = Iupdate->getNumOperands() - 1; idOpnd >= 0; idOpnd--) { + MachineOperand &IOpnd = Iupdate->getOperand((unsigned)idOpnd); + + if (IOpnd.isReg() && IOpnd.getReg() == regCrt) { + LLVM_DEBUG(dbgs() << "UpdateUsesOfRegUntilCOPY(): Updating to " + "regNew the register of Iupdate. " + " Iupdate = " + << *Iupdate); + + /* + // This does NOT hold because we can have uses of a COPY instr dest + // register before the COPY - see the big WHERE block of ADD.f16 + assert( (Iupdate->getOpcode() == Connex::WHEREEQ || + Iupdate->getOpcode() == Connex::WHERELT || + Iupdate->getOpcode() == Connex::WHERECRY) && + "We should NOT be arriving here otherwise."); + */ + + if (IOpnd.isDef()) { + // We break + Iupdate = IE; Iupdate--; // We make it break out of outermost loop + break; + } + + IOpnd.setReg(regNew); + } + } + } + } + + + void PutCOPYBeforeWhereBlock(MachineBasicBlock &MBB, + const TargetInstrInfo *TII, + //MachineBasicBlock::iterator &I, + MachineInstr *IMI, // The WHERE* instruction + MachineBasicBlock::iterator &I2, // COPY + MachineBasicBlock::iterator &I2plus1, + MachineBasicBlock::iterator &IE, + bool &changedMF, + int &destRegisterPredicateOfSplitWhere) { + /* NOTE: I2 is the COPY instruction + if (I2.getOperand(0) == Ipredicate.getOperand(0)) + for each instruction from Ipredicate to I2 - 1 replace defs and uses of + I2.getOperand(0) with CONNEX_RESERVED_REGISTER_01 + */ + + /* + Moving COPY before the WHERE block. + + Normally we move the COPY instructions and put them + in the same order before the predicate. + + important-Note: If we have 2 COPY with the same dest register, + the WHERE block will be surely split at least for + the 2nd COPY. For example, from MatMul-256.f16: + + R(11) = R(23) == R(1); + NOP; + ); + EXECUTE_WHERE_EQ( + R(19) = ISHL(R(21), 10); + // Assume it's not here: R(19) = R(10) | R(19); + // Assume it's not here: R(25) = R(1) & R(10); + R(10) = R(0) | R(0); // COPY + R(10) = R(26) - R(1); + R(11) = R(1) << R(11); + R(10) = R(0) | R(0); // COPY + R(10) = R(11) & R(20); + The 2nd COPY forces the WHERE to be split + - it's actually a different variable. + + Note: although not important, in principle we could + have non-SPECIALV_H instrs inside WHERE blocks if + the register is NOT initialized. */ + LLVM_DEBUG(dbgs() << " moving I2 immediately before the " + "predicate instruction linked to the " + "WHERE block\n"); + + MachineBasicBlock::iterator Ipredicate = IMI; + LLVM_DEBUG(dbgs() << " IMI = " + << *IMI << "\n"); + Ipredicate--; + LLVM_DEBUG(dbgs() << " Ipredicate = " + << *Ipredicate << "\n"); + + /* + if (Ipredicate->getOpcode() != Connex::NOP_BPF) + LLVM_DEBUG(dbgs() << "PassAfterPostRAScheduler: Warning: " + "Ipredicate->getOpcode() != Connex::NOP_BPF\n"); + */ + // 2017_08_24 + assert(Ipredicate->getOpcode() == Connex::NOP_BPF + //|| Ipredicate->getOpcode() == Connex::NOP + ); + + /* Ipredicate is pointing at 2 instructions before the + WHERE* instruction, normally at the predicate + instruction.*/ + Ipredicate--; + + LLVM_DEBUG(dbgs() << " Ipredicate = " + << *Ipredicate << "\n"); + + // 2017_08_27 + // IMPORTANT-TODO: check better: check for right (w.r.t. WHERE) predicate instruction before NOP + assert(Ipredicate->getOpcode() == Connex::EQ_H || + Ipredicate->getOpcode() == Connex::LT_H || + Ipredicate->getOpcode() == Connex::ULT_H //); + || + // 2018_10_07: this is for the case of using lane gating instructions (DISABLE_CELL, ENABLE_ALL_CELLS) + Ipredicate->getOpcode() == Connex::EQ_SPECIAL_H || + Ipredicate->getOpcode() == Connex::LT_SPECIAL_H || + Ipredicate->getOpcode() == Connex::ULT_SPECIAL_H); + + + assert(Ipredicate->getOperand(0).isReg() && + Ipredicate->getOperand(0).isDef()); + assert(I2->getOperand(0).isReg() && + I2->getOperand(0).isDef()); + + + /* + // This case can be handled (ONLY) by splitting WHERE block: + #ifndef ALLOW_COPY_BETWEEN_PREDICATE_AND_WHERE_INSTRUCTIONS + assert(I2->getOperand(1).getReg() != Ipredicate->getOperand(0).getReg() && + "We reached a case that's not treatable by to implement this case!"); + #endif + */ + + /* Checking for WAR/anti-dependence between predicate and COPY instruction + - if so, then changing order (moving COPY before predicate) compromises + correctness so we make a copy of the respective predicate input. */ + // I2 is the COPY instruction + assert( I2->getOperand(0).isReg() && I2->getOperand(0).isDef() ); + // + // Ipredicate is the predicate instruction + assert( Ipredicate->getOperand(1).isReg() && + Ipredicate->getOperand(1).isUse() ); + assert( Ipredicate->getOperand(2).isReg() && + Ipredicate->getOperand(2).isUse() ); + // + bool sameOpnd1 = + Ipredicate->getOperand(1).getReg() == I2->getOperand(0).getReg(); + bool sameOpnd2 = + Ipredicate->getOperand(2).getReg() == I2->getOperand(0).getReg(); + // + if (sameOpnd1 || sameOpnd2) { + LLVM_DEBUG(dbgs() << + "Moving COPY before WHERE predicate breaks WAR/anti-dependence " + "relation between COPY and predicate. " + "--> fixing the problem by making copy of predicate input.\n"); + + /* TODO???: if Ipredicate has a use of the dest register of EQ???????????? + then add: a) an instr before COPY with + CONNEX_RESERVED_REGISTER_01 = Rinput_EQ | Rinput_EQ + */ + + /* We preserve the input register of the predicate instruction since it + will be overwritten by the moved (before the predicate) + COPY instruction: + we make a copy: + CONNEX_RESERVED_REGISTER_01 = Rdst_COPY | Rdst_COPY + */ +#ifndef ALLOW_COPY_BETWEEN_PREDICATE_AND_WHERE_INSTRUCTIONS + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + BuildMI(MBB, + Ipredicate, + /* We insert this MachineInstr before Ipredicate. + Also the COPY I2 we move after this, after Ipredicate, + so I2 will be moved after this new copy */ + IMI->getDebugLoc(), + TII->get(Connex::ORV_H), + CONNEX_RESERVED_REGISTER_01). + addReg(I2->getOperand(0).getReg()). + /* Note: I2 (COPY) does NOT necessarily have the + same dest register as Ipredicate. */ + addReg(I2->getOperand(0).getReg()); + #else + #error "This case is NOT implemented. Implement it!" + #endif +#endif + /* 2018_11_20: This really helps a lot since the COPY moved before + Ipredicate should be visible inside the WHERE block, + so then we need to make the Ipredicate destination a reserved reg. + Chances are big (but it's not necessary to be so I think) that since + sameOpnd1 || sameOpnd2, then we can have Ipredicate with + Ipredicate->getOperand(0) == I2->getOperand(0); + and if we leave it like that then we shadow the COPY. + . */ + if (Ipredicate->getOperand(0).getReg() == I2->getOperand(0).getReg()) + Ipredicate->getOperand(0).setReg(CONNEX_RESERVED_REGISTER_01); + + // Note: Ipredicate is the predicate instruction + /* These checks handle also the case both input operands of Ipredicate + are the same. + */ + if (sameOpnd1) + Ipredicate->getOperand(1).setReg(CONNEX_RESERVED_REGISTER_01); + if (sameOpnd2) + Ipredicate->getOperand(2).setReg(CONNEX_RESERVED_REGISTER_01); + + /* We now normally have to update the uses of modified input of + Ipredicate for the following instructions between the predicate + and the place where the COPY was. + However, the instructions using the input after predicate are + only the ones in the WHERE block basically. + */ + UpdateUsesOfRegUntilCOPY(Ipredicate, + I2, // COPY + IE, + I2->getOperand(0).getReg(), + CONNEX_RESERVED_REGISTER_01); + } + else // MEGA-TODO: think if OK + if (Ipredicate->getOperand(0).getReg() == I2->getOperand(0).getReg()) { + // If we have a WAW (output) dependendce + // Note: Ipredicate is the predicate, I2 is the COPY + LLVM_DEBUG(dbgs() << + " Found that the COPY to be moved " + "immediately before the predicate of the " + "WHERE block has the same destination register as the predicate. " + "This forces us to handle specially " + "the predicate instr dest register, " + "since this dest " + "register is the same as the one of the " + "COPY (hence, a WAW dependence is broken " + "and the program would become incorrect " + "otherwise).\n"); + + /* We update dest register of of Ipredicate (predicate) + due to conflict with I2, which we move before it. */ + /* + if (destRegisterPredicateOfSplitWhere != -1) + Ipredicate->getOperand(0).setReg(destRegisterPredicateOfSplitWhere); + else + Ipredicate->getOperand(0).setReg(CONNEX_RESERVED_REGISTER_01); + */ + Ipredicate->getOperand(0).setReg(CONNEX_RESERVED_REGISTER_02); + // + UpdateUsesOfRegUntilCOPY(Ipredicate, + I2, // COPY + IE, + I2->getOperand(0).getReg(), + CONNEX_RESERVED_REGISTER_02); + } + + // We move the COPY instruction before the predicate + MBB.remove((&(*I2))); + //MBB.insert(IMI, I2); // It inserts before IMI + #ifdef ALLOW_COPY_BETWEEN_PREDICATE_AND_WHERE_INSTRUCTIONS + MBB.insert(Ipredicate, IMI); // It inserts immediately before the WHERE instr + #else + MBB.insert(Ipredicate, (&(*I2))); // It inserts before Ipredicate + #endif + changedMF = true; + + // We handle the case of more than 1 COPY instr in the WHERE block +// I2plus1 represents the next instr after the COPY (before move) + I2 = I2plus1; + } // END PutCOPYBeforeWhereBlock() + + + void SplitWhereBlock(MachineBasicBlock &MBB, + const TargetInstrInfo *TII, + MachineBasicBlock::iterator &I, + MachineInstr *&IMI, + MachineBasicBlock::iterator &I2, // COPY instr + MachineBasicBlock::iterator &IE, + bool &changedMF, + int &destRegisterPredicateOfSplitWhere) { + /* This case handles only the cases we ran so far. + See MEGA-TODO for limitation of this case. */ + changedMF = true; + + LLVM_DEBUG(dbgs() << " SplitWhereBlock(): IMI = " + << *IMI); + LLVM_DEBUG(dbgs() << " SplitWhereBlock(): I2 = " + << *I2 << "\n"); + + /* TODO TODO: handle case + where we have COPY between 2 instr like ADD and + ADDC, which is incorrect because the COPY messes + up the Connex flags. */ + MachineBasicBlock::iterator I2plus1 = I2; + I2plus1++; + // I think this does NOT cover all cases but most of them + assert(I2plus1->getOpcode() != Connex::ADDCV_H && + I2plus1->getOpcode() != Connex::SUBCV_H && + I2plus1->getOpcode() != Connex::ADDCV_SPECIAL_H && + I2plus1->getOpcode() != Connex::SUBCV_SPECIAL_H && + "We do NOT handle yet ADDCV/SUBCV instructions immediately after COPY " + "for this case (and the corresponding ADD/SUB before the COPY)"); + + LLVM_DEBUG(dbgs() << " splitting WHERE block in 2 s.t. we put I2 immediately " + "after new END_WHERE resulting from split.\n"); + // I = beginning of new WHERE block + //const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + + MachineBasicBlock::iterator Ipredicate = IMI; + // We make Ipredicate point to the predicate of this WHERE + // block + Ipredicate--; + LLVM_DEBUG(dbgs() << " SplitWhereBlock(): Ipredicate = " + << *Ipredicate << "\n"); + assert(Ipredicate->getOpcode() == Connex::NOP_BPF); + Ipredicate--; + LLVM_DEBUG(dbgs() << " SplitWhereBlock(): Ipredicate (2 instr before) = " + << *Ipredicate << "\n"); + + unsigned regDest = CONNEX_RESERVED_REGISTER_02; + int changedPredicateOpnd = -1; + + // We check Ipredicate, the predicate, is 3-opcode + assert( + ( + ( + // For the standard case: + (Ipredicate->getOpcode() == Connex::EQ_H || + Ipredicate->getOpcode() == Connex::LT_H || + Ipredicate->getOpcode() == Connex::ULT_H + ) && + Ipredicate->getNumOperands() == 3 + ) + || + ( + // For disabled lane gating regions + ( + Ipredicate->getOpcode() == Connex::EQ_SPECIAL_H || + Ipredicate->getOpcode() == Connex::LT_SPECIAL_H || + Ipredicate->getOpcode() == Connex::ULT_SPECIAL_H + ) && + Ipredicate->getNumOperands() == 4 + ) + ) + && + Ipredicate->getOperand(0).isReg() && + Ipredicate->getOperand(0).isDef() && + Ipredicate->getOperand(1).isReg() && + Ipredicate->getOperand(1).isUse() && + Ipredicate->getOperand(2).isReg() && + Ipredicate->getOperand(2).isUse() + ); + + unsigned predicateInstrOpnd[2]; + predicateInstrOpnd[0] = Ipredicate->getOperand(1).getReg(); + predicateInstrOpnd[1] = Ipredicate->getOperand(2).getReg(); + + destRegisterPredicateOfSplitWhere = Ipredicate->getOperand(0).getReg(); + LLVM_DEBUG(dbgs() << "PassAfterPostRAScheduler: destRegisterPredicateOfSplitWhere = " + << destRegisterPredicateOfSplitWhere + << "\n"); + + /* + assert( (predicateInstrOpnd[0] != CONNEX_RESERVED_REGISTER_02) && + (predicateInstrOpnd[1] != CONNEX_RESERVED_REGISTER_02) && + // MEGA-MEGA-TODO: implement this - it happens for ADD/MUL.f16 + "We currently can't handle these cases because we have only 1 reserved register."); + */ + unsigned predicateInstrOpcode = Ipredicate->getOpcode(); + unsigned predicateInstrOpndAux[2]; + + /* We look if predicateInstrOpnd[*] is updated/redefined + either in the predicate instruction or in the + instructions of the + associated WHERE block before the COPY instr. + - i.e., if predicateInstrOpnd[1] changes then + use it as predicateInstrOpnd[0]. + If NO change happens we do NOT need to save the + value of predicateInstrOpnd[*], i.e., to create + ORV_H below. + + We check this from Ipredicate(+1) (next instr after predicate) to I2(-1) + (COPY instr, exclusive). + We check if any of the operands of the predicate change. + NOTE: assert (if both change - we don't want to waste by reserving 2 + Connex registers - maybe we can change the Connex ASM code by hand + to avoid this). + */ + /* + if (Ipredicate->getOperand(0).getReg() == + Ipredicate->getOperand(1).getReg()) { + // We changed the 1st input operand of the predicate + changedPredicateOpnd = 0; + } + else + if (Ipredicate->getOperand(0).getReg() == + Ipredicate->getOperand(2).getReg()) { + // We changed the 2nd input operand of the predicate + changedPredicateOpnd = 1; + } + */ + + MachineBasicBlock::iterator Iaux = Ipredicate; + //Iaux++; + MachineBasicBlock::iterator IauxEnd = I2; // I2 is COPY + + #define TREAT_ONLY_ONCE_CHANGE_PREDICATE_OPERANDS + #ifdef TREAT_ONLY_ONCE_CHANGE_PREDICATE_OPERANDS + IauxEnd++; + #endif + //IauxEnd--; + /* IMPORTANT: for the NEW predicate we don't care what we use for the + destination register. + + We now check for the NEW predicate we create for the split if its input + operands are updated between the + original_predicate..COPY_instr */ + for (; Iaux != IauxEnd && Iaux != IE; Iaux++) { + LLVM_DEBUG(dbgs() << " SplitWhereBlock(): Iaux = " + << *Iaux << "\n"); + if (Iaux->getNumOperands() >= 1 && Iaux->getOperand(0).isReg() && + Iaux->getOperand(0).isDef()) { + if (Iaux->getOperand(0).getReg() == predicateInstrOpnd[0]) { + assert((changedPredicateOpnd == -1 || changedPredicateOpnd == 0) && + // MEGA-TODO: handle this assert violation case + "It seems both input operands of the " + "predicate get updated so we would need to " + "reserve 2 Connex registers to handle well " + "this case."); + // We find that we subsequently change the 1st input operand of the predicate + changedPredicateOpnd = 0; + } + else + if (Iaux->getOperand(0).getReg() == predicateInstrOpnd[1]) { + /* We find that we subsequently change + the 2nd input operand of the predicate */ + assert((changedPredicateOpnd == -1 || changedPredicateOpnd == 1) && + // MEGA-TODO: handle this assert violation case + "It seems both input operands of the " + "predicate get updated so we would need " + "to reserve 2 Connex registers to handle " + "well this case."); + changedPredicateOpnd = 1; + } + } + } + + LLVM_DEBUG(dbgs() << " changedPredicateOpnd = " + << changedPredicateOpnd + << " (for the input operands of the predicate)\n"); + + if (changedPredicateOpnd == -1) { + //regDest = predicateInstrOpnd[0]; + predicateInstrOpndAux[0] = predicateInstrOpnd[0]; + predicateInstrOpndAux[1] = predicateInstrOpnd[1]; + } + else { + /* Put a copy of the changed input register of the predicate instruction + before Ipredicate, the initial predicate of this WHERE block. */ + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + if (regDest != predicateInstrOpnd[changedPredicateOpnd]) { + BuildMI(MBB, + Ipredicate, + IMI->getDebugLoc(), + TII->get(Connex::ORV_H), + regDest). // The reserved register, CONNEX_RESERVED_REGISTER_02 + addReg(predicateInstrOpnd[changedPredicateOpnd]). + addReg(predicateInstrOpnd[changedPredicateOpnd]); + } + #else + #error "This case is NOT implemented. Implement it!" + #endif + + /* + predicateInstrOpndAux[0] = regDest; // Reserved register + predicateInstrOpndAux[1] = predicateInstrOpnd[1 - changedPredicateOpnd]; + */ + // 2018_07_29 + predicateInstrOpndAux[changedPredicateOpnd] = CONNEX_RESERVED_REGISTER_02; // regDest + predicateInstrOpndAux[1 - changedPredicateOpnd] = + predicateInstrOpnd[1 - changedPredicateOpnd]; + } + + LLVM_DEBUG(dbgs() << " predicateInstrOpndAux[0] = " + << predicateInstrOpndAux[0] + << "\n"); + LLVM_DEBUG(dbgs() << " predicateInstrOpndAux[1] = " + << predicateInstrOpndAux[1] + << "\n"); + + MachineBasicBlock::iterator I2succ = I2; + I2succ++; + BuildMI(MBB, + //2018_08_17 I2succ, // Immediately after the COPY instr + I2, // Immediately before the COPY instr + IMI->getDebugLoc(), + TII->get(Connex::END_WHERE) + //, I2->getOperand(0).getReg() + ); + LLVM_DEBUG(dbgs() << " Finished creating the END_WHERE\n"); + + #ifndef TREAT_ONLY_ONCE_CHANGE_PREDICATE_OPERANDS + /* + // Ipredicate is predicate + // Unnecessary check: + assert(Ipredicate->getOperand(0).getReg() != + I2->getOperand(0).getReg()); + */ + /* + // 2018_06_26 + This check is actually VAGUELY different from the one above because + the one above inserts a register save (copy) instruction before the original WHERE, + while this new one after the new END_WHERE resulting from the split. + VERY IMPORTANT Note: the new predicate WHERE can have the result stored in RESERVED_REGISTER. + * We now check for conflicts between: + - destination register operand of COPY and + - input registers of predicate instruction. + * + * Note: I2 is the COPY instruction that triggered the split of WHERE block. + * + * Addressing the case, where after the split of WHERE* block we have something + * like this immediately after the 1st new WHERE* block, before the 2nd + * WHERE* block, where the repeated predicate instruction (repeated by us) + * happens to use the register defined in the COPY instruction, which makes + * the computation incorrect: + * END_WHERE; + * R(26) = R(10) | R(10); // This COPY instruction is the reason of the split + * R(30) = R(26) < R(3); + * NOP + * WHERE* + * + * Note: R(30) (CONNEX_RESERVED_REGISTER_01) is a reserved register. + * + * To correct the problem in this example we have to copy the value of R(26) + * in R(30): + * END_WHERE; + * R(30) = R(26) | R(26); + * R(26) = R(10) | R(10); // This COPY instruction is the reason of the split + * R(30) = R(30) < R(3); + * NOP + * WHERE* + */ + int changeInputPredicateOperandsDueToCOPY = 0; + if (predicateInstrOpnd[0] == I2->getOperand(0).getReg()) { + changeInputPredicateOperandsDueToCOPY |= 1; + } + if (predicateInstrOpnd[1] == I2->getOperand(0).getReg()) { + changeInputPredicateOperandsDueToCOPY |= 2; + } + // + assert(changeInputPredicateOperandsDueToCOPY != 3 && + // important-TODO: handle this assert violation case + "We shouldn't have such a case - doesn't really make sense for a " + "conditional to have both operands equal."); + + LLVM_DEBUG(dbgs() << " changeInputPredicateOperandsDueToCOPY = " + << changeInputPredicateOperandsDueToCOPYMBB << "\n"); + /* + assert(! (changedPredicateOpnd != -1 && changeInputPredicateOperandsDueToCOPY != 0) && + // TODO: if not merging the 2 cases together, handle this assert violation case, + "We currently can't handle both cases simultaneously."); + */ + // + if (changeInputPredicateOperandsDueToCOPY != 0) { + LLVM_DEBUG(dbgs() << " PassAfterPostRAScheduler::runOnMachineFunction(): correcting " + "the conflicting register (due to the COPY) in the " + "predicate instruction\n"); + MachineBasicBlock::iterator Icorrect = I2succ; + //Icorrect++; + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + BuildMI(MBB, + Icorrect, // We insert this MachineInstr after the new END_WHERE, before the COPY instr + IMI->getDebugLoc(), + TII->get(Connex::ORV_H), + CONNEX_RESERVED_REGISTER_02). + addReg(I2->getOperand(0).getReg()). + addReg(I2->getOperand(0).getReg()); + #else + #error "This case is NOT implemented. Implement it!" + #endif + + + /* Note: Ipredicate is the predicate for the 1st (part) WHERE* block. + //Ipredicate->getOperand(1).setReg(CONNEX_RESERVED_REGISTER_02); */ + + LLVM_DEBUG(dbgs() << "PassAfterPostRAScheduler: after WHERE block processed: MBB = "; + MBB.dump()); + // We check that we don't mess up the program - TODO we should also check that the iterators are not messed up + /* + for (MachineBasicBlock::iterator Inew = MBB.begin(), + IEnew = MBB.end(); Inew != IEnew; ++Inew) { + //MachineInstr *IMI = I; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): Inew = " + << *Inew << "\n"); + } + */ + } + #endif // END ifndef TREAT_ONLY_ONCE_CHANGE_PREDICATE_OPERANDS + + + // I2succ++; + LLVM_DEBUG(dbgs() << " moving I2 immediately after END_WHERE of " + "split WHERE block\n"); + //2018_08_17 assert(I2succ != IE); + //2018_08_17 MBB.remove(I2); + // I = I3; + //2018_08_17 MBB.insert(I2succ, I2); // Before I2succ + + + /* VERY IMPORTANT: We create another predicate, a NOP and a new WHERE* + instructions, identical with the (previous) one associated to the + WHERE block, EXCEPT the destination register is + CONNEX_RESERVED_REGISTER_02 - this is safe. */ + BuildMI(MBB, + I2succ, // We insert new instr immediately before I2succ + IMI->getDebugLoc(), + TII->get(predicateInstrOpcode), + #define NEW2018_08_11 + #ifdef NEW2018_08_11 + CONNEX_RESERVED_REGISTER_03 + #else + /* destRegisterPredicateOfSplitWhere is made -1 only after + iterating over END_WHERE, below + */ + destRegisterPredicateOfSplitWhere != -1 ? + destRegisterPredicateOfSplitWhere : + regDest // It is CONNEX_RESERVED_REGISTER_02 + #endif + ). + /* We now change the conflicting register in the predicate + * instruction. + */ + #ifdef TREAT_ONLY_ONCE_CHANGE_PREDICATE_OPERANDS + addReg((changedPredicateOpnd == 0) ? + #else + addReg(((changeInputPredicateOperandsDueToCOPY & 1) == 1) ? + #endif + (unsigned)CONNEX_RESERVED_REGISTER_02 : + predicateInstrOpndAux[0]). //predicateInstrOpnd1). + #ifdef TREAT_ONLY_ONCE_CHANGE_PREDICATE_OPERANDS + addReg((changedPredicateOpnd == 1) ? + #else + addReg(((changeInputPredicateOperandsDueToCOPY & 2) == 2) ? + #endif + (unsigned)CONNEX_RESERVED_REGISTER_02 : + predicateInstrOpndAux[1]); + + BuildMI(MBB, + I2succ, + IMI->getDebugLoc(), + TII->get(Connex::NOP_BPF)); + // TODO: maybe add an addImm(0)?, although it works without + + // We add the same WHERE instr as the one for this block + /* This gives the following error: + <getParent() && "machine instruction already in a basic block"' failed.>> + MBB.insert(I2succ, IMI); // before I2succ + */ + LLVM_DEBUG(dbgs() << " SplitWhereBlock(): IMI (for split) = " + << *IMI << "\n"); + /* From http://llvm.org/doxygen/MachineInstrBuilder_8h_source.html#l00312: + "inserts the newly-built instruction before the given position". */ + /* + IMI = I2succ; + LLVM_DEBUG(dbgs() << " IMI = I2succ = " + << *IMI << "\n"); + IMI--; // IMPORTANT: This makes IMI NULL since IMI is a MachineInstr - see /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/DawnCC/35l_MatMul_f16/SIZE_128/L/STDerr_llc_01_old17 + */ + // See good comments on iterator invalidation: http://llvm.1065342.n5.nabble.com/deleting-or-replacing-a-MachineInst-td77723.html + I = BuildMI(MBB, + I2succ, // We insert new instr immediately before I2succ + IMI->getDebugLoc(), + TII->get(IMI->getOpcode()), + regDest + ); + + // TODO TODO TODO TODO: understand if it generates (due to iterator invalidation??) another END_WHERE - see /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/DawnCC/25k_map/MUL_i32/!!/5_GOOD/output_old06.cpp + + #define GOOD + #ifdef GOOD + // NOTE: I is the new WHERE* instruction just created + // We update I2 to check for more COPY instrs after the new created WHERE + I2 = I; I2++; + + // We update IMI since we insert COPY before predicate of WHERE using IMI + IMI = (&(*I)); + + //MachineBasicBlock::iterator Iaux10 = I2succ; Iaux10--; + LLVM_DEBUG(dbgs() << " I2succ = " + << *I2succ << "\n"); + LLVM_DEBUG(dbgs() << " IMI = " + << *IMI << "\n"); + LLVM_DEBUG(dbgs() << " I = " + << *I << "\n"); + LLVM_DEBUG(dbgs() << " I2 = " + << *I2 << "\n"); + #else + I = I2succ; // TODO TODO TODO: not sure this covers all cases + // We jump over the new WHERE* for the split WHERE block + I++; + // Here we handle the case of more than 1 COPY instr in the WHERE block: + I2 = I2plus1; + // TODO TODO TODO TODO TODO TODO TODO TODO TODO + /* + // VERY IMPORTANT: it seems it's very difficult to get the new WHERE instruction + // added after (removing the COPY and) adding EQ/(U)LT, NOP and a WHERE due + // to pointer invalidation - doing I2succ-- results in NULL pointer. + // See http://llvm.1065342.n5.nabble.com/deleting-or-replacing-a-MachineInst-td77723.html + // for various possible solutions. + IMI = I2succ; + LLVM_DEBUG(dbgs() << " IMI = I2succ = " + << *IMI << "\n"); + IMI--; // IMPORTANT: This makes IMI NULL since IMI is a MachineInstr - see /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/DawnCC/35l_MatMul_f16/SIZE_128/L/STDerr_llc_01_old15 + */ + //IMI = I2; // The new WHERE* added; + /* I checked on 2018_06_26 that IMI points really to the next instruction after + * the COPY that triggered the split of the WHERE* block. + * So, it seems that we miss the 1st instruction after the COPY that + * triggered the split. + */ + // + // + // NOT required: IMI--; // IMI is not an iterator - that's why it gives error - it seems it starts from last instr of 1st part of split WHERE* block + LLVM_DEBUG(dbgs() << " SplitWhereBlock(): after split I2 (should point at the new WHERE* added) = " + << *I2 << "\n"); + #endif + + // break; + //assert(); + LLVM_DEBUG(dbgs() << " To check: IMI = " + << *IMI << "\n"); + + LLVM_DEBUG(dbgs() + << "PassAfterPostRAScheduler: after splitting WHERE block in 2: MBB = "; + MBB.dump()); + } // END SplitWhereBlock() + + + /// \brief Loop over all of the basic blocks + //bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineFunction(MachineFunction &MF) { + bool changedMF = false; + + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineFunction.html + LLVM_DEBUG(dbgs() << "Entered PassAfterPostRAScheduler::runOnMachineFunction(MF = " + //; MF.dump(); + << MF.getName().data() + //dbgs() + << ")\n"); + //bool Changed = false; + + /* + MRI = &MF.getRegInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + */ + + // Process all basic blocks. + for (auto &MBB : MF) { + //int anotherReservedRegister = -1; + int destRegisterPredicateOfSplitWhere = -1; + + // For the current MBB: + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + LLVM_DEBUG(dbgs() << "PassAfterPostRAScheduler::runOnMachineFunction(): a new MBB = " + << MBB + << "\n"); + + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MachineBasicBlock.html + LLVM_DEBUG(dbgs() << "PassAfterPostRAScheduler::runOnMachineFunction(): again MBB = " + << MBB + << "\n"); + + for (MachineBasicBlock::iterator I = MBB.begin(), + IE = MBB.end(); I != IE; ++I) { + MachineInstr *IMI = (&(*I)); + /* + if (IMI == &MI) + I++; + // predMI contains normally instruction VLOAD_H_SYM_IMM + break; + */ + // predMI = I; + /* + LLVM_DEBUG(dbgs() << "runOnMachineFunction(): I->getOpcode() = " + << I->getOpcode() << "\n"); + */ + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I = " + << *I << "\n"); + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): DontTreatCopyInstructions = " + << DontTreatCopyInstructions << "\n"); + + if (DontTreatCopyInstructions == false) { + /* + IMPORTANT: we move the COPY instructions outside + the WHERE block, just like the ARM/Thumb2ITBlockPass.cpp + does (the ARM pass is also registered in addPreSched2()). + Note that moving COPY instrs before WHERE (ARM IT) blocks + (as it seems ARM surprisingly is doing, since + MBB::insert(iterator, MI) does "Insert MI into the + instruction list before I, possibly inside a bundle.") + can change semantics in most cases. + + IMPORTANT: This is where we remove any COPY instructions + generated by the TwoAddressInstructionPass and not erased + by RegisterCoalescer (transformed + into ORV_H) instructions inside WHERE* blocks. + This is to handle cases like sequences of manually + selected instructions in ConnexISelDAGToDAG for MULi32, DIVi16, etc. + */ + if (IMI->getOpcode() == Connex::WHEREEQ || + IMI->getOpcode() == Connex::WHERELT || + IMI->getOpcode() == Connex::WHERECRY) { + LLVM_DEBUG(dbgs() << "PassAfterPostRAScheduler::runOnMachineFunction(): found WHERE* block\n"); + + // 2017_08_27 + /* Removing useless COPY immediately before WHERE* block + * (between NOP and WHERE*, where it should normally be put). + * It is useless - we eye-balled seriously on a few + * programs, most notably SSD.f16 on Jul 29-30 2018 + * (I guess - MEGA-TODO: check if so) always because it is + * generated by the WHERE* instruction and, + * therefore, it's NOT required. + * important-TODO: we should take care of COPY + * instructions being moved by the post-RA scheduler. */ + MachineBasicBlock::iterator ItmpToErase = IMI; + ItmpToErase--; + if (ItmpToErase->getOpcode() != Connex::NOP_BPF + //|| ItmpToErase->getOpcode() == Connex::NOP + ) { + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + if (ItmpToErase->getOpcode() == Connex::ORV_H) { + #else + #error "This case is NOT implemented. Implement it!" + #endif + MachineInstr *Iremove = (&(*ItmpToErase)); + //ItmpToErase--; + + /* We assert this COPY is related to the WHERE* + instruction - if NOT, then the COPY was moved + probably by the post-RA scheduler here. + */ + assert(Iremove->getOperand(0).isReg() && + Iremove->getOperand(0).isDef() && + Iremove->getOperand(0).getReg() == IMI->getOperand(0).getReg() + ); + + + /* Checking that it is really safe to remove this COPY + since it is not used by any instruction after it. + */ + MachineBasicBlock::iterator Icheck = I; + // We jump over the WHERE* instruction found + Icheck++; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): Icheck = " + << *Icheck << "\n"); + // Iterating over all remaining instructions of the BB + for (; Icheck != IE; Icheck++) { + LLVM_DEBUG(dbgs() << " Icheck = " << *Icheck); + if (Icheck->getNumOperands() > 0 && + Icheck->getOperand(0).isReg() && + Icheck->getOperand(0).getReg() == + Iremove->getOperand(0).getReg()) { + // It normally has to be a def - if it's a use it's bad + assert(Icheck->getOperand(0).isDef() && + "PassAfterPostRAScheduler: Found a 'useless' COPY " + "that is not useless since it is used after... - this is not good --> change ConnexTargetMachine.cpp"); + break; + } + } + + //LLVM_DEBUG(dbgs() << " Removing useless COPY immediately before the WHERE block. Note: ItmpToErase = " + // << *ItmpToErase << "\n"); + LLVM_DEBUG(dbgs() << " Removing useless COPY immediately before the WHERE block.\n"); + + MBB.remove(Iremove); + } + } + + + MachineBasicBlock::iterator I2 = I; // + 1; + // We jump over the WHERE* instruction found + I2++; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2 = " + << *I2 << "\n"); + + //continue; + + // Iterating over all remaining instructions of the BB + for (; I2 != IE; /* I2++ */) { + LLVM_DEBUG(dbgs() << " I2 = " << *I2); + + // TO_ADAPT: currently copyPhysReg() is implemented with ORV_H + /* IMPORTANT: NORMALLY, inside WHERE blocks generated + with Opincaa lib's Kernel::genLLVMISelManualCode(), + we are guaranteed to have only ORV_SPECIAL_H Connex + instructions, so meeting an ORV_H is only when a COPY + was generated by the TwoAddressInstructionPass. */ + if ( + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + I2->getOpcode() == Connex::ORV_H + #else + #error "This case is NOT implemented. Implement it!" + #endif + || I2->getOpcode() == Connex::LD_FILL_H) { + // MEGA-TODO: || I2->getOpcode() == Connex::ST_FILL_H + /* The ORV_H instruction implemented in copyPhysReg() + has both input operands equal. + NOTE: the destination register of any instruction + I is I->getOperand(0). + */ + + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + if (I2->getOpcode() == Connex::ORV_H) + assert(I2->getOperand(1).getReg() == + I2->getOperand(2).getReg() && + "I2 is an ORV_H with different input operands. " + "Maybe too paranoid check: We do not " + "recommend to have emulation Opincaa kernels " + "generated by Kernel::genLLVMISelManualCode() " + "with ORV_H inside WHERE blocks (if these " + "instructions come from there). But you " + "can comment this assert and issue a simple " + "warning."); + /* + if (I2->getOperand(1).getReg() != + I2->getOperand(2).getReg()) + LLVM_DEBUG(dbgs() << "PassAfterPostRAScheduler: Warning: " + "I2->getOperand(1).getReg() != " + "I2->getOperand(2).getReg()\n\n"); + */ + #endif + + #ifdef NNNNNO + /* We put in a vector and erase at the end + (otherwise it seems it invalidates the iterator). */ + LLVM_DEBUG(dbgs() << " adding I2 to vecInstrToRemove\n"); + vecInstrToRemove.push_back(I2); + #endif + // I2->eraseFromParent(); + //MBB.remove(I2); + //LLVM_DEBUG(dbgs() << " called MBB.remove(I2)\n"); + + //MachineBasicBlock::iterator IMI; + /* From http://llvm.org/doxygen/MachineBasicBlock_8h_source.html: + MBB::insert(iterator, MI) + "Insert MI into the instruction list before I, possibly inside a bundle. + */ + LLVM_DEBUG(dbgs() << " found COPY/LD_FILL at I2 = " << *I2 + << " --> moving it out of the WHERE block to " + "preserve correct program semantics.\n"); + + /* We should move I2 before or after the WHERE block, + * or split the WHERE block in 2. */ + /* The algo is (a sketch that MIGHT NOT reflect + totally the implementation): + NOTE: this is the case that allows having COPY between + predicate and WHERE instr. + If the COPY doesn't use (doesn't have as source) + a register defined in the WHERE block + BEFORE the COPY (NO RAW/flow dependence relation to be broken) + and also the COPY doesn't define a register + that is used by an instruction before + (NO WAR/anti-dependence relation to be broken): + We move the COPY exactly before the + WHERE instruction starting the block + Else + If the COPY doesn't use (doesn't have as source) + a register defined in the WHERE block, + after the COPY (NO WAR dep broken) + and also the COPY doesn't define a register + used by an instruction after it (NO RAW dep broken): + We move the COPY exactly after the END_WHERE + instruction ending the block + Else + Moving the COPY immediately before/after + the WHERE block is UNsafe and + would change semantics program + The solution is to split the WHERE block in + two and for the 2nd WHERE block to copy the + predicate (together with a NOP) just + before it. + */ + + #ifdef ALLOW_COPY_BETWEEN_PREDICATE_AND_WHERE_INSTRUCTIONS + MachineBasicBlock::iterator I3 = IMI; // IMI is WHERE instr + LLVM_DEBUG(dbgs() << " I3 = " + << *I3 << "\n"); + I3--; + LLVM_DEBUG(dbgs() << " I3 (after 1 -)= " + << *I3 << "\n"); + + assert(I3->getOpcode() == Connex::NOP || + I3->getOpcode() == Connex::NOP_BPF); + I3--; + LLVM_DEBUG(dbgs() << " I3 (after 2 -)= " + << *I3 << "\n"); + assert(I3->getOpcode() == Connex::EQ_H || + I3->getOpcode() == Connex::LT_H || + I3->getOpcode() == Connex::ULT_H); + #else + MachineBasicBlock::iterator I3 = IMI; // IMI is WHERE instr + I3++; + #endif + + #define SAFE_SINCE_NO_CONSTRAINT 0 + #define NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK 1 + #define NOT_SAFE_TO_PUT_COPY_AFTER_WHERE_BLOCK 2 + #define SAFE_TO_PUT_COPY_IN_SPLIT_WHERE_BLOCK 3 + int whatToDo = SAFE_SINCE_NO_CONSTRAINT; + + //bool I2afterIsInsideWhereBlock = true; + bool I3IsBeforeI2 = true; + + // Remember: I2 points to the COPY instruction + for (; I3 != IE; I3++) { + if (I3->getOpcode() == Connex::END_WHERE) { + break; + } + + LLVM_DEBUG(dbgs() << " I3 = " + << *I3); + + if (I3 == I2) { + I3IsBeforeI2 = false; + continue; + } + /* + LLVM_DEBUG(dbgs() << " I3 < I2 = " + << (I3 < I2) << "\n"); + */ + LLVM_DEBUG(dbgs() << " I3IsBeforeI2 = " + << I3IsBeforeI2 << "\n"); + + // We look at all operands of instruction I3 + for (unsigned idOpnd = 0; idOpnd < I3->getNumOperands(); + idOpnd++) { + MachineOperand &I3Opnd = I3->getOperand(idOpnd); + + LLVM_DEBUG(dbgs() << " I3Opnd (index = " << idOpnd + << ") = " << I3Opnd << "\n"); + + if (I3Opnd.isReg() && I3Opnd.isUse()) { + // Remember: I2 points to the COPY instruction + if (I3Opnd.getReg() == I2->getOperand(0).getReg()) { + if (I3IsBeforeI2) { + // RBW dependence w.r.t. COPY (I2), which writes + // I3 uses the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << + " I3, which is before I2, uses the dst-register of I2 " + "--> moving I2 before the WHERE block is NOT safe" + << "\n"); + + whatToDo |= NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK; + /* + LLVM_DEBUG(dbgs() << " changing I2afterOpnd's reg to = " + << I2->getOperand(0).getReg() << "\n"); + I2afterOpnd.setReg(I2->getOperand(1).getReg()); + */ + } + else { // NOT I3IsBeforeI2 + // RAW dependence w.r.t. COPY (I2), which writes + // I3 uses the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is after I2, uses the dst-register of I2 --> moving I2 after the WHERE block is NOT safe" + << "\n"); + + whatToDo |= NOT_SAFE_TO_PUT_COPY_AFTER_WHERE_BLOCK; + } + } + else + /* Although we are safe on the else branch, + we put this code here for "completness". + */ + if ( + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + I2->getOpcode() == Connex::ORV_H && + #endif + I3Opnd.getReg() == I2->getOperand(1).getReg()) { + // RAR dependence - NONE actually :) + if (I3IsBeforeI2) { + // I3 uses the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is before I2, uses the src-register of I2 --> everything is safe" + << "\n"); + + //whatToDo |= NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK; + } + else { + // I3 uses the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is after I2, uses the src-register of I2 --> everything is safe" + << "\n"); + + //whatToDo |= NOT_SAFE_TO_PUT_COPY_AFTER_WHERE_BLOCK; + } + } + } // END I3Opnd.isUse() + else + if (I3Opnd.isReg() && I3Opnd.isDef()) { + // Remember: I2 points to the COPY instruction + if (I3Opnd.getReg() == I2->getOperand(0).getReg()) { + if (I3IsBeforeI2) { + // WAW dependence w.r.t. COPY (I2), which writes + // I3 defs the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is before I2, defs the dst-register of I2 --> moving I2 before the WHERE block is NOT safe" + << "\n"); + + whatToDo |= NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK; + } + else { + // WAW dependence w.r.t. COPY (I2), which writes + // I3 defs the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is after I2, defs the dst-register of I2 --> moving I2 after the WHERE block is NOT safe" + << "\n"); + + whatToDo |= NOT_SAFE_TO_PUT_COPY_AFTER_WHERE_BLOCK; + } + } + else + if ( + #ifdef COPY_REGISTER_IMPLEMENTED_WITH_ORV_H + I2->getOpcode() == Connex::ORV_H && + #endif + I3Opnd.getReg() == I2->getOperand(1).getReg()) { + if (I3IsBeforeI2) { + // RAW dependence w.r.t. I3, which writes + // I3 defs the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is before I2, defs the src-register of I2 --> moving I2 before the WHERE block is NOT safe" + << "\n"); + + whatToDo |= NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK; + } + else { + // RBW dependence w.r.t. I3, which writes + // I3 defs the dst-register of I2 (the COPY instr) + LLVM_DEBUG(dbgs() << " I3, which is after I2, defs the src-register of I2 --> moving I2 after the WHERE block is NOT safe" + << "\n"); + + whatToDo |= NOT_SAFE_TO_PUT_COPY_AFTER_WHERE_BLOCK; + } + } + } // END I3Opnd.isDef() + } // END for loop idOpnd + } // END for loop with ind-var I3 + + /* + * Note: + * I = main loop iterating over all instr of the MBB + * IMI = I; + * I2 + * if IMI == WHERE* + * I2 = I + 1; + * for (;; I2++) + * if I2 == ORV_H (or whatever is used to implement the COPY primitive) + * for (I3 = IMI + 1; ; I3++) // used to compute whatToDo; + if I3 == END_WHERE + break; + compute whatToDo; + */ + MachineBasicBlock::iterator I2plus1 = I2; + /* We need to increment it, otherwise it looks that + * I2 and I2plus1 are identical after remove() + * and insert() + */ + I2plus1++; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2plus1 = " + << *I2plus1 << "\n"); + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2 (before moving I2) = " + << *I2 << "\n"); + LLVM_DEBUG(dbgs() << " whatToDo = " << whatToDo << "\n"); + + if (//whatToDo == SAFE_SINCE_NO_CONSTRAINT || + whatToDo == NOT_SAFE_TO_PUT_COPY_AFTER_WHERE_BLOCK) { + // Moving COPY before the WHERE block. + PutCOPYBeforeWhereBlock(MBB, TII, IMI, I2, + I2plus1, IE, changedMF, + destRegisterPredicateOfSplitWhere); + // break; + + } // END moving I2 immediately before the logical instruction linked to the WHERE block + else + if ( + // 2018_08_17: we treat here SAFE_SINCE_NO_CONSTRAINT because moving after WHERE block doesn't add any auxiliary instruction + whatToDo == SAFE_SINCE_NO_CONSTRAINT || + whatToDo == NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK) { + // TODO TODO: we should put multiple COPY instructions from this WHERE block in the SAME order after END_WHERE. See if such cases happen. + LLVM_DEBUG(dbgs() << " moving I2 immediately after WHERE block\n"); + assert(I3 != IE); + + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2 = " + << *I2 << "\n"); + + // I3 is pointing to END_WHERE (see code above) + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I3 = " + << *I3 << "\n"); + + assert( (I3->getOpcode() == Connex::END_WHERE) && + "I3 should point to END_WHERE (see code above)."); + /* + assert( (I3->getOpcode() == Connex::WHEREEQ || + I3->getOpcode() == Connex::WHERELT || + I3->getOpcode() == Connex::WHERECRY) && + "We should NOT be arriving here otherwise."); + */ + + I3++; // Jump over END_WHERE (normally) + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I3 (after I3++) = " + << *I3 << "\n"); + + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): Preparing to remove I2 = " + << *I2 + << " and moving it before I3 = " + << *I3 << "\n"); + MBB.remove((&(*I2))); + MBB.insert(I3, (&(*I2))); // It inserts before I3 + + /* This is NOT good for case where we have 2+ COPY + instrs in the WHERE block: I = I3; */ + //I2++; + //I = I2; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2 (after moving I2) = " + << *I2 << "\n"); + // I2plus1++; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2plus1 = " + << *I2plus1 << "\n"); + + /* Here we handle the case of more than 1 COPY + instr in the WHERE block (I2plus1 represents the next + instr after the COPY (before move)) */ + I2 = I2plus1; + + MachineBasicBlock::iterator I2plus2 = I2plus1; + I2plus2++; + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): I2plus2 = " + << *I2plus2 << "\n"); + + changedMF = true; + /* This is NOT good for case where we have 2+ COPY + instrs in the WHERE block: break; + We keep searching with I2 for loop in this WHERE block + for more COPY instrs. */ + } // END if (whatToDo == NOT_SAFE_TO_PUT_COPY_BEFORE_WHERE_BLOCK) + else + if (whatToDo == SAFE_TO_PUT_COPY_IN_SPLIT_WHERE_BLOCK) { + SplitWhereBlock(MBB, TII, I, IMI, I2, IE, + changedMF, + destRegisterPredicateOfSplitWhere); + LLVM_DEBUG(dbgs() << " After calling SplitWhereBlock(): IMI = " + << *IMI << "\n"); + } // END if SPLIT WHERE block + else + // IMPORTANT: we increment here the iterator over instruction in WHERE block + I2++; + } // END if (I2->getOpcode() == Connex::ORV_H) + else { + // IMPORTANT: we increment here the iterator over instruction in WHERE block + I2++; + // else + } + + // Note that the END_WHERE takes input node and has a value output + if (I2->getOpcode() == Connex::END_WHERE) { + LLVM_DEBUG(dbgs() << " found END_WHERE --> breaking I2 loop\n"); + I2++; + I = I2; + + // MEGA-TODO: think if OK here + destRegisterPredicateOfSplitWhere = -1; + + LLVM_DEBUG(dbgs() << " Making destRegisterPredicateOfSplitWhere = -1\n"); + + break; + } + + LLVM_DEBUG(dbgs() << "PassAfterPostRAScheduler: at end of for loop I2, I2 = " + << *I2 + << " and IMI = " + << *IMI); + } // END for loop with ind-var I2 + + LLVM_DEBUG(dbgs() << "PassAfterPostRAScheduler: after WHERE block processed: MBB = "; + MBB.dump()); + LLVM_DEBUG(dbgs() << "PassAfterPostRAScheduler: IMI = " + << *IMI); + } // END if WHERE* + } // END if (DontTreatCopyInstructions == false) + } // END for (MachineBasicBlock::iterator I + + + /* + if (predMI != NULL) + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): predMI = " + << *predMI + << "(" << predMI << ")" + << "\n"); + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): succMI = " + << *succMI + << "(" << succMI << ")" + << "\n"); + LLVM_DEBUG(dbgs() << "expandPostRAPseudo(): MI = " + << MI + << "(" << &MI << ")" + << "\n"); + */ + } // END for (auto &MBB : MF) + + + LLVM_DEBUG(dbgs() << " runOnMachineFunction(): changedMF = " + << changedMF << "\n"); + + return changedMF; // indicates if we changed MF + } + + private: + MachineRegisterInfo *MRI; + + static char ID; +}; // END class PassAfterPostRAScheduler +char PassAfterPostRAScheduler::ID = 0; + +} // END namespace + + + +// We currently don't use anymore bundles, since we avoid using the post-RA scheduler +//#define CREATE_BUNDLES +#ifdef CREATE_BUNDLES + #include "ConnexTargetMachine_NotUsed_Important.h" +#endif + +// Gives error: should have been declared inside ‘llvm’: FunctionPass *llvm::createPreRAPassFinalizeBundles() { return new PreRAPassFinalizeBundles(); } +namespace llvm { +#ifdef CREATE_BUNDLES + FunctionPass *createPassCreateBundles() { + return new PassCreateBundles(); + } + + FunctionPass *createPassFinalizeBundles() { + return new PassFinalizeBundles(); + } +#endif + + FunctionPass *createPassAfterPostRAScheduler() { + return new PassAfterPostRAScheduler(); + } +} + + +namespace { + +// Connex Code Generator Pass Configuration Options. +class ConnexPassConfig : public TargetPassConfig { + public: + ConnexPassConfig(ConnexTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig((LLVMTargetMachine &)(*TM), PM) {} + + + ConnexTargetMachine &getConnexTargetMachine() const { + return getTM(); + } + + +//#ifdef CREATE_BUNDLES // IMPORTANT - not executing these methods inside results in error: <> + //bool addInstSelector() override; + // Install an instruction selector pass using + // the ISelDag to gen Connex code; also register extra passes. + +// VERY IMPORTANT: commenting this method results in error: <> + //#ifdef CREATE_BUNDLES + bool /* ConnexPassConfig:: */ addInstSelector() { + addPass(createConnexISelDag(getConnexTargetMachine())); + + /* The registered pass is run immediately after the 1st List + * scheduling, after the ISel pass registered above. + * The reason it is NOT directly after the ISel pass is that it seems + * that the 1st scheduling + * pass is considered to be linked together with ISel. + */ + #ifdef CREATE_BUNDLES + addPass(createPassCreateBundles()); + #endif + + return false; + } + //#endif + + + /* From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetPassConfig.html + This method may be implemented by targets that want to run passes immediately before register allocation. + */ + void addPreRegAlloc() { + /* + // IMPORTANT: As of Mar 2017, implementing this pass with finalizeBundle here + // gives error at: + // <> + + LLVM_DEBUG(dbgs() << "Entered ConnexPassConfig::addPreRegAlloc().\n"); + + // Inspired from llvm/lib/Target/X86/X86TargetMachine.cpp and X86OptimizeLEAs.cpp + if (getOptLevel() != CodeGenOpt::None) + addPass(createPassFinalizeBundles()); + */ + + /* + LLVM_DEBUG(dbgs() << "Entered ConnexPassConfig::addPreRegAlloc().\n"); + //addPass(createPassCreateBundles()); + + // IMPORTANT: finalizeBundle gives error: + // <> + addPass(createPassFinalizeBundles()); + */ + } + + + void addPostRegAlloc() { + /* + // It does NOT help for my llc -O1 bug related to <> + + LLVM_DEBUG(dbgs() << "Entered ConnexPassConfig::addPostRegAlloc().\n"); + addPass(createPassFinalizeBundles()); + */ + } + + + #ifdef CREATE_BUNDLES + /* IMPORTANT: + From http://llvm.org/docs/doxygen/html/classllvm_1_1TargetPassConfig.html + <> + (This runs after register allocation, before 2nd (post-RA) scheduler) */ + void addPreSched2() { + LLVM_DEBUG(dbgs() << "Entered ConnexPassConfig::addPreSched2().\n"); + + // Inspired from llvm/lib/Target/ARM/ARMTargetMachine.cpp + //if (getOptLevel() != CodeGenOpt::None) + addPass(createPassFinalizeBundles()); + } + #endif +//#endif // CREATE_BUNDLES + + + /* + From http://llvm.org/doxygen/classllvm_1_1TargetPassConfig.html: + <> + */ + void addPreEmitPass() { + LLVM_DEBUG(dbgs() << "Entered ConnexPassConfig::addPreEmitPass().\n"); + + addPass(createPassAfterPostRAScheduler()); + + // Here we add a stand-alone hazard recognizer pass + addPass(&PostRAHazardRecognizerID); + } +}; + +} // end namespace + +TargetPassConfig *ConnexTargetMachine::createPassConfig(PassManagerBase &PM) { + return new ConnexPassConfig(this, PM); +} + +/* +*/ +// 2019_03_30: Inspired from ARCTargetMachine.cpp +TargetTransformInfo ConnexTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(ConnexTTIImpl(this, F)); +} + Index: lib/Target/Connex/ConnexTargetTransformInfo.h =================================================================== --- lib/Target/Connex/ConnexTargetTransformInfo.h +++ lib/Target/Connex/ConnexTargetTransformInfo.h @@ -0,0 +1,132 @@ +// 2019_03_30: Inspired from XCore/XCoreTargetTransformInfo.h + +//===-- ConnexTargetTransformInfo.h - Connex specific TTI ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file contains a TargetTransformInfo::Concept conforming object specific to the +/// Connex target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_CONNEXTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_CONNEX_CONNEXTARGETTRANSFORMINFO_H + +#include "Connex.h" +#include "ConnexTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/TargetLowering.h" + +namespace llvm { + +class ConnexTTIImpl : public BasicTTIImplBase { + typedef BasicTTIImplBase BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const ConnexSubtarget *ST; + const ConnexTargetLowering *TLI; + + const ConnexSubtarget *getST() const { + LLVM_DEBUG(dbgs() << "Entered getST()\n"); + return ST; } + + const ConnexTargetLowering *getTLI() const { + LLVM_DEBUG(dbgs() << "Entered getTLI()\n"); + return TLI; } + + +public: + bool isLegalMaskedGather(Type *DataTy) { + // Inspired from X86TargetTransformInfo.cpp + LLVM_DEBUG(dbgs() << "Entered isLegalMaskedGather()\n"); + + /* + // Some CPUs have better gather performance than others. + // TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only + // enable gather with a -march. + if (!(ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2()))) + return false; + + // This function is called now in two cases: from the Loop Vectorizer + // and from the Scalarizer. + // When the Loop Vectorizer asks about legality of the feature, + // the vectorization factor is not calculated yet. The Loop Vectorizer + // sends a scalar type and the decision is based on the width of the + // scalar element. + // Later on, the cost model will estimate usage this intrinsic based on + // the vector type. + // The Scalarizer asks again about legality. It sends a vector type. + // In this case we can reject non-power-of-2 vectors. + // We also reject single element vectors as the type legalizer can't + // scalarize it. + if (isa(DataTy)) { + unsigned NumElts = DataTy->getVectorNumElements(); + if (NumElts == 1 || !isPowerOf2_32(NumElts)) + return false; + } + Type *ScalarTy = DataTy->getScalarType(); + if (ScalarTy->isPointerTy()) + return true; + + if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) + return true; + + if (!ScalarTy->isIntegerTy()) + return false; + + unsigned IntWidth = ScalarTy->getIntegerBitWidth(); + return IntWidth == 32 || IntWidth == 64; + */ + + Type *ScalarTy = DataTy->getScalarType(); + + if (ScalarTy->isHalfTy()) // 2019_04_14 + return true; + + if (ScalarTy->isIntegerTy()) { + unsigned IntWidth = ScalarTy->getIntegerBitWidth(); + LLVM_DEBUG(dbgs() << "isLegalMaskedGather(): IntWidth = " + << IntWidth << "\n"); + //return IntWidth == 16; // 32 || IntWidth == 64; + return (IntWidth == 16) || (IntWidth == 32); // 2019_04_14 + } + + return false; + } + + bool isLegalMaskedScatter(Type *DataType) { + LLVM_DEBUG(dbgs() << "Entered isLegalMaskedScatter()\n"); + + // Inspired from X86TargetTransformInfo.cpp + return isLegalMaskedGather(DataType); + } + +public: + explicit ConnexTTIImpl(const ConnexTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()), + // Doesn't help (inspired from X86 backend) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) { + LLVM_DEBUG(dbgs() << "Entered constructor ConnexTTIImpl()\n"); + } + +/* + unsigned getNumberOfRegisters(bool Vector) { + if (Vector) { + return 0; + } + return 12; + } +*/ +}; + +} // end namespace llvm + +#endif Index: lib/Target/Connex/InstPrinter/CMakeLists.txt =================================================================== --- lib/Target/Connex/InstPrinter/CMakeLists.txt +++ lib/Target/Connex/InstPrinter/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMConnexAsmPrinter + ConnexInstPrinter.cpp + ) Index: lib/Target/Connex/InstPrinter/ConnexInstPrinter.h =================================================================== --- lib/Target/Connex/InstPrinter/ConnexInstPrinter.h +++ lib/Target/Connex/InstPrinter/ConnexInstPrinter.h @@ -0,0 +1,65 @@ +//===-- ConnexInstPrinter.h - Convert Connex MCInst to asm syntax -------*- C++ -*--// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class prints a Connex MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_INSTPRINTER_CONNEXINSTPRINTER_H +#define LLVM_LIB_TARGET_CONNEX_INSTPRINTER_CONNEXINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { +class MCOperand; + +class ConnexInstPrinter : public MCInstPrinter { +public: + ConnexInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + + // IMPORTANT Note: printOperand() etc are not methods of the + // MCInstPrinter class, but they are methods called from the + // TableGen generated code from ConnexGenAsmWriter.inc. + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, + const char *Modifier = nullptr); + + template + void printUImm(const MCInst *MI, int opNum, raw_ostream &O); + + void printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O, + const char *Modifier = nullptr); + + // Taken from MSP430InstPrinter.h + void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, + const char *Modifier = nullptr); + + void printImm64Operand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + // Inspired from printi256mem() from [LLVM]/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h + void printScatterGatherMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + +private: + // Taken from [LLVM]/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h + void printUnsignedImm8(const MCInst *MI, int opNum, raw_ostream &O); + + // Required by ConnexGenAsmWriter.inc + // Taken from Mips/InstPrinter/MipsInstPrinter.h + void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O); +}; +} + +#endif Index: lib/Target/Connex/InstPrinter/ConnexInstPrinter.cpp =================================================================== --- lib/Target/Connex/InstPrinter/ConnexInstPrinter.cpp +++ lib/Target/Connex/InstPrinter/ConnexInstPrinter.cpp @@ -0,0 +1,535 @@ +//===-- ConnexInstPrinter.cpp - Convert Connex MCInst to asm syntax -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class prints an Connex MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstr.h" +#include "Connex.h" +#include "ConnexInstPrinter.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" + +#include "llvm/Support/Debug.h" // for dbgs and DEBUG() macro +#include "ConnexConfig.h" + +using namespace llvm; + + +//#define DEBUG_TYPE "asm-printer" +#define DEBUG_TYPE "asm-inst-printer" + +// Include the auto-generated portion of the assembly writer. +#include "ConnexGenAsmWriter.inc" + + +#include "llvm/CodeGen/MachineInstr.h" +#include + +/* +NOTE: as of Nov 2016, the LLVM APIs allow printing customized code only +here (and NOT in ConnexAsmPrinter.cpp, which around a year ago had some APIs). +*/ + +/* +We declare here these vars and use them as extern in + ConnexAsmPrinter.cpp (and NOT the other way around - which gives ld error) + because of the way these C modules are being linked by the Makefile scripts + of LLVM. + +Note that the flow of the operations is + ConnexAsmPrinter::EmitInstruction() gets called first and then + ConnexInstPrinter::printUnsignedImm() gets called immediately after. + (look at the stdout files generated by llc with the DEBUG prints) + Also, if we look at ConnexAsmPrinter::EmitInstruction(), we have an + automatic var MCInst TmpInst; . + So, MCInst is generated for the time it is output to the stream and then + automatically destroyed - so it does NOT make any sense to keep track in a + map the associated MachineInstr for the MCInst in "flight" (TmpInst). +*/ +const MachineInstr *crtMI = NULL; +#ifdef NOTNOTNOT +std::unordered_map mapMachineMCInst; +#endif +// A map associating: first is LD_H, ST_H or REPEAT, second is the associated INLINEASM +std::unordered_map mapLD_ST_REPEAT_InlineAsm; + +#ifdef NOTNOTNOT +const MachineInstr *RetrieveAssociatedMachineInstr(const MCInst *mci) { + DEBUG(dbgs() << "Entered RetrieveAssociatedMachineInstr()\n"); + + const MachineInstr *res; + + //for (auto : mapMachineMCInst) + // See http://www.cplusplus.com/reference/unordered_map/unordered_map/begin/ + for (auto it = mapMachineMCInst.begin(); + it != mapMachineMCInst.end(); ++it) { + //std::cout << " " << it->first << ":" << it->second; + if (it->second == mci) { + const MachineInstr *mi = &(*(it->first)); + DEBUG(dbgs() << "RetrieveAssociatedMachineInstr(): " + << "mci = " << *mci + << ", mci = " << mci + //<< ", it->second = " << it->second + << ", MachineInstr = " << mi + //<< " " << *mi + << "\n"); + + res = it->first; + /*const MachineInstr *res = it->first; + return res; + */ + } + } + return res; + /* + assert(0 && "MCInst not found!"); + return NULL; + */ +} +#endif + + + +void ConnexInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot, const MCSubtargetInfo &STI) { + LLVM_DEBUG(dbgs() << "Entered ConnexInstPrinter::printInst()...\n"); + //O << "Entered ConnexInstPrinter::printInst()\n"; + LLVM_DEBUG(dbgs() << "printInst(): MI = " << *MI << "\n"); + LLVM_DEBUG(dbgs() << "printInst(): MI->getOpcode() = " + << MI->getOpcode() << "\n"); + + /* For some reason, ConnexGenAsmWriter.inc cannot print INLINEASM from the + MachineInstr bundles I create in ConnexInstrInfo.cpp, expandPostRAPseudo(), + and then unpack in [Target]AsmPrinter::EmitInstruction(), + because of this definition they have: + static const uint32_t OpInfo0[] = { + 0U,>// PHI + 0U,>// INLINEASM + ... + etc. + So I handle these INLINEASMs myself here. + TODO: maybe explain better. + */ + if (MI->getOpcode() == 1) { + O << " "; + printOperand(MI, 0, O); //getOperand(0)); + O << " // custom code in ConnexInstPrinter::printInst() for INLINEASM"; + } + /* + else + if (MI->getOpcode() == 13) { // Handling bundle for VSELECT, more exactly instruction COPY + // TODO TODO TODO: I should maybe implement this in printInstruction() and check for Bits != 0 and act accordingly + O << " "; + printOperand(MI, 0, O); //getOperand(0)); + O << " = "; + printOperand(MI, 1, O); + O << " | "; + printOperand(MI, 1, O); + O << " ; // custom code in ConnexInstPrinter::printInst() for VSELECT"; + } + */ + else { + printInstruction(MI, O); + } + + printAnnotation(O, Annot); +} + + +static void printExpr(const MCExpr *Expr, raw_ostream &O) { +#ifndef NDEBUG + const MCSymbolRefExpr *SRE; + + if (const MCBinaryExpr *BE = dyn_cast(Expr)) + SRE = dyn_cast(BE->getLHS()); + else + SRE = dyn_cast(Expr); + assert(SRE && "Unexpected MCExpr type."); + + MCSymbolRefExpr::VariantKind Kind = SRE->getKind(); + + assert(Kind == MCSymbolRefExpr::VK_None); +#endif + + O << *Expr; +} + +void ConnexInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O, const char *Modifier) { + LLVM_DEBUG(dbgs() << "Entered ConnexInstPrinter::printOperand(OpNo = " + << OpNo << ")...\n"); + LLVM_DEBUG(dbgs() << "ConnexInstPrinter::printOperand(): *MI = " + << *MI << "\n"); + LLVM_DEBUG(dbgs() << "ConnexInstPrinter::printOperand(): MI->getNumOperands() = " + << MI->getNumOperands() << "\n"); + + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MCInst.html + + /* Simple failback, useful just for NOP - + * TODO: I could take care of it in printInstruction(), which calls + * printOperand() + */ + if (MI->getNumOperands() <= OpNo) + return; + + LLVM_DEBUG(dbgs() << "ConnexInstPrinter::printOperand(): MI->getOperand(OpNo) = " + << MI->getOperand(OpNo) << "\n"); + + assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + + const MCOperand &Op = MI->getOperand(OpNo); + + if (Op.isReg()) { + // This handles registers, such as scalar r0 or vector R(0) + O << getRegisterName(Op.getReg()); + } + else + if (Op.isImm()) { + /* Normally we do NOT get here because this case is treated in + printUnsignedImm(). */ + O << (int32_t)Op.getImm(); + } + else { + assert(Op.isExpr() && "Expected an expression"); + printExpr(Op.getExpr(), O); + } +} + +template +void ConnexInstPrinter::printUImm(const MCInst *MI, int opNum, raw_ostream &O) { + const MCOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) { + uint64_t Imm = MO.getImm(); + Imm -= Offset; + Imm &= (1 << Bits) - 1; + Imm += Offset; + O << formatImm(Imm); + return; + } + + printOperand(MI, opNum, O); +} + +void ConnexInstPrinter::printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O, + const char *Modifier) { + LLVM_DEBUG(dbgs() << "Entered ConnexInstPrinter::printMemOperand()\n"); + + const MCOperand &RegOp = MI->getOperand(OpNo); + const MCOperand &OffsetOp = MI->getOperand(OpNo + 1); + + // offset + if (OffsetOp.isImm()) + O << "Teeest" << formatDec(OffsetOp.getImm()); + else + assert(0 && "Expected an immediate"); + + // register + assert(RegOp.isReg() && "Register operand not a register"); + //#ifdef USE_ORIGINAL_PRINT_CODE + O << '(' << getRegisterName(RegOp.getReg()) << ')'; + /* + #else + // See http://llvm.org/docs/doxygen/html/classllvm_1_1MCOperand.html + O << getRegisterName(RegOp.getReg()); //print something like r1, r2, etc + //O << RegOp.getImm(); // Gives error: /home/asusu/LLVM/llvm38Nov2016/llvm/include/llvm/MC/MCInst.h:75: int64_t llvm::MCOperand::getImm() const: Assertion `isImm() && "This is not an immediate"' failed. + //O << RegOp; // Outputs something like , etc + #endif + */ +} + +// Taken from MSP430InstPrinter.h +void ConnexInstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O, + const char *Modifier) { + LLVM_DEBUG(dbgs() << "Entered ConnexInstPrinter::printSrcMemOperand()\n"); + + const MCOperand &Base = MI->getOperand(0); + const MCOperand &Disp = MI->getOperand(1); + + // Print displacement first + + // If the global address expression is a part of displacement field with a + // register base, we should not emit any prefix symbol here, e.g. + // mov.w &foo, r1 + // vs + // mov.w glb(r1), r2 + // Otherwise (!) msp430-as will silently miscompile the output :( + if (!Base.getReg()) + O << '&'; + + if (Disp.isExpr()) + Disp.getExpr()->print(O, &MAI); + else { + assert(Disp.isImm() && "Expected immediate in displacement field"); + O << Disp.getImm(); + } + + // Print register base field + if (Base.getReg()) + O << '(' << getRegisterName(Base.getReg()) << ')'; +} + +void ConnexInstPrinter::printImm64Operand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + LLVM_DEBUG(dbgs() << "Entered ConnexInstPrinter::printImm64Operand()\n"); + + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) + O << "Teeest2" << (uint64_t)Op.getImm(); + else + O << "Teeest2" << Op; +} + +void ConnexInstPrinter::printScatterGatherMemOperand(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + LLVM_DEBUG(dbgs() << + "Entered ConnexInstPrinter::printScatterGatherMemOperand() - " + "NOTE that we discard the BasePtr of the TableGen MemOperand\n"); + /* + IMPORTANT: Here, for the MCInst, the parameters do NOT follow the order from the .td file. + Following include/llvm/Target/TargetSelectionDAG.td we have: + + // SDTypeProfile - This profile describes the type requirements of a Selection + // DAG node. + class SDTypeProfile constraints> { + int NumResults = numresults; + int NumOperands = numoperands; + list Constraints = constraints; + } + + // So: 3 input operands, 2 results. + // Params are: passthru, mask, index; results are: vector of i1, vector of ptr (actual result) + // Params are 0, 1, 2 and results are 3, 4. + // Operands 0 and 1 have vector type, with same number of elements. + // Operands 0 and 2 have identical types. + // Operands 1 and 3 have identical types. + // --> Opnd 3 (result 0?) is i1 vector + // Operand 4 (result 1?) has pointer type. + // Operand 1 is vector type with element type of i1. + def SDTMaskedGather: SDTypeProfile<2, 3, [ // masked gather + SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<1, 3>, + SDTCisPtrTy<4>, SDTCVecEltisVT<1, i1>, SDTCisSameNumEltsAs<0, 1> + ]>; + + def masked_gather : SDNode<"ISD::MGATHER", SDTMaskedGather, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + */ + + if (MI->getNumOperands() > 4) { + // We have an MGATHER operation + /* + // THIS is WRONG: + const MCOperand &index = MI->getOperand(0); + const MCOperand &maskIn = MI->getOperand(1); + const MCOperand &passthru = MI->getOperand(2); + const MCOperand &maskOut = MI->getOperand(3); + const MCOperand &res = MI->getOperand(4); + */ + const MCOperand &res = MI->getOperand(0); + const MCOperand &index = MI->getOperand(4); + const MCOperand &maskIn = MI->getOperand(1); + const MCOperand &passthru = MI->getOperand(2); + const MCOperand &maskOut = MI->getOperand(3); + + assert(index.isReg() && "index not a register"); + assert(passthru.isReg() && "passthru not a register"); + + LLVM_DEBUG(dbgs() << "MI = " << *MI + << "\n index = " << index + << "\n maskIn (bool vector register, which we actually do NOT use) = " << maskIn + << "\n passthru = " << passthru + << "\n maskOut = " << maskOut + << "\n res = " << res << "\n"); + + LLVM_DEBUG(dbgs() << "\n res = " << res << "\n"); + + assert(res.isReg() && "res not a register"); + O << getRegisterName(index.getReg()); + } + else { + // We have an MSCATTER operation + const MCOperand &value = MI->getOperand(1); + const MCOperand &maskIn = MI->getOperand(0); + const MCOperand &mask2 = MI->getOperand(2); + const MCOperand &index = MI->getOperand(3); + + LLVM_DEBUG(dbgs() << "MI = " << *MI + << "\n value (src) = " << value + << "\n maskIn (bool vector register, " + "which we actually do NOT use) = " << maskIn + << "\n index = " << index + << "\n mask2 = " << mask2 + << "\n"); + O << getRegisterName(index.getReg()); + } + + + /* + O << "MI = " << *MI << "\n"; + O << "index = (" << getRegisterName(index.getReg()) << ")\n"; + O << "passthru = (" << getRegisterName(passthru.getReg()) << ")\n"; + O << "res = (" << getRegisterName(res.getReg()) << ")\n"; + //O << " = (" << getRegisterName(BaseReg.getReg()) << ")\n"; + */ + + //printMemReference(MI, OpNo, O); + LLVM_DEBUG(dbgs() << "Exiting ConnexInstPrinter::printScatterGatherMemOperand()\n"); +} + + +// TODO: if this method gets executed anymore change name to GetStringFromAssociatedInlineAsm() +char *GetSymbolicValueFromAssociatedInlineAsm(const MachineInstr *assocMI, + char *strToSearch) { + char *res = NULL; + + assert(0 && "GetSymbolicValueFromAssociatedInlineAsm() should NOT be executed since we don't use symbolic LD_H or ST_H anymore"); + + LLVM_DEBUG(dbgs() << "GetSymbolicValueFromAssociatedInlineAsm(): assocMI = " + //; assocMI->dump(); + //dbgs() << + << "(" << assocMI << ")" << "\n"); + + /* + This is NORMAL to be wrong since MI and assocMI are LD_H or ST_H + instructions. + const MachineOperand &MO = assocMI->getOperand(0); + <>: + //unsigned imm = MO.getImm(); + */ + + const MachineInstr *miInlineasm = mapLD_ST_REPEAT_InlineAsm[assocMI]; + LLVM_DEBUG(dbgs() << "GetSymbolicValueFromAssociatedInlineAsm(): miInlineasm = " + << miInlineasm << "\n"); + //assert(miInlineasm != NULL); + if (miInlineasm == NULL) { + res = strdup("[NO_VALUE - since miInlineasm == NULL!!!!]"); + return res; + } + + const MachineOperand &inlineAsmStrMO0 = miInlineasm->getOperand(0); + + /*LLVM_DEBUG(dbgs() << "ConnexInstPrinter::printUnsignedImm(): inlineAsmStrMO = " + << inlineAsmStrMO << "\n");*/ + // Inspiring from http://llvm.org/docs/doxygen/html/MachineInstr_8cpp_source.html#l00306 + assert(inlineAsmStrMO0.getType() == MachineOperand::MO_ExternalSymbol); + + LLVM_DEBUG(dbgs() << "GetSymbolicValueFromAssociatedInlineAsm(): " + "inlineAsmStrMO0.getSymbolName() = " + << inlineAsmStrMO0.getSymbolName() << "\n"); + /* + // From http://llvm.org/docs/doxygen/html/classllvm_1_1MachineOperand.html + const char *getSymbolName () const + */ + + res = strstr(const_cast(inlineAsmStrMO0.getSymbolName()), + strToSearch); + + //assert(res != NULL && "Did not find offset marker in INLINEASM"); + assert(res != NULL && "Did not find strToSearch marker in INLINEASM"); + + res += strlen(strToSearch); + + assert(res != NULL); + + return res; +} + + +// Taken from MipsInstPrinter.cpp +// (required by ConnexGenAsmWriter.inc) +void ConnexInstPrinter::printUnsignedImm(const MCInst *MI, int opNum, + raw_ostream &O) { + char *res = NULL; + //int offsetLS; + + LLVM_DEBUG(dbgs() << "Entered ConnexInstPrinter::printUnsignedImm()...\n"); + + const MCOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) { + // Printing 16-bits unsigned int + //O << (unsigned short int)MO.getImm(); + // Printing unsigned int + unsigned imm = MO.getImm(); + + LLVM_DEBUG(dbgs() << "ConnexInstPrinter::printUnsignedImm(): imm = " + << imm + << ", MI (ptr) = " << MI + << ", MI = " << *MI + << "\n"); + + /* + LLVM_DEBUG(dbgs() << "GetSymbolicValueFromAssociatedInlineAsm(): imm = " + << imm << "\n"); + */ + +#ifdef GENERATE_ASSOCIATED_INLINEASM_FROM_LOOPVECTORIZE_PASS + if (imm == VALUE_BOGUS_REPEAT_X_TIMES) { + assert(MI->getOpcode() == Connex::REPEAT); + + res = GetSymbolicValueFromAssociatedInlineAsm(crtMI, + const_cast("/*value*/")); + + O << res; + + //O << "(N + -1 + 1) / CONNEX_VECTOR_LENGTH"; + } + else +#endif + if (imm == CONNEX_MEM_NUM_ROWS + 10) { + #ifdef NOTNOTNOT + // This was too complicated + + //MCInst *assocMC = mapMachineMCInst[MI]; + const MachineInstr *assocMI = + RetrieveAssociatedMachineInstr(MI); + #endif + + const MachineInstr *assocMI = crtMI; + + assert((MI->getOpcode() == Connex::LD_H) || + (MI->getOpcode() == Connex::ST_H)); + + res = GetSymbolicValueFromAssociatedInlineAsm(crtMI, + "/*offset*/"); + + //sscanf(res, "%d", &offsetLS); + + //LLVM_DEBUG(dbgs() << "assocMI = " << *assocMC << "\n"); + O << STR_LOOP_SYMBOLIC_INDEX + << " + " << res; // offsetLS + } + else { + O << (unsigned int)MO.getImm(); + } + } + else + printOperand(MI, opNum, O); +} + + +// Taken from [LLVM]/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +void ConnexInstPrinter::printUnsignedImm8(const MCInst *MI, int opNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(opNum); + if (MO.isImm()) + O << (unsigned short int)(unsigned char)MO.getImm(); + else + printOperand(MI, opNum, O); +} + Index: lib/Target/Connex/InstPrinter/LLVMBuild.txt =================================================================== --- lib/Target/Connex/InstPrinter/LLVMBuild.txt +++ lib/Target/Connex/InstPrinter/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/Connex/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ConnexAsmPrinter +parent = Connex +required_libraries = MC Support +add_to_library_groups = Connex Index: lib/Target/Connex/InstPrinter/Makefile =================================================================== --- lib/Target/Connex/InstPrinter/Makefile +++ lib/Target/Connex/InstPrinter/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/Connex/InstPrinter/Makefile -----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMConnexAsmPrinter + +# Hack: we need to include 'main' Connex target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common Index: lib/Target/Connex/LLVMBuild.txt =================================================================== --- lib/Target/Connex/LLVMBuild.txt +++ lib/Target/Connex/LLVMBuild.txt @@ -0,0 +1,43 @@ +;===- ./lib/Target/Connex/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + +[component_0] +type = TargetGroup +name = Connex +parent = Target +has_asmprinter = 1 + +[component_1] +type = Library +name = ConnexCodeGen +parent = Connex +required_libraries = + Analysis + AsmPrinter + CodeGen + Core + MC + ConnexAsmPrinter + ConnexDesc + ConnexInfo + SelectionDAG + Support + Target +add_to_library_groups = Connex Index: lib/Target/Connex/MCTargetDesc/CMakeLists.txt =================================================================== --- lib/Target/Connex/MCTargetDesc/CMakeLists.txt +++ lib/Target/Connex/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_library(LLVMConnexDesc + ConnexMCTargetDesc.cpp + ConnexAsmBackend.cpp + ConnexMCCodeEmitter.cpp + ConnexELFObjectWriter.cpp + ) Index: lib/Target/Connex/MCTargetDesc/ConnexAsmBackend.cpp =================================================================== --- lib/Target/Connex/MCTargetDesc/ConnexAsmBackend.cpp +++ lib/Target/Connex/MCTargetDesc/ConnexAsmBackend.cpp @@ -0,0 +1,139 @@ +//===-- ConnexAsmBackend.cpp - Connex Assembler Backend -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ConnexMCTargetDesc.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCFixup.h" +/* +// 2019_03_30 +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" +*/ +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/Support/EndianStream.h" +/* +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCExpr.h" +*/ +#include +#include +/* +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +*/ + +using namespace llvm; + +namespace { + +class ConnexAsmBackend : public MCAsmBackend { +public: + ConnexAsmBackend(support::endianness Endian) : MCAsmBackend(Endian) {} + + ~ConnexAsmBackend() override = default; + + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef Data, + uint64_t Value, bool IsResolved, + const MCSubtargetInfo *STI) const override; + + std::unique_ptr createObjectTargetWriter() + const override; + + + // No instruction requires relaxation + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { + return false; + } + + + unsigned getNumFixupKinds() const override { return 1; } + + + bool mayNeedRelaxation(const MCInst &Inst, + const MCSubtargetInfo &STI) const override { + return false; + } + + + void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + MCInst &Res) const override {} + + + bool writeNopData(raw_ostream &OS, uint64_t Count) const override; +}; + +} // end anonymous namespace + + +bool ConnexAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { + if ((Count % 8) != 0) + return false; + + for (uint64_t i = 0; i < Count; i += 8) + support::endian::write(OS, 0x15000000, Endian); + + return true; +} + + +void ConnexAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef Data, uint64_t Value, + bool IsResolved, + const MCSubtargetInfo *STI) const { + if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) { + // The Value is 0 for global variables, and the in-section offset + // for static variables. Write to the immediate field of the inst. + assert(Value <= UINT32_MAX); + support::endian::write(&Data[Fixup.getOffset() + 4], + static_cast(Value), + Endian); + } else if (Fixup.getKind() == FK_Data_4) { + support::endian::write(&Data[Fixup.getOffset()], Value, Endian); + } else if (Fixup.getKind() == FK_Data_8) { + support::endian::write(&Data[Fixup.getOffset()], Value, Endian); + } else if (Fixup.getKind() == FK_PCRel_4) { + Value = (uint32_t)((Value - 8) / 8); + if (Endian == support::little) { + Data[Fixup.getOffset() + 1] = 0x10; + support::endian::write32le(&Data[Fixup.getOffset() + 4], Value); + } else { + Data[Fixup.getOffset() + 1] = 0x1; + support::endian::write32be(&Data[Fixup.getOffset() + 4], Value); + } + } else { + assert(Fixup.getKind() == FK_PCRel_2); + Value = (uint16_t)((Value - 8) / 8); + support::endian::write(&Data[Fixup.getOffset() + 2], Value, + Endian); + } +} + + +std::unique_ptr + ConnexAsmBackend::createObjectTargetWriter() const { + return createConnexELFObjectWriter(0); +} + + +MCAsmBackend *llvm::createConnexAsmBackend(const Target &T, + const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &) { + return new ConnexAsmBackend(support::little); +} + Index: lib/Target/Connex/MCTargetDesc/ConnexELFObjectWriter.cpp =================================================================== --- lib/Target/Connex/MCTargetDesc/ConnexELFObjectWriter.cpp +++ lib/Target/Connex/MCTargetDesc/ConnexELFObjectWriter.cpp @@ -0,0 +1,84 @@ +//===-- ConnexELFObjectWriter.cpp - Connex ELF Writer ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ConnexMCTargetDesc.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include + +using namespace llvm; + +namespace { + +class ConnexELFObjectWriter : public MCELFObjectTargetWriter { +public: + ConnexELFObjectWriter(uint8_t OSABI); + + ~ConnexELFObjectWriter() override; + +protected: + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; +}; + +} // end anonymous namespace + + +ConnexELFObjectWriter::ConnexELFObjectWriter(uint8_t OSABI) + : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_NONE, + /*HasRelocationAddend*/ false) {} + + +ConnexELFObjectWriter::~ConnexELFObjectWriter() {} + + +unsigned ConnexELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, + bool IsPCRel) const { + // determine the type of the relocation + switch ((unsigned)Fixup.getKind()) { + default: + llvm_unreachable("invalid fixup kind!"); + case FK_SecRel_8: + return ELF::R_BPF_64_64; + case FK_PCRel_4: + case FK_SecRel_4: + return ELF::R_BPF_64_32; + case FK_Data_8: + return ELF::R_BPF_64_64; + case FK_Data_4: + // .BTF.ext generates FK_Data_4 relocations for + // insn offset by creating temporary labels. + // The insn offset is within the code section and + // already been fulfilled by applyFixup(). No + // further relocation is needed. + if (const MCSymbolRefExpr *A = Target.getSymA()) { + if (A->getSymbol().isTemporary()) { + MCSection &Section = A->getSymbol().getSection(); + const MCSectionELF *SectionELF = dyn_cast(&Section); + assert(SectionELF && "Null section for reloc symbol"); + + // The reloc symbol should be in text section. + unsigned Flags = SectionELF->getFlags(); + if ((Flags & ELF::SHF_ALLOC) && (Flags & ELF::SHF_EXECINSTR)) + return ELF::R_BPF_NONE; + } + } + return ELF::R_BPF_64_32; + } +} + + +std::unique_ptr + llvm::createConnexELFObjectWriter(uint8_t OSABI) { + return llvm::make_unique(OSABI); +} Index: lib/Target/Connex/MCTargetDesc/ConnexMCAsmInfo.h =================================================================== --- lib/Target/Connex/MCTargetDesc/ConnexMCAsmInfo.h +++ lib/Target/Connex/MCTargetDesc/ConnexMCAsmInfo.h @@ -0,0 +1,51 @@ +//===-- ConnexMCAsmInfo.h - Connex asm properties -------------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the ConnexMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_MCTARGETDESC_CONNEXMCASMINFO_H +#define LLVM_LIB_TARGET_CONNEX_MCTARGETDESC_CONNEXMCASMINFO_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/ADT/Triple.h" + +namespace llvm { +class Target; +class Triple; + +class ConnexMCAsmInfo : public MCAsmInfo { + public: + explicit ConnexMCAsmInfo(const Triple &TT) { + #ifdef NOT_NOT_NOT + if (TT.getArch() == Triple::bpfeb) + IsLittleEndian = false; + #endif + + PrivateGlobalPrefix = ".L"; + WeakRefDirective = "\t.weak\t"; + + // Inspired from http://llvm.org/docs/doxygen/html/NVPTXMCAsmInfo_8cpp_source.html#l00028 + // Avoiding to add APP and NO_APP delimiters before ASM Inline Expressions + CommentString = "//"; + InlineAsmStart = ""; + InlineAsmEnd = ""; + + UsesELFSectionDirectiveForBSS = true; + HasSingleParameterDotFile = false; + HasDotTypeDotSizeDirective = false; + + SupportsDebugInformation = true; + } +}; +} + +#endif Index: lib/Target/Connex/MCTargetDesc/ConnexMCCodeEmitter.cpp =================================================================== --- lib/Target/Connex/MCTargetDesc/ConnexMCCodeEmitter.cpp +++ lib/Target/Connex/MCTargetDesc/ConnexMCCodeEmitter.cpp @@ -0,0 +1,177 @@ +//===-- ConnexMCCodeEmitter.cpp - Convert Connex code to machine code -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the ConnexMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ConnexMCTargetDesc.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/EndianStream.h" +#include +#include + + +using namespace llvm; + +#define DEBUG_TYPE "mccodeemitter" + +namespace { + +class ConnexMCCodeEmitter : public MCCodeEmitter { + const MCInstrInfo &MCII; + const MCRegisterInfo &MRI; + bool IsLittleEndian; + +public: + ConnexMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, + bool IsLittleEndian) + : MCII(mcii), MRI(mri), IsLittleEndian(IsLittleEndian) {} + + ConnexMCCodeEmitter(const ConnexMCCodeEmitter &) = delete; + + void operator=(const ConnexMCCodeEmitter &) = delete; + + ~ConnexMCCodeEmitter() override = default; + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // getMachineOpValue - Return binary encoding of operand. If the machin + // operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; +}; + +} // end anonymous namespace + +MCCodeEmitter *llvm::createConnexMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new ConnexMCCodeEmitter(MCII, MRI, true); +} + +#ifdef NOT_NOT_NOT +MCCodeEmitter *llvm::createBPFbeMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new BPFMCCodeEmitter(MRI, false); +} +#endif + + + +unsigned ConnexMCCodeEmitter::getMachineOpValue(const MCInst &MI, + const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + if (MO.isReg()) + return MRI.getEncodingValue(MO.getReg()); + if (MO.isImm()) + return static_cast(MO.getImm()); + + assert(MO.isExpr()); + + const MCExpr *Expr = MO.getExpr(); + + assert(Expr->getKind() == MCExpr::SymbolRef); + + if (MI.getOpcode() == Connex::JAL) + // func call name + Fixups.push_back(MCFixup::create(0, Expr, FK_SecRel_4)); + else if (MI.getOpcode() == Connex::LD_imm64) + Fixups.push_back(MCFixup::create(0, Expr, FK_SecRel_8)); + else + // bb label + Fixups.push_back(MCFixup::create(0, Expr, FK_PCRel_2)); + + return 0; +} + +static uint8_t SwapBits(uint8_t Val) { + return (Val & 0x0F) << 4 | (Val & 0xF0) >> 4; +} + +void ConnexMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + /* + // 2019_03_30_TODO + verifyInstructionPredicates(MI, + computeAvailableFeatures(STI.getFeatureBits())); + */ + + unsigned Opcode = MI.getOpcode(); + support::endian::Writer OSE(OS, + IsLittleEndian ? support::little : support::big); + + if (Opcode == Connex::LD_imm64 || Opcode == Connex::LD_pseudo) { + uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); + OS << char(Value >> 56); + if (IsLittleEndian) + OS << char((Value >> 48) & 0xff); + else + OS << char(SwapBits((Value >> 48) & 0xff)); + OSE.write(0); + OSE.write(Value & 0xffffFFFF); + + const MCOperand &MO = MI.getOperand(1); + uint64_t Imm = MO.isImm() ? MO.getImm() : 0; + OSE.write(0); + OSE.write(0); + OSE.write(0); + OSE.write(Imm >> 32); + } else { + // Get instruction encoding and emit it + uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI); + OS << char(Value >> 56); + if (IsLittleEndian) + OS << char((Value >> 48) & 0xff); + else + OS << char(SwapBits((Value >> 48) & 0xff)); + OSE.write((Value >> 32) & 0xffff); + OSE.write(Value & 0xffffFFFF); + } +} + +// Encode Connex Memory Operand +uint64_t ConnexMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + uint64_t Encoding; + const MCOperand Op1 = MI.getOperand(1); + assert(Op1.isReg() && "First operand is not register."); + Encoding = MRI.getEncodingValue(Op1.getReg()); + Encoding <<= 16; + MCOperand Op2 = MI.getOperand(2); + assert(Op2.isImm() && "Second operand is not immediate."); + Encoding |= Op2.getImm() & 0xffff; + return Encoding; +} + +// 2019_03_30_TODO #define ENABLE_INSTR_PREDICATE_VERIFIER +#include "ConnexGenMCCodeEmitter.inc" Index: lib/Target/Connex/MCTargetDesc/ConnexMCTargetDesc.h =================================================================== --- lib/Target/Connex/MCTargetDesc/ConnexMCTargetDesc.h +++ lib/Target/Connex/MCTargetDesc/ConnexMCTargetDesc.h @@ -0,0 +1,64 @@ +//===-- ConnexMCTargetDesc.h - Connex Target Descriptions -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides Connex specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_CONNEX_MCTARGETDESC_CONNEXMCTARGETDESC_H +#define LLVM_LIB_TARGET_CONNEX_MCTARGETDESC_CONNEXMCTARGETDESC_H + +#include "llvm/Config/config.h" +#include "llvm/Support/DataTypes.h" + +#include + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectTargetWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class MCTargetOptions; +class StringRef; +class Target; +class Triple; +class raw_ostream; +class raw_pwrite_stream; + +extern Target TheConnexTarget; + + +MCCodeEmitter *createConnexMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx); + +MCAsmBackend *createConnexAsmBackend(const Target &T, const MCSubtargetInfo &STI, + const MCRegisterInfo &MRI, + const MCTargetOptions &Options); + +std::unique_ptr createConnexELFObjectWriter(uint8_t OSABI); +} + +// Defines symbolic names for Connex registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "ConnexGenRegisterInfo.inc" + +// Defines symbolic names for the Connex instructions. +// +#define GET_INSTRINFO_ENUM +#include "ConnexGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "ConnexGenSubtargetInfo.inc" + +#endif Index: lib/Target/Connex/MCTargetDesc/ConnexMCTargetDesc.cpp =================================================================== --- lib/Target/Connex/MCTargetDesc/ConnexMCTargetDesc.cpp +++ lib/Target/Connex/MCTargetDesc/ConnexMCTargetDesc.cpp @@ -0,0 +1,109 @@ +//===-- ConnexMCTargetDesc.cpp - Connex Target Descriptions ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides Connex specific target descriptions. +// +//===----------------------------------------------------------------------===// + + +#include "Connex.h" +#include "ConnexMCTargetDesc.h" +#include "ConnexMCAsmInfo.h" +#include "InstPrinter/ConnexInstPrinter.h" +//#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "ConnexGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "ConnexGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "ConnexGenRegisterInfo.inc" + +using namespace llvm; + + +static MCInstrInfo *createConnexMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitConnexMCInstrInfo(X); + return X; +} + + +static MCRegisterInfo *createConnexMCRegisterInfo(const Triple &TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitConnexMCRegisterInfo(X, Connex::R11 /* RAReg doesn't exist */); + return X; +} + + +static MCSubtargetInfo *createConnexMCSubtargetInfo(const Triple &TT, + StringRef CPU, StringRef FS) { + return createConnexMCSubtargetInfoImpl(TT, CPU, FS); +} + + +static MCStreamer *createConnexMCStreamer(const Triple &T, MCContext &Ctx, + std::unique_ptr &&MAB, + std::unique_ptr &&OW, + std::unique_ptr &&Emitter, + bool RelaxAll) { + return createELFStreamer(Ctx, std::move(MAB), std::move(OW), + std::move(Emitter), + RelaxAll); +} + + +static MCInstPrinter *createConnexMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + if (SyntaxVariant == 0) + return new ConnexInstPrinter(MAI, MII, MRI); + return nullptr; +} + + +extern "C" void LLVMInitializeConnexTargetMC() { + for (Target *T : {&TheConnexTarget}) { + // Register the MC asm info. + RegisterMCAsmInfo X(*T); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createConnexMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createConnexMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, + createConnexMCSubtargetInfo); + + // Register the object streamer + TargetRegistry::RegisterELFStreamer(*T, createConnexMCStreamer); + + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(*T, createConnexMCInstPrinter); + } + + // Register the MC code emitter + TargetRegistry::RegisterMCCodeEmitter(TheConnexTarget, + createConnexMCCodeEmitter); + + // Register the ASM Backend + TargetRegistry::RegisterMCAsmBackend(TheConnexTarget, + createConnexAsmBackend); +} Index: lib/Target/Connex/MCTargetDesc/LLVMBuild.txt =================================================================== --- lib/Target/Connex/MCTargetDesc/LLVMBuild.txt +++ lib/Target/Connex/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/Connex/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ConnexDesc +parent = Connex +required_libraries = MC ConnexAsmPrinter ConnexInfo Support +add_to_library_groups = Connex Index: lib/Target/Connex/MCTargetDesc/Makefile =================================================================== --- lib/Target/Connex/MCTargetDesc/Makefile +++ lib/Target/Connex/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/Connex/MCTargetDesc/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMConnexDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common Index: lib/Target/Connex/Makefile =================================================================== --- lib/Target/Connex/Makefile +++ lib/Target/Connex/Makefile @@ -0,0 +1,21 @@ +##===- lib/Target/Connex/Makefile -----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMConnexCodeGen +TARGET = Connex + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = ConnexGenRegisterInfo.inc ConnexGenInstrInfo.inc \ + ConnexGenAsmWriter.inc ConnexGenAsmMatcher.inc ConnexGenDAGISel.inc \ + ConnexGenMCCodeEmitter.inc ConnexGenSubtargetInfo.inc ConnexGenCallingConv.inc + +DIRS = InstPrinter TargetInfo MCTargetDesc + +include $(LEVEL)/Makefile.common Index: lib/Target/Connex/Select_ADDf16_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_ADDf16_OpincaaCodeGen.h +++ lib/Target/Connex/Select_ADDf16_OpincaaCodeGen.h @@ -0,0 +1,3625 @@ +// From /home/asusu/LLVM/Tests/opincaa_standalone_apps/Emulate_f16/ADD_SUB_f16_manual/DumpISel_OpincaaCodeGen_old36_C00_ADDf16.cpp + +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from Opincaa lib from kernel: add_or_sub.f16. +// It is important to put this code in the Select() method of the +// SelectionDAGISel class of your back end, after the ISelLowering pass, +// which contains the DAG Combiner, because the DAG Combiner can remove +// the getCopyToReg() we create, which can lead to the following error: +// <> assertion failed. +// Number of instructions generated: 279. + + + + +SDValue ct0 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R14 = 1; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast1, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #2 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R29 = 16; +// Instr #3 +SDNode *vload3 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +SDValue ct4 = CurDAG->getConstant(31, DL, MVT::i16, true, false); +// R28 = 31; +// Instr #4 +SDNode *vload4 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(vload3, 1) + ); + +SDValue ct5 = CurDAG->getConstant(1023, DL, MVT::i16, true, false); +// R13 = 1023; +// Instr #5 +SDNode *vload5 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct5, + // glue (or chain) input edge + SDValue(vload4, 1) + ); + +SDValue ct6 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R12 = 31744; +// Instr #6 +SDNode *vload6 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct6, + // glue (or chain) input edge + SDValue(vload5, 1) + ); + +SDValue ct7 = CurDAG->getConstant(-32768, DL, MVT::i16, true, false); +// R11 = -32768; +// Instr #7 +SDNode *vload7 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct7, + // glue (or chain) input edge + SDValue(vload6, 1) + ); + +SDValue ct8 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R10 = 1024; +// Instr #8 +SDNode *vload8 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct8, + // glue (or chain) input edge + SDValue(vload7, 1) + ); + +SDValue ct9 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = 0; +// Instr #9 +SDNode *vload9 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct9, + // glue (or chain) input edge + SDValue(vload8, 1) + ); + +SDValue ct10 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R19 = 0; +// Instr #10 +SDNode *vload10 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct10, + // glue (or chain) input edge + SDValue(vload9, 1) + ); + +SDValue ct11 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = 0; +// Instr #11 +SDNode *vload11 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct11, + // glue (or chain) input edge + SDValue(vload10, 1) + ); + +SDValue ct12 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R61 = 0; +// Instr #12 +SDNode *vload12 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct12, + // glue (or chain) input edge + SDValue(vload11, 1) + ); + +SDValue ct13 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R07 = 0; +// Instr #13 +SDNode *vload13 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct13, + // glue (or chain) input edge + SDValue(vload12, 1) + ); + +SDValue ct14 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R04 = 0; +// Instr #14 +SDNode *vload14 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct14, + // glue (or chain) input edge + SDValue(vload13, 1) + ); + +SDValue ct15 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R03 = 0; +// Instr #15 +SDNode *vload15 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct15, + // glue (or chain) input edge + SDValue(vload14, 1) + ); + +SDValue ct16 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R02 = 0; +// Instr #16 +SDNode *vload16 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct16, + // glue (or chain) input edge + SDValue(vload15, 1) + ); + +SDValue ct17 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R01 = 0; +// Instr #17 +SDNode *vload17 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct17, + // glue (or chain) input edge + SDValue(vload16, 1) + ); + +SDValue ct18 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R00 = 0; +// Instr #18 +SDNode *vload18 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct18, + // glue (or chain) input edge + SDValue(vload17, 1) + ); + +// R24 = R27 & R11; +// Instr #19 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(vload18, 1) + ); + +// R25 = R27 & R12; +// Instr #20 +SDNode *and1 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct19 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R25 = R25 >> 10; +// Instr #21 +SDNode *ishr0 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and1, 0), + ct19, + // glue (or chain) input edge + SDValue(and1, 1) + ); + +// R26 = R27 & R13; +// Instr #22 +SDNode *and2 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(ishr0, 1) + ); + +// R18 = R31 < R26; +// Instr #23 +SDNode *lt0 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(and2, 1) + ); + +// R17 = R25 == R31; +// Instr #24 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr0, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(lt0, 1) + ); + +// R09 = R17 & R18; +// Instr #25 +SDNode *and3 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt0, 0), + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// R09 = R09 == R30; +// Instr #26 +SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and3, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and3, 1) + ); + +SDValue ct20 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #27 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct20, + // glue (or chain) input edge + SDValue(eq1, 1) + ); + +// WHERE_EQ; +// Instr #28 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq1, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct21 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R25 = 1; +// Instr #29 +SDNode *vload19 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct21, + SDValue(ishr0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #30 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload19, 1) + ); + +// R18 = R25 == R28; +// Instr #31 +SDNode *eq2 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload19, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// R18 = R18 | R17; +// Instr #32 +SDNode *or0 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(eq2, 0), + // glue (or chain) input edge + SDValue(eq2, 1) + ); + +// R18 = R18 == R31; +// Instr #33 +SDNode *eq3 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or0, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(or0, 1) + ); + +SDValue ct22 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #34 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct22, + // glue (or chain) input edge + SDValue(eq3, 1) + ); + +// WHERE_EQ; +// Instr #35 +SDNode *whereeq1 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq3, 0), + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R26 = R26 | R10; +// Instr #36 +SDNode *or1 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload8, 0), + SDValue(and2, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(whereeq1, 1) + ); + +// END_WHERE; +// Instr #37 +SDNode *endwhere1 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or1, 1) + ); + +// R20 = R23 & R11; +// Instr #38 +SDNode *and4 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(endwhere1, 0) + ); + +// R21 = R23 & R12; +// Instr #39 +SDNode *and5 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(and4, 1) + ); + +SDValue ct23 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R21 = R21 >> 10; +// Instr #40 +SDNode *ishr1 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and5, 0), + ct23, + // glue (or chain) input edge + SDValue(and5, 1) + ); + +// R22 = R23 & R13; +// Instr #41 +SDNode *and6 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(ishr1, 1) + ); + +// R16 = R31 < R22; +// Instr #42 +SDNode *lt1 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(and6, 1) + ); + +// R15 = R21 == R31; +// Instr #43 +SDNode *eq4 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr1, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(lt1, 1) + ); + +// R09 = R15 & R16; +// Instr #44 +SDNode *and7 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt1, 0), + SDValue(eq4, 0), + // glue (or chain) input edge + SDValue(eq4, 1) + ); + +// R09 = R09 == R30; +// Instr #45 +SDNode *eq5 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and7, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and7, 1) + ); + +SDValue ct24 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #46 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct24, + // glue (or chain) input edge + SDValue(eq5, 1) + ); + +// WHERE_EQ; +// Instr #47 +SDNode *whereeq2 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq5, 0), + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +SDValue ct25 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R21 = 1; +// Instr #48 +SDNode *vload20 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct25, + SDValue(ishr1, 0), + // glue (or chain) input edge + SDValue(whereeq2, 1) + ); + +// END_WHERE; +// Instr #49 +SDNode *endwhere2 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload20, 1) + ); + +// R16 = R21 == R28; +// Instr #50 +SDNode *eq6 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(endwhere2, 0) + ); + +// R16 = R16 | R15; +// Instr #51 +SDNode *or2 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq4, 0), + SDValue(eq6, 0), + // glue (or chain) input edge + SDValue(eq6, 1) + ); + +// R16 = R16 == R31; +// Instr #52 +SDNode *eq7 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or2, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(or2, 1) + ); + +SDValue ct26 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #53 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct26, + // glue (or chain) input edge + SDValue(eq7, 1) + ); + +// WHERE_EQ; +// Instr #54 +SDNode *whereeq3 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq7, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R22 = R22 | R10; +// Instr #55 +SDNode *or3 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload8, 0), + SDValue(and6, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(whereeq3, 1) + ); + +// END_WHERE; +// Instr #56 +SDNode *endwhere3 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or3, 1) + ); + +// R50 = R24 == R11; +// Instr #57 +SDNode *eq8 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(endwhere3, 0) + ); + +// R49 = R25 == R28; +// Instr #58 +SDNode *eq9 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload19, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(eq8, 1) + ); + +// R48 = R26 == R31; +// Instr #59 +SDNode *eq10 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or1, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(eq9, 1) + ); + +// R47 = R20 == R11; +// Instr #60 +SDNode *eq11 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and4, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(eq10, 1) + ); + +// R46 = R21 == R28; +// Instr #61 +SDNode *eq12 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(eq11, 1) + ); + +// R45 = R22 == R31; +// Instr #62 +SDNode *eq13 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or3, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(eq12, 1) + ); + +// R07 = R49 & R46; +// Instr #63 +SDNode *and8 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + SDValue(eq9, 0), + // glue (or chain) input edge + SDValue(eq13, 1) + ); + +// R08 = R07 & R50; +// Instr #64 +SDNode *and9 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + SDValue(and8, 0), + // glue (or chain) input edge + SDValue(and8, 1) + ); + +// R44 = ~R47; +// Instr #65 +SDNode *not0 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq11, 0), + // glue (or chain) input edge + SDValue(and9, 1) + ); + +// R08 = R08 & R44; +// Instr #66 +SDNode *and10 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(not0, 0), + SDValue(and9, 0), + // glue (or chain) input edge + SDValue(not0, 1) + ); + +// R44 = ~R50; +// Instr #67 +SDNode *not1 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + // glue (or chain) input edge + SDValue(and10, 1) + ); + +// R44 = R44 & R07; +// Instr #68 +SDNode *and11 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and8, 0), + SDValue(not1, 0), + // glue (or chain) input edge + SDValue(not1, 1) + ); + +// R44 = R44 & R47; +// Instr #69 +SDNode *and12 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq11, 0), + SDValue(and11, 0), + // glue (or chain) input edge + SDValue(and11, 1) + ); + +// R08 = R08 | R44; +// Instr #70 +SDNode *or4 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and12, 0), + SDValue(and10, 0), + // glue (or chain) input edge + SDValue(and12, 1) + ); + +// R07 = ~R45; +// Instr #71 +SDNode *not2 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + // glue (or chain) input edge + SDValue(or4, 1) + ); + +// R07 = R07 & R46; +// Instr #72 +SDNode *and13 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + SDValue(not2, 0), + // glue (or chain) input edge + SDValue(not2, 1) + ); + +// R08 = R08 | R07; +// Instr #73 +SDNode *or5 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and13, 0), + SDValue(or4, 0), + // glue (or chain) input edge + SDValue(and13, 1) + ); + +// R07 = ~R48; +// Instr #74 +SDNode *not3 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq10, 0), + // glue (or chain) input edge + SDValue(or5, 1) + ); + +// R07 = R07 & R49; +// Instr #75 +SDNode *and14 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + SDValue(not3, 0), + // glue (or chain) input edge + SDValue(not3, 1) + ); + +// R08 = R08 | R07; +// Instr #76 +SDNode *or6 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and14, 0), + SDValue(or5, 0), + // glue (or chain) input edge + SDValue(and14, 1) + ); + +// R09 = R08 == R30; +// Instr #77 +SDNode *eq14 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or6, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(or6, 1) + ); + +SDValue ct27 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #78 +SDNode *nop4 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct27, + // glue (or chain) input edge + SDValue(eq14, 1) + ); + +// WHERE_EQ; +// Instr #79 +SDNode *whereeq4 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq14, 0), + // glue (or chain) input edge + SDValue(nop4, 0) + ); + +SDValue ct28 = CurDAG->getConstant(31745, DL, MVT::i16, true, false); +// R19 = 31745; +// Instr #80 +SDNode *vload21 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct28, + SDValue(vload10, 0), + // glue (or chain) input edge + SDValue(whereeq4, 1) + ); + +SDValue ct29 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #81 +SDNode *vload22 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct29, + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(vload21, 1) + ); + +// END_WHERE; +// Instr #82 +SDNode *endwhere4 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload22, 1) + ); + +// R08 = R49 | R46; +// Instr #83 +SDNode *or7 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + SDValue(eq9, 0), + // glue (or chain) input edge + SDValue(endwhere4, 0) + ); + +// R09 = R08 & R14; +// Instr #84 +SDNode *and15 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload22, 0), + SDValue(or7, 0), + // glue (or chain) input edge + SDValue(or7, 1) + ); + +// R09 = R09 == R30; +// Instr #85 +SDNode *eq15 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and15, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and15, 1) + ); + +SDValue ct30 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #86 +SDNode *nop5 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct30, + // glue (or chain) input edge + SDValue(eq15, 1) + ); + +// WHERE_EQ; +// Instr #87 +SDNode *whereeq5 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq15, 0), + // glue (or chain) input edge + SDValue(nop5, 0) + ); + +SDValue ct31 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R19 = 31744; +// Instr #88 +SDNode *vload23 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct31, + SDValue(vload21, 0), + // glue (or chain) input edge + SDValue(whereeq5, 1) + ); + +// R08 = R50 & R49; +// Instr #89 +SDNode *and16 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + SDValue(eq8, 0), + SDValue(or7, 0), + // glue (or chain) input edge + SDValue(vload23, 1) + ); + +// R07 = R47 & R46; +// Instr #90 +SDNode *and17 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + SDValue(eq11, 0), + SDValue(and14, 0), + // glue (or chain) input edge + SDValue(and16, 1) + ); + +// R08 = R08 | R07; +// Instr #91 +SDNode *or8 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and17, 0), + SDValue(and16, 0), + SDValue(and16, 0), + // glue (or chain) input edge + SDValue(and17, 1) + ); + +SDValue ct32 = CurDAG->getConstant(15, DL, MVT::i16, true, false); +// R08 = R08 << 15; +// Instr #92 +SDNode *ishl0 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or8, 0), + ct32, + SDValue(or8, 0), + // glue (or chain) input edge + SDValue(or8, 1) + ); + +// R19 = R19 ^ R08; +// Instr #93 +SDNode *xor0 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl0, 0), + SDValue(vload23, 0), + SDValue(vload23, 0), + // glue (or chain) input edge + SDValue(ishl0, 1) + ); + +SDValue ct33 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #94 +SDNode *vload24 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct33, + SDValue(vload22, 0), + // glue (or chain) input edge + SDValue(xor0, 1) + ); + +// END_WHERE; +// Instr #95 +SDNode *endwhere5 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload24, 1) + ); + +// R15 = R25 - R21; +// Instr #96 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload19, 0), + SDValue(vload20, 0), + // glue (or chain) input edge + SDValue(endwhere5, 0) + ); + +SDValue ct34 = CurDAG->getConstant(-15, DL, MVT::i16, true, false); +// R08 = -15; +// Instr #97 +SDNode *vload25 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct34, + // glue (or chain) input edge + SDValue(sub0, 1) + ); + +// R09 = R15 < R08; +// Instr #98 +SDNode *lt2 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub0, 0), + SDValue(vload25, 0), + // glue (or chain) input edge + SDValue(vload25, 1) + ); + +// R09 = R09 & R14; +// Instr #99 +SDNode *and18 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt2, 0), + // glue (or chain) input edge + SDValue(lt2, 1) + ); + +// R09 = R09 == R30; +// Instr #100 +SDNode *eq16 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and18, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and18, 1) + ); + +SDValue ct35 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #101 +SDNode *nop6 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct35, + // glue (or chain) input edge + SDValue(eq16, 1) + ); + +// WHERE_EQ; +// Instr #102 +SDNode *whereeq6 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq16, 0), + // glue (or chain) input edge + SDValue(nop6, 0) + ); + +// R15 = R31 - R15; +// Instr #103 +SDNode *sub1 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub0, 0), + SDValue(sub0, 0), + // glue (or chain) input edge + SDValue(whereeq6, 1) + ); + +SDValue ct36 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R26 = 0; +// Instr #104 +SDNode *vload26 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct36, + SDValue(or1, 0), + // glue (or chain) input edge + SDValue(sub1, 1) + ); + +SDValue ct37 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = R21 << 0; +// Instr #105 +SDNode *ishl1 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + ct37, + SDValue(vload19, 0), + // glue (or chain) input edge + SDValue(vload26, 1) + ); + +SDValue ct38 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #106 +SDNode *vload27 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct38, + SDValue(sub1, 0), + // glue (or chain) input edge + SDValue(ishl1, 1) + ); + +// END_WHERE; +// Instr #107 +SDNode *endwhere6 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload27, 1) + ); + +SDValue ct39 = CurDAG->getConstant(-3, DL, MVT::i16, true, false); +// R08 = -3; +// Instr #108 +SDNode *vload28 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct39, + // glue (or chain) input edge + SDValue(endwhere6, 0) + ); + +// R09 = R15 < R08; +// Instr #109 +SDNode *lt3 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload27, 0), + SDValue(vload28, 0), + // glue (or chain) input edge + SDValue(vload28, 1) + ); + +// R09 = R09 & R14; +// Instr #110 +SDNode *and19 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt3, 0), + // glue (or chain) input edge + SDValue(lt3, 1) + ); + +// R09 = R09 == R30; +// Instr #111 +SDNode *eq17 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and19, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and19, 1) + ); + +SDValue ct40 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #112 +SDNode *nop7 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct40, + // glue (or chain) input edge + SDValue(eq17, 1) + ); + +// WHERE_EQ; +// Instr #113 +SDNode *whereeq7 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq17, 0), + // glue (or chain) input edge + SDValue(nop7, 0) + ); + +// R15 = R31 - R15; +// Instr #114 +SDNode *sub2 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload27, 0), + SDValue(vload27, 0), + // glue (or chain) input edge + SDValue(whereeq7, 1) + ); + +// R26 = R26 >> R15; +// Instr #115 +SDNode *shr0 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload26, 0), + SDValue(sub2, 0), + SDValue(vload26, 0), + // glue (or chain) input edge + SDValue(sub2, 1) + ); + +SDValue ct41 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = R21 << 0; +// Instr #116 +SDNode *ishl2 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + ct41, + SDValue(ishl1, 0), + // glue (or chain) input edge + SDValue(shr0, 1) + ); + +SDValue ct42 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #117 +SDNode *vload29 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct42, + SDValue(sub2, 0), + // glue (or chain) input edge + SDValue(ishl2, 1) + ); + +// END_WHERE; +// Instr #118 +SDNode *endwhere7 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload29, 1) + ); + +// R09 = R15 < R31; +// Instr #119 +SDNode *lt4 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload29, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere7, 0) + ); + +// R09 = R09 & R14; +// Instr #120 +SDNode *and20 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt4, 0), + // glue (or chain) input edge + SDValue(lt4, 1) + ); + +// R09 = R09 == R30; +// Instr #121 +SDNode *eq18 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and20, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and20, 1) + ); + +SDValue ct43 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #122 +SDNode *nop8 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct43, + // glue (or chain) input edge + SDValue(eq18, 1) + ); + +// WHERE_EQ; +// Instr #123 +SDNode *whereeq8 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq18, 0), + // glue (or chain) input edge + SDValue(nop8, 0) + ); + +// R15 = R31 - R15; +// Instr #124 +SDNode *sub3 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload29, 0), + SDValue(vload29, 0), + // glue (or chain) input edge + SDValue(whereeq8, 1) + ); + +// R22 = R22 << R15; +// Instr #125 +SDNode *shl0 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or3, 0), + SDValue(sub3, 0), + SDValue(or3, 0), + // glue (or chain) input edge + SDValue(sub3, 1) + ); + +SDValue ct44 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R21 = R25 << 0; +// Instr #126 +SDNode *ishl3 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl2, 0), + ct44, + SDValue(vload20, 0), + // glue (or chain) input edge + SDValue(shl0, 1) + ); + +SDValue ct45 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #127 +SDNode *vload30 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct45, + SDValue(sub3, 0), + // glue (or chain) input edge + SDValue(ishl3, 1) + ); + +// END_WHERE; +// Instr #128 +SDNode *endwhere8 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload30, 1) + ); + +SDValue ct46 = CurDAG->getConstant(4, DL, MVT::i16, true, false); +// R08 = 4; +// Instr #129 +SDNode *vload31 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct46, + // glue (or chain) input edge + SDValue(endwhere8, 0) + ); + +// R09 = R15 < R08; +// Instr #130 +SDNode *lt5 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload30, 0), + SDValue(vload31, 0), + // glue (or chain) input edge + SDValue(vload31, 1) + ); + +// R09 = R09 & R14; +// Instr #131 +SDNode *and21 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt5, 0), + // glue (or chain) input edge + SDValue(lt5, 1) + ); + +// R09 = R09 == R30; +// Instr #132 +SDNode *eq19 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and21, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and21, 1) + ); + +SDValue ct47 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #133 +SDNode *nop9 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct47, + // glue (or chain) input edge + SDValue(eq19, 1) + ); + +// WHERE_EQ; +// Instr #134 +SDNode *whereeq9 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq19, 0), + // glue (or chain) input edge + SDValue(nop9, 0) + ); + +// R26 = R26 << R15; +// Instr #135 +SDNode *shl1 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr0, 0), + SDValue(vload30, 0), + SDValue(shr0, 0), + // glue (or chain) input edge + SDValue(whereeq9, 1) + ); + +SDValue ct48 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = R21 << 0; +// Instr #136 +SDNode *ishl4 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl3, 0), + ct48, + SDValue(ishl2, 0), + // glue (or chain) input edge + SDValue(shl1, 1) + ); + +SDValue ct49 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #137 +SDNode *vload32 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct49, + SDValue(vload30, 0), + // glue (or chain) input edge + SDValue(ishl4, 1) + ); + +// END_WHERE; +// Instr #138 +SDNode *endwhere9 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload32, 1) + ); + +// R09 = R15 < R29; +// Instr #139 +SDNode *lt6 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload32, 0), + SDValue(vload3, 0), + // glue (or chain) input edge + SDValue(endwhere9, 0) + ); + +// R09 = R09 & R14; +// Instr #140 +SDNode *and22 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt6, 0), + // glue (or chain) input edge + SDValue(lt6, 1) + ); + +// R09 = R09 == R30; +// Instr #141 +SDNode *eq20 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and22, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and22, 1) + ); + +SDValue ct50 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #142 +SDNode *nop10 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct50, + // glue (or chain) input edge + SDValue(eq20, 1) + ); + +// WHERE_EQ; +// Instr #143 +SDNode *whereeq10 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq20, 0), + // glue (or chain) input edge + SDValue(nop10, 0) + ); + +// R22 = R22 >> R15; +// Instr #144 +SDNode *shr1 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl0, 0), + SDValue(vload32, 0), + SDValue(shl0, 0), + // glue (or chain) input edge + SDValue(whereeq10, 1) + ); + +SDValue ct51 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R21 = R25 << 0; +// Instr #145 +SDNode *ishl5 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + ct51, + SDValue(ishl3, 0), + // glue (or chain) input edge + SDValue(shr1, 1) + ); + +SDValue ct52 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #146 +SDNode *vload33 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct52, + SDValue(vload32, 0), + // glue (or chain) input edge + SDValue(ishl5, 1) + ); + +// END_WHERE; +// Instr #147 +SDNode *endwhere10 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload33, 1) + ); + +SDValue ct53 = CurDAG->getConstant(32, DL, MVT::i16, true, false); +// R08 = 32; +// Instr #148 +SDNode *vload34 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct53, + // glue (or chain) input edge + SDValue(endwhere10, 0) + ); + +// R09 = R15 < R08; +// Instr #149 +SDNode *lt7 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload33, 0), + SDValue(vload34, 0), + // glue (or chain) input edge + SDValue(vload34, 1) + ); + +// R09 = R09 & R14; +// Instr #150 +SDNode *and23 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt7, 0), + // glue (or chain) input edge + SDValue(lt7, 1) + ); + +// R09 = R09 == R30; +// Instr #151 +SDNode *eq21 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and23, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and23, 1) + ); + +SDValue ct54 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #152 +SDNode *nop11 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct54, + // glue (or chain) input edge + SDValue(eq21, 1) + ); + +// WHERE_EQ; +// Instr #153 +SDNode *whereeq11 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq21, 0), + // glue (or chain) input edge + SDValue(nop11, 0) + ); + +SDValue ct55 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R22 = 0; +// Instr #154 +SDNode *vload35 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct55, + SDValue(shr1, 0), + // glue (or chain) input edge + SDValue(whereeq11, 1) + ); + +SDValue ct56 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R21 = R25 << 0; +// Instr #155 +SDNode *ishl6 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + ct56, + SDValue(ishl5, 0), + // glue (or chain) input edge + SDValue(vload35, 1) + ); + +// END_WHERE; +// Instr #156 +SDNode *endwhere11 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(ishl6, 1) + ); + +// R09 = R24 == R11; +// Instr #157 +SDNode *eq22 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(endwhere11, 0) + ); + +// R09 = R09 & R14; +// Instr #158 +SDNode *and24 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq22, 0), + // glue (or chain) input edge + SDValue(eq22, 1) + ); + +// R09 = R09 == R30; +// Instr #159 +SDNode *eq23 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and24, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and24, 1) + ); + +SDValue ct57 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #160 +SDNode *nop12 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct57, + // glue (or chain) input edge + SDValue(eq23, 1) + ); + +// WHERE_EQ; +// Instr #161 +SDNode *whereeq12 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq23, 0), + // glue (or chain) input edge + SDValue(nop12, 0) + ); + +// R26 = R31 - R26; +// Instr #162 +SDNode *sub4 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(shl1, 0), + SDValue(shl1, 0), + // glue (or chain) input edge + SDValue(whereeq12, 1) + ); + +// END_WHERE; +// Instr #163 +SDNode *endwhere12 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub4, 1) + ); + +// R09 = R20 == R11; +// Instr #164 +SDNode *eq24 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and4, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(endwhere12, 0) + ); + +// R09 = R09 & R14; +// Instr #165 +SDNode *and25 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq24, 0), + // glue (or chain) input edge + SDValue(eq24, 1) + ); + +// R09 = R09 == R30; +// Instr #166 +SDNode *eq25 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and25, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and25, 1) + ); + +SDValue ct58 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #167 +SDNode *nop13 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct58, + // glue (or chain) input edge + SDValue(eq25, 1) + ); + +// WHERE_EQ; +// Instr #168 +SDNode *whereeq13 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq25, 0), + // glue (or chain) input edge + SDValue(nop13, 0) + ); + +// R22 = R31 - R22; +// Instr #169 +SDNode *sub5 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload35, 0), + SDValue(vload35, 0), + // glue (or chain) input edge + SDValue(whereeq13, 1) + ); + +// END_WHERE; +// Instr #170 +SDNode *endwhere13 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub5, 1) + ); + +// R09 = R14 == R30; +// Instr #171 +SDNode *eq26 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere13, 0) + ); + +SDValue ct59 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #172 +SDNode *nop14 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct59, + // glue (or chain) input edge + SDValue(eq26, 1) + ); + +// WHERE_EQ; +// Instr #173 +SDNode *whereeq14 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq26, 0), + // glue (or chain) input edge + SDValue(nop14, 0) + ); + +// R26 = R22 + R26; +// Instr #174 +SDNode *add0 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub4, 0), + SDValue(sub5, 0), + SDValue(sub4, 0), + // glue (or chain) input edge + SDValue(whereeq14, 1) + ); + +// END_WHERE; +// Instr #175 +SDNode *endwhere14 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(add0, 1) + ); + +// R24 = R26 & R11; +// Instr #176 +SDNode *and26 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add0, 0), + // glue (or chain) input edge + SDValue(endwhere14, 0) + ); + +// R09 = R24 == R11; +// Instr #177 +SDNode *eq27 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and26, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(and26, 1) + ); + +// R09 = R09 & R14; +// Instr #178 +SDNode *and27 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq27, 0), + // glue (or chain) input edge + SDValue(eq27, 1) + ); + +// R09 = R09 == R30; +// Instr #179 +SDNode *eq28 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and27, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and27, 1) + ); + +SDValue ct60 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #180 +SDNode *nop15 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct60, + // glue (or chain) input edge + SDValue(eq28, 1) + ); + +// WHERE_EQ; +// Instr #181 +SDNode *whereeq15 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq28, 0), + // glue (or chain) input edge + SDValue(nop15, 0) + ); + +// R26 = R31 - R26; +// Instr #182 +SDNode *sub6 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(add0, 0), + SDValue(add0, 0), + // glue (or chain) input edge + SDValue(whereeq15, 1) + ); + +// END_WHERE; +// Instr #183 +SDNode *endwhere15 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub6, 1) + ); + +SDValue ct61 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R05 = R26 << 0; +// Instr #184 +SDNode *ishl7 = CurDAG->getMachineNode( + Connex::ISHLV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + ct61, + // glue (or chain) input edge + SDValue(endwhere15, 0) + ); + +SDValue ct62 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R08 = R05 >> 1; +// Instr #185 +SDNode *ishr2 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl7, 0), + ct62, + // glue (or chain) input edge + SDValue(ishl7, 1) + ); + +// R05 = R05 | R08; +// Instr #186 +SDNode *or9 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr2, 0), + SDValue(ishl7, 0), + // glue (or chain) input edge + SDValue(ishr2, 1) + ); + +SDValue ct63 = CurDAG->getConstant(2, DL, MVT::i16, true, false); +// R08 = R05 >> 2; +// Instr #187 +SDNode *ishr3 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or9, 0), + ct63, + // glue (or chain) input edge + SDValue(or9, 1) + ); + +// R05 = R05 | R08; +// Instr #188 +SDNode *or10 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr3, 0), + SDValue(or9, 0), + // glue (or chain) input edge + SDValue(ishr3, 1) + ); + +SDValue ct64 = CurDAG->getConstant(4, DL, MVT::i16, true, false); +// R08 = R05 >> 4; +// Instr #189 +SDNode *ishr4 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or10, 0), + ct64, + // glue (or chain) input edge + SDValue(or10, 1) + ); + +// R05 = R05 | R08; +// Instr #190 +SDNode *or11 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr4, 0), + SDValue(or10, 0), + // glue (or chain) input edge + SDValue(ishr4, 1) + ); + +SDValue ct65 = CurDAG->getConstant(8, DL, MVT::i16, true, false); +// R08 = R05 >> 8; +// Instr #191 +SDNode *ishr5 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or11, 0), + ct65, + // glue (or chain) input edge + SDValue(or11, 1) + ); + +// R05 = R05 | R08; +// Instr #192 +SDNode *or12 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr5, 0), + SDValue(or11, 0), + // glue (or chain) input edge + SDValue(ishr5, 1) + ); + +// R05 = ~R05; +// Instr #193 +SDNode *not4 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or12, 0), + // glue (or chain) input edge + SDValue(or12, 1) + ); + +// R06 = POPCNT(R05); +// Instr #194 +SDNode *popcnt0 = CurDAG->getMachineNode( + Connex::POPCNT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(not4, 0), + // glue (or chain) input edge + SDValue(not4, 1) + ); + +// R06 = R29 - R06; +// Instr #195 +SDNode *sub7 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(popcnt0, 0), + // glue (or chain) input edge + SDValue(popcnt0, 1) + ); + +SDValue ct66 = CurDAG->getConstant(11, DL, MVT::i16, true, false); +// R08 = 11; +// Instr #196 +SDNode *vload36 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct66, + // glue (or chain) input edge + SDValue(sub7, 1) + ); + +// R08 = R06 - R08; +// Instr #197 +SDNode *sub8 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub7, 0), + SDValue(vload36, 0), + // glue (or chain) input edge + SDValue(vload36, 1) + ); + +// R09 = R31 < R08; +// Instr #198 +SDNode *lt8 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub8, 0), + // glue (or chain) input edge + SDValue(sub8, 1) + ); + +// R09 = R09 & R14; +// Instr #199 +SDNode *and28 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt8, 0), + // glue (or chain) input edge + SDValue(lt8, 1) + ); + +// R09 = R09 == R30; +// Instr #200 +SDNode *eq29 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and28, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and28, 1) + ); + +SDValue ct67 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #201 +SDNode *nop16 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct67, + // glue (or chain) input edge + SDValue(eq29, 1) + ); + +// WHERE_EQ; +// Instr #202 +SDNode *whereeq16 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq29, 0), + // glue (or chain) input edge + SDValue(nop16, 0) + ); + +SDValue ct68 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = R26 << 0; +// Instr #203 +SDNode *ishl8 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + ct68, + SDValue(vload11, 0), + // glue (or chain) input edge + SDValue(whereeq16, 1) + ); + +// R09 = R29 - R08; +// Instr #204 +SDNode *sub9 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(sub8, 0), + SDValue(eq29, 0), + // glue (or chain) input edge + SDValue(ishl8, 1) + ); + +// R62 = R62 << R09; +// Instr #205 +SDNode *shl2 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl8, 0), + SDValue(sub9, 0), + SDValue(ishl8, 0), + // glue (or chain) input edge + SDValue(sub9, 1) + ); + +// R62 = R62 >> R09; +// Instr #206 +SDNode *shr2 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl2, 0), + SDValue(sub9, 0), + SDValue(shl2, 0), + // glue (or chain) input edge + SDValue(shl2, 1) + ); + +SDValue ct69 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R61 = R08 << 0; +// Instr #207 +SDNode *ishl9 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub8, 0), + ct69, + SDValue(vload12, 0), + // glue (or chain) input edge + SDValue(shr2, 1) + ); + +// R26 = R26 >> R08; +// Instr #208 +SDNode *shr3 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + SDValue(sub8, 0), + SDValue(sub6, 0), + // glue (or chain) input edge + SDValue(ishl9, 1) + ); + +// R25 = R08 + R25; +// Instr #209 +SDNode *add1 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + SDValue(sub8, 0), + SDValue(ishl4, 0), + // glue (or chain) input edge + SDValue(shr3, 1) + ); + +// END_WHERE; +// Instr #210 +SDNode *endwhere16 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(add1, 1) + ); + +// R09 = R08 < R31; +// Instr #211 +SDNode *lt9 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub8, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere16, 0) + ); + +// R09 = R09 & R14; +// Instr #212 +SDNode *and29 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt9, 0), + // glue (or chain) input edge + SDValue(lt9, 1) + ); + +// R09 = R09 == R30; +// Instr #213 +SDNode *eq30 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and29, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and29, 1) + ); + +SDValue ct70 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #214 +SDNode *nop17 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct70, + // glue (or chain) input edge + SDValue(eq30, 1) + ); + +// WHERE_EQ; +// Instr #215 +SDNode *whereeq17 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq30, 0), + // glue (or chain) input edge + SDValue(nop17, 0) + ); + +// R08 = R31 - R08; +// Instr #216 +SDNode *sub10 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub8, 0), + SDValue(sub8, 0), + // glue (or chain) input edge + SDValue(whereeq17, 1) + ); + +// R26 = R26 << R08; +// Instr #217 +SDNode *shl3 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr3, 0), + SDValue(sub10, 0), + SDValue(shr3, 0), + // glue (or chain) input edge + SDValue(sub10, 1) + ); + +// R25 = R25 - R08; +// Instr #218 +SDNode *sub11 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(add1, 0), + SDValue(sub10, 0), + SDValue(add1, 0), + // glue (or chain) input edge + SDValue(shl3, 1) + ); + +// END_WHERE; +// Instr #219 +SDNode *endwhere17 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub11, 1) + ); + +// R09 = R25 < R30; +// Instr #220 +SDNode *lt10 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub11, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere17, 0) + ); + +// R09 = R09 & R14; +// Instr #221 +SDNode *and30 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt10, 0), + // glue (or chain) input edge + SDValue(lt10, 1) + ); + +// R09 = R09 == R30; +// Instr #222 +SDNode *eq31 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and30, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and30, 1) + ); + +SDValue ct71 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #223 +SDNode *nop18 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct71, + // glue (or chain) input edge + SDValue(eq31, 1) + ); + +// WHERE_EQ; +// Instr #224 +SDNode *whereeq18 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq31, 0), + // glue (or chain) input edge + SDValue(nop18, 0) + ); + +// R61 = R30 - R25; +// Instr #225 +SDNode *sub12 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(sub11, 0), + SDValue(ishl9, 0), + // glue (or chain) input edge + SDValue(whereeq18, 1) + ); + +SDValue ct72 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R25 = 1; +// Instr #226 +SDNode *vload37 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct72, + SDValue(sub11, 0), + // glue (or chain) input edge + SDValue(sub12, 1) + ); + +SDValue ct73 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = R26 << 0; +// Instr #227 +SDNode *ishl10 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl3, 0), + ct73, + SDValue(shr2, 0), + // glue (or chain) input edge + SDValue(vload37, 1) + ); + +// R09 = R29 - R61; +// Instr #228 +SDNode *sub13 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(sub12, 0), + SDValue(eq31, 0), + // glue (or chain) input edge + SDValue(ishl10, 1) + ); + +// R62 = R62 << R09; +// Instr #229 +SDNode *shl4 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl10, 0), + SDValue(sub13, 0), + SDValue(ishl10, 0), + // glue (or chain) input edge + SDValue(sub13, 1) + ); + +// R62 = R62 >> R09; +// Instr #230 +SDNode *shr4 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl4, 0), + SDValue(sub13, 0), + SDValue(shl4, 0), + // glue (or chain) input edge + SDValue(shl4, 1) + ); + +// R26 = R26 >> R61; +// Instr #231 +SDNode *shr5 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl3, 0), + SDValue(sub12, 0), + SDValue(shl3, 0), + // glue (or chain) input edge + SDValue(shr4, 1) + ); + +// END_WHERE; +// Instr #232 +SDNode *endwhere18 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(shr5, 1) + ); + +SDValue ct74 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R08 = 1024; +// Instr #233 +SDNode *vload38 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct74, + // glue (or chain) input edge + SDValue(endwhere18, 0) + ); + +// R08 = R26 < R08; +// Instr #234 +SDNode *lt11 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr5, 0), + SDValue(vload38, 0), + // glue (or chain) input edge + SDValue(vload38, 1) + ); + +// R09 = R25 == R30; +// Instr #235 +SDNode *eq32 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload37, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(lt11, 1) + ); + +// R09 = R09 & R14; +// Instr #236 +SDNode *and31 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq32, 0), + // glue (or chain) input edge + SDValue(eq32, 1) + ); + +// R09 = R09 & R08; +// Instr #237 +SDNode *and32 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt11, 0), + SDValue(and31, 0), + // glue (or chain) input edge + SDValue(and31, 1) + ); + +// R09 = R09 == R30; +// Instr #238 +SDNode *eq33 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and32, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and32, 1) + ); + +SDValue ct75 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #239 +SDNode *nop19 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct75, + // glue (or chain) input edge + SDValue(eq33, 1) + ); + +// WHERE_EQ; +// Instr #240 +SDNode *whereeq19 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq33, 0), + // glue (or chain) input edge + SDValue(nop19, 0) + ); + +SDValue ct76 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = 0; +// Instr #241 +SDNode *vload39 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct76, + SDValue(vload37, 0), + // glue (or chain) input edge + SDValue(whereeq19, 1) + ); + +// END_WHERE; +// Instr #242 +SDNode *endwhere19 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload39, 1) + ); + +// R26 = R26 & R13; +// Instr #243 +SDNode *and33 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(shr5, 0), + // glue (or chain) input edge + SDValue(endwhere19, 0) + ); + +SDValue ct77 = CurDAG->getConstant(30, DL, MVT::i16, true, false); +// R09 = 30; +// Instr #244 +SDNode *vload40 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct77, + // glue (or chain) input edge + SDValue(and33, 1) + ); + +// R09 = R09 < R25; +// Instr #245 +SDNode *lt12 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload40, 0), + SDValue(vload39, 0), + // glue (or chain) input edge + SDValue(vload40, 1) + ); + +// R09 = R09 & R14; +// Instr #246 +SDNode *and34 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt12, 0), + // glue (or chain) input edge + SDValue(lt12, 1) + ); + +// R09 = R09 == R30; +// Instr #247 +SDNode *eq34 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and34, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and34, 1) + ); + +SDValue ct78 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #248 +SDNode *nop20 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct78, + // glue (or chain) input edge + SDValue(eq34, 1) + ); + +// WHERE_EQ; +// Instr #249 +SDNode *whereeq20 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq34, 0), + // glue (or chain) input edge + SDValue(nop20, 0) + ); + +SDValue ct79 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #250 +SDNode *vload41 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct79, + SDValue(vload24, 0), + // glue (or chain) input edge + SDValue(whereeq20, 1) + ); + +SDValue ct80 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R19 = 31744; +// Instr #251 +SDNode *vload42 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct80, + SDValue(xor0, 0), + // glue (or chain) input edge + SDValue(vload41, 1) + ); + +// R19 = R19 | R24; +// Instr #252 +SDNode *or13 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and26, 0), + SDValue(vload42, 0), + SDValue(vload42, 0), + // glue (or chain) input edge + SDValue(vload42, 1) + ); + +// END_WHERE; +// Instr #253 +SDNode *endwhere20 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or13, 1) + ); + +// R08 = R14 == R30; +// Instr #254 +SDNode *eq35 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload41, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere20, 0) + ); + +SDValue ct81 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #255 +SDNode *nop21 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct81, + // glue (or chain) input edge + SDValue(eq35, 1) + ); + +// WHERE_EQ; +// Instr #256 +SDNode *whereeq21 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq35, 0), + // glue (or chain) input edge + SDValue(nop21, 0) + ); + +SDValue ct82 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R19 = R25 << 10; +// Instr #257 +SDNode *ishl11 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload39, 0), + ct82, + SDValue(or13, 0), + // glue (or chain) input edge + SDValue(whereeq21, 1) + ); + +// R19 = R19 | R26; +// Instr #258 +SDNode *or14 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and33, 0), + SDValue(ishl11, 0), + SDValue(ishl11, 0), + // glue (or chain) input edge + SDValue(ishl11, 1) + ); + +// R04 = R26 & R30; +// Instr #259 +SDNode *and35 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(and33, 0), + SDValue(vload14, 0), + // glue (or chain) input edge + SDValue(or14, 1) + ); + +// R07 = R61 - R30; +// Instr #260 +SDNode *sub14 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub12, 0), + SDValue(vload1, 0), + SDValue(and17, 0), + // glue (or chain) input edge + SDValue(and35, 1) + ); + +// R08 = R30 << R08; +// Instr #261 +SDNode *shl5 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq35, 0), + SDValue(eq35, 0), + // glue (or chain) input edge + SDValue(sub14, 1) + ); + +// R03 = R62 & R08; +// Instr #262 +SDNode *and36 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl5, 0), + SDValue(shr4, 0), + SDValue(vload15, 0), + // glue (or chain) input edge + SDValue(shl5, 1) + ); + +// R62 = R62 ^ R03; +// Instr #263 +SDNode *xor1 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and36, 0), + SDValue(shr4, 0), + SDValue(shr4, 0), + // glue (or chain) input edge + SDValue(and36, 1) + ); + +// R03 = R03 == R31; +// Instr #264 +SDNode *eq36 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and36, 0), + SDValue(vload2, 0), + SDValue(and36, 0), + // glue (or chain) input edge + SDValue(xor1, 1) + ); + +// R03 = R30 - R03; +// Instr #265 +SDNode *sub15 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq36, 0), + SDValue(eq36, 0), + // glue (or chain) input edge + SDValue(eq36, 1) + ); + +SDValue ct83 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R08 = R08 >> 1; +// Instr #266 +SDNode *ishr6 = CurDAG->getMachineNode( + Connex::ISHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl5, 0), + ct83, + SDValue(shl5, 0), + // glue (or chain) input edge + SDValue(sub15, 1) + ); + +// R02 = R62 & R08; +// Instr #267 +SDNode *and37 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr6, 0), + SDValue(xor1, 0), + SDValue(vload16, 0), + // glue (or chain) input edge + SDValue(ishr6, 1) + ); + +// R62 = R62 ^ R02; +// Instr #268 +SDNode *xor2 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and37, 0), + SDValue(xor1, 0), + SDValue(xor1, 0), + // glue (or chain) input edge + SDValue(and37, 1) + ); + +// R02 = R02 == R31; +// Instr #269 +SDNode *eq37 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and37, 0), + SDValue(vload2, 0), + SDValue(and37, 0), + // glue (or chain) input edge + SDValue(xor2, 1) + ); + +// R02 = R30 - R02; +// Instr #270 +SDNode *sub16 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq37, 0), + SDValue(eq37, 0), + // glue (or chain) input edge + SDValue(eq37, 1) + ); + +// R01 = R62 == R31; +// Instr #271 +SDNode *eq38 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor2, 0), + SDValue(vload2, 0), + SDValue(vload17, 0), + // glue (or chain) input edge + SDValue(sub16, 1) + ); + +// R01 = R30 - R01; +// Instr #272 +SDNode *sub17 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq38, 0), + SDValue(eq38, 0), + // glue (or chain) input edge + SDValue(eq38, 1) + ); + +// R00 = R04 | R02; +// Instr #273 +SDNode *or15 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub16, 0), + SDValue(and35, 0), + SDValue(vload18, 0), + // glue (or chain) input edge + SDValue(sub17, 1) + ); + +// R00 = R00 | R01; +// Instr #274 +SDNode *or16 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub17, 0), + SDValue(or15, 0), + SDValue(or15, 0), + // glue (or chain) input edge + SDValue(or15, 1) + ); + +// R00 = R00 & R03; +// Instr #275 +SDNode *and38 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub15, 0), + SDValue(or16, 0), + SDValue(or16, 0), + // glue (or chain) input edge + SDValue(or16, 1) + ); + +// R19 = R00 + R19; +// Instr #276 +SDNode *add2 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or14, 0), + SDValue(and38, 0), + SDValue(or14, 0), + // glue (or chain) input edge + SDValue(and38, 1) + ); + +// R19 = R19 | R24; +// Instr #277 +SDNode *resF16 /*or17*/ = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and26, 0), + SDValue(add2, 0), + SDValue(add2, 0), + // glue (or chain) input edge + SDValue(add2, 1) + ); + +// END_WHERE; +// Instr #278 +SDNode *lastNode /*endwhere21*/ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, +// Alex: MVT::Glue, + MVT::Other, + // glue (or chain) input edge +// Alex: SDValue(or17, 1) + SDValue(resF16, 1) + ); + Index: lib/Target/Connex/Select_ADDi32_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_ADDi32_OpincaaCodeGen.h +++ lib/Target/Connex/Select_ADDi32_OpincaaCodeGen.h @@ -0,0 +1,205 @@ +// From /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/opincaa_standalone_apps/Emulate_i32/ADD_i32_manual/DumpISel_OpincaaCodeGen_old05_011.cpp + +// R27 is REG_SRC1. It is represented by result of nodeOpSrcCast1. +// R28 is REG_SRC2. It is represented by result of nodeOpSrcCast2. + + + +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from Opincaa lib from kernel: simpleIoTest_allowOverwrite123456. +// It is important to put this code in the Select() method of the +// SelectionDAGISel class of your back end, after the ISelLowering pass, +// which contains the DAG Combiner, because the DAG Combiner can remove +// the getCopyToReg() we create, which can lead to the following error: +// <> assertion failed. +// Number of instructions generated: 15. + + + + +SDValue ct0 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast2, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +// R29 = R27 + R28; +// Instr #2 +SDNode *add0 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast2, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +// R23 = ADDC(R31, R31); +// Instr #3 +SDNode *addc0 = CurDAG->getMachineNode( + Connex::ADDCV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(vload0, 0), + SDValue(add0, 0) + // no need for glue or chain input (since it normally consumes the output of the predecessor) + ); + +// R26 = INDEX; +// Instr #4 +SDNode *ldix0 = CurDAG->getMachineNode( + Connex::LDIX_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(addc0, 1) + ); + +// R25 = R26 & R30; +// Instr #5 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(ldix0, 0), + // glue (or chain) input edge + SDValue(ldix0, 1) + ); + +// R24 = R25 == R30; +// Instr #6 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #7 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// WHERE_EQ; +// Instr #8 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct3 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R23 = 0; +// Instr #9 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + SDValue(addc0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #10 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +// CELL_SHR(R23, R30); +// Instr #11 +SDNode *cellshr0 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +SDValue ct4 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #12 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(cellshr0, 0) + ); + +// R21 = SHIFT_REG; +// Instr #13 +SDNode *ldsh0 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R22 = R21 + R29; +// Instr #14 +SDNode *resH /*add1*/ = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(add0, 0), + SDValue(ldsh0, 0), + // glue (or chain) input edge + SDValue(ldsh0, 1) + ); + +SDNode *lastNode = resH; Index: lib/Target/Connex/Select_LTf16_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_LTf16_OpincaaCodeGen.h +++ lib/Target/Connex/Select_LTf16_OpincaaCodeGen.h @@ -0,0 +1,689 @@ +// From /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/opincaa_standalone_apps/Emulate_f16/LT_f16_manual/DumpISel_OpincaaCodeGen_old05_050.cpp + +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from Opincaa lib from kernel: lt.f16. +// It is important to put this code in the Select() method of the +// SelectionDAGISel class of your back end, after the ISelLowering pass, +// which contains the DAG Combiner, because the DAG Combiner can remove +// the getCopyToReg() we create, which can lead to the following error: +// <> assertion failed. +// Number of instructions generated: 53. + + + + +SDValue ct0 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast1, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(5, DL, MVT::i16, true, false); +// R29 = 5; +// Instr #2 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(1023, DL, MVT::i16, true, false); +// R13 = 1023; +// Instr #3 +SDNode *vload3 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +SDValue ct4 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R12 = 31744; +// Instr #4 +SDNode *vload4 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(vload3, 1) + ); + +SDValue ct5 = CurDAG->getConstant(-32768, DL, MVT::i16, true, false); +// R11 = -32768; +// Instr #5 +SDNode *vload5 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct5, + // glue (or chain) input edge + SDValue(vload4, 1) + ); + +SDValue ct6 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R10 = 1024; +// Instr #6 +SDNode *vload6 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct6, + // glue (or chain) input edge + SDValue(vload5, 1) + ); + +SDValue ct7 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R19 = 0; +// Instr #7 +SDNode *vload7 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct7, + // glue (or chain) input edge + SDValue(vload6, 1) + ); + +SDValue ct8 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R14 = 1; +// Instr #8 +SDNode *vload8 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct8, + // glue (or chain) input edge + SDValue(vload7, 1) + ); + +// R25 = R27 & R12; +// Instr #9 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload4, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(vload8, 1) + ); + +// R26 = R27 & R13; +// Instr #10 +SDNode *and1 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +// R21 = R23 & R12; +// Instr #11 +SDNode *and2 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload4, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(and1, 1) + ); + +// R22 = R23 & R13; +// Instr #12 +SDNode *and3 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(and2, 1) + ); + +// R17 = POPCNT(R25); +// Instr #13 +SDNode *popcnt0 = CurDAG->getMachineNode( + Connex::POPCNT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + // glue (or chain) input edge + SDValue(and3, 1) + ); + +// R17 = R17 == R29; +// Instr #14 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(popcnt0, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(popcnt0, 1) + ); + +// R18 = R26 == R31; +// Instr #15 +SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and1, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// R18 = R30 - R18; +// Instr #16 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq1, 0), + // glue (or chain) input edge + SDValue(eq1, 1) + ); + +// R18 = R18 & R17; +// Instr #17 +SDNode *and4 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(sub0, 0), + // glue (or chain) input edge + SDValue(sub0, 1) + ); + +// R18 = R18 == R30; +// Instr #18 +SDNode *eq2 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and4, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and4, 1) + ); + +SDValue ct9 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #19 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct9, + // glue (or chain) input edge + SDValue(eq2, 1) + ); + +// WHERE_EQ; +// Instr #20 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq2, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct10 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #21 +SDNode *vload9 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct10, + SDValue(vload8, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #22 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload9, 1) + ); + +// R15 = POPCNT(R21); +// Instr #23 +SDNode *popcnt1 = CurDAG->getMachineNode( + Connex::POPCNT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// R15 = R15 == R29; +// Instr #24 +SDNode *eq3 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(popcnt1, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(popcnt1, 1) + ); + +// R16 = R22 == R31; +// Instr #25 +SDNode *eq4 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and3, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(eq3, 1) + ); + +// R16 = R30 - R16; +// Instr #26 +SDNode *sub1 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq4, 0), + // glue (or chain) input edge + SDValue(eq4, 1) + ); + +// R16 = R16 & R15; +// Instr #27 +SDNode *and5 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq3, 0), + SDValue(sub1, 0), + // glue (or chain) input edge + SDValue(sub1, 1) + ); + +// R16 = R16 == R30; +// Instr #28 +SDNode *eq5 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and5, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and5, 1) + ); + +SDValue ct11 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #29 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct11, + // glue (or chain) input edge + SDValue(eq5, 1) + ); + +// WHERE_EQ; +// Instr #30 +SDNode *whereeq1 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq5, 0), + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +SDValue ct12 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #31 +SDNode *vload10 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct12, + SDValue(vload9, 0), + // glue (or chain) input edge + SDValue(whereeq1, 1) + ); + +// END_WHERE; +// Instr #32 +SDNode *endwhere1 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload10, 1) + ); + +// R16 = R27 == R23; +// Instr #33 +SDNode *eq6 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast1, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(endwhere1, 0) + ); + +// R14 = R14 ^ R16; +// Instr #34 +SDNode *xor0 = CurDAG->getMachineNode( + Connex::XORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq6, 0), + SDValue(vload10, 0), + // glue (or chain) input edge + SDValue(eq6, 1) + ); + +// R16 = R27 & R23; +// Instr #35 +SDNode *and6 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast2, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(xor0, 1) + ); + +// R16 = R16 & R11; +// Instr #36 +SDNode *and7 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(and6, 1) + ); + +// R16 = R16 == R11; +// Instr #37 +SDNode *eq7 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and7, 0), + SDValue(vload5, 0), + // glue (or chain) input edge + SDValue(and7, 1) + ); + +// R16 = R16 & R14; +// Instr #38 +SDNode *and8 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor0, 0), + SDValue(eq7, 0), + // glue (or chain) input edge + SDValue(eq7, 1) + ); + +// R16 = R16 == R30; +// Instr #39 +SDNode *eq8 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and8, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and8, 1) + ); + +SDValue ct13 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #40 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct13, + // glue (or chain) input edge + SDValue(eq8, 1) + ); + +// WHERE_EQ; +// Instr #41 +SDNode *whereeq2 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +// R27 = R27 ^ R11; +// Instr #42 +SDNode *xor1 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast1, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(whereeq2, 1) + ); + +// R23 = R23 ^ R11; +// Instr #43 +SDNode *xor2 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast2, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(xor1, 1) + ); + +SDValue ct14 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R19 = 1; +// Instr #44 +SDNode *vload11 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct14, + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(xor2, 1) + ); + +// END_WHERE; +// Instr #45 +SDNode *endwhere2 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload11, 1) + ); + +// R16 = R27 < R23; +// Instr #46 +SDNode *lt0 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor1, 0), + SDValue(xor2, 0), + // glue (or chain) input edge + SDValue(endwhere2, 0) + ); + +// R16 = R16 & R14; +// Instr #47 +SDNode *and9 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor0, 0), + SDValue(lt0, 0), + // glue (or chain) input edge + SDValue(lt0, 1) + ); + +// R16 = R16 == R30; +// Instr #48 +SDNode *eq9 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and9, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and9, 1) + ); + +SDValue ct15 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #49 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct15, + // glue (or chain) input edge + SDValue(eq9, 1) + ); + +// WHERE_EQ; +// Instr #50 +SDNode *whereeq3 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R19 = R19 ^ R30; +// Instr #51 +SDNode *resF16 /*xor3*/ = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(vload11, 0), + SDValue(vload11, 0), + // glue (or chain) input edge + SDValue(whereeq3, 1) + ); + +// END_WHERE; +// Instr #52 +SDNode *lastNode /*endwhere3*/ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, +// Alex: MVT::Glue, + MVT::Other, + // glue (or chain) input edge +// Alex: SDValue(xor3, 1) + SDValue(resF16, 1) + ); + Index: lib/Target/Connex/Select_MULTf16_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_MULTf16_OpincaaCodeGen.h +++ lib/Target/Connex/Select_MULTf16_OpincaaCodeGen.h @@ -0,0 +1,3258 @@ +// From /home/asusu/LLVM/Tests/opincaa_standalone_apps/Emulate_f16/MULTf16_manual/DumpISel_OpincaaCodeGen.cpp + +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from Opincaa lib from kernel: mul.f16. +// It is important to put this code in the Select() method of the +// SelectionDAGISel class of your back end, after the ISelLowering pass, +// which contains the DAG Combiner, because the DAG Combiner can remove +// the getCopyToReg() we create, which can lead to the following error: +// <> assertion failed. +// Number of instructions generated: 249. + + + + +SDValue ct0 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast1, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R29 = 16; +// Instr #2 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(31, DL, MVT::i16, true, false); +// R28 = 31; +// Instr #3 +SDNode *vload3 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +SDValue ct4 = CurDAG->getConstant(1023, DL, MVT::i16, true, false); +// R10 = 1023; +// Instr #4 +SDNode *vload4 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(vload3, 1) + ); + +SDValue ct5 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R09 = 31744; +// Instr #5 +SDNode *vload5 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct5, + // glue (or chain) input edge + SDValue(vload4, 1) + ); + +SDValue ct6 = CurDAG->getConstant(-32768, DL, MVT::i16, true, false); +// R08 = -32768; +// Instr #6 +SDNode *vload6 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct6, + // glue (or chain) input edge + SDValue(vload5, 1) + ); + +SDValue ct7 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R07 = 1024; +// Instr #7 +SDNode *vload7 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct7, + // glue (or chain) input edge + SDValue(vload6, 1) + ); + +SDValue ct8 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R55 = 0; +// Instr #8 +SDNode *vload8 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct8, + // glue (or chain) input edge + SDValue(vload7, 1) + ); + +SDValue ct9 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R00 = 1; +// Instr #9 +SDNode *vload9 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct9, + // glue (or chain) input edge + SDValue(vload8, 1) + ); + +// R24 = R27 & R08; +// Instr #10 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(vload9, 1) + ); + +// R25 = R27 & R09; +// Instr #11 +SDNode *and1 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct10 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R25 = R25 >> 10; +// Instr #12 +SDNode *ishr0 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and1, 0), + ct10, + // glue (or chain) input edge + SDValue(and1, 1) + ); + +// R26 = R27 & R10; +// Instr #13 +SDNode *and2 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload4, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(ishr0, 1) + ); + +// R14 = R31 < R26; +// Instr #14 +SDNode *lt0 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(and2, 1) + ); + +// R13 = R25 == R31; +// Instr #15 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr0, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt0, 1) + ); + +// R12 = R13 & R14; +// Instr #16 +SDNode *and3 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt0, 0), + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// R12 = R12 == R30; +// Instr #17 +SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and3, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and3, 1) + ); + +SDValue ct11 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #18 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct11, + // glue (or chain) input edge + SDValue(eq1, 1) + ); + +// WHERE_EQ; +// Instr #19 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq1, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct12 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R25 = 1; +// Instr #20 +SDNode *vload10 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct12, + SDValue(ishr0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #21 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload10, 1) + ); + +// R14 = R25 == R28; +// Instr #22 +SDNode *eq2 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload10, 0), + SDValue(vload3, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// R14 = R14 | R13; +// Instr #23 +SDNode *or0 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(eq2, 0), + // glue (or chain) input edge + SDValue(eq2, 1) + ); + +// R14 = R14 == R31; +// Instr #24 +SDNode *eq3 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or0, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(or0, 1) + ); + +SDValue ct13 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #25 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct13, + // glue (or chain) input edge + SDValue(eq3, 1) + ); + +// WHERE_EQ; +// Instr #26 +SDNode *whereeq1 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq3, 0), + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R26 = R26 | R07; +// Instr #27 +SDNode *or1 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(and2, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(whereeq1, 1) + ); + +// END_WHERE; +// Instr #28 +SDNode *endwhere1 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or1, 1) + ); + +// R20 = R23 & R08; +// Instr #29 +SDNode *and4 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(endwhere1, 0) + ); + +// R21 = R23 & R09; +// Instr #30 +SDNode *and5 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(and4, 1) + ); + +SDValue ct14 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R21 = R21 >> 10; +// Instr #31 +SDNode *ishr1 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and5, 0), + ct14, + // glue (or chain) input edge + SDValue(and5, 1) + ); + +// R22 = R23 & R10; +// Instr #32 +SDNode *and6 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload4, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(ishr1, 1) + ); + +// R14 = R31 < R22; +// Instr #33 +SDNode *lt1 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(and6, 1) + ); + +// R13 = R21 == R31; +// Instr #34 +SDNode *eq4 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr1, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt1, 1) + ); + +// R12 = R13 & R14; +// Instr #35 +SDNode *and7 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt1, 0), + SDValue(eq4, 0), + // glue (or chain) input edge + SDValue(eq4, 1) + ); + +// R12 = R12 == R30; +// Instr #36 +SDNode *eq5 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and7, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and7, 1) + ); + +SDValue ct15 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #37 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct15, + // glue (or chain) input edge + SDValue(eq5, 1) + ); + +// WHERE_EQ; +// Instr #38 +SDNode *whereeq2 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq5, 0), + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +SDValue ct16 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R21 = 1; +// Instr #39 +SDNode *vload11 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct16, + SDValue(ishr1, 0), + // glue (or chain) input edge + SDValue(whereeq2, 1) + ); + +// END_WHERE; +// Instr #40 +SDNode *endwhere2 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload11, 1) + ); + +// R14 = R21 == R28; +// Instr #41 +SDNode *eq6 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload11, 0), + SDValue(vload3, 0), + // glue (or chain) input edge + SDValue(endwhere2, 0) + ); + +// R14 = R14 | R13; +// Instr #42 +SDNode *or2 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq4, 0), + SDValue(eq6, 0), + // glue (or chain) input edge + SDValue(eq6, 1) + ); + +// R14 = R14 == R31; +// Instr #43 +SDNode *eq7 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or2, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(or2, 1) + ); + +SDValue ct17 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #44 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct17, + // glue (or chain) input edge + SDValue(eq7, 1) + ); + +// WHERE_EQ; +// Instr #45 +SDNode *whereeq3 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq7, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R22 = R22 | R07; +// Instr #46 +SDNode *or3 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(and6, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(whereeq3, 1) + ); + +// END_WHERE; +// Instr #47 +SDNode *endwhere3 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or3, 1) + ); + +SDValue ct18 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R19 = 31744; +// Instr #48 +SDNode *vload12 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct18, + // glue (or chain) input edge + SDValue(endwhere3, 0) + ); + +// R15 = R24 ^ R20; +// Instr #49 +SDNode *xor0 = CurDAG->getMachineNode( + Connex::XORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and4, 0), + SDValue(and0, 0), + // glue (or chain) input edge + SDValue(vload12, 1) + ); + +SDValue ct19 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R16 = 0; +// Instr #50 +SDNode *vload13 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct19, + // glue (or chain) input edge + SDValue(xor0, 1) + ); + +SDValue ct20 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R18 = 0; +// Instr #51 +SDNode *vload14 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct20, + // glue (or chain) input edge + SDValue(vload13, 1) + ); + +SDValue ct21 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R17 = 0; +// Instr #52 +SDNode *vload15 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct21, + // glue (or chain) input edge + SDValue(vload14, 1) + ); + +SDValue ct22 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R54 = 0; +// Instr #53 +SDNode *vload16 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct22, + // glue (or chain) input edge + SDValue(vload15, 1) + ); + +SDValue ct23 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R60 = 0; +// Instr #54 +SDNode *vload17 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct23, + // glue (or chain) input edge + SDValue(vload16, 1) + ); + +SDValue ct24 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R59 = 0; +// Instr #55 +SDNode *vload18 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct24, + // glue (or chain) input edge + SDValue(vload17, 1) + ); + +SDValue ct25 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R58 = 0; +// Instr #56 +SDNode *vload19 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct25, + // glue (or chain) input edge + SDValue(vload18, 1) + ); + +SDValue ct26 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R57 = 0; +// Instr #57 +SDNode *vload20 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct26, + // glue (or chain) input edge + SDValue(vload19, 1) + ); + +// R02 = R27 == R24; +// Instr #58 +SDNode *eq8 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast1, 0), + SDValue(and0, 0), + // glue (or chain) input edge + SDValue(vload20, 1) + ); + +// R01 = R23 == R20; +// Instr #59 +SDNode *eq9 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast2, 0), + SDValue(and4, 0), + // glue (or chain) input edge + SDValue(eq8, 1) + ); + +// R36 = R25 == R28; +// Instr #60 +SDNode *eq10 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload10, 0), + SDValue(vload3, 0), + // glue (or chain) input edge + SDValue(eq9, 1) + ); + +// R38 = R31 < R26; +// Instr #61 +SDNode *lt2 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(or1, 0), + // glue (or chain) input edge + SDValue(eq10, 1) + ); + +// R35 = R21 == R28; +// Instr #62 +SDNode *eq11 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload11, 0), + SDValue(vload3, 0), + // glue (or chain) input edge + SDValue(lt2, 1) + ); + +// R37 = R31 < R22; +// Instr #63 +SDNode *lt3 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(or3, 0), + // glue (or chain) input edge + SDValue(eq11, 1) + ); + +// R62 = R36 & R38; +// Instr #64 +SDNode *and8 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt2, 0), + SDValue(eq10, 0), + // glue (or chain) input edge + SDValue(lt3, 1) + ); + +// R63 = R35 & R37; +// Instr #65 +SDNode *and9 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt3, 0), + SDValue(eq11, 0), + // glue (or chain) input edge + SDValue(and8, 1) + ); + +// R61 = R62 | R63; +// Instr #66 +SDNode *or4 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and9, 0), + SDValue(and8, 0), + // glue (or chain) input edge + SDValue(and9, 1) + ); + +// R62 = R36 & R01; +// Instr #67 +SDNode *and10 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + SDValue(eq10, 0), + // glue (or chain) input edge + SDValue(or4, 1) + ); + +// R63 = R35 & R02; +// Instr #68 +SDNode *and11 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + SDValue(eq11, 0), + // glue (or chain) input edge + SDValue(and10, 1) + ); + +// R61 = R61 | R62; +// Instr #69 +SDNode *or5 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and10, 0), + SDValue(or4, 0), + // glue (or chain) input edge + SDValue(and11, 1) + ); + +// R61 = R61 | R63; +// Instr #70 +SDNode *or6 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and11, 0), + SDValue(or5, 0), + // glue (or chain) input edge + SDValue(or5, 1) + ); + +// R03 = R61 == R30; +// Instr #71 +SDNode *eq12 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or6, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(or6, 1) + ); + +SDValue ct27 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #72 +SDNode *nop4 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct27, + // glue (or chain) input edge + SDValue(eq12, 1) + ); + +// WHERE_EQ; +// Instr #73 +SDNode *whereeq4 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + // glue (or chain) input edge + SDValue(nop4, 0) + ); + +SDValue ct28 = CurDAG->getConstant(31745, DL, MVT::i16, true, false); +// R19 = 31745; +// Instr #74 +SDNode *vload21 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct28, + SDValue(vload12, 0), + // glue (or chain) input edge + SDValue(whereeq4, 1) + ); + +SDValue ct29 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R00 = 0; +// Instr #75 +SDNode *vload22 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct29, + SDValue(vload9, 0), + // glue (or chain) input edge + SDValue(vload21, 1) + ); + +// END_WHERE; +// Instr #76 +SDNode *endwhere4 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload22, 1) + ); + +// R39 = R36 | R35; +// Instr #77 +SDNode *or7 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq11, 0), + SDValue(eq10, 0), + // glue (or chain) input edge + SDValue(endwhere4, 0) + ); + +// R04 = R61 == R31; +// Instr #78 +SDNode *eq13 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or6, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(or7, 1) + ); + +// R03 = R39 == R30; +// Instr #79 +SDNode *eq14 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or7, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(eq13, 1) + ); + +// R03 = R03 & R04; +// Instr #80 +SDNode *and12 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + SDValue(eq14, 0), + // glue (or chain) input edge + SDValue(eq14, 1) + ); + +// R03 = R03 == R30; +// Instr #81 +SDNode *eq15 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and12, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and12, 1) + ); + +SDValue ct30 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #82 +SDNode *nop5 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct30, + // glue (or chain) input edge + SDValue(eq15, 1) + ); + +// WHERE_EQ; +// Instr #83 +SDNode *whereeq5 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq15, 0), + // glue (or chain) input edge + SDValue(nop5, 0) + ); + +// R19 = R19 | R15; +// Instr #84 +SDNode *or8 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor0, 0), + SDValue(vload21, 0), + SDValue(vload21, 0), + // glue (or chain) input edge + SDValue(whereeq5, 1) + ); + +SDValue ct31 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R00 = 0; +// Instr #85 +SDNode *vload23 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct31, + SDValue(vload22, 0), + // glue (or chain) input edge + SDValue(or8, 1) + ); + +// END_WHERE; +// Instr #86 +SDNode *endwhere5 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload23, 1) + ); + +// R52 = R00 == R30; +// Instr #87 +SDNode *eq16 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere5, 0) + ); + +SDValue ct32 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #88 +SDNode *nop6 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct32, + // glue (or chain) input edge + SDValue(eq16, 1) + ); + +// WHERE_EQ; +// Instr #89 +SDNode *whereeq6 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq16, 0), + // glue (or chain) input edge + SDValue(nop6, 0) + ); + +// R16 = R21 + R25; +// Instr #90 +SDNode *add0 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload10, 0), + SDValue(vload11, 0), + SDValue(vload13, 0), + // glue (or chain) input edge + SDValue(whereeq6, 1) + ); + +SDValue ct33 = CurDAG->getConstant(15, DL, MVT::i16, true, false); +// R04 = 15; +// Instr #91 +SDNode *vload24 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct33, + SDValue(eq13, 0), + // glue (or chain) input edge + SDValue(add0, 1) + ); + +// R16 = R16 - R04; +// Instr #92 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(add0, 0), + SDValue(vload24, 0), + SDValue(add0, 0), + // glue (or chain) input edge + SDValue(vload24, 1) + ); + +// R26 * R22; +// Instr #93 +SDNode *mult0 = CurDAG->getMachineNode( + Connex::MULT_H, + DL, + MVT::Glue, + SDValue(or1, 0), + SDValue(or3, 0), + // glue (or chain) input edge + SDValue(sub0, 1) + ); + +// R18 = MULT_LOW(); +// Instr #94 +SDNode *multlo0 = CurDAG->getMachineNode( + Connex::MULTLO_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload14, 0), + // glue (or chain) input edge + SDValue(mult0, 0) + ); + +// R17 = MULT_HIGH(); +// Instr #95 +SDNode *multhi0 = CurDAG->getMachineNode( + Connex::MULTHI_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload15, 0), + // glue (or chain) input edge + SDValue(multlo0, 1) + ); + +// END_WHERE; +// Instr #96 +SDNode *endwhere6 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(multhi0, 1) + ); + +SDValue ct34 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R03 = 16; +// Instr #97 +SDNode *vload25 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct34, + // glue (or chain) input edge + SDValue(endwhere6, 0) + ); + +SDValue ct35 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R06 = R18 << 0; +// Instr #98 +SDNode *ishl0 = CurDAG->getMachineNode( + Connex::ISHLV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(multlo0, 0), + ct35, + // glue (or chain) input edge + SDValue(vload25, 1) + ); + +// R04 = R31 < R17; +// Instr #99 +SDNode *lt4 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(multhi0, 0), + // glue (or chain) input edge + SDValue(ishl0, 1) + ); + +// R52 = R04 & R00; +// Instr #100 +SDNode *and13 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt4, 0), + // glue (or chain) input edge + SDValue(lt4, 1) + ); + +// R52 = R52 == R30; +// Instr #101 +SDNode *eq17 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and13, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and13, 1) + ); + +SDValue ct36 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #102 +SDNode *nop7 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct36, + // glue (or chain) input edge + SDValue(eq17, 1) + ); + +// WHERE_EQ; +// Instr #103 +SDNode *whereeq7 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq17, 0), + // glue (or chain) input edge + SDValue(nop7, 0) + ); + +SDValue ct37 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R06 = R17 << 0; +// Instr #104 +SDNode *ishl1 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(multhi0, 0), + ct37, + SDValue(ishl0, 0), + // glue (or chain) input edge + SDValue(whereeq7, 1) + ); + +SDValue ct38 = CurDAG->getConstant(32, DL, MVT::i16, true, false); +// R03 = 32; +// Instr #105 +SDNode *vload26 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct38, + SDValue(vload25, 0), + // glue (or chain) input edge + SDValue(ishl1, 1) + ); + +// END_WHERE; +// Instr #106 +SDNode *endwhere7 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload26, 1) + ); + +SDValue ct39 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R04 = R06 >> 1; +// Instr #107 +SDNode *ishr2 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl1, 0), + ct39, + // glue (or chain) input edge + SDValue(endwhere7, 0) + ); + +// R06 = R06 | R04; +// Instr #108 +SDNode *or9 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr2, 0), + SDValue(ishl1, 0), + // glue (or chain) input edge + SDValue(ishr2, 1) + ); + +SDValue ct40 = CurDAG->getConstant(2, DL, MVT::i16, true, false); +// R04 = R06 >> 2; +// Instr #109 +SDNode *ishr3 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or9, 0), + ct40, + // glue (or chain) input edge + SDValue(or9, 1) + ); + +// R06 = R06 | R04; +// Instr #110 +SDNode *or10 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr3, 0), + SDValue(or9, 0), + // glue (or chain) input edge + SDValue(ishr3, 1) + ); + +SDValue ct41 = CurDAG->getConstant(4, DL, MVT::i16, true, false); +// R04 = R06 >> 4; +// Instr #111 +SDNode *ishr4 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or10, 0), + ct41, + // glue (or chain) input edge + SDValue(or10, 1) + ); + +// R06 = R06 | R04; +// Instr #112 +SDNode *or11 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr4, 0), + SDValue(or10, 0), + // glue (or chain) input edge + SDValue(ishr4, 1) + ); + +SDValue ct42 = CurDAG->getConstant(8, DL, MVT::i16, true, false); +// R04 = R06 >> 8; +// Instr #113 +SDNode *ishr5 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or11, 0), + ct42, + // glue (or chain) input edge + SDValue(or11, 1) + ); + +// R06 = R06 | R04; +// Instr #114 +SDNode *or12 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr5, 0), + SDValue(or11, 0), + // glue (or chain) input edge + SDValue(ishr5, 1) + ); + +// R06 = ~R06; +// Instr #115 +SDNode *not0 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or12, 0), + // glue (or chain) input edge + SDValue(or12, 1) + ); + +// R05 = POPCNT(R06); +// Instr #116 +SDNode *popcnt0 = CurDAG->getMachineNode( + Connex::POPCNT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(not0, 0), + // glue (or chain) input edge + SDValue(not0, 1) + ); + +// R05 = R03 - R05; +// Instr #117 +SDNode *sub1 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload26, 0), + SDValue(popcnt0, 0), + // glue (or chain) input edge + SDValue(popcnt0, 1) + ); + +SDValue ct43 = CurDAG->getConstant(11, DL, MVT::i16, true, false); +// R04 = 11; +// Instr #118 +SDNode *vload27 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct43, + // glue (or chain) input edge + SDValue(sub1, 1) + ); + +// R04 = R05 - R04; +// Instr #119 +SDNode *sub2 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub1, 0), + SDValue(vload27, 0), + // glue (or chain) input edge + SDValue(vload27, 1) + ); + +// R12 = R04 < R31; +// Instr #120 +SDNode *lt5 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub2, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(sub2, 1) + ); + +// R52 = R12 & R00; +// Instr #121 +SDNode *and14 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt5, 0), + // glue (or chain) input edge + SDValue(lt5, 1) + ); + +// R52 = R52 == R30; +// Instr #122 +SDNode *eq18 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and14, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and14, 1) + ); + +SDValue ct44 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #123 +SDNode *nop8 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct44, + // glue (or chain) input edge + SDValue(eq18, 1) + ); + +// WHERE_EQ; +// Instr #124 +SDNode *whereeq8 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq18, 0), + // glue (or chain) input edge + SDValue(nop8, 0) + ); + +// R03 = R31 - R04; +// Instr #125 +SDNode *sub3 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(sub2, 0), + SDValue(vload26, 0), + // glue (or chain) input edge + SDValue(whereeq8, 1) + ); + +// R18 = R18 << R03; +// Instr #126 +SDNode *shl0 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(multlo0, 0), + SDValue(sub3, 0), + SDValue(multlo0, 0), + // glue (or chain) input edge + SDValue(sub3, 1) + ); + +// END_WHERE; +// Instr #127 +SDNode *endwhere8 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(shl0, 1) + ); + +// R12 = R31 < R04; +// Instr #128 +SDNode *lt6 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(sub2, 0), + // glue (or chain) input edge + SDValue(endwhere8, 0) + ); + +// R52 = R12 & R00; +// Instr #129 +SDNode *and15 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt6, 0), + // glue (or chain) input edge + SDValue(lt6, 1) + ); + +// R52 = R52 == R30; +// Instr #130 +SDNode *eq19 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and15, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and15, 1) + ); + +SDValue ct45 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #131 +SDNode *nop9 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct45, + // glue (or chain) input edge + SDValue(eq19, 1) + ); + +// WHERE_EQ; +// Instr #132 +SDNode *whereeq9 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq19, 0), + // glue (or chain) input edge + SDValue(nop9, 0) + ); + +SDValue ct46 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R03 = R17 << 0; +// Instr #133 +SDNode *ishl2 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(multhi0, 0), + ct46, + SDValue(sub3, 0), + // glue (or chain) input edge + SDValue(whereeq9, 1) + ); + +// R17 = R17 >> R04; +// Instr #134 +SDNode *shr0 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(multhi0, 0), + SDValue(sub2, 0), + SDValue(multhi0, 0), + // glue (or chain) input edge + SDValue(ishl2, 1) + ); + +SDValue ct47 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R12 = R04 << 0; +// Instr #135 +SDNode *ishl3 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub2, 0), + ct47, + SDValue(lt6, 0), + // glue (or chain) input edge + SDValue(shr0, 1) + ); + +// R04 = R29 - R04; +// Instr #136 +SDNode *sub4 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub2, 0), + SDValue(sub2, 0), + // glue (or chain) input edge + SDValue(ishl3, 1) + ); + +SDValue ct48 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R55 = R18 << 0; +// Instr #137 +SDNode *ishl4 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl0, 0), + ct48, + SDValue(vload8, 0), + // glue (or chain) input edge + SDValue(sub4, 1) + ); + +// R55 = R55 << R04; +// Instr #138 +SDNode *shl1 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + SDValue(sub4, 0), + SDValue(ishl4, 0), + // glue (or chain) input edge + SDValue(ishl4, 1) + ); + +// R55 = R55 >> R04; +// Instr #139 +SDNode *shr1 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl1, 0), + SDValue(sub4, 0), + SDValue(shl1, 0), + // glue (or chain) input edge + SDValue(shl1, 1) + ); + +SDValue ct49 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R54 = R12 << 0; +// Instr #140 +SDNode *ishl5 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl3, 0), + ct49, + SDValue(vload16, 0), + // glue (or chain) input edge + SDValue(shr1, 1) + ); + +// R18 = R18 >> R12; +// Instr #141 +SDNode *shr2 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl0, 0), + SDValue(ishl3, 0), + SDValue(shl0, 0), + // glue (or chain) input edge + SDValue(ishl5, 1) + ); + +// R03 = R03 << R04; +// Instr #142 +SDNode *shl2 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl2, 0), + SDValue(sub4, 0), + SDValue(ishl2, 0), + // glue (or chain) input edge + SDValue(shr2, 1) + ); + +// R18 = R18 | R03; +// Instr #143 +SDNode *or13 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl2, 0), + SDValue(shr2, 0), + SDValue(shr2, 0), + // glue (or chain) input edge + SDValue(shl2, 1) + ); + +// END_WHERE; +// Instr #144 +SDNode *endwhere9 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or13, 1) + ); + +// R52 = R00 == R30; +// Instr #145 +SDNode *eq20 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere9, 0) + ); + +SDValue ct50 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #146 +SDNode *nop10 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct50, + // glue (or chain) input edge + SDValue(eq20, 1) + ); + +// WHERE_EQ; +// Instr #147 +SDNode *whereeq10 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq20, 0), + // glue (or chain) input edge + SDValue(nop10, 0) + ); + +SDValue ct51 = CurDAG->getConstant(21, DL, MVT::i16, true, false); +// R04 = 21; +// Instr #148 +SDNode *vload28 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct51, + SDValue(sub4, 0), + // glue (or chain) input edge + SDValue(whereeq10, 1) + ); + +// R04 = R04 - R05; +// Instr #149 +SDNode *sub5 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload28, 0), + SDValue(sub1, 0), + SDValue(vload28, 0), + // glue (or chain) input edge + SDValue(vload28, 1) + ); + +// R16 = R16 - R04; +// Instr #150 +SDNode *sub6 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub0, 0), + SDValue(sub5, 0), + SDValue(sub0, 0), + // glue (or chain) input edge + SDValue(sub5, 1) + ); + +// END_WHERE; +// Instr #151 +SDNode *endwhere10 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub6, 1) + ); + +SDValue ct52 = CurDAG->getConstant(15, DL, MVT::i16, true, false); +// R03 = 15; +// Instr #152 +SDNode *vload29 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct52, + // glue (or chain) input edge + SDValue(endwhere10, 0) + ); + +// R04 = R30 - R16; +// Instr #153 +SDNode *sub7 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(sub6, 0), + // glue (or chain) input edge + SDValue(vload29, 1) + ); + +// R12 = R03 < R04; +// Instr #154 +SDNode *lt7 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload29, 0), + SDValue(sub7, 0), + // glue (or chain) input edge + SDValue(sub7, 1) + ); + +// R52 = R12 & R00; +// Instr #155 +SDNode *and16 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt7, 0), + // glue (or chain) input edge + SDValue(lt7, 1) + ); + +// R52 = R52 == R30; +// Instr #156 +SDNode *eq21 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and16, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and16, 1) + ); + +SDValue ct53 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #157 +SDNode *nop11 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct53, + // glue (or chain) input edge + SDValue(eq21, 1) + ); + +// WHERE_EQ; +// Instr #158 +SDNode *whereeq11 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq21, 0), + // glue (or chain) input edge + SDValue(nop11, 0) + ); + +// R54 = R55 == R31; +// Instr #159 +SDNode *eq22 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr1, 0), + SDValue(vload0, 0), + SDValue(ishl5, 0), + // glue (or chain) input edge + SDValue(whereeq11, 1) + ); + +// R54 = R30 - R54; +// Instr #160 +SDNode *sub8 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq22, 0), + SDValue(eq22, 0), + // glue (or chain) input edge + SDValue(eq22, 1) + ); + +SDValue ct54 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R55 = R18 << 0; +// Instr #161 +SDNode *ishl6 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or13, 0), + ct54, + SDValue(shr1, 0), + // glue (or chain) input edge + SDValue(sub8, 1) + ); + +// R55 = R55 | R54; +// Instr #162 +SDNode *or14 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub8, 0), + SDValue(ishl6, 0), + SDValue(ishl6, 0), + // glue (or chain) input edge + SDValue(ishl6, 1) + ); + +SDValue ct55 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R54 = 16; +// Instr #163 +SDNode *vload30 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct55, + SDValue(sub8, 0), + // glue (or chain) input edge + SDValue(or14, 1) + ); + +SDValue ct56 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R18 = R17 << 0; +// Instr #164 +SDNode *ishl7 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr0, 0), + ct56, + SDValue(or13, 0), + // glue (or chain) input edge + SDValue(vload30, 1) + ); + +SDValue ct57 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R17 = 0; +// Instr #165 +SDNode *vload31 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct57, + SDValue(shr0, 0), + // glue (or chain) input edge + SDValue(ishl7, 1) + ); + +// R04 = R04 - R29; +// Instr #166 +SDNode *sub9 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub7, 0), + SDValue(vload2, 0), + SDValue(sub7, 0), + // glue (or chain) input edge + SDValue(vload31, 1) + ); + +// R16 = R29 + R16; +// Instr #167 +SDNode *add1 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + SDValue(vload2, 0), + SDValue(sub6, 0), + // glue (or chain) input edge + SDValue(sub9, 1) + ); + +// END_WHERE; +// Instr #168 +SDNode *endwhere11 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(add1, 1) + ); + +// R12 = R03 < R04; +// Instr #169 +SDNode *lt8 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload29, 0), + SDValue(sub9, 0), + // glue (or chain) input edge + SDValue(endwhere11, 0) + ); + +// R52 = R12 & R00; +// Instr #170 +SDNode *and17 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt8, 0), + // glue (or chain) input edge + SDValue(lt8, 1) + ); + +// R52 = R52 == R30; +// Instr #171 +SDNode *eq23 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and17, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and17, 1) + ); + +SDValue ct58 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #172 +SDNode *nop12 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct58, + // glue (or chain) input edge + SDValue(eq23, 1) + ); + +// WHERE_EQ; +// Instr #173 +SDNode *whereeq12 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq23, 0), + // glue (or chain) input edge + SDValue(nop12, 0) + ); + +// R54 = R55 == R31; +// Instr #174 +SDNode *eq24 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or14, 0), + SDValue(vload0, 0), + SDValue(vload30, 0), + // glue (or chain) input edge + SDValue(whereeq12, 1) + ); + +// R54 = R30 - R54; +// Instr #175 +SDNode *sub10 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq24, 0), + SDValue(eq24, 0), + // glue (or chain) input edge + SDValue(eq24, 1) + ); + +SDValue ct59 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R55 = R18 << 0; +// Instr #176 +SDNode *ishl8 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl7, 0), + ct59, + SDValue(or14, 0), + // glue (or chain) input edge + SDValue(sub10, 1) + ); + +// R55 = R55 | R54; +// Instr #177 +SDNode *or15 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub10, 0), + SDValue(ishl8, 0), + SDValue(ishl8, 0), + // glue (or chain) input edge + SDValue(ishl8, 1) + ); + +SDValue ct60 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R54 = 16; +// Instr #178 +SDNode *vload32 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct60, + SDValue(sub10, 0), + // glue (or chain) input edge + SDValue(or15, 1) + ); + +SDValue ct61 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R18 = 0; +// Instr #179 +SDNode *vload33 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct61, + SDValue(ishl7, 0), + // glue (or chain) input edge + SDValue(vload32, 1) + ); + +// R04 = R04 - R29; +// Instr #180 +SDNode *sub11 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub9, 0), + SDValue(vload2, 0), + SDValue(sub9, 0), + // glue (or chain) input edge + SDValue(vload33, 1) + ); + +// R16 = R29 + R16; +// Instr #181 +SDNode *add2 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(add1, 0), + SDValue(vload2, 0), + SDValue(add1, 0), + // glue (or chain) input edge + SDValue(sub11, 1) + ); + +// END_WHERE; +// Instr #182 +SDNode *endwhere12 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(add2, 1) + ); + +// R12 = R16 < R30; +// Instr #183 +SDNode *lt9 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(add2, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere12, 0) + ); + +// R52 = R12 & R00; +// Instr #184 +SDNode *and18 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt9, 0), + // glue (or chain) input edge + SDValue(lt9, 1) + ); + +// R52 = R52 == R30; +// Instr #185 +SDNode *eq25 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and18, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and18, 1) + ); + +SDValue ct62 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #186 +SDNode *nop13 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct62, + // glue (or chain) input edge + SDValue(eq25, 1) + ); + +// WHERE_EQ; +// Instr #187 +SDNode *whereeq13 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq25, 0), + // glue (or chain) input edge + SDValue(nop13, 0) + ); + +// R04 = R30 - R16; +// Instr #188 +SDNode *sub12 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(add2, 0), + SDValue(sub11, 0), + // glue (or chain) input edge + SDValue(whereeq13, 1) + ); + +// R54 = R55 == R31; +// Instr #189 +SDNode *eq26 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or15, 0), + SDValue(vload0, 0), + SDValue(vload32, 0), + // glue (or chain) input edge + SDValue(sub12, 1) + ); + +// R54 = R30 - R54; +// Instr #190 +SDNode *sub13 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq26, 0), + SDValue(eq26, 0), + // glue (or chain) input edge + SDValue(eq26, 1) + ); + +SDValue ct63 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R55 = R18 << 0; +// Instr #191 +SDNode *ishl9 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload33, 0), + ct63, + SDValue(or15, 0), + // glue (or chain) input edge + SDValue(sub13, 1) + ); + +// R55 = R55 | R54; +// Instr #192 +SDNode *or16 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub13, 0), + SDValue(ishl9, 0), + SDValue(ishl9, 0), + // glue (or chain) input edge + SDValue(ishl9, 1) + ); + +// R03 = R29 - R04; +// Instr #193 +SDNode *sub14 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub12, 0), + SDValue(vload29, 0), + // glue (or chain) input edge + SDValue(or16, 1) + ); + +// R55 = R55 << R03; +// Instr #194 +SDNode *shl3 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or16, 0), + SDValue(sub14, 0), + SDValue(or16, 0), + // glue (or chain) input edge + SDValue(sub14, 1) + ); + +// R55 = R55 >> R03; +// Instr #195 +SDNode *shr3 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl3, 0), + SDValue(sub14, 0), + SDValue(shl3, 0), + // glue (or chain) input edge + SDValue(shl3, 1) + ); + +SDValue ct64 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R54 = R04 << 0; +// Instr #196 +SDNode *ishl10 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub12, 0), + ct64, + SDValue(sub13, 0), + // glue (or chain) input edge + SDValue(shr3, 1) + ); + +// R18 = R18 >> R04; +// Instr #197 +SDNode *shr4 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload33, 0), + SDValue(sub12, 0), + SDValue(vload33, 0), + // glue (or chain) input edge + SDValue(ishl10, 1) + ); + +SDValue ct65 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R03 = R17 << 0; +// Instr #198 +SDNode *ishl11 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload31, 0), + ct65, + SDValue(sub14, 0), + // glue (or chain) input edge + SDValue(shr4, 1) + ); + +// R17 = R17 >> R04; +// Instr #199 +SDNode *shr5 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload31, 0), + SDValue(sub12, 0), + SDValue(vload31, 0), + // glue (or chain) input edge + SDValue(ishl11, 1) + ); + +// R04 = R29 - R04; +// Instr #200 +SDNode *sub15 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub12, 0), + SDValue(sub12, 0), + // glue (or chain) input edge + SDValue(shr5, 1) + ); + +// R03 = R03 << R04; +// Instr #201 +SDNode *shl4 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl11, 0), + SDValue(sub15, 0), + SDValue(ishl11, 0), + // glue (or chain) input edge + SDValue(sub15, 1) + ); + +// R18 = R18 | R03; +// Instr #202 +SDNode *or17 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl4, 0), + SDValue(shr4, 0), + SDValue(shr4, 0), + // glue (or chain) input edge + SDValue(shl4, 1) + ); + +SDValue ct66 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R16 = 1; +// Instr #203 +SDNode *vload34 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct66, + SDValue(add2, 0), + // glue (or chain) input edge + SDValue(or17, 1) + ); + +// END_WHERE; +// Instr #204 +SDNode *endwhere13 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload34, 1) + ); + +SDValue ct67 = CurDAG->getConstant(30, DL, MVT::i16, true, false); +// R12 = 30; +// Instr #205 +SDNode *vload35 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct67, + // glue (or chain) input edge + SDValue(endwhere13, 0) + ); + +// R12 = R12 < R16; +// Instr #206 +SDNode *lt10 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload35, 0), + SDValue(vload34, 0), + // glue (or chain) input edge + SDValue(vload35, 1) + ); + +// R52 = R12 & R00; +// Instr #207 +SDNode *and19 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload23, 0), + SDValue(lt10, 0), + // glue (or chain) input edge + SDValue(lt10, 1) + ); + +// R52 = R52 == R30; +// Instr #208 +SDNode *eq27 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and19, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and19, 1) + ); + +SDValue ct68 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #209 +SDNode *nop14 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct68, + // glue (or chain) input edge + SDValue(eq27, 1) + ); + +// WHERE_EQ; +// Instr #210 +SDNode *whereeq14 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq27, 0), + // glue (or chain) input edge + SDValue(nop14, 0) + ); + +// R19 = R19 | R15; +// Instr #211 +SDNode *or18 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor0, 0), + SDValue(or8, 0), + SDValue(or8, 0), + // glue (or chain) input edge + SDValue(whereeq14, 1) + ); + +SDValue ct69 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R00 = 0; +// Instr #212 +SDNode *vload36 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct69, + SDValue(vload23, 0), + // glue (or chain) input edge + SDValue(or18, 1) + ); + +// END_WHERE; +// Instr #213 +SDNode *endwhere14 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload36, 1) + ); + +SDValue ct70 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R04 = 1024; +// Instr #214 +SDNode *vload37 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct70, + // glue (or chain) input edge + SDValue(endwhere14, 0) + ); + +// R04 = R18 < R04; +// Instr #215 +SDNode *lt11 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or17, 0), + SDValue(vload37, 0), + // glue (or chain) input edge + SDValue(vload37, 1) + ); + +// R12 = R16 == R30; +// Instr #216 +SDNode *eq28 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload34, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(lt11, 1) + ); + +// R12 = R12 & R04; +// Instr #217 +SDNode *and20 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt11, 0), + SDValue(eq28, 0), + // glue (or chain) input edge + SDValue(eq28, 1) + ); + +// R12 = R12 == R30; +// Instr #218 +SDNode *eq29 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and20, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and20, 1) + ); + +// R52 = R12 & R00; +// Instr #219 +SDNode *and21 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload36, 0), + SDValue(eq29, 0), + // glue (or chain) input edge + SDValue(eq29, 1) + ); + +// R52 = R52 == R30; +// Instr #220 +SDNode *eq30 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and21, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and21, 1) + ); + +SDValue ct71 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #221 +SDNode *nop15 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct71, + // glue (or chain) input edge + SDValue(eq30, 1) + ); + +// WHERE_EQ; +// Instr #222 +SDNode *whereeq15 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq30, 0), + // glue (or chain) input edge + SDValue(nop15, 0) + ); + +SDValue ct72 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R16 = 0; +// Instr #223 +SDNode *vload38 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct72, + SDValue(vload34, 0), + // glue (or chain) input edge + SDValue(whereeq15, 1) + ); + +// END_WHERE; +// Instr #224 +SDNode *endwhere15 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload38, 1) + ); + +// R52 = R00 == R30; +// Instr #225 +SDNode *eq31 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload36, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere15, 0) + ); + +SDValue ct73 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #226 +SDNode *nop16 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct73, + // glue (or chain) input edge + SDValue(eq31, 1) + ); + +// WHERE_EQ; +// Instr #227 +SDNode *whereeq16 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq31, 0), + // glue (or chain) input edge + SDValue(nop16, 0) + ); + +// R60 = R18 & R30; +// Instr #228 +SDNode *and22 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(or17, 0), + SDValue(vload17, 0), + // glue (or chain) input edge + SDValue(whereeq16, 1) + ); + +// R54 = R54 - R30; +// Instr #229 +SDNode *sub16 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl10, 0), + SDValue(vload1, 0), + SDValue(ishl10, 0), + // glue (or chain) input edge + SDValue(and22, 1) + ); + +// R54 = R30 << R54; +// Instr #230 +SDNode *shl5 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(sub16, 0), + SDValue(sub16, 0), + // glue (or chain) input edge + SDValue(sub16, 1) + ); + +// R59 = R55 & R54; +// Instr #231 +SDNode *and23 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl5, 0), + SDValue(shr3, 0), + SDValue(vload18, 0), + // glue (or chain) input edge + SDValue(shl5, 1) + ); + +// R55 = R55 ^ R59; +// Instr #232 +SDNode *xor1 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and23, 0), + SDValue(shr3, 0), + SDValue(shr3, 0), + // glue (or chain) input edge + SDValue(and23, 1) + ); + +// R59 = R59 == R31; +// Instr #233 +SDNode *eq32 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and23, 0), + SDValue(vload0, 0), + SDValue(and23, 0), + // glue (or chain) input edge + SDValue(xor1, 1) + ); + +// R59 = R30 - R59; +// Instr #234 +SDNode *sub17 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq32, 0), + SDValue(eq32, 0), + // glue (or chain) input edge + SDValue(eq32, 1) + ); + +// R58 = R55 == R31; +// Instr #235 +SDNode *eq33 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor1, 0), + SDValue(vload0, 0), + SDValue(vload19, 0), + // glue (or chain) input edge + SDValue(sub17, 1) + ); + +// R58 = R30 - R58; +// Instr #236 +SDNode *sub18 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq33, 0), + SDValue(eq33, 0), + // glue (or chain) input edge + SDValue(eq33, 1) + ); + +// R57 = R58 | R60; +// Instr #237 +SDNode *or19 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and22, 0), + SDValue(sub18, 0), + SDValue(vload20, 0), + // glue (or chain) input edge + SDValue(sub18, 1) + ); + +// R57 = R57 & R59; +// Instr #238 +SDNode *and24 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub17, 0), + SDValue(or19, 0), + SDValue(or19, 0), + // glue (or chain) input edge + SDValue(or19, 1) + ); + +// END_WHERE; +// Instr #239 +SDNode *endwhere16 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(and24, 1) + ); + +// R52 = R00 == R30; +// Instr #240 +SDNode *eq34 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload36, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere16, 0) + ); + +SDValue ct74 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #241 +SDNode *nop17 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct74, + // glue (or chain) input edge + SDValue(eq34, 1) + ); + +// WHERE_EQ; +// Instr #242 +SDNode *whereeq17 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq34, 0), + // glue (or chain) input edge + SDValue(nop17, 0) + ); + +SDValue ct75 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R19 = R16 << 10; +// Instr #243 +SDNode *ishl12 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload38, 0), + ct75, + SDValue(or18, 0), + // glue (or chain) input edge + SDValue(whereeq17, 1) + ); + +// R18 = R18 & R10; +// Instr #244 +SDNode *and25 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload4, 0), + SDValue(or17, 0), + SDValue(or17, 0), + // glue (or chain) input edge + SDValue(ishl12, 1) + ); + +// R19 = R19 | R18; +// Instr #245 +SDNode *or20 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and25, 0), + SDValue(ishl12, 0), + SDValue(ishl12, 0), + // glue (or chain) input edge + SDValue(and25, 1) + ); + +// R19 = R57 + R19; +// Instr #246 +SDNode *add3 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or20, 0), + SDValue(and24, 0), + SDValue(or20, 0), + // glue (or chain) input edge + SDValue(or20, 1) + ); + +// R19 = R19 | R15; +// Instr #247 +SDNode *resF16 /*or21*/ = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor0, 0), + SDValue(add3, 0), + SDValue(add3, 0), + // glue (or chain) input edge + SDValue(add3, 1) + ); + +// END_WHERE; +// Instr #248 +SDNode *lastNode /*endwhere17*/ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, +// Alex: MVT::Glue, + MVT::Other, + // glue (or chain) input edge +// Alex: SDValue(or21, 1) + SDValue(resF16, 1) + ); + Index: lib/Target/Connex/Select_MULTi32_ComplementedRepresentation_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_MULTi32_ComplementedRepresentation_OpincaaCodeGen.h +++ lib/Target/Connex/Select_MULTi32_ComplementedRepresentation_OpincaaCodeGen.h @@ -0,0 +1,345 @@ +// Copied from /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/opincaa_standalone_apps/Emulate_i32/MULTi32_manual_Complemented_radix_216_representation/DumpISel_OpincaaCodeGen_old27_220.cpp + + +// R27 is REG_SRC1. It is represented by result of nodeOpSrcCast1. +// R28 is REG_SRC2. It is represented by result of nodeOpSrcCast2. + + +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from Opincaa lib from kernel: simpleIoTest_allowOverwrite123456. +// It is important to put this code in the Select() method of the +// SelectionDAGISel class of your back end, after the ISelLowering pass, +// which contains the DAG Combiner, because the DAG Combiner can remove +// the getCopyToReg() we create, which can lead to the following error: +// <> assertion failed. +// Number of instructions generated: 27. + + + + +SDValue ct0 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast2, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +// MULT_U(R28, R27); +// Instr #2 +SDNode *mult_u0 = CurDAG->getMachineNode( + Connex::MULT_U_H, + DL, + MVT::Other, + SDValue(nodeOpSrcCast2, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +// R26 = MULT_LOW(); +// Instr #3 +SDNode *multlo0 = CurDAG->getMachineNode( + Connex::MULTLO_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(mult_u0, 0) + ); + +// R25 = MULT_HIGH(); +// Instr #4 +SDNode *multhi0 = CurDAG->getMachineNode( + Connex::MULTHI_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(multlo0, 1) + ); + +// CELL_SHR(R27, R30); +// Instr #5 +SDNode *cellshr0 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Other, + SDValue(nodeOpSrcCast1, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(multhi0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #6 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Other, + ct2, + // glue (or chain) input edge + SDValue(cellshr0, 0) + ); + +// R24 = SHIFT_REG; +// Instr #7 +SDNode *ldsh0 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +// MULT_U(R24, R28); +// Instr #8 +SDNode *mult_u1 = CurDAG->getMachineNode( + Connex::MULT_U_H, + DL, + MVT::Other, + SDValue(ldsh0, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(ldsh0, 1) + ); + +// R24 = MULT_LOW(); +// Instr #9 +SDNode *multlo1 = CurDAG->getMachineNode( + Connex::MULTLO_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(mult_u1, 0) + ); + +// CELL_SHR(R28, R30); +// Instr #10 +SDNode *cellshr1 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Other, + SDValue(nodeOpSrcCast2, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(multlo1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #11 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Other, + ct3, + // glue (or chain) input edge + SDValue(cellshr1, 0) + ); + +// R23 = SHIFT_REG; +// Instr #12 +SDNode *ldsh1 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// MULT_U(R23, R27); +// Instr #13 +SDNode *mult_u2 = CurDAG->getMachineNode( + Connex::MULT_U_H, + DL, + MVT::Other, + SDValue(ldsh1, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(ldsh1, 1) + ); + +// R23 = MULT_LOW(); +// Instr #14 +SDNode *multlo2 = CurDAG->getMachineNode( + Connex::MULTLO_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(mult_u2, 0) + ); + +// CELL_SHR(R25, R30); +// Instr #15 +SDNode *cellshr2 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Other, + SDValue(multhi0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(multlo2, 1) + ); + +SDValue ct4 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #16 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Other, + ct4, + // glue (or chain) input edge + SDValue(cellshr2, 0) + ); + +// R21 = SHIFT_REG; +// Instr #17 +SDNode *ldsh2 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +// R14 = INDEX; +// Instr #18 +SDNode *ldix0 = CurDAG->getMachineNode( + Connex::LDIX_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + // glue (or chain) input edge + SDValue(ldsh2, 1) + ); + +// R13 = R14 & R30; +// Instr #19 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + SDValue(vload1, 0), + SDValue(ldix0, 0), + // glue (or chain) input edge + SDValue(ldix0, 1) + ); + +// R12 = R13 == R30; +// Instr #20 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + SDValue(and0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct5 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #21 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Other, + ct5, + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// WHERE_EQ; +// Instr #22 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Other, + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R26 = R21 | R21; +// Instr #23 +SDNode *or0 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + SDValue(ldsh2, 0), + SDValue(ldsh2, 0), + SDValue(multlo0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// R26 = R24 + R26; +// Instr #24 +SDNode *add0 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + SDValue(or0, 0), + SDValue(multlo1, 0), + SDValue(or0, 0), + // glue (or chain) input edge + SDValue(or0, 1) + ); + +// R26 = R23 + R26; +// Instr #25 +SDNode *resH /*add1*/ = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Other, + SDValue(add0, 0), + SDValue(multlo2, 0), + SDValue(add0, 0), + // glue (or chain) input edge + SDValue(add0, 1) + ); + +// END_WHERE; +// Instr #26 +SDNode *lastNode /*endwhere0*/ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Other, + // glue (or chain) input edge + SDValue(resH /*add1*/, 1) + ); + + +//SDNode *lastNode = resF16; Index: lib/Target/Connex/Select_REDf16_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_REDf16_OpincaaCodeGen.h +++ lib/Target/Connex/Select_REDf16_OpincaaCodeGen.h @@ -0,0 +1,1554 @@ +// From /home/asusu/LLVM/Tests/opincaa_standalone_apps/Emulate_f16/REDf16_manual/DumpISel_OpincaaCodeGen.cpp + +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from Opincaa lib from kernel: red.f16. +// It is important to put this code in the Select() method of the +// SelectionDAGISel class of your back end, after the ISelLowering pass, +// which contains the DAG Combiner, because the DAG Combiner can remove +// the getCopyToReg() we create, which can lead to the following error: +// <> assertion failed. +// Number of instructions generated: 122. + + + +SDValue ct0 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R31 = 1; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast, 1) + ); + +SDValue ct1 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R30 = 0; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(31, DL, MVT::i16, true, false); +// R29 = 31; +// Instr #2 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(1023, DL, MVT::i16, true, false); +// R13 = 1023; +// Instr #3 +SDNode *vload3 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +SDValue ct4 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R12 = 31744; +// Instr #4 +SDNode *vload4 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(vload3, 1) + ); + +SDValue ct5 = CurDAG->getConstant(-32768, DL, MVT::i16, true, false); +// R11 = -32768; +// Instr #5 +SDNode *vload5 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct5, + // glue (or chain) input edge + SDValue(vload4, 1) + ); + +SDValue ct6 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R10 = 1024; +// Instr #6 +SDNode *vload6 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct6, + // glue (or chain) input edge + SDValue(vload5, 1) + ); + +// R25 = R28 & R11; +// Instr #7 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast, 0), + // glue (or chain) input edge + SDValue(vload6, 1) + ); + +// R26 = R28 & R12; +// Instr #8 +SDNode *and1 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload4, 0), + SDValue(nodeOpSrcCast, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct7 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R26 = R26 >> 10; +// Instr #9 +SDNode *ishr0 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and1, 0), + ct7, + // glue (or chain) input edge + SDValue(and1, 1) + ); + +// R27 = R28 & R13; +// Instr #10 +SDNode *and2 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(nodeOpSrcCast, 0), + // glue (or chain) input edge + SDValue(ishr0, 1) + ); + +// R17 = R30 < R27; +// Instr #11 +SDNode *lt0 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(and2, 1) + ); + +// R16 = R26 == R30; +// Instr #12 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(lt0, 1) + ); + +// R09 = R16 & R17; +// Instr #13 +SDNode *and3 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt0, 0), + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// R09 = R09 == R31; +// Instr #14 +SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and3, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and3, 1) + ); + +SDValue ct8 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #15 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct8, + // glue (or chain) input edge + SDValue(eq1, 1) + ); + +// WHERE_EQ; +// Instr #16 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq1, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct9 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R26 = 1; +// Instr #17 +SDNode *vload7 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct9, + SDValue(ishr0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #18 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload7, 1) + ); + +// R17 = R26 == R29; +// Instr #19 +SDNode *eq2 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// R17 = R17 | R16; +// Instr #20 +SDNode *or0 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(eq2, 0), + // glue (or chain) input edge + SDValue(eq2, 1) + ); + +// R17 = R17 == R30; +// Instr #21 +SDNode *eq3 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(or0, 1) + ); + +SDValue ct10 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #22 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct10, + // glue (or chain) input edge + SDValue(eq3, 1) + ); + +// WHERE_EQ; +// Instr #23 +SDNode *whereeq1 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq3, 0), + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R27 = R27 | R10; +// Instr #24 +SDNode *or1 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(and2, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(whereeq1, 1) + ); + +// END_WHERE; +// Instr #25 +SDNode *endwhere1 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or1, 1) + ); + +// R18 = R26 == R29; +// Instr #26 +SDNode *eq4 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere1, 0) + ); + +// R17 = R27 == R30; +// Instr #27 +SDNode *eq5 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or1, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(eq4, 1) + ); + +// R09 = R31 - R17; +// Instr #28 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(eq5, 0), + // glue (or chain) input edge + SDValue(eq5, 1) + ); + +// R09 = R09 & R18; +// Instr #29 +SDNode *and4 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq4, 0), + SDValue(sub0, 0), + // glue (or chain) input edge + SDValue(sub0, 1) + ); + +// REDUCE(R09); +// Instr #30 +SDNode *sumRed0 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(and4, 0), + // glue (or chain) input edge + SDValue(and4, 1) + ); + +// R24 = R18 & R17; +// Instr #31 +SDNode *and5 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq5, 0), + SDValue(eq4, 0), + // glue (or chain) input edge + SDValue(sumRed0, 0) + ); + +// R09 = R25 == R30; +// Instr #32 +SDNode *eq6 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and5, 1) + ); + +// R16 = R24 & R09; +// Instr #33 +SDNode *and6 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq6, 0), + SDValue(and5, 0), + // glue (or chain) input edge + SDValue(eq6, 1) + ); + +// REDUCE(R16); +// Instr #34 +SDNode *sumRed1 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(and6, 1) + ); + +// R09 = R31 - R09; +// Instr #35 +SDNode *sub1 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(eq6, 0), + // glue (or chain) input edge + SDValue(sumRed1, 0) + ); + +// R16 = R24 & R09; +// Instr #36 +SDNode *and7 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub1, 0), + SDValue(and5, 0), + // glue (or chain) input edge + SDValue(sub1, 1) + ); + +// REDUCE(R16); +// Instr #37 +SDNode *sumRed2 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(and7, 0), + // glue (or chain) input edge + SDValue(and7, 1) + ); + +// R09 = R25 == R11; +// Instr #38 +SDNode *eq7 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload5, 0), + // glue (or chain) input edge + SDValue(sumRed2, 0) + ); + +SDValue ct11 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #39 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct11, + // glue (or chain) input edge + SDValue(eq7, 1) + ); + +// WHERE_EQ; +// Instr #40 +SDNode *whereeq2 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq7, 0), + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +// R27 = R30 - R27; +// Instr #41 +SDNode *sub2 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(or1, 0), + SDValue(or1, 0), + // glue (or chain) input edge + SDValue(whereeq2, 1) + ); + +// END_WHERE; +// Instr #42 +SDNode *endwhere2 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub2, 1) + ); + +SDValue ct12 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R02 = R26 << 0; +// Instr #43 +SDNode *ishl0 = CurDAG->getMachineNode( + Connex::ISHLV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + ct12, + // glue (or chain) input edge + SDValue(endwhere2, 0) + ); + +SDValue ct13 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R02 = 0; +// Instr #44 +SDNode *vload8 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct13, + // glue (or chain) input edge + SDValue(ishl0, 1) + ); + +SDValue ct14 = CurDAG->getConstant(6, DL, MVT::i16, true, false); +// R24 = 6; +// Instr #45 +SDNode *vload9 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct14, + // glue (or chain) input edge + SDValue(vload8, 1) + ); + +// R19 = R26 < R24; +// Instr #46 +SDNode *lt1 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload9, 0), + // glue (or chain) input edge + SDValue(vload9, 1) + ); + +// R17 = R02 < R26; +// Instr #47 +SDNode *lt2 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload8, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(lt1, 1) + ); + +// R02 = R31 + R02; +// Instr #48 +SDNode *add0 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload8, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt2, 1) + ); + +// R09 = R19 & R17; +// Instr #49 +SDNode *and8 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt2, 0), + SDValue(lt1, 0), + // glue (or chain) input edge + SDValue(add0, 1) + ); + +// R09 = R09 == R31; +// Instr #50 +SDNode *eq8 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and8, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and8, 1) + ); + +SDValue ct15 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #51 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct15, + // glue (or chain) input edge + SDValue(eq8, 1) + ); + +// WHERE_EQ; +// Instr #52 +SDNode *whereeq3 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R19 = R26 - R02; +// Instr #53 +SDNode *sub3 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add0, 0), + SDValue(lt1, 0), + // glue (or chain) input edge + SDValue(whereeq3, 1) + ); + +// R27 = R27 << R19; +// Instr #54 +SDNode *shl0 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub2, 0), + SDValue(sub3, 0), + SDValue(sub2, 0), + // glue (or chain) input edge + SDValue(sub3, 1) + ); + +// REDUCE(R27); +// Instr #55 +SDNode *sumRed3 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(shl0, 0), + // glue (or chain) input edge + SDValue(shl0, 1) + ); + +// END_WHERE; +// Instr #56 +SDNode *endwhere3 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sumRed3, 0) + ); + +SDValue ct16 = CurDAG->getConstant(5, DL, MVT::i16, true, false); +// R02 = 5; +// Instr #57 +SDNode *vload10 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct16, + // glue (or chain) input edge + SDValue(endwhere3, 0) + ); + +SDValue ct17 = CurDAG->getConstant(11, DL, MVT::i16, true, false); +// R24 = 11; +// Instr #58 +SDNode *vload11 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct17, + // glue (or chain) input edge + SDValue(vload10, 1) + ); + +// R19 = R26 < R24; +// Instr #59 +SDNode *lt3 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload11, 0), + // glue (or chain) input edge + SDValue(vload11, 1) + ); + +// R17 = R02 < R26; +// Instr #60 +SDNode *lt4 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload10, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(lt3, 1) + ); + +// R02 = R31 + R02; +// Instr #61 +SDNode *add1 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload10, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt4, 1) + ); + +// R09 = R19 & R17; +// Instr #62 +SDNode *and9 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt4, 0), + SDValue(lt3, 0), + // glue (or chain) input edge + SDValue(add1, 1) + ); + +// R09 = R09 == R31; +// Instr #63 +SDNode *eq9 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and9, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and9, 1) + ); + +SDValue ct18 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #64 +SDNode *nop4 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct18, + // glue (or chain) input edge + SDValue(eq9, 1) + ); + +// WHERE_EQ; +// Instr #65 +SDNode *whereeq4 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + // glue (or chain) input edge + SDValue(nop4, 0) + ); + +// R19 = R26 - R02; +// Instr #66 +SDNode *sub4 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add1, 0), + SDValue(lt3, 0), + // glue (or chain) input edge + SDValue(whereeq4, 1) + ); + +// R27 = R27 << R19; +// Instr #67 +SDNode *shl1 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl0, 0), + SDValue(sub4, 0), + SDValue(shl0, 0), + // glue (or chain) input edge + SDValue(sub4, 1) + ); + +// REDUCE(R27); +// Instr #68 +SDNode *sumRed4 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(shl1, 0), + // glue (or chain) input edge + SDValue(shl1, 1) + ); + +// END_WHERE; +// Instr #69 +SDNode *endwhere4 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sumRed4, 0) + ); + +SDValue ct19 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R02 = 10; +// Instr #70 +SDNode *vload12 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct19, + // glue (or chain) input edge + SDValue(endwhere4, 0) + ); + +SDValue ct20 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R24 = 16; +// Instr #71 +SDNode *vload13 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct20, + // glue (or chain) input edge + SDValue(vload12, 1) + ); + +// R19 = R26 < R24; +// Instr #72 +SDNode *lt5 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload13, 0), + // glue (or chain) input edge + SDValue(vload13, 1) + ); + +// R17 = R02 < R26; +// Instr #73 +SDNode *lt6 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload12, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(lt5, 1) + ); + +// R02 = R31 + R02; +// Instr #74 +SDNode *add2 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload12, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt6, 1) + ); + +// R09 = R19 & R17; +// Instr #75 +SDNode *and10 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt6, 0), + SDValue(lt5, 0), + // glue (or chain) input edge + SDValue(add2, 1) + ); + +// R09 = R09 == R31; +// Instr #76 +SDNode *eq10 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and10, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and10, 1) + ); + +SDValue ct21 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #77 +SDNode *nop5 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct21, + // glue (or chain) input edge + SDValue(eq10, 1) + ); + +// WHERE_EQ; +// Instr #78 +SDNode *whereeq5 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq10, 0), + // glue (or chain) input edge + SDValue(nop5, 0) + ); + +// R19 = R26 - R02; +// Instr #79 +SDNode *sub5 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add2, 0), + SDValue(lt5, 0), + // glue (or chain) input edge + SDValue(whereeq5, 1) + ); + +// R27 = R27 << R19; +// Instr #80 +SDNode *shl2 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl1, 0), + SDValue(sub5, 0), + SDValue(shl1, 0), + // glue (or chain) input edge + SDValue(sub5, 1) + ); + +// REDUCE(R27); +// Instr #81 +SDNode *sumRed5 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(shl2, 0), + // glue (or chain) input edge + SDValue(shl2, 1) + ); + +// END_WHERE; +// Instr #82 +SDNode *endwhere5 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sumRed5, 0) + ); + +SDValue ct22 = CurDAG->getConstant(15, DL, MVT::i16, true, false); +// R02 = 15; +// Instr #83 +SDNode *vload14 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct22, + // glue (or chain) input edge + SDValue(endwhere5, 0) + ); + +SDValue ct23 = CurDAG->getConstant(21, DL, MVT::i16, true, false); +// R24 = 21; +// Instr #84 +SDNode *vload15 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct23, + // glue (or chain) input edge + SDValue(vload14, 1) + ); + +// R19 = R26 < R24; +// Instr #85 +SDNode *lt7 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload15, 0), + // glue (or chain) input edge + SDValue(vload15, 1) + ); + +// R17 = R02 < R26; +// Instr #86 +SDNode *lt8 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload14, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(lt7, 1) + ); + +// R02 = R31 + R02; +// Instr #87 +SDNode *add3 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload14, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt8, 1) + ); + +// R09 = R19 & R17; +// Instr #88 +SDNode *and11 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt8, 0), + SDValue(lt7, 0), + // glue (or chain) input edge + SDValue(add3, 1) + ); + +// R09 = R09 == R31; +// Instr #89 +SDNode *eq11 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and11, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and11, 1) + ); + +SDValue ct24 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #90 +SDNode *nop6 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct24, + // glue (or chain) input edge + SDValue(eq11, 1) + ); + +// WHERE_EQ; +// Instr #91 +SDNode *whereeq6 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq11, 0), + // glue (or chain) input edge + SDValue(nop6, 0) + ); + +// R19 = R26 - R02; +// Instr #92 +SDNode *sub6 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add3, 0), + SDValue(lt7, 0), + // glue (or chain) input edge + SDValue(whereeq6, 1) + ); + +// R27 = R27 << R19; +// Instr #93 +SDNode *shl3 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl2, 0), + SDValue(sub6, 0), + SDValue(shl2, 0), + // glue (or chain) input edge + SDValue(sub6, 1) + ); + +// REDUCE(R27); +// Instr #94 +SDNode *sumRed6 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(shl3, 0), + // glue (or chain) input edge + SDValue(shl3, 1) + ); + +// END_WHERE; +// Instr #95 +SDNode *endwhere6 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sumRed6, 0) + ); + +SDValue ct25 = CurDAG->getConstant(20, DL, MVT::i16, true, false); +// R02 = 20; +// Instr #96 +SDNode *vload16 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct25, + // glue (or chain) input edge + SDValue(endwhere6, 0) + ); + +SDValue ct26 = CurDAG->getConstant(26, DL, MVT::i16, true, false); +// R24 = 26; +// Instr #97 +SDNode *vload17 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct26, + // glue (or chain) input edge + SDValue(vload16, 1) + ); + +// R19 = R26 < R24; +// Instr #98 +SDNode *lt9 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload17, 0), + // glue (or chain) input edge + SDValue(vload17, 1) + ); + +// R17 = R02 < R26; +// Instr #99 +SDNode *lt10 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload16, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(lt9, 1) + ); + +// R02 = R31 + R02; +// Instr #100 +SDNode *add4 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload16, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt10, 1) + ); + +// R09 = R19 & R17; +// Instr #101 +SDNode *and12 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt10, 0), + SDValue(lt9, 0), + // glue (or chain) input edge + SDValue(add4, 1) + ); + +// R09 = R09 == R31; +// Instr #102 +SDNode *eq12 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and12, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and12, 1) + ); + +SDValue ct27 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #103 +SDNode *nop7 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct27, + // glue (or chain) input edge + SDValue(eq12, 1) + ); + +// WHERE_EQ; +// Instr #104 +SDNode *whereeq7 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + // glue (or chain) input edge + SDValue(nop7, 0) + ); + +// R19 = R26 - R02; +// Instr #105 +SDNode *sub7 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add4, 0), + SDValue(lt9, 0), + // glue (or chain) input edge + SDValue(whereeq7, 1) + ); + +// R27 = R27 << R19; +// Instr #106 +SDNode *shl4 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl3, 0), + SDValue(sub7, 0), + SDValue(shl3, 0), + // glue (or chain) input edge + SDValue(sub7, 1) + ); + +// REDUCE(R27); +// Instr #107 +SDNode *sumRed7 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(shl4, 0), + // glue (or chain) input edge + SDValue(shl4, 1) + ); + +// END_WHERE; +// Instr #108 +SDNode *endwhere7 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sumRed7, 0) + ); + +SDValue ct28 = CurDAG->getConstant(25, DL, MVT::i16, true, false); +// R02 = 25; +// Instr #109 +SDNode *vload18 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct28, + // glue (or chain) input edge + SDValue(endwhere7, 0) + ); + +SDValue ct29 = CurDAG->getConstant(31, DL, MVT::i16, true, false); +// R24 = 31; +// Instr #110 +SDNode *vload19 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct29, + // glue (or chain) input edge + SDValue(vload18, 1) + ); + +// R19 = R26 < R24; +// Instr #111 +SDNode *lt11 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(vload19, 0), + // glue (or chain) input edge + SDValue(vload19, 1) + ); + +// R17 = R02 < R26; +// Instr #112 +SDNode *lt12 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload18, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(lt11, 1) + ); + +// R02 = R31 + R02; +// Instr #113 +SDNode *add5 = CurDAG->getMachineNode( + Connex::ADDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload18, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(lt12, 1) + ); + +// R09 = R19 & R17; +// Instr #114 +SDNode *and13 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt12, 0), + SDValue(lt11, 0), + // glue (or chain) input edge + SDValue(add5, 1) + ); + +// R09 = R09 == R31; +// Instr #115 +SDNode *eq13 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and13, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and13, 1) + ); + +SDValue ct30 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #116 +SDNode *nop8 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct30, + // glue (or chain) input edge + SDValue(eq13, 1) + ); + +// WHERE_EQ; +// Instr #117 +SDNode *whereeq8 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + // glue (or chain) input edge + SDValue(nop8, 0) + ); + +// R19 = R26 - R02; +// Instr #118 +SDNode *sub8 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add5, 0), + SDValue(lt11, 0), + // glue (or chain) input edge + SDValue(whereeq8, 1) + ); + +// R27 = R27 << R19; +// Instr #119 +SDNode *shl5 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl4, 0), + SDValue(sub8, 0), + SDValue(shl4, 0), + // glue (or chain) input edge + SDValue(sub8, 1) + ); + +// REDUCE(R27); +// Instr #120 +SDNode *sumRed8 = CurDAG->getMachineNode( + Connex::RED_H, + DL, + MVT::Glue, + SDValue(shl5, 0), + // glue (or chain) input edge + SDValue(shl5, 1) + ); + +// END_WHERE; +// Instr #121 +SDNode *reduceH /* endwhere8 */ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, +// Alex: MVT::Glue, // Error: <getNodeId() == -1 && "Node already inserted!"' failed.>> + MVT::Other, + // glue (or chain) input edge + SDValue(sumRed8, 0) + ); + Index: lib/Target/Connex/Select_REDi32_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_REDi32_OpincaaCodeGen.h +++ lib/Target/Connex/Select_REDi32_OpincaaCodeGen.h @@ -0,0 +1,184 @@ +// From /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/opincaa_standalone_apps/Emulate_i32/RED_i32_manual/DumpISel_OpincaaCodeGen_old04_300.cpp + + +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from Opincaa lib from kernel: simpleIoTest_allowOverwrite123456. +// It is important to put this code in the Select() method of the +// SelectionDAGISel class of your back end, after the ISelLowering pass, +// which contains the DAG Combiner, because the DAG Combiner can remove +// the getCopyToReg() we create, which can lead to the following error: +// <> assertion failed. +// Number of instructions generated: 14. + + + +SDValue ct0 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R29 = 1; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast, 1) + ); + +// CELL_SHR(R28, R29); +// Instr #1 +SDNode *cellshr0 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Glue, + SDValue(nodeOpSrcCast, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #2 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(cellshr0, 0) + ); + +// R27 = SHIFT_REG; +// Instr #3 +SDNode *ldsh0 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +// R26 = INDEX; +// Instr #4 +SDNode *ldix0 = CurDAG->getMachineNode( + Connex::LDIX_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(ldsh0, 1) + ); + +// R25 = R26 & R29; +// Instr #5 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(ldix0, 0), + // glue (or chain) input edge + SDValue(ldix0, 1) + ); + +// R24 = R25 == R29; +// Instr #6 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #7 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// WHERE_EQ; +// Instr #8 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +SDValue ct3 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R28 = 0; +// Instr #9 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + SDValue(nodeOpSrcCast, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +SDValue ct4 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R27 = 0; +// Instr #10 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct4, + SDValue(ldsh0, 0), + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +// END_WHERE; +// Instr #11 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +// REDUCE_U(R28); +// Instr #12 +SDNode *sumRedU0 = CurDAG->getMachineNode( + Connex::RED_U_H, + DL, + MVT::Glue, + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// REDUCE_U(R27); +// Instr #13 +SDNode *reduceHigh16 /*sumRedU1*/ = CurDAG->getMachineNode( + Connex::RED_U_H, + DL, +// Alex: MVT::Glue, + MVT::Other, + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(sumRedU0, 0) + ); + Index: lib/Target/Connex/Select_SHRAi32_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_SHRAi32_OpincaaCodeGen.h +++ lib/Target/Connex/Select_SHRAi32_OpincaaCodeGen.h @@ -0,0 +1,456 @@ +// From /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/opincaa_standalone_apps/Emulate_i32/SHRA_i32_manual/DumpISel_OpincaaCodeGen_old13_927.cpp + +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from Opincaa lib from kernel: simpleIoTest_allowOverwrite123456. +// It is important to put this code in the Select() method of the +// SelectionDAGISel class of your back end, after the ISelLowering pass, +// which contains the DAG Combiner, because the DAG Combiner can remove +// the getCopyToReg() we create, which can lead to the following error: +// <> assertion failed. +// Number of instructions generated: 33. + + + +/* Alex: added manually to have predicated instructions refer to tied-to + constraints to these nodes (destination registers of predicated instr) + without initializing the respective dest registers, since it's not necessary. +*/ +SDValue ct21Node = CurDAG->getConstant(21, DL, MVT::i16, true, false); +SDNode *r21Node = CurDAG->getMachineNode( + Connex::VLOAD_BOGUS_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct21Node, + // glue (or chain) input edge + SDValue(nodeOpSrcCast1, 1) + ); + +SDValue ct22Node = CurDAG->getConstant(22, DL, MVT::i16, true, false); +SDNode *r22Node = CurDAG->getMachineNode( + Connex::VLOAD_BOGUS_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct22Node, + // glue (or chain) input edge + SDValue(r21Node, 1) + ); + + +SDValue ct0 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(r22Node, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R10 = 16; +// Instr #2 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(31, DL, MVT::i16, true, false); +// R08 = 31; +// Instr #3 +SDNode *vload3 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +// R27 = R27 & R08; +// Instr #4 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(vload3, 1) + ); + +// R25 = INDEX; +// Instr #5 +SDNode *ldix0 = CurDAG->getMachineNode( + Connex::LDIX_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(and0, 1) + ); + +// R25 = R25 & R30; +// Instr #6 +SDNode *and1 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(ldix0, 0), + // glue (or chain) input edge + SDValue(ldix0, 1) + ); + +// CELL_SHR(R27, R25); +// Instr #7 +SDNode *cellshr0 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Glue, + SDValue(and0, 0), + SDValue(and1, 0), + // glue (or chain) input edge + SDValue(and1, 1) + ); + +SDValue ct4 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #8 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(cellshr0, 0) + ); + +// R27 = SHIFT_REG; +// Instr #9 +SDNode *ldsh0 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +// R20 = R10 < R27; +// Instr #10 +SDNode *lt0 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(ldsh0, 0), + // glue (or chain) input edge + SDValue(ldsh0, 1) + ); + +// R29 = SHRA(R28, R27); +// Instr #11 +SDNode *shra0 = CurDAG->getMachineNode( + Connex::SHRAV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast1, 0), + SDValue(ldsh0, 0), + // glue (or chain) input edge + SDValue(lt0, 1) + ); + +// CELL_SHL(R28, R30); +// Instr #12 +SDNode *cellshl0 = CurDAG->getMachineNode( + Connex::CELLSHL_H, + DL, + MVT::Glue, + SDValue(nodeOpSrcCast1, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(shra0, 1) + ); + +SDValue ct5 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #13 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct5, + // glue (or chain) input edge + SDValue(cellshl0, 0) + ); + +// R23 = SHIFT_REG; +// Instr #14 +SDNode *ldsh1 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R25 = R25 == R31; +// Instr #15 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and1, 0), + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(ldsh1, 1) + ); + +// R24 = R20 & R25; +// Instr #16 +SDNode *and2 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(lt0, 0), + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// R19 = R24 == R30; +// Instr #17 +SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and2, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and2, 1) + ); + +SDValue ct6 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #18 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct6, + // glue (or chain) input edge + SDValue(eq1, 1) + ); + +// WHERE_EQ; +// Instr #19 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq1, 0), + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +// R21 = R27 - R10; +// Instr #20 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ldsh0, 0), + SDValue(vload2, 0), + SDValue(r21Node, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// R29 = SHRA(R23, R21); +// Instr #21 +SDNode *shra1 = CurDAG->getMachineNode( + Connex::SHRAV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ldsh1, 0), + SDValue(sub0, 0), + SDValue(shra0, 0), + // glue (or chain) input edge + SDValue(sub0, 1) + ); + +// END_WHERE; +// Instr #22 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(shra1, 1) + ); + +// R20 = R30 - R20; +// Instr #23 +SDNode *sub1 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(lt0, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// R24 = R20 & R25; +// Instr #24 +SDNode *and3 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(sub1, 0), + // glue (or chain) input edge + SDValue(sub1, 1) + ); + +// R19 = R24 == R30; +// Instr #25 +SDNode *eq2 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and3, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and3, 1) + ); + +SDValue ct7 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #26 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct7, + // glue (or chain) input edge + SDValue(eq2, 1) + ); + +// WHERE_EQ; +// Instr #27 +SDNode *whereeq1 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq2, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R21 = R10 - R27; +// Instr #28 +SDNode *sub2 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(ldsh0, 0), + SDValue(sub0, 0), + // glue (or chain) input edge + SDValue(whereeq1, 1) + ); + +// R22 = R23 << R21; +// Instr #29 +SDNode *shl0 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ldsh1, 0), + SDValue(sub2, 0), + SDValue(r22Node, 0), + // glue (or chain) input edge + SDValue(sub2, 1) + ); + +// R29 = R28 >> R27; +// Instr #30 +SDNode *shr0 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast1, 0), + SDValue(ldsh0, 0), + SDValue(shra1, 0), + // glue (or chain) input edge + SDValue(shl0, 1) + ); + +// R29 = R29 | R22; +// Instr #31 +SDNode *resH /*or0*/ = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl0, 0), + SDValue(shr0, 0), + SDValue(shr0, 0), + // glue (or chain) input edge + SDValue(shr0, 1) + ); + +// END_WHERE; +// Instr #32 +SDNode *lastNode /*endwhere1*/ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, +// MVT::Glue, + MVT::Other, + // glue (or chain) input edge + SDValue(resH /*or0*/, 1) + ); + Index: lib/Target/Connex/Select_SUBf16_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_SUBf16_OpincaaCodeGen.h +++ lib/Target/Connex/Select_SUBf16_OpincaaCodeGen.h @@ -0,0 +1,3637 @@ +// From /home/asusu/LLVM/Tests/opincaa_standalone_apps/Emulate_f16/ADD_SUB_f16_manual/DumpISel_OpincaaCodeGen_old37_C10_SUBf16.cpp + +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from Opincaa lib from kernel: add_or_sub.f16. +// It is important to put this code in the Select() method of the +// SelectionDAGISel class of your back end, after the ISelLowering pass, +// which contains the DAG Combiner, because the DAG Combiner can remove +// the getCopyToReg() we create, which can lead to the following error: +// <> assertion failed. +// Number of instructions generated: 280. + + + + +SDValue ct0 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R14 = 1; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast1, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #2 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +SDValue ct3 = CurDAG->getConstant(16, DL, MVT::i16, true, false); +// R29 = 16; +// Instr #3 +SDNode *vload3 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +SDValue ct4 = CurDAG->getConstant(31, DL, MVT::i16, true, false); +// R28 = 31; +// Instr #4 +SDNode *vload4 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(vload3, 1) + ); + +SDValue ct5 = CurDAG->getConstant(1023, DL, MVT::i16, true, false); +// R13 = 1023; +// Instr #5 +SDNode *vload5 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct5, + // glue (or chain) input edge + SDValue(vload4, 1) + ); + +SDValue ct6 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R12 = 31744; +// Instr #6 +SDNode *vload6 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct6, + // glue (or chain) input edge + SDValue(vload5, 1) + ); + +SDValue ct7 = CurDAG->getConstant(-32768, DL, MVT::i16, true, false); +// R11 = -32768; +// Instr #7 +SDNode *vload7 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct7, + // glue (or chain) input edge + SDValue(vload6, 1) + ); + +SDValue ct8 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R10 = 1024; +// Instr #8 +SDNode *vload8 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct8, + // glue (or chain) input edge + SDValue(vload7, 1) + ); + +SDValue ct9 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = 0; +// Instr #9 +SDNode *vload9 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct9, + // glue (or chain) input edge + SDValue(vload8, 1) + ); + +SDValue ct10 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R19 = 0; +// Instr #10 +SDNode *vload10 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct10, + // glue (or chain) input edge + SDValue(vload9, 1) + ); + +SDValue ct11 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = 0; +// Instr #11 +SDNode *vload11 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct11, + // glue (or chain) input edge + SDValue(vload10, 1) + ); + +SDValue ct12 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R61 = 0; +// Instr #12 +SDNode *vload12 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct12, + // glue (or chain) input edge + SDValue(vload11, 1) + ); + +SDValue ct13 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R07 = 0; +// Instr #13 +SDNode *vload13 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct13, + // glue (or chain) input edge + SDValue(vload12, 1) + ); + +SDValue ct14 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R04 = 0; +// Instr #14 +SDNode *vload14 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct14, + // glue (or chain) input edge + SDValue(vload13, 1) + ); + +SDValue ct15 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R03 = 0; +// Instr #15 +SDNode *vload15 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct15, + // glue (or chain) input edge + SDValue(vload14, 1) + ); + +SDValue ct16 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R02 = 0; +// Instr #16 +SDNode *vload16 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct16, + // glue (or chain) input edge + SDValue(vload15, 1) + ); + +SDValue ct17 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R01 = 0; +// Instr #17 +SDNode *vload17 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct17, + // glue (or chain) input edge + SDValue(vload16, 1) + ); + +SDValue ct18 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R00 = 0; +// Instr #18 +SDNode *vload18 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct18, + // glue (or chain) input edge + SDValue(vload17, 1) + ); + +// R24 = R27 & R11; +// Instr #19 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(vload18, 1) + ); + +// R25 = R27 & R12; +// Instr #20 +SDNode *and1 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct19 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R25 = R25 >> 10; +// Instr #21 +SDNode *ishr0 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and1, 0), + ct19, + // glue (or chain) input edge + SDValue(and1, 1) + ); + +// R26 = R27 & R13; +// Instr #22 +SDNode *and2 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast1, 0), + // glue (or chain) input edge + SDValue(ishr0, 1) + ); + +// R18 = R31 < R26; +// Instr #23 +SDNode *lt0 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(and2, 1) + ); + +// R17 = R25 == R31; +// Instr #24 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr0, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(lt0, 1) + ); + +// R09 = R17 & R18; +// Instr #25 +SDNode *and3 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt0, 0), + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// R09 = R09 == R30; +// Instr #26 +SDNode *eq1 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and3, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and3, 1) + ); + +SDValue ct20 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #27 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct20, + // glue (or chain) input edge + SDValue(eq1, 1) + ); + +// WHERE_EQ; +// Instr #28 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq1, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct21 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R25 = 1; +// Instr #29 +SDNode *vload19 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct21, + SDValue(ishr0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #30 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload19, 1) + ); + +// R18 = R25 == R28; +// Instr #31 +SDNode *eq2 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload19, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +// R18 = R18 | R17; +// Instr #32 +SDNode *or0 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + SDValue(eq2, 0), + // glue (or chain) input edge + SDValue(eq2, 1) + ); + +// R18 = R18 == R31; +// Instr #33 +SDNode *eq3 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or0, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(or0, 1) + ); + +SDValue ct22 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #34 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct22, + // glue (or chain) input edge + SDValue(eq3, 1) + ); + +// WHERE_EQ; +// Instr #35 +SDNode *whereeq1 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq3, 0), + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R26 = R26 | R10; +// Instr #36 +SDNode *or1 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload8, 0), + SDValue(and2, 0), + SDValue(and2, 0), + // glue (or chain) input edge + SDValue(whereeq1, 1) + ); + +// END_WHERE; +// Instr #37 +SDNode *endwhere1 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or1, 1) + ); + +// R20 = R23 & R11; +// Instr #38 +SDNode *and4 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(endwhere1, 0) + ); + +// R21 = R23 & R12; +// Instr #39 +SDNode *and5 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload6, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(and4, 1) + ); + +SDValue ct23 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R21 = R21 >> 10; +// Instr #40 +SDNode *ishr1 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and5, 0), + ct23, + // glue (or chain) input edge + SDValue(and5, 1) + ); + +// R22 = R23 & R13; +// Instr #41 +SDNode *and6 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(ishr1, 1) + ); + +// R16 = R31 < R22; +// Instr #42 +SDNode *lt1 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(and6, 1) + ); + +// R15 = R21 == R31; +// Instr #43 +SDNode *eq4 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr1, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(lt1, 1) + ); + +// R09 = R15 & R16; +// Instr #44 +SDNode *and7 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt1, 0), + SDValue(eq4, 0), + // glue (or chain) input edge + SDValue(eq4, 1) + ); + +// R09 = R09 == R30; +// Instr #45 +SDNode *eq5 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and7, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and7, 1) + ); + +SDValue ct24 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #46 +SDNode *nop2 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct24, + // glue (or chain) input edge + SDValue(eq5, 1) + ); + +// WHERE_EQ; +// Instr #47 +SDNode *whereeq2 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq5, 0), + // glue (or chain) input edge + SDValue(nop2, 0) + ); + +SDValue ct25 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R21 = 1; +// Instr #48 +SDNode *vload20 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct25, + SDValue(ishr1, 0), + // glue (or chain) input edge + SDValue(whereeq2, 1) + ); + +// END_WHERE; +// Instr #49 +SDNode *endwhere2 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload20, 1) + ); + +// R16 = R21 == R28; +// Instr #50 +SDNode *eq6 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(endwhere2, 0) + ); + +// R16 = R16 | R15; +// Instr #51 +SDNode *or2 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq4, 0), + SDValue(eq6, 0), + // glue (or chain) input edge + SDValue(eq6, 1) + ); + +// R16 = R16 == R31; +// Instr #52 +SDNode *eq7 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or2, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(or2, 1) + ); + +SDValue ct26 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #53 +SDNode *nop3 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct26, + // glue (or chain) input edge + SDValue(eq7, 1) + ); + +// WHERE_EQ; +// Instr #54 +SDNode *whereeq3 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq7, 0), + // glue (or chain) input edge + SDValue(nop3, 0) + ); + +// R22 = R22 | R10; +// Instr #55 +SDNode *or3 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload8, 0), + SDValue(and6, 0), + SDValue(and6, 0), + // glue (or chain) input edge + SDValue(whereeq3, 1) + ); + +// END_WHERE; +// Instr #56 +SDNode *endwhere3 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or3, 1) + ); + +// R50 = R24 == R11; +// Instr #57 +SDNode *eq8 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(endwhere3, 0) + ); + +// R49 = R25 == R28; +// Instr #58 +SDNode *eq9 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload19, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(eq8, 1) + ); + +// R48 = R26 == R31; +// Instr #59 +SDNode *eq10 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or1, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(eq9, 1) + ); + +// R47 = R20 == R11; +// Instr #60 +SDNode *eq11 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and4, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(eq10, 1) + ); + +// R47 = R47 == R31; +// Instr #61 +SDNode *eq12 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq11, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(eq11, 1) + ); + +// R46 = R21 == R28; +// Instr #62 +SDNode *eq13 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + SDValue(vload4, 0), + // glue (or chain) input edge + SDValue(eq12, 1) + ); + +// R45 = R22 == R31; +// Instr #63 +SDNode *eq14 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or3, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(eq13, 1) + ); + +// R07 = R49 & R46; +// Instr #64 +SDNode *and8 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + SDValue(eq9, 0), + // glue (or chain) input edge + SDValue(eq14, 1) + ); + +// R08 = R07 & R50; +// Instr #65 +SDNode *and9 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + SDValue(and8, 0), + // glue (or chain) input edge + SDValue(and8, 1) + ); + +// R44 = ~R47; +// Instr #66 +SDNode *not0 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + // glue (or chain) input edge + SDValue(and9, 1) + ); + +// R08 = R08 & R44; +// Instr #67 +SDNode *and10 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(not0, 0), + SDValue(and9, 0), + // glue (or chain) input edge + SDValue(not0, 1) + ); + +// R44 = ~R50; +// Instr #68 +SDNode *not1 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq8, 0), + // glue (or chain) input edge + SDValue(and10, 1) + ); + +// R44 = R44 & R07; +// Instr #69 +SDNode *and11 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and8, 0), + SDValue(not1, 0), + // glue (or chain) input edge + SDValue(not1, 1) + ); + +// R44 = R44 & R47; +// Instr #70 +SDNode *and12 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq12, 0), + SDValue(and11, 0), + // glue (or chain) input edge + SDValue(and11, 1) + ); + +// R08 = R08 | R44; +// Instr #71 +SDNode *or4 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and12, 0), + SDValue(and10, 0), + // glue (or chain) input edge + SDValue(and12, 1) + ); + +// R07 = ~R45; +// Instr #72 +SDNode *not2 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq14, 0), + // glue (or chain) input edge + SDValue(or4, 1) + ); + +// R07 = R07 & R46; +// Instr #73 +SDNode *and13 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + SDValue(not2, 0), + // glue (or chain) input edge + SDValue(not2, 1) + ); + +// R08 = R08 | R07; +// Instr #74 +SDNode *or5 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and13, 0), + SDValue(or4, 0), + // glue (or chain) input edge + SDValue(and13, 1) + ); + +// R07 = ~R48; +// Instr #75 +SDNode *not3 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq10, 0), + // glue (or chain) input edge + SDValue(or5, 1) + ); + +// R07 = R07 & R49; +// Instr #76 +SDNode *and14 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + SDValue(not3, 0), + // glue (or chain) input edge + SDValue(not3, 1) + ); + +// R08 = R08 | R07; +// Instr #77 +SDNode *or6 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and14, 0), + SDValue(or5, 0), + // glue (or chain) input edge + SDValue(and14, 1) + ); + +// R09 = R08 == R30; +// Instr #78 +SDNode *eq15 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or6, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(or6, 1) + ); + +SDValue ct27 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #79 +SDNode *nop4 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct27, + // glue (or chain) input edge + SDValue(eq15, 1) + ); + +// WHERE_EQ; +// Instr #80 +SDNode *whereeq4 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq15, 0), + // glue (or chain) input edge + SDValue(nop4, 0) + ); + +SDValue ct28 = CurDAG->getConstant(31745, DL, MVT::i16, true, false); +// R19 = 31745; +// Instr #81 +SDNode *vload21 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct28, + SDValue(vload10, 0), + // glue (or chain) input edge + SDValue(whereeq4, 1) + ); + +SDValue ct29 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #82 +SDNode *vload22 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct29, + SDValue(vload0, 0), + // glue (or chain) input edge + SDValue(vload21, 1) + ); + +// END_WHERE; +// Instr #83 +SDNode *endwhere4 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload22, 1) + ); + +// R08 = R49 | R46; +// Instr #84 +SDNode *or7 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + SDValue(eq9, 0), + // glue (or chain) input edge + SDValue(endwhere4, 0) + ); + +// R09 = R08 & R14; +// Instr #85 +SDNode *and15 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload22, 0), + SDValue(or7, 0), + // glue (or chain) input edge + SDValue(or7, 1) + ); + +// R09 = R09 == R30; +// Instr #86 +SDNode *eq16 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and15, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and15, 1) + ); + +SDValue ct30 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #87 +SDNode *nop5 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct30, + // glue (or chain) input edge + SDValue(eq16, 1) + ); + +// WHERE_EQ; +// Instr #88 +SDNode *whereeq5 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq16, 0), + // glue (or chain) input edge + SDValue(nop5, 0) + ); + +SDValue ct31 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R19 = 31744; +// Instr #89 +SDNode *vload23 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct31, + SDValue(vload21, 0), + // glue (or chain) input edge + SDValue(whereeq5, 1) + ); + +// R08 = R50 & R49; +// Instr #90 +SDNode *and16 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq9, 0), + SDValue(eq8, 0), + SDValue(or7, 0), + // glue (or chain) input edge + SDValue(vload23, 1) + ); + +// R07 = R47 & R46; +// Instr #91 +SDNode *and17 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq13, 0), + SDValue(eq12, 0), + SDValue(and14, 0), + // glue (or chain) input edge + SDValue(and16, 1) + ); + +// R08 = R08 | R07; +// Instr #92 +SDNode *or8 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and17, 0), + SDValue(and16, 0), + SDValue(and16, 0), + // glue (or chain) input edge + SDValue(and17, 1) + ); + +SDValue ct32 = CurDAG->getConstant(15, DL, MVT::i16, true, false); +// R08 = R08 << 15; +// Instr #93 +SDNode *ishl0 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or8, 0), + ct32, + SDValue(or8, 0), + // glue (or chain) input edge + SDValue(or8, 1) + ); + +// R19 = R19 ^ R08; +// Instr #94 +SDNode *xor0 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl0, 0), + SDValue(vload23, 0), + SDValue(vload23, 0), + // glue (or chain) input edge + SDValue(ishl0, 1) + ); + +SDValue ct33 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #95 +SDNode *vload24 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct33, + SDValue(vload22, 0), + // glue (or chain) input edge + SDValue(xor0, 1) + ); + +// END_WHERE; +// Instr #96 +SDNode *endwhere5 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload24, 1) + ); + +// R15 = R25 - R21; +// Instr #97 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload19, 0), + SDValue(vload20, 0), + // glue (or chain) input edge + SDValue(endwhere5, 0) + ); + +SDValue ct34 = CurDAG->getConstant(-15, DL, MVT::i16, true, false); +// R08 = -15; +// Instr #98 +SDNode *vload25 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct34, + // glue (or chain) input edge + SDValue(sub0, 1) + ); + +// R09 = R15 < R08; +// Instr #99 +SDNode *lt2 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub0, 0), + SDValue(vload25, 0), + // glue (or chain) input edge + SDValue(vload25, 1) + ); + +// R09 = R09 & R14; +// Instr #100 +SDNode *and18 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt2, 0), + // glue (or chain) input edge + SDValue(lt2, 1) + ); + +// R09 = R09 == R30; +// Instr #101 +SDNode *eq17 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and18, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and18, 1) + ); + +SDValue ct35 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #102 +SDNode *nop6 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct35, + // glue (or chain) input edge + SDValue(eq17, 1) + ); + +// WHERE_EQ; +// Instr #103 +SDNode *whereeq6 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq17, 0), + // glue (or chain) input edge + SDValue(nop6, 0) + ); + +// R15 = R31 - R15; +// Instr #104 +SDNode *sub1 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub0, 0), + SDValue(sub0, 0), + // glue (or chain) input edge + SDValue(whereeq6, 1) + ); + +SDValue ct36 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R26 = 0; +// Instr #105 +SDNode *vload26 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct36, + SDValue(or1, 0), + // glue (or chain) input edge + SDValue(sub1, 1) + ); + +SDValue ct37 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = R21 << 0; +// Instr #106 +SDNode *ishl1 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + ct37, + SDValue(vload19, 0), + // glue (or chain) input edge + SDValue(vload26, 1) + ); + +SDValue ct38 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #107 +SDNode *vload27 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct38, + SDValue(sub1, 0), + // glue (or chain) input edge + SDValue(ishl1, 1) + ); + +// END_WHERE; +// Instr #108 +SDNode *endwhere6 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload27, 1) + ); + +SDValue ct39 = CurDAG->getConstant(-3, DL, MVT::i16, true, false); +// R08 = -3; +// Instr #109 +SDNode *vload28 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct39, + // glue (or chain) input edge + SDValue(endwhere6, 0) + ); + +// R09 = R15 < R08; +// Instr #110 +SDNode *lt3 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload27, 0), + SDValue(vload28, 0), + // glue (or chain) input edge + SDValue(vload28, 1) + ); + +// R09 = R09 & R14; +// Instr #111 +SDNode *and19 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt3, 0), + // glue (or chain) input edge + SDValue(lt3, 1) + ); + +// R09 = R09 == R30; +// Instr #112 +SDNode *eq18 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and19, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and19, 1) + ); + +SDValue ct40 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #113 +SDNode *nop7 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct40, + // glue (or chain) input edge + SDValue(eq18, 1) + ); + +// WHERE_EQ; +// Instr #114 +SDNode *whereeq7 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq18, 0), + // glue (or chain) input edge + SDValue(nop7, 0) + ); + +// R15 = R31 - R15; +// Instr #115 +SDNode *sub2 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload27, 0), + SDValue(vload27, 0), + // glue (or chain) input edge + SDValue(whereeq7, 1) + ); + +// R26 = R26 >> R15; +// Instr #116 +SDNode *shr0 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload26, 0), + SDValue(sub2, 0), + SDValue(vload26, 0), + // glue (or chain) input edge + SDValue(sub2, 1) + ); + +SDValue ct41 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = R21 << 0; +// Instr #117 +SDNode *ishl2 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload20, 0), + ct41, + SDValue(ishl1, 0), + // glue (or chain) input edge + SDValue(shr0, 1) + ); + +SDValue ct42 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #118 +SDNode *vload29 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct42, + SDValue(sub2, 0), + // glue (or chain) input edge + SDValue(ishl2, 1) + ); + +// END_WHERE; +// Instr #119 +SDNode *endwhere7 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload29, 1) + ); + +// R09 = R15 < R31; +// Instr #120 +SDNode *lt4 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload29, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere7, 0) + ); + +// R09 = R09 & R14; +// Instr #121 +SDNode *and20 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt4, 0), + // glue (or chain) input edge + SDValue(lt4, 1) + ); + +// R09 = R09 == R30; +// Instr #122 +SDNode *eq19 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and20, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and20, 1) + ); + +SDValue ct43 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #123 +SDNode *nop8 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct43, + // glue (or chain) input edge + SDValue(eq19, 1) + ); + +// WHERE_EQ; +// Instr #124 +SDNode *whereeq8 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq19, 0), + // glue (or chain) input edge + SDValue(nop8, 0) + ); + +// R15 = R31 - R15; +// Instr #125 +SDNode *sub3 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload29, 0), + SDValue(vload29, 0), + // glue (or chain) input edge + SDValue(whereeq8, 1) + ); + +// R22 = R22 << R15; +// Instr #126 +SDNode *shl0 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or3, 0), + SDValue(sub3, 0), + SDValue(or3, 0), + // glue (or chain) input edge + SDValue(sub3, 1) + ); + +SDValue ct44 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R21 = R25 << 0; +// Instr #127 +SDNode *ishl3 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl2, 0), + ct44, + SDValue(vload20, 0), + // glue (or chain) input edge + SDValue(shl0, 1) + ); + +SDValue ct45 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #128 +SDNode *vload30 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct45, + SDValue(sub3, 0), + // glue (or chain) input edge + SDValue(ishl3, 1) + ); + +// END_WHERE; +// Instr #129 +SDNode *endwhere8 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload30, 1) + ); + +SDValue ct46 = CurDAG->getConstant(4, DL, MVT::i16, true, false); +// R08 = 4; +// Instr #130 +SDNode *vload31 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct46, + // glue (or chain) input edge + SDValue(endwhere8, 0) + ); + +// R09 = R15 < R08; +// Instr #131 +SDNode *lt5 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload30, 0), + SDValue(vload31, 0), + // glue (or chain) input edge + SDValue(vload31, 1) + ); + +// R09 = R09 & R14; +// Instr #132 +SDNode *and21 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt5, 0), + // glue (or chain) input edge + SDValue(lt5, 1) + ); + +// R09 = R09 == R30; +// Instr #133 +SDNode *eq20 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and21, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and21, 1) + ); + +SDValue ct47 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #134 +SDNode *nop9 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct47, + // glue (or chain) input edge + SDValue(eq20, 1) + ); + +// WHERE_EQ; +// Instr #135 +SDNode *whereeq9 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq20, 0), + // glue (or chain) input edge + SDValue(nop9, 0) + ); + +// R26 = R26 << R15; +// Instr #136 +SDNode *shl1 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr0, 0), + SDValue(vload30, 0), + SDValue(shr0, 0), + // glue (or chain) input edge + SDValue(whereeq9, 1) + ); + +SDValue ct48 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = R21 << 0; +// Instr #137 +SDNode *ishl4 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl3, 0), + ct48, + SDValue(ishl2, 0), + // glue (or chain) input edge + SDValue(shl1, 1) + ); + +SDValue ct49 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #138 +SDNode *vload32 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct49, + SDValue(vload30, 0), + // glue (or chain) input edge + SDValue(ishl4, 1) + ); + +// END_WHERE; +// Instr #139 +SDNode *endwhere9 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload32, 1) + ); + +// R09 = R15 < R29; +// Instr #140 +SDNode *lt6 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload32, 0), + SDValue(vload3, 0), + // glue (or chain) input edge + SDValue(endwhere9, 0) + ); + +// R09 = R09 & R14; +// Instr #141 +SDNode *and22 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt6, 0), + // glue (or chain) input edge + SDValue(lt6, 1) + ); + +// R09 = R09 == R30; +// Instr #142 +SDNode *eq21 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and22, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and22, 1) + ); + +SDValue ct50 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #143 +SDNode *nop10 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct50, + // glue (or chain) input edge + SDValue(eq21, 1) + ); + +// WHERE_EQ; +// Instr #144 +SDNode *whereeq10 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq21, 0), + // glue (or chain) input edge + SDValue(nop10, 0) + ); + +// R22 = R22 >> R15; +// Instr #145 +SDNode *shr1 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl0, 0), + SDValue(vload32, 0), + SDValue(shl0, 0), + // glue (or chain) input edge + SDValue(whereeq10, 1) + ); + +SDValue ct51 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R21 = R25 << 0; +// Instr #146 +SDNode *ishl5 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + ct51, + SDValue(ishl3, 0), + // glue (or chain) input edge + SDValue(shr1, 1) + ); + +SDValue ct52 = CurDAG->getConstant(32000, DL, MVT::i16, true, false); +// R15 = 32000; +// Instr #147 +SDNode *vload33 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct52, + SDValue(vload32, 0), + // glue (or chain) input edge + SDValue(ishl5, 1) + ); + +// END_WHERE; +// Instr #148 +SDNode *endwhere10 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload33, 1) + ); + +SDValue ct53 = CurDAG->getConstant(32, DL, MVT::i16, true, false); +// R08 = 32; +// Instr #149 +SDNode *vload34 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct53, + // glue (or chain) input edge + SDValue(endwhere10, 0) + ); + +// R09 = R15 < R08; +// Instr #150 +SDNode *lt7 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload33, 0), + SDValue(vload34, 0), + // glue (or chain) input edge + SDValue(vload34, 1) + ); + +// R09 = R09 & R14; +// Instr #151 +SDNode *and23 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt7, 0), + // glue (or chain) input edge + SDValue(lt7, 1) + ); + +// R09 = R09 == R30; +// Instr #152 +SDNode *eq22 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and23, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and23, 1) + ); + +SDValue ct54 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #153 +SDNode *nop11 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct54, + // glue (or chain) input edge + SDValue(eq22, 1) + ); + +// WHERE_EQ; +// Instr #154 +SDNode *whereeq11 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq22, 0), + // glue (or chain) input edge + SDValue(nop11, 0) + ); + +SDValue ct55 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R22 = 0; +// Instr #155 +SDNode *vload35 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct55, + SDValue(shr1, 0), + // glue (or chain) input edge + SDValue(whereeq11, 1) + ); + +SDValue ct56 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R21 = R25 << 0; +// Instr #156 +SDNode *ishl6 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + ct56, + SDValue(ishl5, 0), + // glue (or chain) input edge + SDValue(vload35, 1) + ); + +// END_WHERE; +// Instr #157 +SDNode *endwhere11 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(ishl6, 1) + ); + +// R09 = R24 == R11; +// Instr #158 +SDNode *eq23 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(endwhere11, 0) + ); + +// R09 = R09 & R14; +// Instr #159 +SDNode *and24 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq23, 0), + // glue (or chain) input edge + SDValue(eq23, 1) + ); + +// R09 = R09 == R30; +// Instr #160 +SDNode *eq24 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and24, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and24, 1) + ); + +SDValue ct57 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #161 +SDNode *nop12 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct57, + // glue (or chain) input edge + SDValue(eq24, 1) + ); + +// WHERE_EQ; +// Instr #162 +SDNode *whereeq12 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq24, 0), + // glue (or chain) input edge + SDValue(nop12, 0) + ); + +// R26 = R31 - R26; +// Instr #163 +SDNode *sub4 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(shl1, 0), + SDValue(shl1, 0), + // glue (or chain) input edge + SDValue(whereeq12, 1) + ); + +// END_WHERE; +// Instr #164 +SDNode *endwhere12 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub4, 1) + ); + +// R09 = R20 == R31; +// Instr #165 +SDNode *eq25 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and4, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere12, 0) + ); + +// R09 = R09 & R14; +// Instr #166 +SDNode *and25 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq25, 0), + // glue (or chain) input edge + SDValue(eq25, 1) + ); + +// R09 = R09 == R30; +// Instr #167 +SDNode *eq26 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and25, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and25, 1) + ); + +SDValue ct58 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #168 +SDNode *nop13 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct58, + // glue (or chain) input edge + SDValue(eq26, 1) + ); + +// WHERE_EQ; +// Instr #169 +SDNode *whereeq13 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq26, 0), + // glue (or chain) input edge + SDValue(nop13, 0) + ); + +// R22 = R31 - R22; +// Instr #170 +SDNode *sub5 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload35, 0), + SDValue(vload35, 0), + // glue (or chain) input edge + SDValue(whereeq13, 1) + ); + +// END_WHERE; +// Instr #171 +SDNode *endwhere13 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub5, 1) + ); + +// R09 = R14 == R30; +// Instr #172 +SDNode *eq27 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere13, 0) + ); + +SDValue ct59 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #173 +SDNode *nop14 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct59, + // glue (or chain) input edge + SDValue(eq27, 1) + ); + +// WHERE_EQ; +// Instr #174 +SDNode *whereeq14 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq27, 0), + // glue (or chain) input edge + SDValue(nop14, 0) + ); + +// R26 = R22 + R26; +// Instr #175 +SDNode *add0 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub4, 0), + SDValue(sub5, 0), + SDValue(sub4, 0), + // glue (or chain) input edge + SDValue(whereeq14, 1) + ); + +// END_WHERE; +// Instr #176 +SDNode *endwhere14 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(add0, 1) + ); + +// R24 = R26 & R11; +// Instr #177 +SDNode *and26 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload7, 0), + SDValue(add0, 0), + // glue (or chain) input edge + SDValue(endwhere14, 0) + ); + +// R09 = R24 == R11; +// Instr #178 +SDNode *eq28 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and26, 0), + SDValue(vload7, 0), + // glue (or chain) input edge + SDValue(and26, 1) + ); + +// R09 = R09 & R14; +// Instr #179 +SDNode *and27 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq28, 0), + // glue (or chain) input edge + SDValue(eq28, 1) + ); + +// R09 = R09 == R30; +// Instr #180 +SDNode *eq29 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and27, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and27, 1) + ); + +SDValue ct60 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #181 +SDNode *nop15 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct60, + // glue (or chain) input edge + SDValue(eq29, 1) + ); + +// WHERE_EQ; +// Instr #182 +SDNode *whereeq15 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq29, 0), + // glue (or chain) input edge + SDValue(nop15, 0) + ); + +// R26 = R31 - R26; +// Instr #183 +SDNode *sub6 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(add0, 0), + SDValue(add0, 0), + // glue (or chain) input edge + SDValue(whereeq15, 1) + ); + +// END_WHERE; +// Instr #184 +SDNode *endwhere15 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub6, 1) + ); + +SDValue ct61 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R05 = R26 << 0; +// Instr #185 +SDNode *ishl7 = CurDAG->getMachineNode( + Connex::ISHLV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + ct61, + // glue (or chain) input edge + SDValue(endwhere15, 0) + ); + +SDValue ct62 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R08 = R05 >> 1; +// Instr #186 +SDNode *ishr2 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl7, 0), + ct62, + // glue (or chain) input edge + SDValue(ishl7, 1) + ); + +// R05 = R05 | R08; +// Instr #187 +SDNode *or9 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr2, 0), + SDValue(ishl7, 0), + // glue (or chain) input edge + SDValue(ishr2, 1) + ); + +SDValue ct63 = CurDAG->getConstant(2, DL, MVT::i16, true, false); +// R08 = R05 >> 2; +// Instr #188 +SDNode *ishr3 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or9, 0), + ct63, + // glue (or chain) input edge + SDValue(or9, 1) + ); + +// R05 = R05 | R08; +// Instr #189 +SDNode *or10 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr3, 0), + SDValue(or9, 0), + // glue (or chain) input edge + SDValue(ishr3, 1) + ); + +SDValue ct64 = CurDAG->getConstant(4, DL, MVT::i16, true, false); +// R08 = R05 >> 4; +// Instr #190 +SDNode *ishr4 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or10, 0), + ct64, + // glue (or chain) input edge + SDValue(or10, 1) + ); + +// R05 = R05 | R08; +// Instr #191 +SDNode *or11 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr4, 0), + SDValue(or10, 0), + // glue (or chain) input edge + SDValue(ishr4, 1) + ); + +SDValue ct65 = CurDAG->getConstant(8, DL, MVT::i16, true, false); +// R08 = R05 >> 8; +// Instr #192 +SDNode *ishr5 = CurDAG->getMachineNode( + Connex::ISHRV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or11, 0), + ct65, + // glue (or chain) input edge + SDValue(or11, 1) + ); + +// R05 = R05 | R08; +// Instr #193 +SDNode *or12 = CurDAG->getMachineNode( + Connex::ORV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr5, 0), + SDValue(or11, 0), + // glue (or chain) input edge + SDValue(ishr5, 1) + ); + +// R05 = ~R05; +// Instr #194 +SDNode *not4 = CurDAG->getMachineNode( + Connex::NOT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or12, 0), + // glue (or chain) input edge + SDValue(or12, 1) + ); + +// R06 = POPCNT(R05); +// Instr #195 +SDNode *popcnt0 = CurDAG->getMachineNode( + Connex::POPCNT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(not4, 0), + // glue (or chain) input edge + SDValue(not4, 1) + ); + +// R06 = R29 - R06; +// Instr #196 +SDNode *sub7 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(popcnt0, 0), + // glue (or chain) input edge + SDValue(popcnt0, 1) + ); + +SDValue ct66 = CurDAG->getConstant(11, DL, MVT::i16, true, false); +// R08 = 11; +// Instr #197 +SDNode *vload36 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct66, + // glue (or chain) input edge + SDValue(sub7, 1) + ); + +// R08 = R06 - R08; +// Instr #198 +SDNode *sub8 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub7, 0), + SDValue(vload36, 0), + // glue (or chain) input edge + SDValue(vload36, 1) + ); + +// R09 = R31 < R08; +// Instr #199 +SDNode *lt8 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub8, 0), + // glue (or chain) input edge + SDValue(sub8, 1) + ); + +// R09 = R09 & R14; +// Instr #200 +SDNode *and28 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt8, 0), + // glue (or chain) input edge + SDValue(lt8, 1) + ); + +// R09 = R09 == R30; +// Instr #201 +SDNode *eq30 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and28, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and28, 1) + ); + +SDValue ct67 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #202 +SDNode *nop16 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct67, + // glue (or chain) input edge + SDValue(eq30, 1) + ); + +// WHERE_EQ; +// Instr #203 +SDNode *whereeq16 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq30, 0), + // glue (or chain) input edge + SDValue(nop16, 0) + ); + +SDValue ct68 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = R26 << 0; +// Instr #204 +SDNode *ishl8 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + ct68, + SDValue(vload11, 0), + // glue (or chain) input edge + SDValue(whereeq16, 1) + ); + +// R09 = R29 - R08; +// Instr #205 +SDNode *sub9 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(sub8, 0), + SDValue(eq30, 0), + // glue (or chain) input edge + SDValue(ishl8, 1) + ); + +// R62 = R62 << R09; +// Instr #206 +SDNode *shl2 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl8, 0), + SDValue(sub9, 0), + SDValue(ishl8, 0), + // glue (or chain) input edge + SDValue(sub9, 1) + ); + +// R62 = R62 >> R09; +// Instr #207 +SDNode *shr2 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl2, 0), + SDValue(sub9, 0), + SDValue(shl2, 0), + // glue (or chain) input edge + SDValue(shl2, 1) + ); + +SDValue ct69 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R61 = R08 << 0; +// Instr #208 +SDNode *ishl9 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub8, 0), + ct69, + SDValue(vload12, 0), + // glue (or chain) input edge + SDValue(shr2, 1) + ); + +// R26 = R26 >> R08; +// Instr #209 +SDNode *shr3 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub6, 0), + SDValue(sub8, 0), + SDValue(sub6, 0), + // glue (or chain) input edge + SDValue(ishl9, 1) + ); + +// R25 = R08 + R25; +// Instr #210 +SDNode *add1 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl4, 0), + SDValue(sub8, 0), + SDValue(ishl4, 0), + // glue (or chain) input edge + SDValue(shr3, 1) + ); + +// END_WHERE; +// Instr #211 +SDNode *endwhere16 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(add1, 1) + ); + +// R09 = R08 < R31; +// Instr #212 +SDNode *lt9 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub8, 0), + SDValue(vload2, 0), + // glue (or chain) input edge + SDValue(endwhere16, 0) + ); + +// R09 = R09 & R14; +// Instr #213 +SDNode *and29 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt9, 0), + // glue (or chain) input edge + SDValue(lt9, 1) + ); + +// R09 = R09 == R30; +// Instr #214 +SDNode *eq31 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and29, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and29, 1) + ); + +SDValue ct70 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #215 +SDNode *nop17 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct70, + // glue (or chain) input edge + SDValue(eq31, 1) + ); + +// WHERE_EQ; +// Instr #216 +SDNode *whereeq17 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq31, 0), + // glue (or chain) input edge + SDValue(nop17, 0) + ); + +// R08 = R31 - R08; +// Instr #217 +SDNode *sub10 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload2, 0), + SDValue(sub8, 0), + SDValue(sub8, 0), + // glue (or chain) input edge + SDValue(whereeq17, 1) + ); + +// R26 = R26 << R08; +// Instr #218 +SDNode *shl3 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr3, 0), + SDValue(sub10, 0), + SDValue(shr3, 0), + // glue (or chain) input edge + SDValue(sub10, 1) + ); + +// R25 = R25 - R08; +// Instr #219 +SDNode *sub11 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(add1, 0), + SDValue(sub10, 0), + SDValue(add1, 0), + // glue (or chain) input edge + SDValue(shl3, 1) + ); + +// END_WHERE; +// Instr #220 +SDNode *endwhere17 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(sub11, 1) + ); + +// R09 = R25 < R30; +// Instr #221 +SDNode *lt10 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub11, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere17, 0) + ); + +// R09 = R09 & R14; +// Instr #222 +SDNode *and30 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt10, 0), + // glue (or chain) input edge + SDValue(lt10, 1) + ); + +// R09 = R09 == R30; +// Instr #223 +SDNode *eq32 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and30, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and30, 1) + ); + +SDValue ct71 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #224 +SDNode *nop18 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct71, + // glue (or chain) input edge + SDValue(eq32, 1) + ); + +// WHERE_EQ; +// Instr #225 +SDNode *whereeq18 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq32, 0), + // glue (or chain) input edge + SDValue(nop18, 0) + ); + +// R61 = R30 - R25; +// Instr #226 +SDNode *sub12 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(sub11, 0), + SDValue(ishl9, 0), + // glue (or chain) input edge + SDValue(whereeq18, 1) + ); + +SDValue ct72 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R25 = 1; +// Instr #227 +SDNode *vload37 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct72, + SDValue(sub11, 0), + // glue (or chain) input edge + SDValue(sub12, 1) + ); + +SDValue ct73 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R62 = R26 << 0; +// Instr #228 +SDNode *ishl10 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl3, 0), + ct73, + SDValue(shr2, 0), + // glue (or chain) input edge + SDValue(vload37, 1) + ); + +// R09 = R29 - R61; +// Instr #229 +SDNode *sub13 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload3, 0), + SDValue(sub12, 0), + SDValue(eq32, 0), + // glue (or chain) input edge + SDValue(ishl10, 1) + ); + +// R62 = R62 << R09; +// Instr #230 +SDNode *shl4 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishl10, 0), + SDValue(sub13, 0), + SDValue(ishl10, 0), + // glue (or chain) input edge + SDValue(sub13, 1) + ); + +// R62 = R62 >> R09; +// Instr #231 +SDNode *shr4 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl4, 0), + SDValue(sub13, 0), + SDValue(shl4, 0), + // glue (or chain) input edge + SDValue(shl4, 1) + ); + +// R26 = R26 >> R61; +// Instr #232 +SDNode *shr5 = CurDAG->getMachineNode( + Connex::SHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl3, 0), + SDValue(sub12, 0), + SDValue(shl3, 0), + // glue (or chain) input edge + SDValue(shr4, 1) + ); + +// END_WHERE; +// Instr #233 +SDNode *endwhere18 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(shr5, 1) + ); + +SDValue ct74 = CurDAG->getConstant(1024, DL, MVT::i16, true, false); +// R08 = 1024; +// Instr #234 +SDNode *vload38 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct74, + // glue (or chain) input edge + SDValue(endwhere18, 0) + ); + +// R08 = R26 < R08; +// Instr #235 +SDNode *lt11 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shr5, 0), + SDValue(vload38, 0), + // glue (or chain) input edge + SDValue(vload38, 1) + ); + +// R09 = R25 == R30; +// Instr #236 +SDNode *eq33 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload37, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(lt11, 1) + ); + +// R09 = R09 & R14; +// Instr #237 +SDNode *and31 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(eq33, 0), + // glue (or chain) input edge + SDValue(eq33, 1) + ); + +// R09 = R09 & R08; +// Instr #238 +SDNode *and32 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(lt11, 0), + SDValue(and31, 0), + // glue (or chain) input edge + SDValue(and31, 1) + ); + +// R09 = R09 == R30; +// Instr #239 +SDNode *eq34 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and32, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and32, 1) + ); + +SDValue ct75 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #240 +SDNode *nop19 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct75, + // glue (or chain) input edge + SDValue(eq34, 1) + ); + +// WHERE_EQ; +// Instr #241 +SDNode *whereeq19 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq34, 0), + // glue (or chain) input edge + SDValue(nop19, 0) + ); + +SDValue ct76 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R25 = 0; +// Instr #242 +SDNode *vload39 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct76, + SDValue(vload37, 0), + // glue (or chain) input edge + SDValue(whereeq19, 1) + ); + +// END_WHERE; +// Instr #243 +SDNode *endwhere19 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload39, 1) + ); + +// R26 = R26 & R13; +// Instr #244 +SDNode *and33 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload5, 0), + SDValue(shr5, 0), + // glue (or chain) input edge + SDValue(endwhere19, 0) + ); + +SDValue ct77 = CurDAG->getConstant(30, DL, MVT::i16, true, false); +// R09 = 30; +// Instr #245 +SDNode *vload40 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct77, + // glue (or chain) input edge + SDValue(and33, 1) + ); + +// R09 = R09 < R25; +// Instr #246 +SDNode *lt12 = CurDAG->getMachineNode( + Connex::LT_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload40, 0), + SDValue(vload39, 0), + // glue (or chain) input edge + SDValue(vload40, 1) + ); + +// R09 = R09 & R14; +// Instr #247 +SDNode *and34 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload24, 0), + SDValue(lt12, 0), + // glue (or chain) input edge + SDValue(lt12, 1) + ); + +// R09 = R09 == R30; +// Instr #248 +SDNode *eq35 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and34, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and34, 1) + ); + +SDValue ct78 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #249 +SDNode *nop20 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct78, + // glue (or chain) input edge + SDValue(eq35, 1) + ); + +// WHERE_EQ; +// Instr #250 +SDNode *whereeq20 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq35, 0), + // glue (or chain) input edge + SDValue(nop20, 0) + ); + +SDValue ct79 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R14 = 0; +// Instr #251 +SDNode *vload41 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct79, + SDValue(vload24, 0), + // glue (or chain) input edge + SDValue(whereeq20, 1) + ); + +SDValue ct80 = CurDAG->getConstant(31744, DL, MVT::i16, true, false); +// R19 = 31744; +// Instr #252 +SDNode *vload42 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct80, + SDValue(xor0, 0), + // glue (or chain) input edge + SDValue(vload41, 1) + ); + +// R19 = R19 | R24; +// Instr #253 +SDNode *or13 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and26, 0), + SDValue(vload42, 0), + SDValue(vload42, 0), + // glue (or chain) input edge + SDValue(vload42, 1) + ); + +// END_WHERE; +// Instr #254 +SDNode *endwhere20 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(or13, 1) + ); + +// R08 = R14 == R30; +// Instr #255 +SDNode *eq36 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload41, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere20, 0) + ); + +SDValue ct81 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #256 +SDNode *nop21 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct81, + // glue (or chain) input edge + SDValue(eq36, 1) + ); + +// WHERE_EQ; +// Instr #257 +SDNode *whereeq21 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq36, 0), + // glue (or chain) input edge + SDValue(nop21, 0) + ); + +SDValue ct82 = CurDAG->getConstant(10, DL, MVT::i16, true, false); +// R19 = R25 << 10; +// Instr #258 +SDNode *ishl11 = CurDAG->getMachineNode( + Connex::ISHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload39, 0), + ct82, + SDValue(or13, 0), + // glue (or chain) input edge + SDValue(whereeq21, 1) + ); + +// R19 = R19 | R26; +// Instr #259 +SDNode *or14 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and33, 0), + SDValue(ishl11, 0), + SDValue(ishl11, 0), + // glue (or chain) input edge + SDValue(ishl11, 1) + ); + +// R04 = R26 & R30; +// Instr #260 +SDNode *and35 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(and33, 0), + SDValue(vload14, 0), + // glue (or chain) input edge + SDValue(or14, 1) + ); + +// R07 = R61 - R30; +// Instr #261 +SDNode *sub14 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub12, 0), + SDValue(vload1, 0), + SDValue(and17, 0), + // glue (or chain) input edge + SDValue(and35, 1) + ); + +// R08 = R30 << R08; +// Instr #262 +SDNode *shl5 = CurDAG->getMachineNode( + Connex::SHLV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq36, 0), + SDValue(eq36, 0), + // glue (or chain) input edge + SDValue(sub14, 1) + ); + +// R03 = R62 & R08; +// Instr #263 +SDNode *and36 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl5, 0), + SDValue(shr4, 0), + SDValue(vload15, 0), + // glue (or chain) input edge + SDValue(shl5, 1) + ); + +// R62 = R62 ^ R03; +// Instr #264 +SDNode *xor1 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and36, 0), + SDValue(shr4, 0), + SDValue(shr4, 0), + // glue (or chain) input edge + SDValue(and36, 1) + ); + +// R03 = R03 == R31; +// Instr #265 +SDNode *eq37 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and36, 0), + SDValue(vload2, 0), + SDValue(and36, 0), + // glue (or chain) input edge + SDValue(xor1, 1) + ); + +// R03 = R30 - R03; +// Instr #266 +SDNode *sub15 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq37, 0), + SDValue(eq37, 0), + // glue (or chain) input edge + SDValue(eq37, 1) + ); + +SDValue ct83 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R08 = R08 >> 1; +// Instr #267 +SDNode *ishr6 = CurDAG->getMachineNode( + Connex::ISHRV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(shl5, 0), + ct83, + SDValue(shl5, 0), + // glue (or chain) input edge + SDValue(sub15, 1) + ); + +// R02 = R62 & R08; +// Instr #268 +SDNode *and37 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(ishr6, 0), + SDValue(xor1, 0), + SDValue(vload16, 0), + // glue (or chain) input edge + SDValue(ishr6, 1) + ); + +// R62 = R62 ^ R02; +// Instr #269 +SDNode *xor2 = CurDAG->getMachineNode( + Connex::XORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and37, 0), + SDValue(xor1, 0), + SDValue(xor1, 0), + // glue (or chain) input edge + SDValue(and37, 1) + ); + +// R02 = R02 == R31; +// Instr #270 +SDNode *eq38 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and37, 0), + SDValue(vload2, 0), + SDValue(and37, 0), + // glue (or chain) input edge + SDValue(xor2, 1) + ); + +// R02 = R30 - R02; +// Instr #271 +SDNode *sub16 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq38, 0), + SDValue(eq38, 0), + // glue (or chain) input edge + SDValue(eq38, 1) + ); + +// R01 = R62 == R31; +// Instr #272 +SDNode *eq39 = CurDAG->getMachineNode( + Connex::EQ_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(xor2, 0), + SDValue(vload2, 0), + SDValue(vload17, 0), + // glue (or chain) input edge + SDValue(sub16, 1) + ); + +// R01 = R30 - R01; +// Instr #273 +SDNode *sub17 = CurDAG->getMachineNode( + Connex::SUBV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(eq39, 0), + SDValue(eq39, 0), + // glue (or chain) input edge + SDValue(eq39, 1) + ); + +// R00 = R04 | R02; +// Instr #274 +SDNode *or15 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub16, 0), + SDValue(and35, 0), + SDValue(vload18, 0), + // glue (or chain) input edge + SDValue(sub17, 1) + ); + +// R00 = R00 | R01; +// Instr #275 +SDNode *or16 = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub17, 0), + SDValue(or15, 0), + SDValue(or15, 0), + // glue (or chain) input edge + SDValue(or15, 1) + ); + +// R00 = R00 & R03; +// Instr #276 +SDNode *and38 = CurDAG->getMachineNode( + Connex::ANDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub15, 0), + SDValue(or16, 0), + SDValue(or16, 0), + // glue (or chain) input edge + SDValue(or16, 1) + ); + +// R19 = R00 + R19; +// Instr #277 +SDNode *add2 = CurDAG->getMachineNode( + Connex::ADDV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(or14, 0), + SDValue(and38, 0), + SDValue(or14, 0), + // glue (or chain) input edge + SDValue(and38, 1) + ); + +// R19 = R19 | R24; +// Instr #278 +SDNode *resF16 /*or17*/ = CurDAG->getMachineNode( + Connex::ORV_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and26, 0), + SDValue(add2, 0), + SDValue(add2, 0), + // glue (or chain) input edge + SDValue(add2, 1) + ); + +// END_WHERE; +// Instr #279 +SDNode *lastNode /*endwhere21*/ = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, +// Alex: MVT::Glue, + MVT::Other, + // glue (or chain) input edge + SDValue(resF16 /*or17*/, 1) + ); + Index: lib/Target/Connex/Select_SUBi32_OpincaaCodeGen.h =================================================================== --- lib/Target/Connex/Select_SUBi32_OpincaaCodeGen.h +++ lib/Target/Connex/Select_SUBi32_OpincaaCodeGen.h @@ -0,0 +1,205 @@ +// From /home/asusu/LLVM/llvm38Nov2016/llvm/build40/bin/Tests/NEW_v128i16/opincaa_standalone_apps/Emulate_i32/SUB_i32_manual/DumpISel_OpincaaCodeGen_old110_400.cpp + +// R27 is REG_SRC1. It is represented by result of nodeOpSrcCast1. +// R28 is REG_SRC2. It is represented by result of nodeOpSrcCast2. + + + +// Code auto-generated by method Kernel::genLLVMISelManualCode() +// from Opincaa lib from kernel: simpleIoTest_allowOverwrite123456. +// It is important to put this code in the Select() method of the +// SelectionDAGISel class of your back end, after the ISelLowering pass, +// which contains the DAG Combiner, because the DAG Combiner can remove +// the getCopyToReg() we create, which can lead to the following error: +// <> assertion failed. +// Number of instructions generated: 15. + + + + +SDValue ct0 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R31 = 0; +// Instr #0 +SDNode *vload0 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct0, + // glue (or chain) input edge + SDValue(nodeOpSrcCast2, 1) + ); + +SDValue ct1 = CurDAG->getConstant(1, DL, MVT::i16, true, false); +// R30 = 1; +// Instr #1 +SDNode *vload1 = CurDAG->getMachineNode( + Connex::VLOAD_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct1, + // glue (or chain) input edge + SDValue(vload0, 1) + ); + +// R29 = R27 - R28; +// Instr #2 +SDNode *sub0 = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(nodeOpSrcCast1, 0), + SDValue(nodeOpSrcCast2, 0), + // glue (or chain) input edge + SDValue(vload1, 1) + ); + +// R23 = ADDC(R31, R31); +// Instr #3 +SDNode *addc0 = CurDAG->getMachineNode( + Connex::ADDCV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload0, 0), + SDValue(vload0, 0), + SDValue(sub0, 0) + // no need for glue or chain input (since it normally consumes the output of the predecessor) + ); + +// R26 = INDEX; +// Instr #4 +SDNode *ldix0 = CurDAG->getMachineNode( + Connex::LDIX_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(addc0, 1) + ); + +// R25 = R26 & R30; +// Instr #5 +SDNode *and0 = CurDAG->getMachineNode( + Connex::ANDV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(vload1, 0), + SDValue(ldix0, 0), + // glue (or chain) input edge + SDValue(ldix0, 1) + ); + +// R24 = R25 == R30; +// Instr #6 +SDNode *eq0 = CurDAG->getMachineNode( + Connex::EQ_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(and0, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(and0, 1) + ); + +SDValue ct2 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #7 +SDNode *nop0 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct2, + // glue (or chain) input edge + SDValue(eq0, 1) + ); + +// WHERE_EQ; +// Instr #8 +SDNode *whereeq0 = CurDAG->getMachineNode( + Connex::WHEREEQ, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(eq0, 0), + // glue (or chain) input edge + SDValue(nop0, 0) + ); + +SDValue ct3 = CurDAG->getConstant(0, DL, MVT::i16, true, false); +// R23 = 0; +// Instr #9 +SDNode *vload2 = CurDAG->getMachineNode( + Connex::VLOAD_SPECIAL_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + ct3, + SDValue(addc0, 0), + // glue (or chain) input edge + SDValue(whereeq0, 1) + ); + +// END_WHERE; +// Instr #10 +SDNode *endwhere0 = CurDAG->getMachineNode( + Connex::END_WHERE, + DL, + MVT::Glue, + // glue (or chain) input edge + SDValue(vload2, 1) + ); + +// CELL_SHR(R23, R30); +// Instr #11 +SDNode *cellshr0 = CurDAG->getMachineNode( + Connex::CELLSHR_H, + DL, + MVT::Glue, + SDValue(vload2, 0), + SDValue(vload1, 0), + // glue (or chain) input edge + SDValue(endwhere0, 0) + ); + +SDValue ct4 = CurDAG->getConstant(1 /* Num of cycles to NOP */, DL, MVT::i16, true, false); +// NOP; +// Instr #12 +SDNode *nop1 = CurDAG->getMachineNode( + Connex::NOP_BPF, + DL, + MVT::Glue, + ct4, + // glue (or chain) input edge + SDValue(cellshr0, 0) + ); + +// R23 = SHIFT_REG; +// Instr #13 +SDNode *ldsh0 = CurDAG->getMachineNode( + Connex::LDSH_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + // glue (or chain) input edge + SDValue(nop1, 0) + ); + +// R29 = R29 - R23; +// Instr #14 +SDNode *resH /*sub1*/ = CurDAG->getMachineNode( + Connex::SUBV_H, + DL, + TYPE_VECTOR_I16, + MVT::Glue, + SDValue(sub0, 0), + SDValue(ldsh0, 0), + // glue (or chain) input edge + SDValue(ldsh0, 1) + ); + +SDNode *lastNode = resH; Index: lib/Target/Connex/TargetInfo/CMakeLists.txt =================================================================== --- lib/Target/Connex/TargetInfo/CMakeLists.txt +++ lib/Target/Connex/TargetInfo/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMConnexInfo + ConnexTargetInfo.cpp + ) Index: lib/Target/Connex/TargetInfo/ConnexTargetInfo.cpp =================================================================== --- lib/Target/Connex/TargetInfo/ConnexTargetInfo.cpp +++ lib/Target/Connex/TargetInfo/ConnexTargetInfo.cpp @@ -0,0 +1,24 @@ +//===-- ConnexTargetInfo.cpp - Connex Target Implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Connex.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +namespace llvm { +Target TheConnexTarget; +} + +extern "C" void LLVMInitializeConnexTargetInfo() { + TargetRegistry::RegisterTarget(TheConnexTarget, "connex", + //"Connex (host endian)", + "Connex", + "Connex", + [](Triple::ArchType) { return false; }, true); +} Index: lib/Target/Connex/TargetInfo/LLVMBuild.txt =================================================================== --- lib/Target/Connex/TargetInfo/LLVMBuild.txt +++ lib/Target/Connex/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/Connex/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ConnexInfo +parent = Connex +required_libraries = Support +add_to_library_groups = Connex Index: lib/Target/Connex/TargetInfo/Makefile =================================================================== --- lib/Target/Connex/TargetInfo/Makefile +++ lib/Target/Connex/TargetInfo/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/Connex/TargetInfo/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMConnexInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common Index: lib/Target/LLVMBuild.txt =================================================================== --- lib/Target/LLVMBuild.txt +++ lib/Target/LLVMBuild.txt @@ -24,6 +24,7 @@ AArch64 AVR BPF + Connex Lanai Hexagon MSP430 Index: test/CodeGen/Connex/basictest.ll =================================================================== --- test/CodeGen/Connex/basictest.ll +++ test/CodeGen/Connex/basictest.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -march=bpfel | FileCheck %s + +define i32 @test0(i32 %X) { + %tmp.1 = add i32 %X, 1 + ret i32 %tmp.1 +; CHECK-LABEL: test0: +; CHECK: addi r1, 1 +} + +; CHECK-LABEL: store_imm: +; CHECK: stw 0(r1), r{{[03]}} +; CHECK: stw 4(r2), r{{[03]}} +define i32 @store_imm(i32* %a, i32* %b) { +entry: + store i32 0, i32* %a, align 4 + %0 = getelementptr inbounds i32, i32* %b, i32 1 + store i32 0, i32* %0, align 4 + ret i32 0 +} + +@G = external global i8 +define zeroext i8 @loadG() { + %tmp = load i8, i8* @G + ret i8 %tmp +; CHECK-LABEL: loadG: +; CHECK: ld_64 r1 +; CHECK: ldb r0, 0(r1) +} Index: test/CodeGen/Connex/lit.local.cfg =================================================================== --- test/CodeGen/Connex/lit.local.cfg +++ test/CodeGen/Connex/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'Connex' in config.root.targets: + config.unsupported = True