diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -311,6 +311,7 @@ EM_RISCV = 243, // RISC-V EM_LANAI = 244, // Lanai 32-bit processor EM_BPF = 247, // Linux kernel bpf virtual machine + EM_VE = 251, // NEC SX-Aurora VE }; // Object file classes. diff --git a/llvm/include/llvm/IR/CMakeLists.txt b/llvm/include/llvm/IR/CMakeLists.txt --- a/llvm/include/llvm/IR/CMakeLists.txt +++ b/llvm/include/llvm/IR/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(LLVM IntrinsicsR600.h -gen-intrinsic-enums -intrinsic-prefix=r600) tablegen(LLVM IntrinsicsRISCV.h -gen-intrinsic-enums -intrinsic-prefix=riscv) tablegen(LLVM IntrinsicsS390.h -gen-intrinsic-enums -intrinsic-prefix=s390) +tablegen(LLVM IntrinsicsVE.h -gen-intrinsic-enums -intrinsic-prefix=ve) tablegen(LLVM IntrinsicsWebAssembly.h -gen-intrinsic-enums -intrinsic-prefix=wasm) tablegen(LLVM IntrinsicsX86.h -gen-intrinsic-enums -intrinsic-prefix=x86) tablegen(LLVM IntrinsicsXCore.h -gen-intrinsic-enums -intrinsic-prefix=xcore) diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -241,6 +241,14 @@ /// The remainder matches the regular calling convention. WASM_EmscriptenInvoke = 99, + /// Calling convention used for NEC SX-Aurora VE vec_expf intrinsic + /// function. + VE_VEC_EXPF = 100, + + /// Calling convention used for NEC SX-Aurora VE llvm_grow_stack intrinsic + /// function. + VE_LLVM_GROW_STACK = 101, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1356,3 +1356,4 @@ include "llvm/IR/IntrinsicsSystemZ.td" include "llvm/IR/IntrinsicsWebAssembly.td" include "llvm/IR/IntrinsicsRISCV.td" +include "llvm/IR/IntrinsicsVE.td" diff --git a/llvm/include/llvm/IR/IntrinsicsVE.td b/llvm/include/llvm/IR/IntrinsicsVE.td new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/IR/IntrinsicsVE.td @@ -0,0 +1,8 @@ +let TargetPrefix = "ve" in { + + // fencem instructions + def int_ve_fencem1 : Intrinsic<[], [], []>; + def int_ve_fencem2 : Intrinsic<[], [], []>; + def int_ve_fencem3 : Intrinsic<[], [], []>; + +} diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/IntrinsicsVE.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/IntrinsicsXCore.h" diff --git a/llvm/lib/Target/VE/AsmParser/CMakeLists.txt b/llvm/lib/Target/VE/AsmParser/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/AsmParser/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_component_library(LLVMVEAsmParser + VEAsmParser.cpp + ) diff --git a/llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt b/llvm/lib/Target/VE/AsmParser/LLVMBuild.txt copy from llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt copy to llvm/lib/Target/VE/AsmParser/LLVMBuild.txt --- a/llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt +++ b/llvm/lib/Target/VE/AsmParser/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/VE/MCTargetDesc/LLVMBuild.txt ---------------*- Conf -*--===; +;===- ./lib/Target/VE/AsmParser/LLVMBuild.txt ------------------*- Conf -*--===; ; ; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ; See https://llvm.org/LICENSE.txt for license information. @@ -16,7 +16,7 @@ [component_0] type = Library -name = VEDesc +name = VEAsmParser parent = VE -required_libraries = MC VEInfo Support +required_libraries = MC MCParser VEDesc VEInfo Support add_to_library_groups = VE diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp @@ -0,0 +1,832 @@ +//===-- VEAsmParser.cpp - Parse VE assembly to MCInst instructions --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/VEMCExpr.h" +#include "MCTargetDesc/VEMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +using namespace llvm; + +// The generated AsmMatcher VEGenAsmMatcher uses "VE" as the target +// namespace. +namespace llvm { +namespace VE { + +using namespace VE; + +} // namespace VE +} // namespace llvm + +namespace { + +class VEOperand; + +class VEAsmParser : public MCTargetAsmParser { + MCAsmParser &Parser; + + /// @name Auto-generated Match Functions + /// { + +#define GET_ASSEMBLER_HEADER +#include "VEGenAsmMatcher.inc" + + /// } + + // public interface of the MCTargetAsmParser. + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) override; + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; + bool ParseDirective(AsmToken DirectiveID) override; + + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, + unsigned Kind) override; + + // Custom parse functions for VE specific operands. + OperandMatchResultTy parseMEMOperand(OperandVector &Operands); + + OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name); + + OperandMatchResultTy parseVEAsmOperand(std::unique_ptr &Operand, + bool isCall = false); + + OperandMatchResultTy parseBranchModifiers(OperandVector &Operands); + + // Helper function for dealing with %lo / %hi in PIC mode. + const VEMCExpr *adjustPICRelocation(VEMCExpr::VariantKind VK, + const MCExpr *subExpr); + + // returns true if Tok is matched to a register and returns register in RegNo. + bool matchRegisterName(const AsmToken &Tok, unsigned &RegNo, + unsigned &RegKind); + + bool matchVEAsmModifiers(const MCExpr *&EVal, SMLoc &EndLoc); + bool parseDirectiveWord(unsigned Size, SMLoc L); + + bool is64Bit() const { + return getSTI().getTargetTriple().getArch() == Triple::sparcv9; + } + +public: + VEAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(Options, sti, MII), Parser(parser) { + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + } +}; + +} // end anonymous namespace + +static const MCPhysReg IntRegs[64] = { + VE::SX0, VE::SX1, VE::SX2, VE::SX3, VE::SX4, VE::SX5, VE::SX6, + VE::SX7, VE::SX8, VE::SX9, VE::SX10, VE::SX11, VE::SX12, VE::SX13, + VE::SX14, VE::SX15, VE::SX16, VE::SX17, VE::SX18, VE::SX19, VE::SX20, + VE::SX21, VE::SX22, VE::SX23, VE::SX24, VE::SX25, VE::SX26, VE::SX27, + VE::SX28, VE::SX29, VE::SX30, VE::SX31, VE::SX32, VE::SX33, VE::SX34, + VE::SX35, VE::SX36, VE::SX37, VE::SX38, VE::SX39, VE::SX40, VE::SX41, + VE::SX42, VE::SX43, VE::SX44, VE::SX45, VE::SX46, VE::SX47, VE::SX48, + VE::SX49, VE::SX50, VE::SX51, VE::SX52, VE::SX53, VE::SX54, VE::SX55, + VE::SX56, VE::SX57, VE::SX58, VE::SX59, VE::SX60, VE::SX61, VE::SX62, + VE::SX63}; + +namespace { + +/// VEOperand - Instances of this class represent a parsed VE machine +/// instruction. +class VEOperand : public MCParsedAsmOperand { +public: + enum RegisterKind { + rk_None, + rk_IntReg, + rk_IntPairReg, + rk_FloatReg, + rk_DoubleReg, + rk_QuadReg, + rk_CoprocReg, + rk_CoprocPairReg, + rk_Special, + }; + +private: + enum KindTy { + k_Token, + k_Register, + k_Immediate, + k_MemoryReg, + k_MemoryImm + } Kind; + + SMLoc StartLoc, EndLoc; + + struct Token { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + RegisterKind Kind; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned Base; + unsigned OffsetReg; + const MCExpr *Off; + }; + + union { + struct Token Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; + }; + +public: + VEOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + + bool isToken() const override { return Kind == k_Token; } + bool isReg() const override { return Kind == k_Register; } + bool isImm() const override { return Kind == k_Immediate; } + bool isMem() const override { return isMEMrr() || isMEMri(); } + bool isMEMrr() const { return Kind == k_MemoryReg; } + bool isMEMri() const { return Kind == k_MemoryImm; } + + bool isIntReg() const { + return (Kind == k_Register && Reg.Kind == rk_IntReg); + } + + bool isFloatReg() const { + return (Kind == k_Register && Reg.Kind == rk_FloatReg); + } + + bool isFloatOrDoubleReg() const { + return (Kind == k_Register && + (Reg.Kind == rk_FloatReg || Reg.Kind == rk_DoubleReg)); + } + + bool isCoprocReg() const { + return (Kind == k_Register && Reg.Kind == rk_CoprocReg); + } + + StringRef getToken() const { + assert(Kind == k_Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const override { + assert((Kind == k_Register) && "Invalid access!"); + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert((Kind == k_Immediate) && "Invalid access!"); + return Imm.Val; + } + + unsigned getMemBase() const { + assert((Kind == k_MemoryReg || Kind == k_MemoryImm) && "Invalid access!"); + return Mem.Base; + } + + unsigned getMemOffsetReg() const { + assert((Kind == k_MemoryReg) && "Invalid access!"); + return Mem.OffsetReg; + } + + const MCExpr *getMemOff() const { + assert((Kind == k_MemoryImm) && "Invalid access!"); + return Mem.Off; + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const override { return StartLoc; } + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const override { return EndLoc; } + + void print(raw_ostream &OS) const override { + switch (Kind) { + case k_Token: + OS << "Token: " << getToken() << "\n"; + break; + case k_Register: + OS << "Reg: #" << getReg() << "\n"; + break; + case k_Immediate: + OS << "Imm: " << getImm() << "\n"; + break; + case k_MemoryReg: + OS << "Mem: " << getMemBase() << "+" << getMemOffsetReg() << "\n"; + break; + case k_MemoryImm: + assert(getMemOff() != nullptr); + OS << "Mem: " << getMemBase() << "+" << *getMemOff() << "\n"; + break; + } + } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getReg())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCExpr *Expr = getImm(); + addExpr(Inst, Expr); + } + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediate when possible. Null MCExpr = 0. + if (!Expr) + Inst.addOperand(MCOperand::createImm(0)); + else if (const MCConstantExpr *CE = dyn_cast(Expr)) + Inst.addOperand(MCOperand::createImm(CE->getValue())); + else + Inst.addOperand(MCOperand::createExpr(Expr)); + } + + void addMEMrrOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::createReg(getMemBase())); + + assert(getMemOffsetReg() != 0 && "Invalid offset"); + Inst.addOperand(MCOperand::createReg(getMemOffsetReg())); + } + + void addMEMriOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::createReg(getMemBase())); + + const MCExpr *Expr = getMemOff(); + addExpr(Inst, Expr); + } + + static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { + auto Op = std::make_unique(k_Token); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static std::unique_ptr CreateReg(unsigned RegNum, unsigned Kind, + SMLoc S, SMLoc E) { + auto Op = std::make_unique(k_Register); + Op->Reg.RegNum = RegNum; + Op->Reg.Kind = (VEOperand::RegisterKind)Kind; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, + SMLoc E) { + auto Op = std::make_unique(k_Immediate); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr + MorphToMEMrr(unsigned Base, std::unique_ptr Op) { + unsigned offsetReg = Op->getReg(); + Op->Kind = k_MemoryReg; + Op->Mem.Base = Base; + Op->Mem.OffsetReg = offsetReg; + Op->Mem.Off = nullptr; + return Op; + } + + static std::unique_ptr CreateMEMr(unsigned Base, SMLoc S, + SMLoc E) { + auto Op = std::make_unique(k_MemoryReg); + Op->Mem.Base = Base; + Op->Mem.OffsetReg = 0; // always 0 + Op->Mem.Off = nullptr; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr + MorphToMEMri(unsigned Base, std::unique_ptr Op) { + const MCExpr *Imm = Op->getImm(); + Op->Kind = k_MemoryImm; + Op->Mem.Base = Base; + Op->Mem.OffsetReg = 0; + Op->Mem.Off = Imm; + return Op; + } +}; + +} // end anonymous namespace + +bool VEAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + SmallVector Instructions; + unsigned MatchResult = + MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + switch (MatchResult) { + case Match_Success: { + for (const MCInst &I : Instructions) { + Out.EmitInstruction(I, getSTI()); + } + return false; + } + + case Match_MissingFeature: + return Error(IDLoc, + "instruction requires a CPU feature not currently enabled"); + + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((VEOperand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction mnemonic"); + } + llvm_unreachable("Implement any new match types added!"); +} + +bool VEAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + const AsmToken &Tok = Parser.getTok(); + StartLoc = Tok.getLoc(); + EndLoc = Tok.getEndLoc(); + RegNo = 0; + if (getLexer().getKind() != AsmToken::Percent) + return false; + Parser.Lex(); + unsigned regKind = VEOperand::rk_None; + if (matchRegisterName(Tok, RegNo, regKind)) { + Parser.Lex(); + return false; + } + + return Error(StartLoc, "invalid register name"); +} + +bool VEAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) { + + // First operand in MCInst is instruction mnemonic. + Operands.push_back(VEOperand::CreateToken(Name, NameLoc)); + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (getLexer().is(AsmToken::Comma)) { + if (parseBranchModifiers(Operands) != MatchOperand_Success) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token"); + } + } + if (parseOperand(Operands, Name) != MatchOperand_Success) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token"); + } + + while (getLexer().is(AsmToken::Comma) || getLexer().is(AsmToken::Plus)) { + if (getLexer().is(AsmToken::Plus)) { + // Plus tokens are significant in software_traps (p83, sparcv8.pdf). We + // must capture them. + Operands.push_back( + VEOperand::CreateToken("+", Parser.getTok().getLoc())); + } + Parser.Lex(); // Eat the comma or plus. + // Parse and remember the operand. + if (parseOperand(Operands, Name) != MatchOperand_Success) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token"); + } + } + } + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token"); + } + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +bool VEAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getString(); + + if (IDVal == ".byte") + return parseDirectiveWord(1, DirectiveID.getLoc()); + + if (IDVal == ".half") + return parseDirectiveWord(2, DirectiveID.getLoc()); + + if (IDVal == ".word") + return parseDirectiveWord(4, DirectiveID.getLoc()); + + if (IDVal == ".nword") + return parseDirectiveWord(is64Bit() ? 8 : 4, DirectiveID.getLoc()); + + if (is64Bit() && IDVal == ".xword") + return parseDirectiveWord(8, DirectiveID.getLoc()); + + if (IDVal == ".register") { + // For now, ignore .register directive. + Parser.eatToEndOfStatement(); + return false; + } + if (IDVal == ".proc") { + // For compatibility, ignore this directive. + // (It's supposed to be an "optimization" in the Sun assembler) + Parser.eatToEndOfStatement(); + return false; + } + + // Let the MC layer to handle other directives. + return true; +} + +bool VEAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + while (true) { + const MCExpr *Value; + if (getParser().parseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + } + } + Parser.Lex(); + return false; +} + +OperandMatchResultTy VEAsmParser::parseMEMOperand(OperandVector &Operands) { + SMLoc S, E; + unsigned BaseReg = 0; + + if (ParseRegister(BaseReg, S, E)) { + return MatchOperand_NoMatch; + } + + switch (getLexer().getKind()) { + default: + return MatchOperand_NoMatch; + + case AsmToken::Comma: + case AsmToken::RBrac: + case AsmToken::EndOfStatement: + Operands.push_back(VEOperand::CreateMEMr(BaseReg, S, E)); + return MatchOperand_Success; + + case AsmToken::Plus: + Parser.Lex(); // Eat the '+' + break; + case AsmToken::Minus: + break; + } + + std::unique_ptr Offset; + OperandMatchResultTy ResTy = parseVEAsmOperand(Offset); + if (ResTy != MatchOperand_Success || !Offset) + return MatchOperand_NoMatch; + + Operands.push_back(Offset->isImm() + ? VEOperand::MorphToMEMri(BaseReg, std::move(Offset)) + : VEOperand::MorphToMEMrr(BaseReg, std::move(Offset))); + + return MatchOperand_Success; +} + +OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands, + StringRef Mnemonic) { + + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + + // If there wasn't a custom match, try the generic matcher below. Otherwise, + // there was a match, but an error occurred, in which case, just return that + // the operand parsing failed. + if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail) + return ResTy; + + if (getLexer().is(AsmToken::LBrac)) { + // Memory operand + Operands.push_back(VEOperand::CreateToken("[", Parser.getTok().getLoc())); + Parser.Lex(); // Eat the [ + + if (Mnemonic == "cas" || Mnemonic == "casx" || Mnemonic == "casa") { + SMLoc S = Parser.getTok().getLoc(); + if (getLexer().getKind() != AsmToken::Percent) + return MatchOperand_NoMatch; + Parser.Lex(); // eat % + + unsigned RegNo, RegKind; + if (!matchRegisterName(Parser.getTok(), RegNo, RegKind)) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat the identifier token. + SMLoc E = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(VEOperand::CreateReg(RegNo, RegKind, S, E)); + ResTy = MatchOperand_Success; + } else { + ResTy = parseMEMOperand(Operands); + } + + if (ResTy != MatchOperand_Success) + return ResTy; + + if (!getLexer().is(AsmToken::RBrac)) + return MatchOperand_ParseFail; + + Operands.push_back(VEOperand::CreateToken("]", Parser.getTok().getLoc())); + Parser.Lex(); // Eat the ] + + // Parse an optional address-space identifier after the address. + if (getLexer().is(AsmToken::Integer)) { + std::unique_ptr Op; + ResTy = parseVEAsmOperand(Op, false); + if (ResTy != MatchOperand_Success || !Op) + return MatchOperand_ParseFail; + Operands.push_back(std::move(Op)); + } + return MatchOperand_Success; + } + + std::unique_ptr Op; + + ResTy = parseVEAsmOperand(Op, (Mnemonic == "call")); + if (ResTy != MatchOperand_Success || !Op) + return MatchOperand_ParseFail; + + // Push the parsed operand into the list of operands + Operands.push_back(std::move(Op)); + + return MatchOperand_Success; +} + +OperandMatchResultTy +VEAsmParser::parseVEAsmOperand(std::unique_ptr &Op, bool isCall) { + SMLoc S = Parser.getTok().getLoc(); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + const MCExpr *EVal; + + Op = nullptr; + switch (getLexer().getKind()) { + default: + break; + + case AsmToken::Percent: + Parser.Lex(); // Eat the '%'. + unsigned RegNo; + unsigned RegKind; + if (matchRegisterName(Parser.getTok(), RegNo, RegKind)) { + Parser.Lex(); // Eat the identifier token. + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + switch (RegNo) { + default: + Op = VEOperand::CreateReg(RegNo, RegKind, S, E); + break; + } + break; + } + if (matchVEAsmModifiers(EVal, E)) { + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Op = VEOperand::CreateImm(EVal, S, E); + } + break; + + case AsmToken::Minus: + case AsmToken::Integer: + case AsmToken::LParen: + case AsmToken::Dot: + if (!getParser().parseExpression(EVal, E)) + Op = VEOperand::CreateImm(EVal, S, E); + break; + + case AsmToken::Identifier: { + StringRef Identifier; + if (!getParser().parseIdentifier(Identifier)) { + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); + + const MCExpr *Res = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + Op = VEOperand::CreateImm(Res, S, E); + } + break; + } + } + return (Op) ? MatchOperand_Success : MatchOperand_ParseFail; +} + +OperandMatchResultTy +VEAsmParser::parseBranchModifiers(OperandVector &Operands) { + // parse (,a|,pn|,pt)+ + + while (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma + + if (!getLexer().is(AsmToken::Identifier)) + return MatchOperand_ParseFail; + StringRef modName = Parser.getTok().getString(); + if (modName == "a" || modName == "pn" || modName == "pt") { + Operands.push_back( + VEOperand::CreateToken(modName, Parser.getTok().getLoc())); + Parser.Lex(); // eat the identifier. + } + } + return MatchOperand_Success; +} + +bool VEAsmParser::matchRegisterName(const AsmToken &Tok, unsigned &RegNo, + unsigned &RegKind) { + int64_t intVal = 0; + RegNo = 0; + RegKind = VEOperand::rk_None; + if (Tok.is(AsmToken::Identifier)) { + StringRef Name = Tok.getString(); + + // %fp + if (Name.equals("fp")) { + RegNo = VE::SX9; + RegKind = VEOperand::rk_IntReg; + return true; + } + // %sp + if (Name.equals("sp")) { + RegNo = VE::SX11; + RegKind = VEOperand::rk_IntReg; + return true; + } + + // %s0 - %s63 + if (Name.substr(0, 1).equals_lower("s") && + !Name.substr(1).getAsInteger(10, intVal) && intVal < 64) { + RegNo = IntRegs[intVal]; + RegKind = VEOperand::rk_IntReg; + return true; + } + + if (Name.equals("usrcc")) { + RegNo = VE::UCC; + RegKind = VEOperand::rk_Special; + return true; + } + } + return false; +} + +// Determine if an expression contains a reference to the symbol +// "_GLOBAL_OFFSET_TABLE_". +static bool hasGOTReference(const MCExpr *Expr) { + switch (Expr->getKind()) { + case MCExpr::Target: + if (const VEMCExpr *SE = dyn_cast(Expr)) + return hasGOTReference(SE->getSubExpr()); + break; + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Expr); + return hasGOTReference(BE->getLHS()) || hasGOTReference(BE->getRHS()); + } + + case MCExpr::SymbolRef: { + const MCSymbolRefExpr &SymRef = *cast(Expr); + return (SymRef.getSymbol().getName() == "_GLOBAL_OFFSET_TABLE_"); + } + + case MCExpr::Unary: + return hasGOTReference(cast(Expr)->getSubExpr()); + } + return false; +} + +const VEMCExpr *VEAsmParser::adjustPICRelocation(VEMCExpr::VariantKind VK, + const MCExpr *subExpr) { + // When in PIC mode, "%lo(...)" and "%hi(...)" behave differently. + // If the expression refers contains _GLOBAL_OFFSETE_TABLE, it is + // actually a %pc10 or %pc22 relocation. Otherwise, they are interpreted + // as %got10 or %got22 relocation. + + if (getContext().getObjectFileInfo()->isPositionIndependent()) { + switch (VK) { + default: + break; + case VEMCExpr::VK_VE_LO32: + VK = (hasGOTReference(subExpr) ? VEMCExpr::VK_VE_PC_LO32 + : VEMCExpr::VK_VE_GOT_LO32); + break; + case VEMCExpr::VK_VE_HI32: + VK = (hasGOTReference(subExpr) ? VEMCExpr::VK_VE_PC_HI32 + : VEMCExpr::VK_VE_GOT_HI32); + break; + } + } + + return VEMCExpr::create(VK, subExpr, getContext()); +} + +bool VEAsmParser::matchVEAsmModifiers(const MCExpr *&EVal, SMLoc &EndLoc) { + AsmToken Tok = Parser.getTok(); + if (!Tok.is(AsmToken::Identifier)) + return false; + + StringRef Name = Tok.getString(); + + VEMCExpr::VariantKind VK = VEMCExpr::parseVariantKind(Name); + + if (VK == VEMCExpr::VK_VE_None) + return false; + + Parser.Lex(); // Eat the identifier. + if (Parser.getTok().getKind() != AsmToken::LParen) + return false; + + Parser.Lex(); // Eat the LParen token. + const MCExpr *subExpr; + if (Parser.parseParenExpression(subExpr, EndLoc)) + return false; + + EVal = adjustPICRelocation(VK, subExpr); + return true; +} + +extern "C" void LLVMInitializeVEAsmParser() { + RegisterMCAsmParser A(getTheVETarget()); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "VEGenAsmMatcher.inc" + +unsigned VEAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp, + unsigned Kind) { + VEOperand &Op = (VEOperand &)GOp; + if (Op.isFloatOrDoubleReg()) { + switch (Kind) { + default: + break; + } + } + return Match_InvalidOperand; +} diff --git a/llvm/lib/Target/VE/CMakeLists.txt b/llvm/lib/Target/VE/CMakeLists.txt --- a/llvm/lib/Target/VE/CMakeLists.txt +++ b/llvm/lib/Target/VE/CMakeLists.txt @@ -1,8 +1,30 @@ set(LLVM_TARGET_DEFINITIONS VE.td) +tablegen(LLVM VEGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM VEGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM VEGenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM VEGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM VEGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM VEGenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM VEGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM VEGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM VEGenCallingConv.inc -gen-callingconv) +add_public_tablegen_target(VECommonTableGen) + add_llvm_target(VECodeGen + VEAsmPrinter.cpp + VEFrameLowering.cpp + VEISelDAGToDAG.cpp + VEISelLowering.cpp + VEInstrInfo.cpp + VEMachineFunctionInfo.cpp + VEMCInstLower.cpp + VERegisterInfo.cpp + VESubtarget.cpp VETargetMachine.cpp ) +add_subdirectory(AsmParser) +add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/llvm/lib/Target/VE/InstPrinter/CMakeLists.txt b/llvm/lib/Target/VE/InstPrinter/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/InstPrinter/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_component_library(LLVMVEAsmPrinter + VEInstPrinter.cpp + ) diff --git a/llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt b/llvm/lib/Target/VE/InstPrinter/LLVMBuild.txt copy from llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt copy to llvm/lib/Target/VE/InstPrinter/LLVMBuild.txt --- a/llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt +++ b/llvm/lib/Target/VE/InstPrinter/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/VE/MCTargetDesc/LLVMBuild.txt ---------------*- Conf -*--===; +;===- ./lib/Target/VE/InstPrinter/LLVMBuild.txt ----------------*- Conf -*--===; ; ; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ; See https://llvm.org/LICENSE.txt for license information. @@ -16,7 +16,7 @@ [component_0] type = Library -name = VEDesc +name = VEAsmPrinter parent = VE -required_libraries = MC VEInfo Support +required_libraries = MC Support add_to_library_groups = VE diff --git a/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h b/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h @@ -0,0 +1,57 @@ +//===-- VEInstPrinter.h - Convert VE MCInst to assembly syntax ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class prints an VE MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_INSTPRINTER_VEINSTPRINTER_H +#define LLVM_LIB_TARGET_VE_INSTPRINTER_VEINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + +class VEInstPrinter : public MCInstPrinter { +public: + VEInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + void printRegName(raw_ostream &OS, unsigned RegNo) const override; + void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, + const MCSubtargetInfo &STI, raw_ostream &OS) override; + bool printVEAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &OS); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *, uint64_t, + const MCSubtargetInfo &, raw_ostream &); + bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &O); + void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, + unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &OS); + void printMemASXOperand(const MCInst *MI, int opNum, + const MCSubtargetInfo &STI, raw_ostream &OS, + const char *Modifier = nullptr); + void printMemASOperand(const MCInst *MI, int opNum, + const MCSubtargetInfo &STI, raw_ostream &OS, + const char *Modifier = nullptr); + void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, + raw_ostream &OS); + bool printGetGOT(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &OS); +}; +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp b/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp @@ -0,0 +1,133 @@ +//===-- VEInstPrinter.cpp - Convert VE MCInst to assembly syntax -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This class prints an VE MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#include "VEInstPrinter.h" +#include "VE.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +// The generated AsmMatcher VEGenAsmWriter uses "VE" as the target +// namespace. +namespace llvm { +namespace VE { +using namespace VE; +} +} // namespace llvm + +#define GET_INSTRUCTION_NAME +#define PRINT_ALIAS_INSTR +#include "VEGenAsmWriter.inc" + +void VEInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << '%' << StringRef(getRegisterName(RegNo)).lower(); +} + +void VEInstPrinter::printInst(const MCInst *MI, uint64_t Address, + StringRef Annot, const MCSubtargetInfo &STI, + raw_ostream &OS) { + if (!printAliasInstr(MI, STI, OS) && !printVEAliasInstr(MI, STI, OS)) + printInstruction(MI, Address, STI, OS); + printAnnotation(OS, Annot); +} + +bool VEInstPrinter::printVEAliasInstr(const MCInst *MI, + const MCSubtargetInfo &STI, + raw_ostream &O) { + switch (MI->getOpcode()) { + default: + return false; + } +} + +void VEInstPrinter::printOperand(const MCInst *MI, int opNum, + const MCSubtargetInfo &STI, raw_ostream &O) { + const MCOperand &MO = MI->getOperand(opNum); + + if (MO.isReg()) { + printRegName(O, MO.getReg()); + return; + } + + if (MO.isImm()) { + switch (MI->getOpcode()) { + default: + O << (int)MO.getImm(); + return; + } + } + + assert(MO.isExpr() && "Unknown operand kind in printOperand"); + MO.getExpr()->print(O, &MAI); +} + +void VEInstPrinter::printMemASXOperand(const MCInst *MI, int opNum, + const MCSubtargetInfo &STI, + raw_ostream &O, const char *Modifier) { + // If this is an ADD operand, emit it like normal operands. + if (Modifier && !strcmp(Modifier, "arith")) { + printOperand(MI, opNum, STI, O); + O << ", "; + printOperand(MI, opNum + 1, STI, O); + return; + } + + const MCOperand &MO = MI->getOperand(opNum + 1); + if (MO.isImm() && MO.getImm() == 0) { + // don't print "+0" + } else { + printOperand(MI, opNum + 1, STI, O); + } + O << "(,"; + printOperand(MI, opNum, STI, O); + O << ")"; +} + +void VEInstPrinter::printMemASOperand(const MCInst *MI, int opNum, + const MCSubtargetInfo &STI, + raw_ostream &O, const char *Modifier) { + // If this is an ADD operand, emit it like normal operands. + if (Modifier && !strcmp(Modifier, "arith")) { + printOperand(MI, opNum, STI, O); + O << ", "; + printOperand(MI, opNum + 1, STI, O); + return; + } + + const MCOperand &MO = MI->getOperand(opNum + 1); + if (MO.isImm() && MO.getImm() == 0) { + // don't print "+0" + } else { + printOperand(MI, opNum + 1, STI, O); + } + O << "("; + printOperand(MI, opNum, STI, O); + O << ")"; +} + +void VEInstPrinter::printCCOperand(const MCInst *MI, int opNum, + const MCSubtargetInfo &STI, raw_ostream &O) { + int CC = (int)MI->getOperand(opNum).getImm(); + O << VECondCodeToString((VECC::CondCodes)CC); +} + +bool VEInstPrinter::printGetGOT(const MCInst *MI, unsigned opNum, + const MCSubtargetInfo &STI, raw_ostream &O) { + llvm_unreachable("FIXME: Implement VEInstPrinter::printGetGOT."); + return true; +} diff --git a/llvm/lib/Target/VE/LLVMBuild.txt b/llvm/lib/Target/VE/LLVMBuild.txt --- a/llvm/lib/Target/VE/LLVMBuild.txt +++ b/llvm/lib/Target/VE/LLVMBuild.txt @@ -15,19 +15,20 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = MCTargetDesc TargetInfo +subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup name = VE parent = Target -has_asmparser = 0 -has_asmprinter = 0 +has_asmparser = 1 +has_asmprinter = 1 [component_1] type = Library name = VECodeGen parent = VE -required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG +required_libraries = Analysis AsmPrinter CodeGen Core + MC SelectionDAG VEAsmPrinter VEDesc VEInfo Support Target add_to_library_groups = VE diff --git a/llvm/lib/Target/VE/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/VE/MCTargetDesc/CMakeLists.txt --- a/llvm/lib/Target/VE/MCTargetDesc/CMakeLists.txt +++ b/llvm/lib/Target/VE/MCTargetDesc/CMakeLists.txt @@ -1,3 +1,6 @@ -add_llvm_library(LLVMVEDesc +add_llvm_component_library(LLVMVEDesc + VEMCAsmInfo.cpp + VEMCExpr.cpp VEMCTargetDesc.cpp + VETargetStreamer.cpp ) diff --git a/llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt b/llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt --- a/llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt +++ b/llvm/lib/Target/VE/MCTargetDesc/LLVMBuild.txt @@ -18,5 +18,5 @@ type = Library name = VEDesc parent = VE -required_libraries = MC VEInfo Support +required_libraries = MC VEAsmPrinter VEInfo Support add_to_library_groups = VE diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h @@ -0,0 +1,73 @@ +//===-- VEFixupKinds.h - VE Specific Fixup Entries --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_MCTARGETDESC_VEFIXUPKINDS_H +#define LLVM_LIB_TARGET_VE_MCTARGETDESC_VEFIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace VE { +enum Fixups { + // fixup_ve_call30 - 30-bit PC relative relocation for call + fixup_ve_call30 = FirstTargetFixupKind, + + /// fixup_ve_br22 - 22-bit PC relative relocation for + /// branches + fixup_ve_br22, + + /// fixup_ve_br19 - 19-bit PC relative relocation for + /// branches on icc/xcc + fixup_ve_br19, + + /// fixup_ve_bpr - 16-bit fixup for bpr + fixup_ve_br16_2, + fixup_ve_br16_14, + + /// fixup_ve_hi32 - 32-bit fixup corresponding to foo@hi + fixup_ve_hi32, + + /// fixup_ve_lo32 - 32-bit fixup corresponding to foo@lo + fixup_ve_lo32, + + /// fixup_ve_pc_hi32 - 32-bit fixup corresponding to foo@pc_hi + fixup_ve_pc_hi32, + + /// fixup_ve_pc_lo32 - 32-bit fixup corresponding to foo@pc_lo + fixup_ve_pc_lo32, + + /// fixup_ve_got_hi32 - 32-bit fixup corresponding to foo@got_hi + fixup_ve_got_hi32, + + /// fixup_ve_got_lo32 - 32-bit fixup corresponding to foo@got_lo + fixup_ve_got_lo32, + + /// fixup_ve_gotoff_hi32 - 32-bit fixup corresponding to foo@gotoff_hi + fixup_ve_gotoff_hi32, + + /// fixup_ve_gotoff_lo32 - 32-bit fixup corresponding to foo@gotoff_lo + fixup_ve_gotoff_lo32, + + /// fixup_ve_plt_hi32/lo32 + fixup_ve_plt_hi32, + fixup_ve_plt_lo32, + + /// fixups for Thread Local Storage + fixup_ve_tls_gd_hi32, + fixup_ve_tls_gd_lo32, + fixup_ve_tpoff_hi32, + fixup_ve_tpoff_lo32, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +} // namespace VE +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.h @@ -0,0 +1,37 @@ +//===- VEMCAsmInfo.h - VE asm properties -----------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the VEMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCASMINFO_H +#define LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCASMINFO_H + +#include "llvm/MC/MCAsmInfoELF.h" + +namespace llvm { + +class Triple; + +class VEELFMCAsmInfo : public MCAsmInfoELF { + void anchor() override; + +public: + explicit VEELFMCAsmInfo(const Triple &TheTriple); + + const MCExpr * + getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding, + MCStreamer &Streamer) const override; + const MCExpr *getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding, + MCStreamer &Streamer) const override; +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCASMINFO_H diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp @@ -0,0 +1,69 @@ +//===- VEMCAsmInfo.cpp - VE asm properties --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the VEMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "VEMCAsmInfo.h" +#include "VEMCExpr.h" +#include "llvm/ADT/Triple.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCTargetOptions.h" + +using namespace llvm; + +void VEELFMCAsmInfo::anchor() {} + +VEELFMCAsmInfo::VEELFMCAsmInfo(const Triple &TheTriple) { + + CodePointerSize = CalleeSaveStackSlotSize = 8; + MaxInstLength = MinInstAlignment = 8; + + // VE has ".zero" directive although it is not listed in assembler manual. + // ZeroDirective = nullptr; + + // VE uses ".*byte" directive for unaligned data. + Data8bitsDirective = "\t.byte\t"; + Data16bitsDirective = "\t.2byte\t"; + Data32bitsDirective = "\t.4byte\t"; + Data64bitsDirective = "\t.8byte\t"; + + // Uses '.section' before '.bss' directive. VE requires this although + // assembler manual says sinple '.bss' is supported. + UsesELFSectionDirectiveForBSS = true; + + // ExceptionsType = ExceptionHandling::DwarfCFI; + SupportsDebugInformation = true; + // SunStyleELFSectionSwitchSyntax = true; + // UseIntegratedAssembler = true; +} + +const MCExpr *VEELFMCAsmInfo::getExprForPersonalitySymbol( + const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const { + if (Encoding & dwarf::DW_EH_PE_pcrel) { + MCContext &Ctx = Streamer.getContext(); + return VEMCExpr::create(VEMCExpr::VK_VE_R_DISP32, + MCSymbolRefExpr::create(Sym, Ctx), Ctx); + } + + return MCAsmInfo::getExprForPersonalitySymbol(Sym, Encoding, Streamer); +} + +const MCExpr *VEELFMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const { + if (Encoding & dwarf::DW_EH_PE_pcrel) { + MCContext &Ctx = Streamer.getContext(); + return VEMCExpr::create(VEMCExpr::VK_VE_R_DISP32, + MCSymbolRefExpr::create(Sym, Ctx), Ctx); + } + return MCAsmInfo::getExprForFDESymbol(Sym, Encoding, Streamer); +} diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h @@ -0,0 +1,95 @@ +//====- VEMCExpr.h - VE specific MC expression classes --------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes VE-specific MCExprs, used for modifiers like +// "%hi" or "%lo" etc., +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCEXPR_H +#define LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCEXPR_H + +#include "VEFixupKinds.h" +#include "llvm/MC/MCExpr.h" + +namespace llvm { + +class StringRef; +class VEMCExpr : public MCTargetExpr { +public: + enum VariantKind { + VK_VE_None, + VK_VE_R_DISP32, + VK_VE_HI32, + VK_VE_LO32, + VK_VE_PC_HI32, + VK_VE_PC_LO32, + VK_VE_GOT_HI32, + VK_VE_GOT_LO32, + VK_VE_GOTOFF_HI32, + VK_VE_GOTOFF_LO32, + VK_VE_PLT_HI32, + VK_VE_PLT_LO32, + VK_VE_TLS_GD_HI32, + VK_VE_TLS_GD_LO32, + VK_VE_TPOFF_HI32, + VK_VE_TPOFF_LO32, + }; + +private: + const VariantKind Kind; + const MCExpr *Expr; + + explicit VEMCExpr(VariantKind Kind, const MCExpr *Expr) + : Kind(Kind), Expr(Expr) {} + +public: + /// @name Construction + /// @{ + + static const VEMCExpr *create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx); + /// @} + /// @name Accessors + /// @{ + + /// getOpcode - Get the kind of this expression. + VariantKind getKind() const { return Kind; } + + /// getSubExpr - Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + /// getFixupKind - Get the fixup kind of this expression. + VE::Fixups getFixupKind() const { return getFixupKind(Kind); } + + /// @} + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, + const MCFixup *Fixup) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; + MCFragment *findAssociatedFragment() const override { + return getSubExpr()->findAssociatedFragment(); + } + + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } + + static bool classof(const VEMCExpr *) { return true; } + + static VariantKind parseVariantKind(StringRef name); + static bool printVariantKind(raw_ostream &OS, VariantKind Kind); + static void printVariantKindSuffix(raw_ostream &OS, VariantKind Kind); + static VE::Fixups getFixupKind(VariantKind Kind); +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp @@ -0,0 +1,229 @@ +//===-- VEMCExpr.cpp - VE specific MC expression classes ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the assembly expression modifiers +// accepted by the VE architecture (e.g. "%hi", "%lo", ...). +// +//===----------------------------------------------------------------------===// + +#include "VEMCExpr.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Object/ELF.h" + +using namespace llvm; + +#define DEBUG_TYPE "vemcexpr" + +const VEMCExpr *VEMCExpr::create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) VEMCExpr(Kind, Expr); +} + +void VEMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { + + bool closeParen = printVariantKind(OS, Kind); + + const MCExpr *Expr = getSubExpr(); + Expr->print(OS, MAI); + + if (closeParen) + OS << ')'; + printVariantKindSuffix(OS, Kind); +} + +bool VEMCExpr::printVariantKind(raw_ostream &OS, VariantKind Kind) { + bool closeParen = true; + switch (Kind) { + case VK_VE_None: + closeParen = false; + break; + case VK_VE_R_DISP32: + OS << "%r_disp32("; + break; + case VK_VE_HI32: + return false; // OS << "%hi("; break; + case VK_VE_LO32: + return false; // OS << "%lo("; break; + case VK_VE_PC_HI32: + return false; // OS << "%pc_hi("; break; + case VK_VE_PC_LO32: + return false; // OS << "%pc_lo("; break; + case VK_VE_GOT_HI32: + return false; // OS << "%got_hi("; break; + case VK_VE_GOT_LO32: + return false; // OS << "%got_lo("; break; + case VK_VE_GOTOFF_HI32: + return false; // OS << "%gotoff_hi("; break; + case VK_VE_GOTOFF_LO32: + return false; // OS << "%gotoff_lo("; break; + case VK_VE_PLT_HI32: + return false; // OS << "%plt_hi("; break; + case VK_VE_PLT_LO32: + return false; // OS << "%plt_lo("; break; + case VK_VE_TLS_GD_HI32: + return false; // OS << "%tls_gd_hi("; break; + case VK_VE_TLS_GD_LO32: + return false; // OS << "%tls_gd_lo("; break; + case VK_VE_TPOFF_HI32: + return false; // OS << "%tpoff_hi("; break; + case VK_VE_TPOFF_LO32: + return false; // OS << "%tpoff_lo("; break; + } + return closeParen; +} + +void VEMCExpr::printVariantKindSuffix(raw_ostream &OS, VariantKind Kind) { + switch (Kind) { + case VK_VE_None: + break; + case VK_VE_R_DISP32: + break; + case VK_VE_HI32: + OS << "@hi"; + break; + case VK_VE_LO32: + OS << "@lo"; + break; + case VK_VE_PC_HI32: + OS << "@pc_hi"; + break; + case VK_VE_PC_LO32: + OS << "@pc_lo"; + break; + case VK_VE_GOT_HI32: + OS << "@got_hi"; + break; + case VK_VE_GOT_LO32: + OS << "@got_lo"; + break; + case VK_VE_GOTOFF_HI32: + OS << "@gotoff_hi"; + break; + case VK_VE_GOTOFF_LO32: + OS << "@gotoff_lo"; + break; + case VK_VE_PLT_HI32: + OS << "@plt_hi"; + break; + case VK_VE_PLT_LO32: + OS << "@plt_lo"; + break; + case VK_VE_TLS_GD_HI32: + OS << "@tls_gd_hi"; + break; + case VK_VE_TLS_GD_LO32: + OS << "@tls_gd_lo"; + break; + case VK_VE_TPOFF_HI32: + OS << "@tpoff_hi"; + break; + case VK_VE_TPOFF_LO32: + OS << "@tpoff_lo"; + break; + } +} + +VEMCExpr::VariantKind VEMCExpr::parseVariantKind(StringRef name) { + return StringSwitch(name) + .Case("r_disp32", VK_VE_R_DISP32) + .Case("hi", VK_VE_HI32) + .Case("lo", VK_VE_LO32) + .Case("pc_hi", VK_VE_PC_HI32) + .Case("pc_lo", VK_VE_PC_LO32) + .Case("got_hi", VK_VE_GOT_HI32) + .Case("got_lo", VK_VE_GOT_LO32) + .Case("gotoff_hi", VK_VE_GOTOFF_HI32) + .Case("gotoff_lo", VK_VE_GOTOFF_LO32) + .Case("plt_hi", VK_VE_PLT_HI32) + .Case("plt_lo", VK_VE_PLT_LO32) + .Case("tls_gd_hi", VK_VE_TLS_GD_HI32) + .Case("tls_gd_lo", VK_VE_TLS_GD_LO32) + .Case("tpoff_hi", VK_VE_TPOFF_HI32) + .Case("tpoff_lo", VK_VE_TPOFF_LO32) + .Default(VK_VE_None); +} + +VE::Fixups VEMCExpr::getFixupKind(VEMCExpr::VariantKind Kind) { + switch (Kind) { + default: + llvm_unreachable("Unhandled VEMCExpr::VariantKind"); + case VK_VE_HI32: + return VE::fixup_ve_hi32; + case VK_VE_LO32: + return VE::fixup_ve_lo32; + case VK_VE_PC_HI32: + return VE::fixup_ve_pc_hi32; + case VK_VE_PC_LO32: + return VE::fixup_ve_pc_lo32; + case VK_VE_GOT_HI32: + return VE::fixup_ve_got_hi32; + case VK_VE_GOT_LO32: + return VE::fixup_ve_got_lo32; + case VK_VE_GOTOFF_HI32: + return VE::fixup_ve_gotoff_hi32; + case VK_VE_GOTOFF_LO32: + return VE::fixup_ve_gotoff_lo32; + case VK_VE_PLT_HI32: + return VE::fixup_ve_plt_hi32; + case VK_VE_PLT_LO32: + return VE::fixup_ve_plt_lo32; + case VK_VE_TLS_GD_HI32: + return VE::fixup_ve_tls_gd_hi32; + case VK_VE_TLS_GD_LO32: + return VE::fixup_ve_tls_gd_lo32; + case VK_VE_TPOFF_HI32: + return VE::fixup_ve_tpoff_hi32; + case VK_VE_TPOFF_LO32: + return VE::fixup_ve_tpoff_lo32; + } +} + +bool VEMCExpr::evaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout, + const MCFixup *Fixup) const { + return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup); +} + +static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { + switch (Expr->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expr!"); + break; + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Expr); + fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); + fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: { + const MCSymbolRefExpr &SymRef = *cast(Expr); + cast(SymRef.getSymbol()).setType(ELF::STT_TLS); + break; + } + + case MCExpr::Unary: + fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); + break; + } +} + +void VEMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { + fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); +} + +void VEMCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); +} diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h @@ -18,10 +18,36 @@ #include namespace llvm { - +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class MCTargetOptions; class Target; +class Triple; +class StringRef; +class raw_pwrite_stream; +class raw_ostream; + Target &getTheVETarget(); -} // end llvm namespace +} // end namespace llvm + +// Defines symbolic names for VE registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "VEGenRegisterInfo.inc" + +// Defines symbolic names for the VE instructions. +// +#define GET_INSTRINFO_ENUM +#include "VEGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "VEGenSubtargetInfo.inc" #endif diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp @@ -11,9 +11,96 @@ //===----------------------------------------------------------------------===// #include "VEMCTargetDesc.h" +#include "InstPrinter/VEInstPrinter.h" +#include "VEMCAsmInfo.h" +#include "VETargetStreamer.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; +#define GET_INSTRINFO_MC_DESC +#include "VEGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "VEGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "VEGenRegisterInfo.inc" + +static MCAsmInfo *createVEMCAsmInfo(const MCRegisterInfo &MRI, const Triple &TT, + const MCTargetOptions &Options) { + MCAsmInfo *MAI = new VEELFMCAsmInfo(TT); + unsigned Reg = MRI.getDwarfRegNum(VE::SX11, true); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0); + MAI->addInitialFrameState(Inst); + return MAI; +} + +static MCInstrInfo *createVEMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitVEMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createVEMCRegisterInfo(const Triple &TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitVEMCRegisterInfo(X, VE::SX10); + return X; +} + +static MCSubtargetInfo *createVEMCSubtargetInfo(const Triple &TT, StringRef CPU, + StringRef FS) { + if (CPU.empty()) + CPU = "ve"; + return createVEMCSubtargetInfoImpl(TT, CPU, FS); +} + +static MCTargetStreamer * +createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + return new VETargetELFStreamer(S); +} + +static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm) { + return new VETargetAsmStreamer(S, OS); +} + +static MCInstPrinter *createVEMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + return new VEInstPrinter(MAI, MII, MRI); +} + extern "C" void LLVMInitializeVETargetMC() { - // TODO + // Register the MC asm info. + RegisterMCAsmInfoFn X(getTheVETarget(), createVEMCAsmInfo); + + for (Target *T : {&getTheVETarget()}) { + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(*T, createVEMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(*T, createVEMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(*T, createVEMCSubtargetInfo); + + // Register the object target streamer. + TargetRegistry::RegisterObjectTargetStreamer(*T, + createObjectTargetStreamer); + + // Register the asm streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createTargetAsmStreamer); + + // Register the MCInstPrinter + TargetRegistry::RegisterMCInstPrinter(*T, createVEMCInstPrinter); + } } diff --git a/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.h b/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.h @@ -0,0 +1,47 @@ +//===-- VETargetStreamer.h - VE Target Streamer ----------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_SPARC_SPARCTARGETSTREAMER_H +#define LLVM_LIB_TARGET_SPARC_SPARCTARGETSTREAMER_H + +#include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCStreamer.h" + +namespace llvm { +class VETargetStreamer : public MCTargetStreamer { + virtual void anchor(); + +public: + VETargetStreamer(MCStreamer &S); + /// Emit ".register , #ignore". + virtual void emitVERegisterIgnore(unsigned reg) = 0; + /// Emit ".register , #scratch". + virtual void emitVERegisterScratch(unsigned reg) = 0; +}; + +// This part is for ascii assembly output +class VETargetAsmStreamer : public VETargetStreamer { + formatted_raw_ostream &OS; + +public: + VETargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); + void emitVERegisterIgnore(unsigned reg) override; + void emitVERegisterScratch(unsigned reg) override; +}; + +// This part is for ELF object output +class VETargetELFStreamer : public VETargetStreamer { +public: + VETargetELFStreamer(MCStreamer &S); + MCELFStreamer &getStreamer(); + void emitVERegisterIgnore(unsigned reg) override {} + void emitVERegisterScratch(unsigned reg) override {} +}; +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp b/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp @@ -0,0 +1,44 @@ +//===-- VETargetStreamer.cpp - VE Target Streamer Methods -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides VE specific target streamer methods. +// +//===----------------------------------------------------------------------===// + +#include "VETargetStreamer.h" +#include "InstPrinter/VEInstPrinter.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; + +// pin vtable to this file +VETargetStreamer::VETargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} + +void VETargetStreamer::anchor() {} + +VETargetAsmStreamer::VETargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS) + : VETargetStreamer(S), OS(OS) {} + +void VETargetAsmStreamer::emitVERegisterIgnore(unsigned reg) { + OS << "\t.register " + << "%" << StringRef(VEInstPrinter::getRegisterName(reg)).lower() + << ", #ignore\n"; +} + +void VETargetAsmStreamer::emitVERegisterScratch(unsigned reg) { + OS << "\t.register " + << "%" << StringRef(VEInstPrinter::getRegisterName(reg)).lower() + << ", #scratch\n"; +} + +VETargetELFStreamer::VETargetELFStreamer(MCStreamer &S) : VETargetStreamer(S) {} + +MCELFStreamer &VETargetELFStreamer::getStreamer() { + return static_cast(Streamer); +} diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h --- a/llvm/lib/Target/VE/VE.h +++ b/llvm/lib/Target/VE/VE.h @@ -15,5 +15,114 @@ #define LLVM_LIB_TARGET_VE_VE_H #include "MCTargetDesc/VEMCTargetDesc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetMachine.h" +namespace llvm { +class FunctionPass; +class VETargetMachine; +class formatted_raw_ostream; +class AsmPrinter; +class MCInst; +class MachineInstr; + +FunctionPass *createVEISelDag(VETargetMachine &TM); +FunctionPass *createVEPromoteToI1Pass(); + +void LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + AsmPrinter &AP); +} // namespace llvm + +namespace llvm { +// Enums corresponding to VE condition codes, both icc's and fcc's. These +// values must be kept in sync with the ones in the .td file. +namespace VECC { +enum CondCodes { + // Integer comparison + CC_IG = 0, // Greater + CC_IL = 1, // Less + CC_INE = 2, // Not Equal + CC_IEQ = 3, // Equal + CC_IGE = 4, // Greater or Equal + CC_ILE = 5, // Less or Equal + + // Floating point comparison + CC_AF = 0 + 6, // Never + CC_G = 1 + 6, // Greater + CC_L = 2 + 6, // Less + CC_NE = 3 + 6, // Not Equal + CC_EQ = 4 + 6, // Equal + CC_GE = 5 + 6, // Greater or Equal + CC_LE = 6 + 6, // Less or Equal + CC_NUM = 7 + 6, // Number + CC_NAN = 8 + 6, // NaN + CC_GNAN = 9 + 6, // Greater or NaN + CC_LNAN = 10 + 6, // Less or NaN + CC_NENAN = 11 + 6, // Not Equal or NaN + CC_EQNAN = 12 + 6, // Equal or NaN + CC_GENAN = 13 + 6, // Greater or Equal or NaN + CC_LENAN = 14 + 6, // Less or Equal or NaN + CC_AT = 15 + 6, // Always +}; +} + +inline static const char *VECondCodeToString(VECC::CondCodes CC) { + switch (CC) { + case VECC::CC_IG: + return "gt"; + case VECC::CC_IL: + return "lt"; + case VECC::CC_INE: + return "ne"; + case VECC::CC_IEQ: + return "eq"; + case VECC::CC_IGE: + return "ge"; + case VECC::CC_ILE: + return "le"; + case VECC::CC_AF: + return "af"; + case VECC::CC_G: + return "gt"; + case VECC::CC_L: + return "lt"; + case VECC::CC_NE: + return "ne"; + case VECC::CC_EQ: + return "eq"; + case VECC::CC_GE: + return "ge"; + case VECC::CC_LE: + return "le"; + case VECC::CC_NUM: + return "num"; + case VECC::CC_NAN: + return "nan"; + case VECC::CC_GNAN: + return "gtnan"; + case VECC::CC_LNAN: + return "ltnan"; + case VECC::CC_NENAN: + return "nenan"; + case VECC::CC_EQNAN: + return "eqnan"; + case VECC::CC_GENAN: + return "genan"; + case VECC::CC_LENAN: + return "lenan"; + case VECC::CC_AT: + return "at"; + } + llvm_unreachable("Invalid cond code"); +} + +inline static unsigned HI32(int64_t imm) { + return (unsigned)((imm >> 32) & 0xFFFFFFFF); +} + +inline static unsigned LO32(int64_t imm) { + return (unsigned)(imm & 0xFFFFFFFF); +} + +} // namespace llvm #endif diff --git a/llvm/lib/Target/VE/VE.td b/llvm/lib/Target/VE/VE.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VE.td @@ -0,0 +1,64 @@ +//===-- VE.td - Describe the VE Target Machine -------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// VE Subtarget features. +// + +//===----------------------------------------------------------------------===// +// Register File, Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "VERegisterInfo.td" +include "VECallingConv.td" +include "VESchedule.td" +include "VEInstrInfo.td" + +def VEInstrInfo : InstrInfo; + +def VEAsmParser : AsmParser { + bit ShouldEmitMatchRegisterName = 0; +} + +//===----------------------------------------------------------------------===// +// VE processors supported. +//===----------------------------------------------------------------------===// + +class Proc Features> + : Processor; + +def : Proc<"ve", []>; +// def : Processor<"ve", VEItinerary, []>; +// def : ProcessorModel<"ve", VESchedModel, []>; + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def VEAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int PassSubtarget = 1; + int Variant = 0; +} + +def VE : Target { + // Pull in Instruction Info: + let InstructionSet = VEInstrInfo; + let AssemblyParsers = [VEAsmParser]; + let AssemblyWriters = [VEAsmWriter]; + let AllowRegisterRenaming = 1; +} diff --git a/llvm/lib/Target/VE/VEAsmPrinter.cpp b/llvm/lib/Target/VE/VEAsmPrinter.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEAsmPrinter.cpp @@ -0,0 +1,550 @@ +//===-- VEAsmPrinter.cpp - VE LLVM assembly writer ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format SPARC assembly language. +// +//===----------------------------------------------------------------------===// + +#include "InstPrinter/VEInstPrinter.h" +#include "MCTargetDesc/VEMCExpr.h" +#include "MCTargetDesc/VETargetStreamer.h" +#include "VE.h" +#include "VEInstrInfo.h" +#include "VETargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +namespace { +class VEAsmPrinter : public AsmPrinter { + VETargetStreamer &getTargetStreamer() { + return static_cast(*OutStreamer->getTargetStreamer()); + } + +public: + explicit VEAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)) {} + + StringRef getPassName() const override { return "VE Assembly Printer"; } + + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); + void printMemASXOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = nullptr); + void printMemASOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = nullptr); + + void EmitFunctionBodyStart() override; + void EmitInstruction(const MachineInstr *MI) override; + + static const char *getRegisterName(unsigned RegNo) { + return VEInstPrinter::getRegisterName(RegNo); + } + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; + + void LowerGETGOTAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI); + void LowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI); + void LowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI); + void LowerEH_SJLJ_SETJMPAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI); + void LowerEH_SJLJ_LONGJMPAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI); +}; +} // end of anonymous namespace + +static MCOperand createVEMCOperand(VEMCExpr::VariantKind Kind, MCSymbol *Sym, + MCContext &OutContext) { + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Sym, OutContext); + const VEMCExpr *expr = VEMCExpr::create(Kind, MCSym, OutContext); + return MCOperand::createExpr(expr); +} + +static MCOperand createGOTRelExprOp(VEMCExpr::VariantKind Kind, + MCSymbol *GOTLabel, MCContext &OutContext) { + const MCSymbolRefExpr *GOT = MCSymbolRefExpr::create(GOTLabel, OutContext); + const VEMCExpr *expr = VEMCExpr::create(Kind, GOT, OutContext); + return MCOperand::createExpr(expr); +} + +static void EmitSIC(MCStreamer &OutStreamer, MCOperand &RD, + const MCSubtargetInfo &STI) { + MCInst SICInst; + SICInst.setOpcode(VE::SIC); + SICInst.addOperand(RD); + OutStreamer.EmitInstruction(SICInst, STI); +} + +static void EmitBSIC(MCStreamer &OutStreamer, MCOperand &R1, MCOperand &R2, + const MCSubtargetInfo &STI) { + MCInst BSICInst; + BSICInst.setOpcode(VE::BSIC); + BSICInst.addOperand(R1); + BSICInst.addOperand(R2); + OutStreamer.EmitInstruction(BSICInst, STI); +} + +static void EmitLEAzzi(MCStreamer &OutStreamer, MCOperand &Imm, MCOperand &RD, + const MCSubtargetInfo &STI) { + MCInst LEAInst; + LEAInst.setOpcode(VE::LEAzzi); + LEAInst.addOperand(RD); + LEAInst.addOperand(Imm); + OutStreamer.EmitInstruction(LEAInst, STI); +} + +static void EmitLEASLzzi(MCStreamer &OutStreamer, MCOperand &Imm, MCOperand &RD, + const MCSubtargetInfo &STI) { + MCInst LEASLInst; + LEASLInst.setOpcode(VE::LEASLzzi); + LEASLInst.addOperand(RD); + LEASLInst.addOperand(Imm); + OutStreamer.EmitInstruction(LEASLInst, STI); +} + +static void EmitLEAzii(MCStreamer &OutStreamer, MCOperand &RS1, MCOperand &Imm, + MCOperand &RD, const MCSubtargetInfo &STI) { + MCInst LEAInst; + LEAInst.setOpcode(VE::LEAzii); + LEAInst.addOperand(RD); + LEAInst.addOperand(RS1); + LEAInst.addOperand(Imm); + OutStreamer.EmitInstruction(LEAInst, STI); +} + +static void EmitLEASLrri(MCStreamer &OutStreamer, MCOperand &RS1, + MCOperand &RS2, MCOperand &Imm, MCOperand &RD, + const MCSubtargetInfo &STI) { + MCInst LEASLInst; + LEASLInst.setOpcode(VE::LEASLrri); + LEASLInst.addOperand(RS1); + LEASLInst.addOperand(RS2); + LEASLInst.addOperand(RD); + LEASLInst.addOperand(Imm); + OutStreamer.EmitInstruction(LEASLInst, STI); +} + +static void EmitBinary(MCStreamer &OutStreamer, unsigned Opcode, MCOperand &RS1, + MCOperand &Src2, MCOperand &RD, + const MCSubtargetInfo &STI) { + MCInst Inst; + Inst.setOpcode(Opcode); + Inst.addOperand(RD); + Inst.addOperand(RS1); + Inst.addOperand(Src2); + OutStreamer.EmitInstruction(Inst, STI); +} + +static void EmitANDrm0(MCStreamer &OutStreamer, MCOperand &RS1, MCOperand &Imm, + MCOperand &RD, const MCSubtargetInfo &STI) { + EmitBinary(OutStreamer, VE::ANDrm0, RS1, Imm, RD, STI); +} + +static void EmitHiLo(MCStreamer &OutStreamer, MCSymbol *GOTSym, + VEMCExpr::VariantKind HiKind, VEMCExpr::VariantKind LoKind, + MCOperand &RD, MCContext &OutContext, + const MCSubtargetInfo &STI) { + + MCOperand hi = createVEMCOperand(HiKind, GOTSym, OutContext); + MCOperand lo = createVEMCOperand(LoKind, GOTSym, OutContext); + MCOperand ci32 = MCOperand::createImm(32); + EmitLEAzzi(OutStreamer, lo, RD, STI); + EmitANDrm0(OutStreamer, RD, ci32, RD, STI); + EmitLEASLzzi(OutStreamer, hi, RD, STI); +} + +void VEAsmPrinter::LowerGETGOTAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI) { + MCSymbol *GOTLabel = + OutContext.getOrCreateSymbol(Twine("_GLOBAL_OFFSET_TABLE_")); + + const MachineOperand &MO = MI->getOperand(0); + MCOperand MCRegOP = MCOperand::createReg(MO.getReg()); + + if (!isPositionIndependent()) { + // Just load the address of GOT to MCRegOP. + switch (TM.getCodeModel()) { + default: + llvm_unreachable("Unsupported absolute code model"); + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Large: + EmitHiLo(*OutStreamer, GOTLabel, VEMCExpr::VK_VE_HI32, + VEMCExpr::VK_VE_LO32, MCRegOP, OutContext, STI); + break; + } + return; + } + + MCOperand RegGOT = MCOperand::createReg(VE::SX15); // GOT + MCOperand RegPLT = MCOperand::createReg(VE::SX16); // PLT + + // lea %got, _GLOBAL_OFFSET_TABLE_@PC_LO(-24) + // and %got, %got, (32)0 + // sic %plt + // lea.sl %got, _GLOBAL_OFFSET_TABLE_@PC_HI(%got, %plt) + MCOperand cim24 = MCOperand::createImm(-24); + MCOperand loImm = + createGOTRelExprOp(VEMCExpr::VK_VE_PC_LO32, GOTLabel, OutContext); + EmitLEAzii(*OutStreamer, cim24, loImm, MCRegOP, STI); + MCOperand ci32 = MCOperand::createImm(32); + EmitANDrm0(*OutStreamer, MCRegOP, ci32, MCRegOP, STI); + EmitSIC(*OutStreamer, RegPLT, STI); + MCOperand hiImm = + createGOTRelExprOp(VEMCExpr::VK_VE_PC_HI32, GOTLabel, OutContext); + EmitLEASLrri(*OutStreamer, RegGOT, RegPLT, hiImm, MCRegOP, STI); +} + +void VEAsmPrinter::LowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI) { + const MachineOperand &MO = MI->getOperand(0); + MCOperand MCRegOP = MCOperand::createReg(MO.getReg()); + const MachineOperand &Addr = MI->getOperand(1); + MCSymbol *AddrSym = nullptr; + + switch (Addr.getType()) { + default: + llvm_unreachable(""); + return; + case MachineOperand::MO_MachineBasicBlock: + report_fatal_error("MBB is not supporeted yet"); + return; + case MachineOperand::MO_ConstantPoolIndex: + report_fatal_error("ConstantPool is not supporeted yet"); + return; + case MachineOperand::MO_ExternalSymbol: + AddrSym = GetExternalSymbolSymbol(Addr.getSymbolName()); + break; + case MachineOperand::MO_GlobalAddress: + AddrSym = getSymbol(Addr.getGlobal()); + break; + } + + if (!isPositionIndependent()) { + llvm_unreachable("Unsupported uses of %plt in not PIC code"); + return; + } + + MCOperand RegPLT = MCOperand::createReg(VE::SX16); // PLT + + // lea %dst, %plt_lo(func)(-24) + // and %dst, %dst, (32)0 + // sic %plt ; FIXME: is it safe to use %plt here? + // lea.sl %dst, %plt_hi(func)(%dst, %plt) + MCOperand cim24 = MCOperand::createImm(-24); + MCOperand loImm = + createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, AddrSym, OutContext); + EmitLEAzii(*OutStreamer, cim24, loImm, MCRegOP, STI); + MCOperand ci32 = MCOperand::createImm(32); + EmitANDrm0(*OutStreamer, MCRegOP, ci32, MCRegOP, STI); + EmitSIC(*OutStreamer, RegPLT, STI); + MCOperand hiImm = + createGOTRelExprOp(VEMCExpr::VK_VE_PLT_HI32, AddrSym, OutContext); + EmitLEASLrri(*OutStreamer, MCRegOP, RegPLT, hiImm, MCRegOP, STI); +} + +void VEAsmPrinter::LowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI) { + const MachineOperand &Addr = MI->getOperand(0); + MCSymbol *AddrSym = nullptr; + + switch (Addr.getType()) { + default: + llvm_unreachable(""); + return; + case MachineOperand::MO_MachineBasicBlock: + report_fatal_error("MBB is not supporeted yet"); + return; + case MachineOperand::MO_ConstantPoolIndex: + report_fatal_error("ConstantPool is not supporeted yet"); + return; + case MachineOperand::MO_ExternalSymbol: + AddrSym = GetExternalSymbolSymbol(Addr.getSymbolName()); + break; + case MachineOperand::MO_GlobalAddress: + AddrSym = getSymbol(Addr.getGlobal()); + break; + } + + MCOperand RegLR = MCOperand::createReg(VE::SX10); // LR + MCOperand RegS0 = MCOperand::createReg(VE::SX0); // S0 + MCOperand RegS12 = MCOperand::createReg(VE::SX12); // S12 + MCSymbol *GetTLSLabel = OutContext.getOrCreateSymbol(Twine("__tls_get_addr")); + + // lea %s0, sym@tls_gd_lo(-24) + // and %s0, %s0, (32)0 + // sic %lr + // lea.sl %s0, sym@tls_gd_hi(%s0, %lr) + // lea %s12, __tls_get_addr@plt_lo(8) + // and %s12, %s12, (32)0 + // lea.sl %s12, __tls_get_addr@plt_hi(%s12, %lr) + // bsic %lr, (, %s12) + MCOperand cim24 = MCOperand::createImm(-24); + MCOperand loImm = + createGOTRelExprOp(VEMCExpr::VK_VE_TLS_GD_LO32, AddrSym, OutContext); + EmitLEAzii(*OutStreamer, cim24, loImm, RegS0, STI); + MCOperand ci32 = MCOperand::createImm(32); + EmitANDrm0(*OutStreamer, RegS0, ci32, RegS0, STI); + EmitSIC(*OutStreamer, RegLR, STI); + MCOperand hiImm = + createGOTRelExprOp(VEMCExpr::VK_VE_TLS_GD_HI32, AddrSym, OutContext); + EmitLEASLrri(*OutStreamer, RegS0, RegLR, hiImm, RegS0, STI); + MCOperand ci8 = MCOperand::createImm(8); + MCOperand loImm2 = + createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, GetTLSLabel, OutContext); + EmitLEAzii(*OutStreamer, ci8, loImm2, RegS12, STI); + EmitANDrm0(*OutStreamer, RegS12, ci32, RegS12, STI); + MCOperand hiImm2 = + createGOTRelExprOp(VEMCExpr::VK_VE_PLT_HI32, GetTLSLabel, OutContext); + EmitLEASLrri(*OutStreamer, RegS12, RegLR, hiImm2, RegS12, STI); + EmitBSIC(*OutStreamer, RegLR, RegS12, STI); +} + +void VEAsmPrinter::LowerEH_SJLJ_SETJMPAndEmitMCInsts( + const MachineInstr *MI, const MCSubtargetInfo &STI) { + // sic $dest + // lea $dest, 32($dest) // $dest points 0f + // st $dest, 8(,$src) + // lea $dest, 0 + // br.l 16 // br 1f + // 0: + // lea $dest, 1 + // 1: + + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + EmitToStreamer(*OutStreamer, MCInstBuilder(VE::SIC).addReg(DestReg)); + + EmitToStreamer( + *OutStreamer, + MCInstBuilder(VE::LEArzi).addReg(DestReg).addReg(DestReg).addImm(32)); + + EmitToStreamer( + *OutStreamer, + MCInstBuilder(VE::STSri).addReg(SrcReg).addImm(8).addReg(DestReg)); + + EmitToStreamer(*OutStreamer, + MCInstBuilder(VE::LEAzzi).addReg(DestReg).addImm(0)); + + EmitToStreamer(*OutStreamer, MCInstBuilder(VE::BCRLa).addImm(16)); + + EmitToStreamer(*OutStreamer, + MCInstBuilder(VE::LEAzzi).addReg(DestReg).addImm(1)); +} + +void VEAsmPrinter::LowerEH_SJLJ_LONGJMPAndEmitMCInsts( + const MachineInstr *MI, const MCSubtargetInfo &STI) { + // ld %s9, (, $src) // s9 = fp + // ld %s10, 8(, $src) // s10 = lr + // ld %s11, 16(, $src) // s11 = sp + // b.l (%s10) + + unsigned SrcReg = MI->getOperand(0).getReg(); + + EmitToStreamer( + *OutStreamer, + MCInstBuilder(VE::LDSri).addReg(VE::SX9).addReg(SrcReg).addImm(0)); + + EmitToStreamer( + *OutStreamer, + MCInstBuilder(VE::LDSri).addReg(VE::SX10).addReg(SrcReg).addImm(8)); + + EmitToStreamer( + *OutStreamer, + MCInstBuilder(VE::LDSri).addReg(VE::SX11).addReg(SrcReg).addImm(16)); + + EmitToStreamer(*OutStreamer, + MCInstBuilder(VE::BAri).addReg(VE::SX10).addImm(0)); + return; +} + +void VEAsmPrinter::EmitInstruction(const MachineInstr *MI) { + + switch (MI->getOpcode()) { + default: + break; + case TargetOpcode::DBG_VALUE: + // FIXME: Debug Value. + return; + case VE::GETGOT: + LowerGETGOTAndEmitMCInsts(MI, getSubtargetInfo()); + return; + case VE::GETFUNPLT: + LowerGETFunPLTAndEmitMCInsts(MI, getSubtargetInfo()); + return; + case VE::GETTLSADDR: + LowerGETTLSAddrAndEmitMCInsts(MI, getSubtargetInfo()); + return; + // Emit nothing here but a comment if we can. + case VE::MEMBARRIER: + OutStreamer->emitRawComment("MEMBARRIER"); + return; + case VE::EH_SjLj_SetJmp: + LowerEH_SJLJ_SETJMPAndEmitMCInsts(MI, getSubtargetInfo()); + return; + case VE::EH_SjLj_LongJmp: + LowerEH_SJLJ_LONGJMPAndEmitMCInsts(MI, getSubtargetInfo()); + return; + } + MachineBasicBlock::const_instr_iterator I = MI->getIterator(); + MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); + do { + MCInst TmpInst; + LowerVEMachineInstrToMCInst(&*I, TmpInst, *this); + EmitToStreamer(*OutStreamer, TmpInst); + } while ((++I != E) && I->isInsideBundle()); // Delay slot check. +} + +void VEAsmPrinter::EmitFunctionBodyStart() {} + +void VEAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const DataLayout &DL = getDataLayout(); + const MachineOperand &MO = MI->getOperand(opNum); + VEMCExpr::VariantKind TF = (VEMCExpr::VariantKind)MO.getTargetFlags(); + + bool CloseParen = VEMCExpr::printVariantKind(O, TF); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << "%" << StringRef(getRegisterName(MO.getReg())).lower(); + break; + + case MachineOperand::MO_Immediate: + O << (int)MO.getImm(); + break; + case MachineOperand::MO_MachineBasicBlock: + MO.getMBB()->getSymbol()->print(O, MAI); + return; + case MachineOperand::MO_GlobalAddress: + getSymbol(MO.getGlobal())->print(O, MAI); + break; + case MachineOperand::MO_BlockAddress: + O << GetBlockAddressSymbol(MO.getBlockAddress())->getName(); + break; + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + break; + case MachineOperand::MO_ConstantPoolIndex: + O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" + << MO.getIndex(); + break; + case MachineOperand::MO_Metadata: + MO.getMetadata()->printAsOperand(O, MMI->getModule()); + break; + default: + llvm_unreachable(""); + } + if (CloseParen) + O << ")"; + VEMCExpr::printVariantKindSuffix(O, TF); +} + +void VEAsmPrinter::printMemASXOperand(const MachineInstr *MI, int opNum, + raw_ostream &O, const char *Modifier) { + // If this is an ADD operand, emit it like normal operands. + if (Modifier && !strcmp(Modifier, "arith")) { + printOperand(MI, opNum, O); + O << ", "; + printOperand(MI, opNum + 1, O); + return; + } + + if (MI->getOperand(opNum + 1).isImm() && + MI->getOperand(opNum + 1).getImm() == 0) { + // don't print "+0" + } else { + printOperand(MI, opNum + 1, O); + } + O << "(,"; + printOperand(MI, opNum, O); + O << ")"; +} + +void VEAsmPrinter::printMemASOperand(const MachineInstr *MI, int opNum, + raw_ostream &O, const char *Modifier) { + // If this is an ADD operand, emit it like normal operands. + if (Modifier && !strcmp(Modifier, "arith")) { + printOperand(MI, opNum, O); + O << ", "; + printOperand(MI, opNum + 1, O); + return; + } + + if (MI->getOperand(opNum + 1).isImm() && + MI->getOperand(opNum + 1).getImm() == 0) { + // don't print "+0" + } else { + printOperand(MI, opNum + 1, O); + } + O << "("; + printOperand(MI, opNum, O); + O << ")"; +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool VEAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) + return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); + case 'f': + case 'r': + break; + } + } + + printOperand(MI, OpNo, O); + + return false; +} + +bool VEAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier + + O << '['; + printMemASXOperand(MI, OpNo, O); + O << ']'; + + return false; +} + +// Force static initialization. +extern "C" void LLVMInitializeVEAsmPrinter() { + RegisterAsmPrinter X(getTheVETarget()); +} diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VECallingConv.td @@ -0,0 +1,116 @@ +//===-- VECallingConv.td - Calling Conventions VE ----------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the VE architectures. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Aurora VE +//===----------------------------------------------------------------------===// + +def CC_VE_C_Stack: CallingConv<[ + // F128 are assigned to the stack in 16-byte aligned units + CCIfType<[f128], CCAssignToStackWithShadow<16, 16, [SX7]>>, + + // float --> need special handling like below. + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + CCIfType<[f32], CCCustom<"allocateFloat">>, + + // All of the rest are assigned to the stack in 8-byte aligned units. + CCAssignToStack<0, 8> +]>; + +def CC_VE : CallingConv<[ + // All arguments get passed in generic registers if there is space. + + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, + + // bool, char, int, enum, long --> generic integer 32 bit registers + CCIfType<[i32], CCAssignToRegWithShadow< + [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7], + [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + + // float --> generic floating point 32 bit registers + CCIfType<[f32], CCAssignToRegWithShadow< + [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7], + [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + + // long long/double --> generic 64 bit registers + CCIfType<[i64, f64], + CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + + // long double --> pair of generic 64 bit registers + // + // NOTE: If Q1 is allocated while SX1 is free, llvm tries to allocate SX1 for + // following operands, this masks SX1 to avoid such behavior. + CCIfType<[f128], + CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3], + [SX0, SX1, SX3, SX5]>>, + + // Alternatively, they are assigned to the stack in 8-byte aligned units. + CCDelegateTo +]>; + +// All arguments get passed in stack for varargs function or non-prototyped +// function. +def CC_VE2 : CallingConv<[ + // F128 are assigned to the stack in 16-byte aligned units + CCIfType<[f128], CCAssignToStack<16, 16>>, + + // float --> need special handling like below. + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + CCIfType<[f32], CCCustom<"allocateFloat">>, + + CCAssignToStack<0, 8> +]>; + +def RetCC_VE : CallingConv<[ + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, + + // bool, char, int, enum, long --> generic integer 32 bit registers + CCIfType<[i32], CCAssignToRegWithShadow< + [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7], + [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + + // float --> generic floating point 32 bit registers + CCIfType<[f32], CCAssignToRegWithShadow< + [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7], + [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + + // long long/double --> generic 64 bit registers + CCIfType<[i64, f64], + CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + + // long double --> pair of generic 64 bit registers + CCIfType<[f128], + CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3], + [SX0, SX1, SX3, SX5]>>, +]>; + +// Callee-saved registers +def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>; +def CSR_NoRegs : CalleeSavedRegs<(add)>; + +// vec_expf destroys s0, s1, s5, s6, s11, s61, s62, s63, v0-6, and vm6 +def CSR_vec_expf : CalleeSavedRegs<(add (sequence "SX%u", 2, 4), + (sequence "SX%u", 7, 10), + (sequence "SX%u", 12, 60) + )>; + +// llvm_grow_stack destroys s62 and s63 +def CSR_llvm_grow_stack : CalleeSavedRegs<(add (sequence "SX%u", 0, 61) + )>; diff --git a/llvm/lib/Target/VE/VEFrameLowering.h b/llvm/lib/Target/VE/VEFrameLowering.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEFrameLowering.h @@ -0,0 +1,81 @@ +//===-- VEFrameLowering.h - Define frame lowering for VE --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_VEFRAMELOWERING_H +#define LLVM_LIB_TARGET_VE_VEFRAMELOWERING_H + +#include "VE.h" +#include "llvm/CodeGen/TargetFrameLowering.h" + +namespace llvm { + +class VESubtarget; +class VEFrameLowering : public TargetFrameLowering { +public: + explicit VEFrameLowering(const VESubtarget &ST); + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitPrologueInsns(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, int NumBytes, + bool RequireFPUpdate) const; + void emitEpilogueInsns(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, int NumBytes, + bool RequireFPUpdate) const; + + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; + + bool hasReservedCallFrame(const MachineFunction &MF) const override; + bool hasFP(const MachineFunction &MF) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS = nullptr) const override; + + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; + + const SpillSlot * + getCalleeSavedSpillSlots(unsigned &NumEntries) const override { + static const SpillSlot Offsets[] = { + {VE::SX17, 40}, {VE::SX18, 48}, {VE::SX19, 56}, {VE::SX20, 64}, + {VE::SX21, 72}, {VE::SX22, 80}, {VE::SX23, 88}, {VE::SX24, 96}, + {VE::SX25, 104}, {VE::SX26, 112}, {VE::SX27, 120}, {VE::SX28, 128}, + {VE::SX29, 136}, {VE::SX30, 144}, {VE::SX31, 152}, {VE::SX32, 160}, + {VE::SX33, 168}}; + NumEntries = array_lengthof(Offsets); + return Offsets; + } + + /// targetHandlesStackFrameRounding - Returns true if the target is + /// responsible for rounding up the stack frame (probably at emitPrologue + /// time). + bool targetHandlesStackFrameRounding() const override { return true; } + +private: + // Returns true if MF is a leaf procedure. + bool isLeafProc(MachineFunction &MF) const; + + // Emits code for adjusting SP in function prologue/epilogue. + void emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, int NumBytes) const; + + // Emits code for extending SP in function prologue/epilogue. + void emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, int NumBytes) const; +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEFrameLowering.cpp @@ -0,0 +1,380 @@ +//===-- VEFrameLowering.cpp - VE Frame Information ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the VE implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "VEFrameLowering.h" +#include "VEInstrInfo.h" +#include "VEMachineFunctionInfo.h" +#include "VESubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; + +static cl::opt + DisableLeafProc("disable-ve-leaf-proc", cl::init(false), + cl::desc("Disable VE leaf procedure optimization."), + cl::Hidden); + +VEFrameLowering::VEFrameLowering(const VESubtarget &ST) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(16), 0, + Align(16)) {} + +void VEFrameLowering::emitPrologueInsns(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + int NumBytes, + bool RequireFPUpdate) const { + + DebugLoc dl; + const VEInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + // Insert following codes here as prologue + // + // st %fp, 0(,%sp) + // st %lr, 8(,%sp) + // st %got, 24(,%sp) + // st %plt, 32(,%sp) + // or %fp, 0, %sp + + BuildMI(MBB, MBBI, dl, TII.get(VE::STSri)) + .addReg(VE::SX11) + .addImm(0) + .addReg(VE::SX9); + BuildMI(MBB, MBBI, dl, TII.get(VE::STSri)) + .addReg(VE::SX11) + .addImm(8) + .addReg(VE::SX10); + BuildMI(MBB, MBBI, dl, TII.get(VE::STSri)) + .addReg(VE::SX11) + .addImm(24) + .addReg(VE::SX15); + BuildMI(MBB, MBBI, dl, TII.get(VE::STSri)) + .addReg(VE::SX11) + .addImm(32) + .addReg(VE::SX16); + BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX9).addReg(VE::SX11).addImm(0); +} + +void VEFrameLowering::emitEpilogueInsns(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + int NumBytes, + bool RequireFPUpdate) const { + + DebugLoc dl; + const VEInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + // Insert following codes here as epilogue + // + // or %sp, 0, %fp + // ld %got, 32(,%sp) + // ld %plt, 24(,%sp) + // ld %lr, 8(,%sp) + // ld %fp, 0(,%sp) + + BuildMI(MBB, MBBI, dl, TII.get(VE::ORri), VE::SX11).addReg(VE::SX9).addImm(0); + BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX16) + .addReg(VE::SX11) + .addImm(32); + BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX15) + .addReg(VE::SX11) + .addImm(24); + BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX10) + .addReg(VE::SX11) + .addImm(8); + BuildMI(MBB, MBBI, dl, TII.get(VE::LDSri), VE::SX9) + .addReg(VE::SX11) + .addImm(0); +} + +void VEFrameLowering::emitSPAdjustment(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + int NumBytes) const { + DebugLoc dl; + const VEInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + + if (NumBytes >= -64 && NumBytes < 63) { + BuildMI(MBB, MBBI, dl, TII.get(VE::ADXri), VE::SX11) + .addReg(VE::SX11) + .addImm(NumBytes); + return; + } + + // Emit following codes. This clobbers SX13 which we always know is + // available here. + // lea %s13,%lo(NumBytes) + // and %s13,%s13,(32)0 + // lea.sl %sp,%hi(NumBytes)(%sp, %s13) + BuildMI(MBB, MBBI, dl, TII.get(VE::LEAzzi), VE::SX13).addImm(LO32(NumBytes)); + BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm0), VE::SX13) + .addReg(VE::SX13) + .addImm(32); + BuildMI(MBB, MBBI, dl, TII.get(VE::LEASLrri), VE::SX11) + .addReg(VE::SX11) + .addReg(VE::SX13) + .addImm(HI32(NumBytes)); +} + +void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + int NumBytes) const { + DebugLoc dl; + const VEInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + + // Emit following codes. It is not possible to insert multiple + // BasicBlocks in PEI pass, so we emit two pseudo instructions here. + // + // EXTEND_STACK // pseudo instrcution + // EXTEND_STACK_GUARD // pseudo instrcution + // + // EXTEND_STACK pseudo will be converted by ExpandPostRA pass into + // following instructions with multiple basic blocks later. + // + // thisBB: + // brge.l.t %sp, %sl, sinkBB + // syscallBB: + // ld %s61, 0x18(, %tp) // load param area + // or %s62, 0, %s0 // spill the value of %s0 + // lea %s63, 0x13b // syscall # of grow + // shm.l %s63, 0x0(%s61) // store syscall # at addr:0 + // shm.l %sl, 0x8(%s61) // store old limit at addr:8 + // shm.l %sp, 0x10(%s61) // store new limit at addr:16 + // monc // call monitor + // or %s0, 0, %s62 // restore the value of %s0 + // sinkBB: + // + // EXTEND_STACK_GUARD pseudo will be simply eliminated by ExpandPostRA + // pass. This pseudo is required to be at the next of EXTEND_STACK + // pseudo in order to protect iteration loop in ExpandPostRA. + + BuildMI(MBB, MBBI, dl, TII.get(VE::EXTEND_STACK)); + BuildMI(MBB, MBBI, dl, TII.get(VE::EXTEND_STACK_GUARD)); +} + +void VEFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const VESubtarget &Subtarget = MF.getSubtarget(); + const VEInstrInfo &TII = + *static_cast(Subtarget.getInstrInfo()); + const VERegisterInfo &RegInfo = + *static_cast(Subtarget.getRegisterInfo()); + MachineBasicBlock::iterator MBBI = MBB.begin(); + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc dl; + bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF); + + // FIXME: unfortunately, returning false from canRealignStack + // actually just causes needsStackRealignment to return false, + // rather than reporting an error, as would be sensible. This is + // poor, but fixing that bogosity is going to be a large project. + // For now, just see if it's lied, and report an error here. + if (!NeedsStackRealignment && MFI.getMaxAlignment() > getStackAlignment()) + report_fatal_error("Function \"" + Twine(MF.getName()) + + "\" required " + "stack re-alignment, but LLVM couldn't handle it " + "(probably because it has a dynamic alloca)."); + + // Get the number of bytes to allocate from the FrameInfo + int NumBytes = (int)MFI.getStackSize(); + // The SPARC ABI is a bit odd in that it requires a reserved 92-byte + // (128 in v9) area in the user's stack, starting at %sp. Thus, the + // first part of the stack that can actually be used is located at + // %sp + 92. + // + // We therefore need to add that offset to the total stack size + // after all the stack objects are placed by + // PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack + // needs to be aligned *after* the extra size is added, we need to disable + // calculateFrameObjectOffsets's built-in stack alignment, by having + // targetHandlesStackFrameRounding return true. + + // Add the extra call frame stack size, if needed. (This is the same + // code as in PrologEpilogInserter, but also gets disabled by + // targetHandlesStackFrameRounding) + if (MFI.adjustsStack() && hasReservedCallFrame(MF)) + NumBytes += MFI.getMaxCallFrameSize(); + + // Adds the SPARC subtarget-specific spill area to the stack + // size. Also ensures target-required alignment. + NumBytes = Subtarget.getAdjustedFrameSize(NumBytes); + + // Finally, ensure that the size is sufficiently aligned for the + // data on the stack. + if (MFI.getMaxAlignment() > 0) { + NumBytes = alignTo(NumBytes, MFI.getMaxAlignment()); + } + + // Update stack size with corrected value. + MFI.setStackSize(NumBytes); + + // emit Prologue instructions to save %lr + emitPrologueInsns(MF, MBB, MBBI, NumBytes, true); + + // emit stack adjust instructions + emitSPAdjustment(MF, MBB, MBBI, -NumBytes); + + // emit stack extend instructions + emitSPExtend(MF, MBB, MBBI, -NumBytes); + + unsigned regFP = RegInfo.getDwarfRegNum(VE::SX9, true); + + // Emit ".cfi_def_cfa_register 30". + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Emit ".cfi_window_save". + CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); +} + +MachineBasicBlock::iterator VEFrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (!hasReservedCallFrame(MF)) { + MachineInstr &MI = *I; + int Size = MI.getOperand(0).getImm(); + if (MI.getOpcode() == VE::ADJCALLSTACKDOWN) + Size = -Size; + + if (Size) + emitSPAdjustment(MF, MBB, I, Size); + } + return MBB.erase(I); +} + +void VEFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + DebugLoc dl = MBBI->getDebugLoc(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + int NumBytes = (int)MFI.getStackSize(); + + // emit Epilogue instructions to restore %lr + emitEpilogueInsns(MF, MBB, MBBI, NumBytes, true); +} + +bool VEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + // Reserve call frame if there are no variable sized objects on the stack. + return !MF.getFrameInfo().hasVarSizedObjects(); +} + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +bool VEFrameLowering::hasFP(const MachineFunction &MF) const { + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return MF.getTarget().Options.DisableFramePointerElim(MF) || + RegInfo->needsStackRealignment(MF) || MFI.hasVarSizedObjects() || + MFI.isFrameAddressTaken(); +} + +int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const { + const VESubtarget &Subtarget = MF.getSubtarget(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const VERegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + const VEMachineFunctionInfo *FuncInfo = MF.getInfo(); + bool isFixed = MFI.isFixedObjectIndex(FI); + + // Addressable stack objects are accessed using neg. offsets from + // %fp, or positive offsets from %sp. + bool UseFP; + + // VE uses FP-based references in general, even when "hasFP" is + // false. That function is rather a misnomer, because %fp is + // actually always available, unless isLeafProc. + if (FuncInfo->isLeafProc()) { + // If there's a leaf proc, all offsets need to be %sp-based, + // because we haven't caused %fp to actually point to our frame. + UseFP = false; + } else if (isFixed) { + // Otherwise, argument access should always use %fp. + UseFP = true; + } else if (RegInfo->needsStackRealignment(MF)) { + // If there is dynamic stack realignment, all local object + // references need to be via %sp, to take account of the + // re-alignment. + UseFP = false; + } else { + // Finally, default to using %fp. + UseFP = true; + } + + int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI); + + if (UseFP) { + FrameReg = RegInfo->getFrameRegister(MF); + return FrameOffset; + } else { + FrameReg = VE::SX11; // %sp + return FrameOffset + MF.getFrameInfo().getStackSize(); + } +} + +static bool LLVM_ATTRIBUTE_UNUSED +verifyLeafProcRegUse(MachineRegisterInfo *MRI) { + + // If any of parameter registers are used, this is not leaf function. + for (unsigned reg = VE::SX0; reg <= VE::SX7; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; + + // If any of callee-saved registers are used, this is not leaf function. + for (unsigned reg = VE::SX18; reg <= VE::SX33; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; + + return true; +} + +bool VEFrameLowering::isLeafProc(MachineFunction &MF) const { + + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + return !(MFI.hasCalls() // has calls + || MRI.isPhysRegUsed(VE::SX18) // Too many registers needed + // (s18 is first CSR) + || MRI.isPhysRegUsed(VE::SX11) // %sp is used + || hasFP(MF)); // need %fp +} + +void VEFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + + if (!DisableLeafProc && isLeafProc(MF)) { + VEMachineFunctionInfo *MFI = MF.getInfo(); + MFI->setLeafProc(true); + } +} diff --git a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp @@ -0,0 +1,275 @@ +//===-- VEISelDAGToDAG.cpp - A dag to dag inst selector for VE ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the VE target. +// +//===----------------------------------------------------------------------===// + +#include "VETargetMachine.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===--------------------------------------------------------------------===// +/// VEDAGToDAGISel - VE specific code to select VE machine +/// instructions for SelectionDAG operations. +/// +namespace { +class VEDAGToDAGISel : public SelectionDAGISel { + /// Subtarget - Keep a pointer to the VE Subtarget around so that we can + /// make the right decision when generating code for different targets. + const VESubtarget *Subtarget; + +public: + explicit VEDAGToDAGISel(VETargetMachine &tm) : SelectionDAGISel(tm) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + Subtarget = &MF.getSubtarget(); + return SelectionDAGISel::runOnMachineFunction(MF); + } + + void Select(SDNode *N) override; + + // Complex Pattern Selectors. + bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2); + bool SelectADDRri(SDValue N, SDValue &Base, SDValue &Offset); + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector &OutOps) override; + + StringRef getPassName() const override { + return "VE DAG->DAG Pattern Instruction Selection"; + } + + // Include the pieces autogenerated from the target description. +#include "VEGenDAGISel.inc" + +private: + SDNode *getGlobalBaseReg(); + bool tryInlineAsm(SDNode *N); +}; +} // end anonymous namespace + +SDNode *VEDAGToDAGISel::getGlobalBaseReg() { + unsigned GlobalBaseReg = Subtarget->getInstrInfo()->getGlobalBaseReg(MF); + return CurDAG + ->getRegister(GlobalBaseReg, TLI->getPointerTy(CurDAG->getDataLayout())) + .getNode(); +} + +bool VEDAGToDAGISel::SelectADDRri(SDValue Addr, SDValue &Base, + SDValue &Offset) { + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + Base = CurDAG->getTargetFrameIndex( + FIN->getIndex(), TLI->getPointerTy(CurDAG->getDataLayout())); + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); + return true; + } + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress || + Addr.getOpcode() == ISD::TargetGlobalTLSAddress) + return false; // direct calls. + + if (Addr.getOpcode() == ISD::ADD) { + if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) { + if (isInt<13>(CN->getSExtValue())) { + if (FrameIndexSDNode *FIN = + dyn_cast(Addr.getOperand(0))) { + // Constant offset from frame ref. + Base = CurDAG->getTargetFrameIndex( + FIN->getIndex(), TLI->getPointerTy(CurDAG->getDataLayout())); + } else { + Base = Addr.getOperand(0); + } + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), + MVT::i32); + return true; + } + } + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); + return true; +} + +bool VEDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) { + if (Addr.getOpcode() == ISD::FrameIndex) + return false; + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress || + Addr.getOpcode() == ISD::TargetGlobalTLSAddress) + return false; // direct calls. + + if (Addr.getOpcode() == ISD::ADD) { + if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) + if (isInt<13>(CN->getSExtValue())) + return false; // Let the reg+imm pattern catch this! + if (Addr.getOperand(0).getOpcode() == VEISD::Lo || + Addr.getOperand(1).getOpcode() == VEISD::Lo) + return false; // Let the reg+imm pattern catch this! + R1 = Addr.getOperand(0); + R2 = Addr.getOperand(1); + return true; + } + + return false; // Let the reg+imm pattern catch this! +} + +// Re-assemble i64 arguments split up in SelectionDAGBuilder's +// visitInlineAsm / GetRegistersForValue functions. +// +// Note: This function was copied from, and is essentially identical +// to ARMISelDAGToDAG::SelectInlineAsm. It is very unfortunate that +// such hacking-up is necessary; a rethink of how inline asm operands +// are handled may be in order to make doing this more sane. +// +// TODO: fix inline asm support so I can simply tell it that 'i64' +// inputs to asm need to be allocated to the IntPair register type, +// and have that work. Then, delete this function. +bool VEDAGToDAGISel::tryInlineAsm(SDNode *N) { + std::vector AsmNodeOperands; + unsigned Flag, Kind; + bool Changed = false; + unsigned NumOps = N->getNumOperands(); + + // Normally, i64 data is bounded to two arbitrary GPRs for "%r" + // constraint. However, some instructions (e.g. ldd/std) require + // (even/even+1) GPRs. + + // So, here, we check for this case, and mutate the inlineasm to use + // a single IntPair register instead, which guarantees such even/odd + // placement. + + SDLoc dl(N); + SDValue Glue = + N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue(nullptr, 0); + + SmallVector OpChanged; + // Glue node will be appended late. + for (unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; + ++i) { + SDValue op = N->getOperand(i); + AsmNodeOperands.push_back(op); + + if (i < InlineAsm::Op_FirstOperand) + continue; + + if (ConstantSDNode *C = dyn_cast(N->getOperand(i))) { + Flag = C->getZExtValue(); + Kind = InlineAsm::getKind(Flag); + } else + continue; + + // Immediate operands to inline asm in the SelectionDAG are modeled with + // two operands. The first is a constant of value InlineAsm::Kind_Imm, and + // the second is a constant with the value of the immediate. If we get here + // and we have a Kind_Imm, skip the next operand, and continue. + if (Kind == InlineAsm::Kind_Imm) { + SDValue op = N->getOperand(++i); + AsmNodeOperands.push_back(op); + continue; + } + + unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); + if (NumRegs) + OpChanged.push_back(false); + + unsigned DefIdx = 0; + bool IsTiedToChangedOp = false; + // If it's a use that is tied with a previous def, it has no + // reg class constraint. + if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) + IsTiedToChangedOp = OpChanged[DefIdx]; + + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef && + Kind != InlineAsm::Kind_RegDefEarlyClobber) + continue; + + unsigned RC; + bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); + if ((!IsTiedToChangedOp && (!HasRC || RC != VE::I64RegClassID)) || + NumRegs != 2) + continue; + + // No IntPairRegister on VE + continue; + } + + if (Glue.getNode()) + AsmNodeOperands.push_back(Glue); + if (!Changed) + return false; + + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), + CurDAG->getVTList(MVT::Other, MVT::Glue), + AsmNodeOperands); + New->setNodeId(-1); + ReplaceNode(N, New.getNode()); + return true; +} + +void VEDAGToDAGISel::Select(SDNode *N) { + SDLoc dl(N); + if (N->isMachineOpcode()) { + N->setNodeId(-1); + return; // Already selected. + } + + switch (N->getOpcode()) { + default: + break; + case ISD::INLINEASM: { + if (tryInlineAsm(N)) + return; + break; + } + case VEISD::GLOBAL_BASE_REG: + ReplaceNode(N, getGlobalBaseReg()); + return; + } + + SelectCode(N); +} + +/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for +/// inline asm expressions. +bool VEDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { + SDValue Op0, Op1; + switch (ConstraintID) { + default: + return true; + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_o: + case InlineAsm::Constraint_m: // memory + if (!SelectADDRrr(Op, Op0, Op1)) + SelectADDRri(Op, Op0, Op1); + break; + } + + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; +} + +/// createVEISelDag - This pass converts a legalized DAG into a +/// VE-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createVEISelDag(VETargetMachine &TM) { + return new VEDAGToDAGISel(TM); +} diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -0,0 +1,239 @@ +//===-- VEISelLowering.h - VE DAG Lowering Interface ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that VE uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_VEISELLOWERING_H +#define LLVM_LIB_TARGET_VE_VEISELLOWERING_H + +#include "VE.h" +#include "llvm/CodeGen/TargetLowering.h" + +namespace llvm { +class VESubtarget; + +namespace VEISD { +enum NodeType : unsigned { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + CMPICC, // Compare two GPR operands, set icc+xcc. + CMPFCC, // Compare two FP operands, set fcc. + BRICC, // Branch to dest on icc condition + BRXCC, // Branch to dest on xcc condition (64-bit only). + BRFCC, // Branch to dest on fcc condition + SELECT, + SELECT_ICC, // Select between two values using the current ICC flags. + SELECT_XCC, // Select between two values using the current XCC flags. + SELECT_FCC, // Select between two values using the current FCC flags. + + EH_SJLJ_SETJMP, // SjLj exception handling setjmp. + EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. + EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch. + + Hi, + Lo, // Hi/Lo operations, typically on a global address. + + FTOI, // FP to Int within a FP register. + ITOF, // Int to FP within a FP register. + FTOX, // FP to Int64 within a FP register. + XTOF, // Int64 to FP within a FP register. + + MAX, + MIN, + FMAX, + FMIN, + + GETFUNPLT, // load function address through %plt insturction + GETSTACKTOP, // retrieve address of stack top (first address of + // locals and temporaries) + GETTLSADDR, // load address for TLS access + + MEMBARRIER, // Compiler barrier only; generate a no-op. + + CALL, // A call instruction. + RET_FLAG, // Return with a flag operand. + GLOBAL_BASE_REG, // Global base reg for PIC. + FLUSHW, // FLUSH register windows to stack. + + /// A wrapper node for TargetConstantPool, TargetJumpTable, + /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, + /// MCSymbol and TargetBlockAddress. + Wrapper, +}; +} + +class VETargetLowering : public TargetLowering { + const VESubtarget *Subtarget; + +public: + VETargetLowering(const TargetMachine &TM, const VESubtarget &STI); + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + + /// computeKnownBitsForTargetNode - Determine which of the bits specified + /// in Mask are known to be either zero or one and return them in the + /// KnownZero/KnownOne bitsets. + void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; + + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *MBB) const override; + + const char *getTargetNodeName(unsigned Opcode) const override; + + ConstraintType getConstraintType(StringRef Constraint) const override; + ConstraintWeight + getSingleConstraintMatchWeight(AsmOperandInfo &info, + const char *constraint) const override; + void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; + + unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { + if (ConstraintCode == "o") + return InlineAsm::Constraint_o; + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } + + Register getRegisterByName(const char *RegName, EVT VT, + const MachineFunction &MF) const override; + + /// Override to support customized stack guard loading. + bool useLoadStackGuardNode() const override; + void insertSSPDeclarations(Module &M) const override; + + /// getSetCCResultType - Return the ISD::SETCC ValueType + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, + EVT VT) const override; + + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + SDValue LowerFormalArguments_64(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const; + + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl &ArgsFlags, + LLVMContext &Context) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SDLoc &dl, + SelectionDAG &DAG) const override; + + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerToTLSLocalExecModel(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; + + unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const; + SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const; + SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, + SelectionDAG &DAG) const; + SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; + + bool ShouldShrinkFPConstant(EVT VT) const override { + // Do not shrink FP constpool if VT == MVT::f128. + // (ldd, call _Q_fdtoq) is more expensive than two ldds. + return VT != MVT::f128; + } + + /// Returns true if the target allows unaligned memory accesses of the + /// specified type. + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, + MachineMemOperand::Flags Flags, + bool *Fast) const override; + + bool mergeStoresAfterLegalization(EVT) const override { return true; } + + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const override; + + unsigned getJumpTableEncoding() const override; + + const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned uid, + MCContext &Ctx) const override; + + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + // VE uses Release consistency, so need fence for each atomics. + return true; + } + Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + + AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + + void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + + MachineBasicBlock *expandSelectCC(MachineInstr &MI, MachineBasicBlock *BB, + unsigned BROpcode) const; + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, + MachineBasicBlock *MBB) const; + MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, + MachineBasicBlock *MBB) const; + MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, + MachineBasicBlock *BB) const; + void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, int FI) const; + void finalizeLowering(MachineFunction &MF) const override; + + // VE supports only vector FMA + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override { + return VT.isVector(); + } +}; +} // namespace llvm + +#endif // VE_ISELLOWERING_H diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -0,0 +1,2223 @@ +//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the interfaces that VE uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "VEISelLowering.h" +#include "MCTargetDesc/VEMCExpr.h" +#include "VEInstrBuilder.h" +#include "VEMachineFunctionInfo.h" +#include "VERegisterInfo.h" +#include "VETargetMachine.h" +// #include "VETargetObjectFile.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicsVE.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" +using namespace llvm; + +#define DEBUG_TYPE "ve-lower" + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + switch (LocVT.SimpleTy) { + case MVT::f32: { + // Allocate stack like below + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + // Use align=8 for dummy area to align the beginning of these 2 area. + State.AllocateStack(4, 8); // for empty area + // Use align=4 for value to place it at just after the dummy area. + unsigned Offset = State.AllocateStack(4, 4); // for float value area + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return true; + } + default: + return false; + } +} + +#include "VEGenCallingConv.inc" + +bool VETargetLowering::CanLowerReturn( + CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, LLVMContext &Context) const { + CCAssignFn *RetCC = RetCC_VE; + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC); +} + +SDValue +VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &DL, SelectionDAG &DAG) const { + // CCValAssign - represent the assignment of the return value to locations. + SmallVector RVLocs; + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + + // Analyze return values. + CCInfo.AnalyzeReturn(Outs, RetCC_VE); + + SDValue Flag; + SmallVector RetOps(1, Chain); + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + SDValue OutVal = OutVals[i]; + + // Integer return values must be sign or zero extended by the callee. + switch (VA.getLocInfo()) { + case CCValAssign::Full: + break; + case CCValAssign::SExt: + OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal); + break; + case CCValAssign::ZExt: + OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal); + break; + case CCValAssign::AExt: + OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal); + break; + default: + llvm_unreachable("Unknown loc info!"); + } + + // The custom bit on an i32 return value indicates that it should be passed + // in the high bits of the register. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) { + OutVal = DAG.getNode(ISD::SHL, DL, MVT::i64, OutVal, + DAG.getConstant(32, DL, MVT::i32)); + + // The next value may go in the low bits of the same register. + // Handle both at once. + if (i + 1 < RVLocs.size() && + RVLocs[i + 1].getLocReg() == VA.getLocReg()) { + SDValue NV = + DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, OutVals[i + 1]); + OutVal = DAG.getNode(ISD::OR, DL, MVT::i64, OutVal, NV); + // Skip the next value, it's already done. + ++i; + } + } + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag); + + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + + return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps); +} + +SDValue VETargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + + // Get the base offset of the incoming arguments stack space. + unsigned ArgsBaseOffset = 176; + // Get the size of the preserved arguments area + unsigned ArgsPreserved = 64; + + // Analyze arguments according to CC_VE. + SmallVector ArgLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); + // Allocate the preserved area first. + CCInfo.AllocateStack(ArgsPreserved, 8); + // We already allocated the preserved area, so the stack offset computed + // by CC_VE would be correct now. + CCInfo.AnalyzeFormalArguments(Ins, CC_VE); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) { + // This argument is passed in a register. + // All integer register arguments are promoted by the caller to i64. + + // Create a virtual register for the promoted live-in value. + unsigned VReg = + MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT())); + SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT()); + + // Get the high bits for i32 struct elements. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) + Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg, + DAG.getConstant(32, DL, MVT::i32)); + + // The caller promoted the argument, so insert an Assert?ext SDNode so we + // won't promote the value again in this function. + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg, + DAG.getValueType(VA.getValVT())); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg, + DAG.getValueType(VA.getValVT())); + break; + default: + break; + } + + // Truncate the register down to the argument type. + if (VA.isExtInLoc()) + Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg); + + InVals.push_back(Arg); + continue; + } + + // The registers are exhausted. This argument was passed on the stack. + assert(VA.isMemLoc()); + // The CC_VE_Full/Half functions compute stack offsets relative to the + // beginning of the arguments area at %fp+176. + unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset; + unsigned ValSize = VA.getValVT().getSizeInBits() / 8; + // Adjust offset for extended arguments, SPARC is big-endian. + // The caller will have written the full slot with extended bytes, but we + // prefer our own extending loads. + if (VA.isExtInLoc()) + Offset += 8 - ValSize; + int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true); + InVals.push_back( + DAG.getLoad(VA.getValVT(), DL, Chain, + DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())), + MachinePointerInfo::getFixedStack(MF, FI))); + } + + if (!IsVarArg) + return Chain; + + // This function takes variable arguments, some of which may have been passed + // in registers %s0-%s8. + // + // The va_start intrinsic needs to know the offset to the first variable + // argument. + // TODO: need to calculate offset correctly once we support f128. + unsigned ArgOffset = ArgLocs.size() * 8; + VEMachineFunctionInfo *FuncInfo = MF.getInfo(); + // Skip the 176 bytes of register save area. + FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset); + + return Chain; +} + +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +Register VETargetLowering::getRegisterByName(const char *RegName, EVT VT, + const MachineFunction &MF) const { + unsigned Reg = StringSwitch(RegName) + .Case("sp", VE::SX11) // Stack pointer + .Case("fp", VE::SX9) // Frame pointer + .Case("sl", VE::SX8) // Stack limit + .Case("lr", VE::SX10) // Link regsiter + .Case("tp", VE::SX14) // Thread pointer + .Case("outer", VE::SX12) // Outer regiser + .Case("info", VE::SX17) // Info area register + .Case("got", VE::SX15) // Global offset table register + .Case("plt", VE::SX16) // Procedure linkage table register + .Case("usrcc", VE::UCC) // User clock counter + .Default(0); + + if (Reg) + return Reg; + + report_fatal_error("Invalid register name global variable"); +} + +// This functions returns true if CalleeName is a ABI function that returns +// a long double (fp128). +static bool isFP128ABICall(const char *CalleeName) { + static const char *const ABICalls[] = { + "_Q_add", "_Q_sub", "_Q_mul", "_Q_div", "_Q_sqrt", + "_Q_neg", "_Q_itoq", "_Q_stoq", "_Q_dtoq", "_Q_utoq", + "_Q_lltoq", "_Q_ulltoq", nullptr}; + for (const char *const *I = ABICalls; *I != nullptr; ++I) + if (strcmp(CalleeName, *I) == 0) + return true; + return false; +} + +unsigned VETargetLowering::getSRetArgSize(SelectionDAG &DAG, + SDValue Callee) const { + const Function *CalleeFn = nullptr; + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + CalleeFn = dyn_cast(G->getGlobal()); + } else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) { + const Function &F = DAG.getMachineFunction().getFunction(); + const Module *M = F.getParent(); + const char *CalleeName = E->getSymbol(); + CalleeFn = M->getFunction(CalleeName); + if (!CalleeFn && isFP128ABICall(CalleeName)) + return 16; // Return sizeof(fp128) + } + + if (!CalleeFn) + return 0; + + // It would be nice to check for the sret attribute on CalleeFn here, + // but since it is not part of the function type, any check will misfire. + + PointerType *Ty = cast(CalleeFn->arg_begin()->getType()); + Type *ElementTy = Ty->getElementType(); + return DAG.getDataLayout().getTypeAllocSize(ElementTy); +} + +SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc DL = CLI.DL; + SDValue Chain = CLI.Chain; + auto PtrVT = getPointerTy(DAG.getDataLayout()); + + // VE target does not yet support tail call optimization. + CLI.IsTailCall = false; + + // Get the base offset of the outgoing arguments stack space. + unsigned ArgsBaseOffset = 176; + // Get the size of the preserved arguments area + unsigned ArgsPreserved = 8 * 8u; + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); + // Allocate the preserved area first. + CCInfo.AllocateStack(ArgsPreserved, 8); + // We already allocated the preserved area, so the stack offset computed + // by CC_VE would be correct now. + CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE); + + // VE requires to use both register and stack for varargs or no-prototyped + // functions. FIXME: How to check prototype here? + bool UseBoth = CLI.IsVarArg /* || CLI.NoProtoType */; + + // Analyze operands again if it is required to store BOTH. + SmallVector ArgLocs2; + CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), + ArgLocs2, *DAG.getContext()); + if (UseBoth) + CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2); + + // Get the size of the outgoing arguments stack space requirement. + unsigned ArgsSize = CCInfo.getNextStackOffset(); + + // Keep stack frames 16-byte aligned. + ArgsSize = alignTo(ArgsSize, 16); + + // Adjust the stack pointer to make room for the arguments. + // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls + // with more than 6 arguments. + Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL); + + // Collect the set of registers to pass to the function and their values. + // This will be emitted as a sequence of CopyToReg nodes glued to the call + // instruction. + SmallVector, 8> RegsToPass; + + // Collect chains from all the memory opeations that copy arguments to the + // stack. They must follow the stack pointer adjustment above and precede the + // call instruction itself. + SmallVector MemOpChains; + + // VE needs to get address of callee function in a register + // So, prepare to copy it to SX12 here. + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + SDValue Callee = CLI.Callee; + + bool IsPICCall = isPositionIndependent(); + + // PC-relative references to external symbols should go through $stub. + // If so, we need to prepare GlobalBaseReg first. + const TargetMachine &TM = DAG.getTarget(); + const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); + const GlobalValue *GV = nullptr; + if (auto *G = dyn_cast(Callee)) + GV = G->getGlobal(); + bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); + bool UsePlt = !Local; + MachineFunction &MF = DAG.getMachineFunction(); + + // Turn GlobalAddress/ExternalSymbol node into a value node + // containing the address of them here. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + if (IsPICCall) { + if (UsePlt) + Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT, 0, 0); + Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee); + } else { + Callee = + makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); + } + } else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) { + if (IsPICCall) { + if (UsePlt) + Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); + Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee); + } else { + Callee = + makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); + } + } + + RegsToPass.push_back(std::make_pair(VE::SX12, Callee)); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = CLI.OutVals[i]; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown location info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + if (!UseBoth) + continue; + VA = ArgLocs2[i]; + } + + assert(VA.isMemLoc()); + + // Create a store off the stack pointer for this argument. + SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT); + // The argument area starts at %fp+176 in the callee frame, + // %sp+176 in ours. + SDValue PtrOff = + DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL); + PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); + MemOpChains.push_back( + DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo())); + } + + // Emit all stores, make sure they occur before the call. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); + + // Build a sequence of CopyToReg nodes glued together with token chain and + // glue operands which copy the outgoing args into registers. The InGlue is + // necessary since all emitted instructions must be stuck together in order + // to pass the live physical registers. + SDValue InGlue; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, + RegsToPass[i].second, InGlue); + InGlue = Chain.getValue(1); + } + + // Build the operands for the call instruction itself. + SmallVector Ops; + Ops.push_back(Chain); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const VERegisterInfo *TRI = Subtarget->getRegisterInfo(); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Make sure the CopyToReg nodes are glued to the call instruction which + // consumes the registers. + if (InGlue.getNode()) + Ops.push_back(InGlue); + + // Now the call itself. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops); + InGlue = Chain.getValue(1); + + // Revert the stack pointer immediately after the call. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true), + DAG.getIntPtrConstant(0, DL, true), InGlue, DL); + InGlue = Chain.getValue(1); + + // Now extract the return values. This is more or less the same as + // LowerFormalArguments. + + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + + // Set inreg flag manually for codegen generated library calls that + // return float. + if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CS) + CLI.Ins[0].Flags.setInReg(); + + RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + unsigned Reg = VA.getLocReg(); + + // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can + // reside in the same register in the high and low bits. Reuse the + // CopyFromReg previous node to avoid duplicate copies. + SDValue RV; + if (RegisterSDNode *SrcReg = dyn_cast(Chain.getOperand(1))) + if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg) + RV = Chain.getValue(0); + + // But usually we'll create a new CopyFromReg for a different register. + if (!RV.getNode()) { + RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue); + Chain = RV.getValue(1); + InGlue = Chain.getValue(2); + } + + // Get the high bits for i32 struct elements. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) + RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV, + DAG.getConstant(32, DL, MVT::i32)); + + // The callee promoted the return value, so insert an Assert?ext SDNode so + // we won't promote the value again in this function. + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + case CCValAssign::ZExt: + RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + default: + break; + } + + // Truncate the register down to the return value type. + if (VA.isExtInLoc()) + RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV); + + InVals.push_back(RV); + } + + return Chain; +} + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation +//===----------------------------------------------------------------------===// + +/// isFPImmLegal - Returns true if the target can instruction select the +/// specified FP immediate natively. If false, the legalizer will +/// materialize the FP immediate as a load from a constant pool. +bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const { + return VT == MVT::f32 || VT == MVT::f64; +} + +/// Determine if the target supports unaligned memory accesses. +/// +/// This function returns true if the target allows unaligned memory accesses +/// of the specified type in the given address space. If true, it also returns +/// whether the unaligned memory access is "fast" in the last argument by +/// reference. This is used, for example, in situations where an array +/// copy/move/set is converted to a sequence of store operations. Its use +/// helps to ensure that such replacements don't generate code that causes an +/// alignment error (trap) on the target machine. +bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned AddrSpace, + unsigned Align, + MachineMemOperand::Flags, + bool *Fast) const { + if (Fast) { + // It's fast anytime on VE + *Fast = true; + } + return true; +} + +bool VETargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const { + // Do not merge to float value size (128 bytes) if no implicit + // float attribute is set. + bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + + if (NoFloat) { + unsigned MaxIntSize = 64; + return (MemVT.getSizeInBits() <= MaxIntSize); + } + return true; +} + +TargetLowering::AtomicExpansionKind +VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + if (AI->getOperation() == AtomicRMWInst::Xchg) { + const DataLayout &DL = AI->getModule()->getDataLayout(); + if (DL.getTypeStoreSize(AI->getValOperand()->getType()) < + (VETargetLowering::getMinCmpXchgSizeInBits() / 8)) + return AtomicExpansionKind::CmpXChg; // Uses cas instruction for 1byte or + // 2byte atomic_swap + return AtomicExpansionKind::None; // Uses ts1am instruction + } + return AtomicExpansionKind::CmpXChg; +} + +VETargetLowering::VETargetLowering(const TargetMachine &TM, + const VESubtarget &STI) + : TargetLowering(TM), Subtarget(&STI) { + MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0)); + + // Instructions which use registers as conditionals examine all the + // bits (as does the pseudo SELECT_CC expansion). I don't think it + // matters much whether it's ZeroOrOneBooleanContent, or + // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the + // former. + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); + + // Set up the register classes. + addRegisterClass(MVT::i32, &VE::I32RegClass); + addRegisterClass(MVT::i64, &VE::I64RegClass); + addRegisterClass(MVT::f32, &VE::F32RegClass); + addRegisterClass(MVT::f64, &VE::I64RegClass); + addRegisterClass(MVT::f128, &VE::F128RegClass); + + // Turn FP extload into load/fpextend + for (MVT VT : MVT::fp_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand); + } + + // VE doesn't have i1 sign extending load + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setTruncStoreAction(VT, MVT::i1, Expand); + } + + // Turn FP truncstore into trunc + store. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + + // Custom legalize GlobalAddress nodes into LO/HI parts. + setOperationAction(ISD::GlobalAddress, PtrVT, Custom); + setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); + setOperationAction(ISD::ConstantPool, PtrVT, Custom); + setOperationAction(ISD::BlockAddress, PtrVT, Custom); + + // VE has no REM or DIVREM operations. + for (MVT VT : MVT::integer_valuetypes()) { + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); + } + + // VE has instructions for fp<->sint, so use them. + + // VE doesn't have instructions for fp<->uint, so expand them by llvm + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64 + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64 + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + + // VE doesn't have BRCOND + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + + // BRIND/BR_JT are not implemented yet. + // FIXME: BRIND instruction is implemented, but JumpTable is not yet. + setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); + if (TM.Options.ExceptionModel == ExceptionHandling::SjLj) + setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); + + setTargetDAGCombine(ISD::FADD); + // setTargetDAGCombine(ISD::FMA); + + // ATOMICs. + // Atomics are supported on VE. + setMaxAtomicSizeInBitsSupported(64); + setMinCmpXchgSizeInBits(32); + setSupportsUnalignedAtomics(false); + + // Use custom inserter, LowerATOMIC_FENCE, for ATOMIC_FENCE. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + for (MVT VT : MVT::integer_valuetypes()) { + // Several atomic operations are converted to VE instructions well. + // Additional memory fences are generated in emitLeadingfence and + // emitTrailingFence functions. + setOperationAction(ISD::ATOMIC_LOAD, VT, Legal); + setOperationAction(ISD::ATOMIC_STORE, VT, Legal); + setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Legal); + setOperationAction(ISD::ATOMIC_SWAP, VT, Legal); + + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand); + + // FIXME: not supported "atmam" isntructions yet + setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand); + + // VE doesn't have follwing instructions + setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand); + } + + // FIXME: VE's I128 stuff is not investivated yet + if (!1) { + // These libcalls are not available in 32-bit. + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); + } + + for (MVT VT : MVT::fp_valuetypes()) { + // VE has no sclar FMA instruction + setOperationAction(ISD::FMA, VT, Expand); + setOperationAction(ISD::FMAD, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FNEG, VT, Expand); + setOperationAction(ISD::FABS, VT, Expand); + setOperationAction(ISD::FSQRT, VT, Expand); + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FPOWI, VT, Expand); + setOperationAction(ISD::FPOW, VT, Expand); + setOperationAction(ISD::FLOG, VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); + setOperationAction(ISD::FLOG10, VT, Expand); + setOperationAction(ISD::FEXP, VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FCEIL, VT, Expand); + setOperationAction(ISD::FTRUNC, VT, Expand); + setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FNEARBYINT, VT, Expand); + setOperationAction(ISD::FROUND, VT, Expand); + setOperationAction(ISD::FFLOOR, VT, Expand); + setOperationAction(ISD::FMINNUM, VT, Expand); + setOperationAction(ISD::FMAXNUM, VT, Expand); + setOperationAction(ISD::FMINIMUM, VT, Expand); + setOperationAction(ISD::FMAXIMUM, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); + } + + // FIXME: VE's FCOPYSIGN is not investivated yet + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + + // FIXME: VE's SHL_PARTS and others are not investigated yet. + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + if (1) { + setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); + } + + // Expands to [SU]MUL_LOHI. + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + // setOperationAction(ISD::MUL, MVT::i32, Expand); + + if (1) { + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + + setOperationAction(ISD::UMULO, MVT::i64, Custom); + setOperationAction(ISD::SMULO, MVT::i64, Custom); + } + + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + setOperationAction(ISD::BSWAP, MVT::i32, Legal); + setOperationAction(ISD::BSWAP, MVT::i64, Legal); + setOperationAction(ISD::CTPOP, MVT::i32, Legal); + setOperationAction(ISD::CTPOP, MVT::i64, Legal); + setOperationAction(ISD::CTLZ, MVT::i32, Legal); + setOperationAction(ISD::CTLZ, MVT::i64, Legal); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Expand); + setOperationAction(ISD::ROTR, MVT::i64, Expand); + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + // VAARG needs to be lowered to access with 8 bytes alignment. + setOperationAction(ISD::VAARG, MVT::Other, Custom); + + // Use the default implementation. + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + + // Expand DYNAMIC_STACKALLOC + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); + + // LOAD/STORE for f128 needs to be custom lowered to expand two loads/stores + setOperationAction(ISD::LOAD, MVT::f128, Custom); + setOperationAction(ISD::STORE, MVT::f128, Custom); + + // VE has FAQ, FSQ, FMQ, and FCQ + setOperationAction(ISD::FADD, MVT::f128, Legal); + setOperationAction(ISD::FSUB, MVT::f128, Legal); + setOperationAction(ISD::FMUL, MVT::f128, Legal); + setOperationAction(ISD::FDIV, MVT::f128, Expand); + setOperationAction(ISD::FSQRT, MVT::f128, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); + setOperationAction(ISD::FP_ROUND, MVT::f128, Legal); + + // Other configurations related to f128. + setOperationAction(ISD::SELECT, MVT::f128, Legal); + setOperationAction(ISD::SELECT_CC, MVT::f128, Legal); + setOperationAction(ISD::SETCC, MVT::f128, Legal); + setOperationAction(ISD::BR_CC, MVT::f128, Legal); + + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // TRAP to expand (which turns it into abort). + setOperationAction(ISD::TRAP, MVT::Other, Expand); + + // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" + // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); + + setStackPointerRegisterToSaveRestore(VE::SX11); + + // Set function alignment to 16 bytes + setMinFunctionAlignment(Align(16)); + + // VE stores all argument by 8 bytes alignment + setMinStackArgumentAlignment(Align(8)); + + computeRegisterProperties(Subtarget->getRegisterInfo()); +} + +const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const { + switch ((VEISD::NodeType)Opcode) { + case VEISD::FIRST_NUMBER: + break; + case VEISD::CMPICC: + return "VEISD::CMPICC"; + case VEISD::CMPFCC: + return "VEISD::CMPFCC"; + case VEISD::BRICC: + return "VEISD::BRICC"; + case VEISD::BRXCC: + return "VEISD::BRXCC"; + case VEISD::BRFCC: + return "VEISD::BRFCC"; + case VEISD::SELECT: + return "VEISD::SELECT"; + case VEISD::SELECT_ICC: + return "VEISD::SELECT_ICC"; + case VEISD::SELECT_XCC: + return "VEISD::SELECT_XCC"; + case VEISD::SELECT_FCC: + return "VEISD::SELECT_FCC"; + case VEISD::EH_SJLJ_SETJMP: + return "VEISD::EH_SJLJ_SETJMP"; + case VEISD::EH_SJLJ_LONGJMP: + return "VEISD::EH_SJLJ_LONGJMP"; + case VEISD::EH_SJLJ_SETUP_DISPATCH: + return "VEISD::EH_SJLJ_SETUP_DISPATCH"; + case VEISD::Hi: + return "VEISD::Hi"; + case VEISD::Lo: + return "VEISD::Lo"; + case VEISD::FTOI: + return "VEISD::FTOI"; + case VEISD::ITOF: + return "VEISD::ITOF"; + case VEISD::FTOX: + return "VEISD::FTOX"; + case VEISD::XTOF: + return "VEISD::XTOF"; + case VEISD::MAX: + return "VEISD::MAX"; + case VEISD::MIN: + return "VEISD::MIN"; + case VEISD::FMAX: + return "VEISD::FMAX"; + case VEISD::FMIN: + return "VEISD::FMIN"; + case VEISD::GETFUNPLT: + return "VEISD::GETFUNPLT"; + case VEISD::GETSTACKTOP: + return "VEISD::GETSTACKTOP"; + case VEISD::GETTLSADDR: + return "VEISD::GETTLSADDR"; + case VEISD::MEMBARRIER: + return "VEISD::MEMBARRIER"; + case VEISD::CALL: + return "VEISD::CALL"; + case VEISD::RET_FLAG: + return "VEISD::RET_FLAG"; + case VEISD::GLOBAL_BASE_REG: + return "VEISD::GLOBAL_BASE_REG"; + case VEISD::FLUSHW: + return "VEISD::FLUSHW"; + case VEISD::Wrapper: + return "VEISD::Wrapper"; + } + return nullptr; +} + +EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, + EVT VT) const { + if (!VT.isVector()) + return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} + +/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to +/// be zero. Op is expected to be a target specific node. Used by DAG +/// combiner. +void VETargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + KnownBits Known2; + Known.resetAll(); + + switch (Op.getOpcode()) { + default: + break; + case VEISD::SELECT_ICC: + case VEISD::SELECT_XCC: + case VEISD::SELECT_FCC: + Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); + Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); + + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + break; + } +} + +// Convert to a target node and set target flags. +SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF, + SelectionDAG &DAG) const { + if (const GlobalAddressSDNode *GA = dyn_cast(Op)) + return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), + GA->getValueType(0), GA->getOffset(), TF); + + if (const ConstantPoolSDNode *CP = dyn_cast(Op)) + return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0), + CP->getAlignment(), CP->getOffset(), TF); + + if (const BlockAddressSDNode *BA = dyn_cast(Op)) + return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(), + 0, TF); + + if (const ExternalSymbolSDNode *ES = dyn_cast(Op)) + return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), + TF); + + llvm_unreachable("Unhandled address SDNode"); +} + +// Split Op into high and low parts according to HiTF and LoTF. +// Return an ADD node combining the parts. +SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG)); + SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG)); + return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo); +} + +// Build SDNodes for producing an address from a GlobalAddress, ConstantPool, +// or ExternalSymbol SDNode. +SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = getPointerTy(DAG.getDataLayout()); + + // Handle PIC mode first. SPARC needs a got load for every variable! + if (isPositionIndependent()) { + // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this + // function has calls. + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MFI.setHasCalls(true); + + if (dyn_cast(Op) != nullptr || + (dyn_cast(Op) != nullptr && + dyn_cast(Op)->getGlobal()->hasLocalLinkage())) { + // Create following instructions for local linkage PIC code. + // lea %s35, %gotoff_lo(.LCPI0_0) + // and %s35, %s35, (32)0 + // lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35) + // adds.l %s35, %s15, %s35 ; %s15 is GOT + // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) + SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32, + VEMCExpr::VK_VE_GOTOFF_LO32, DAG); + SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo); + } else { + // Create following instructions for not local linkage PIC code. + // lea %s35, %got_lo(.LCPI0_0) + // and %s35, %s35, (32)0 + // lea.sl %s35, %got_hi(.LCPI0_0)(%s35) + // adds.l %s35, %s15, %s35 ; %s15 is GOT + // ld %s35, (,%s35) + // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) + SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32, + VEMCExpr::VK_VE_GOT_LO32, DAG); + SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT); + SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo); + return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + } + } + + // This is one of the absolute code models. + switch (getTargetMachine().getCodeModel()) { + default: + llvm_unreachable("Unsupported absolute code model"); + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Large: + // abs64. + return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); + } +} + +SDValue VETargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + return makeAddress(Op, DAG); +} + +SDValue VETargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + return makeAddress(Op, DAG); +} + +SDValue VETargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + return makeAddress(Op, DAG); +} + +SDValue +VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + + // Generate following code: + // t1: ch,glue = callseq_start t0, 0, 0 + // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1 + // t3: ch,glue = callseq_end t2, 0, 0, t2:2 + // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1 + SDValue Label = withTargetFlags(Op, 0, DAG); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // Lowering the machine isd will make sure everything is in the right + // location. + SDValue Chain = DAG.getEntryNode(); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask( + DAG.getMachineFunction(), CallingConv::C); + Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl); + SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)}; + Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true), + DAG.getIntPtrConstant(0, dl, true), + Chain.getValue(1), dl); + Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1)); + + // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls. + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MFI.setHasCalls(true); + + // Also generate code to prepare a GOT register if it is PIC. + if (isPositionIndependent()) { + MachineFunction &MF = DAG.getMachineFunction(); + Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); + } + + return Chain; +} + +SDValue VETargetLowering::LowerToTLSLocalExecModel(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // Generate following code: + // lea %s0, Op@tpoff_lo + // and %s0, %s0, (32)0 + // lea.sl %s0, Op@tpoff_hi(%s0) + // add %s0, %s0, %tp + // FIXME: use lea.sl %s0, Op@tpoff_hi(%tp, %s0) for better performance + SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_TPOFF_HI32, + VEMCExpr::VK_VE_TPOFF_LO32, DAG); + return DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(VE::SX14, PtrVT), + HiLo); +} + +SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + // Current implementation of nld doesn't allow local exec model code + // described in VE-tls_v1.1.pdf (*1) as its input. The nld accept + // only general dynamic model and optimize it whenever. So, here + // we need to generate only general dynamic model code sequence. + // + // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf + return LowerToTLSGeneralDynamicModel(Op, DAG); +} + +SDValue VETargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + return DAG.getNode(VEISD::EH_SJLJ_SETJMP, dl, + DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), + Op.getOperand(1)); +} + +SDValue VETargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), + Op.getOperand(1)); +} + +SDValue VETargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other, + Op.getOperand(0)); +} + +static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, + const VETargetLowering &TLI) { + MachineFunction &MF = DAG.getMachineFunction(); + VEMachineFunctionInfo *FuncInfo = MF.getInfo(); + auto PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + + // Need frame address to find the address of VarArgsFrameIndex. + MF.getFrameInfo().setFrameAddressIsTaken(true); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + SDLoc DL(Op); + SDValue Offset = + DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT), + DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL)); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1), + MachinePointerInfo(SV)); +} + +static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + SDValue InChain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + EVT PtrVT = VAListPtr.getValueType(); + const Value *SV = cast(Node->getOperand(2))->getValue(); + SDLoc DL(Node); + SDValue VAList = + DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV)); + SDValue Chain = VAList.getValue(1); + SDValue NextPtr; + + if (VT == MVT::f128) { + // Alignment + int Align = 16; + VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, + DAG.getConstant(Align - 1, DL, PtrVT)); + VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList, + DAG.getConstant(-Align, DL, PtrVT)); + // Increment the pointer, VAList, by 16 to the next vaarg. + NextPtr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL)); + } else if (VT == MVT::f32) { + // float --> need special handling like below. + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + // Increment the pointer, VAList, by 8 to the next vaarg. + NextPtr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); + // Then, adjust VAList. + unsigned InternalOffset = 4; + VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, + DAG.getConstant(InternalOffset, DL, PtrVT)); + } else { + // Increment the pointer, VAList, by 8 to the next vaarg. + NextPtr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); + } + + // Store the incremented VAList to the legalized pointer. + InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV)); + + // Load the actual argument out of the pointer VAList. + // We can't count on greater alignment than the word size. + return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(), + std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8); +} + +SDValue VETargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + // Generate following code. + // (void)__llvm_grow_stack(size); + // ret = GETSTACKTOP; // pseudo instruction + SDLoc dl(Op); + + SDValue Size = Op.getOperand(1); // Legalize the size. + EVT VT = Size->getValueType(0); + + // Prepare arguments + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Size; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Args.push_back(Entry); + Type *RetTy = Type::getVoidTy(*DAG.getContext()); + + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Callee = DAG.getTargetExternalSymbol("__llvm_grow_stack", PtrVT, 0); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(DAG.getEntryNode()) + .setCallee(CallingConv::VE_LLVM_GROW_STACK, RetTy, Callee, + std::move(Args)) + .setDiscardResult(true); + std::pair pair = LowerCallTo(CLI); + SDValue Chain = pair.second; + SDValue Value = DAG.getNode(VEISD::GETSTACKTOP, dl, VT, Chain); + SDValue Ops[2] = {Value, Chain}; + return DAG.getMergeValues(Ops, dl); +} + +static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, + const VETargetLowering &TLI, + const VESubtarget *Subtarget) { + SDLoc dl(Op); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setFrameAddressIsTaken(true); + + EVT PtrVT = TLI.getPointerTy(MF.getDataLayout()); + + // Naked functions never have a frame pointer, and so we use r1. For all + // other functions, this decision must be delayed until during PEI. + const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + unsigned FrameReg = RegInfo->getFrameRegister(MF); + + SDValue FrameAddr = + DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); + while (Depth--) + FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), + FrameAddr, MachinePointerInfo()); + return FrameAddr; +} + +static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG, + const VETargetLowering &TLI, + const VESubtarget *Subtarget) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setReturnAddressIsTaken(true); + + if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + SDLoc dl(Op); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + + auto PtrVT = TLI.getPointerTy(MF.getDataLayout()); + + if (Depth > 0) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG, TLI, Subtarget); + SDValue Offset = DAG.getConstant(8, dl, MVT::i64); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), + MachinePointerInfo()); + } + + // Just load the return address off the stack. + SDValue RetAddrFI = DAG.getFrameIndex(1, PtrVT); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, + MachinePointerInfo()); +} + +// Lower a f128 load into two f64 loads. +static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); + LoadSDNode *LdNode = dyn_cast(Op.getNode()); + assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type"); + + SDValue BasePtr = LdNode->getBasePtr(); + if (dyn_cast(BasePtr.getNode())) { + // For the case of frame index, expanding it here cause dependency + // problem. So, treat it as a legal and expand it in eliminateFrameIndex + return Op; + } + + unsigned alignment = LdNode->getAlignment(); + if (alignment > 8) + alignment = 8; + + SDValue Lo64 = + DAG.getLoad(MVT::f64, dl, LdNode->getChain(), LdNode->getBasePtr(), + LdNode->getPointerInfo(), alignment, + LdNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + EVT addrVT = LdNode->getBasePtr().getValueType(); + SDValue HiPtr = DAG.getNode(ISD::ADD, dl, addrVT, LdNode->getBasePtr(), + DAG.getConstant(8, dl, addrVT)); + SDValue Hi64 = + DAG.getLoad(MVT::f64, dl, LdNode->getChain(), HiPtr, + LdNode->getPointerInfo(), alignment, + LdNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + + SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, dl, MVT::i32); + SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, dl, MVT::i32); + + // VE stores Hi64 to 8(addr) and Lo64 to 0(addr) + SDNode *InFP128 = + DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f128); + InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, MVT::f128, + SDValue(InFP128, 0), Hi64, SubRegEven); + InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, MVT::f128, + SDValue(InFP128, 0), Lo64, SubRegOdd); + SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1), + SDValue(Hi64.getNode(), 1)}; + SDValue OutChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); + SDValue Ops[2] = {SDValue(InFP128, 0), OutChain}; + return DAG.getMergeValues(Ops, dl); +} + +static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) { + LoadSDNode *LdNode = cast(Op.getNode()); + + EVT MemVT = LdNode->getMemoryVT(); + if (MemVT == MVT::f128) + return LowerF128Load(Op, DAG); + + return Op; +} + +// Lower a f128 store into two f64 stores. +static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); + StoreSDNode *StNode = dyn_cast(Op.getNode()); + assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type"); + + SDValue BasePtr = StNode->getBasePtr(); + if (dyn_cast(BasePtr.getNode())) { + // For the case of frame index, expanding it here cause dependency + // problem. So, treat it as a legal and expand it in eliminateFrameIndex + return Op; + } + + SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, dl, MVT::i32); + SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, dl, MVT::i32); + + SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i64, + StNode->getValue(), SubRegEven); + SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i64, + StNode->getValue(), SubRegOdd); + + unsigned alignment = StNode->getAlignment(); + if (alignment > 8) + alignment = 8; + + // VE stores Hi64 to 8(addr) and Lo64 to 0(addr) + SDValue OutChains[2]; + OutChains[0] = + DAG.getStore(StNode->getChain(), dl, SDValue(Lo64, 0), + StNode->getBasePtr(), MachinePointerInfo(), alignment, + StNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + EVT addrVT = StNode->getBasePtr().getValueType(); + SDValue HiPtr = DAG.getNode(ISD::ADD, dl, addrVT, StNode->getBasePtr(), + DAG.getConstant(8, dl, addrVT)); + OutChains[1] = + DAG.getStore(StNode->getChain(), dl, SDValue(Hi64, 0), HiPtr, + MachinePointerInfo(), alignment, + StNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); +} + +static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); + StoreSDNode *St = cast(Op.getNode()); + + EVT MemVT = St->getMemoryVT(); + if (MemVT == MVT::f128) + return LowerF128Store(Op, DAG); + + // Otherwise, ask llvm to expand it. + return SDValue(); +} + +// Custom lower UMULO/SMULO for VE. This code is similar to ExpandNode() +// in LegalizeDAG.cpp except the order of arguments to the library function. +static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG, + const VETargetLowering &TLI) { + unsigned opcode = Op.getOpcode(); + assert((opcode == ISD::UMULO || opcode == ISD::SMULO) && "Invalid Opcode."); + + bool isSigned = (opcode == ISD::SMULO); + EVT VT = MVT::i64; + EVT WideVT = MVT::i128; + SDLoc dl(Op); + SDValue LHS = Op.getOperand(0); + + if (LHS.getValueType() != VT) + return Op; + + SDValue ShiftAmt = DAG.getConstant(63, dl, VT); + + SDValue RHS = Op.getOperand(1); + SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, ShiftAmt); + SDValue HiRHS = DAG.getNode(ISD::SRA, dl, MVT::i64, RHS, ShiftAmt); + SDValue Args[] = {LHS, HiLHS, RHS, HiRHS}; + + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); + SDValue MulResult = + TLI.makeLibCall(DAG, RTLIB::MUL_I128, WideVT, Args, CallOptions, dl) + .first; + SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, MulResult, + DAG.getIntPtrConstant(0, dl)); + SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, MulResult, + DAG.getIntPtrConstant(1, dl)); + if (isSigned) { + SDValue Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt); + TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, Tmp1, ISD::SETNE); + } else { + TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, DAG.getConstant(0, dl, VT), + ISD::SETNE); + } + // MulResult is a node with an illegal type. Because such things are not + // generally permitted during this phase of legalization, ensure that + // nothing is left using the node. The above EXTRACT_ELEMENT nodes should have + // been folded. + assert(MulResult->use_empty() && "Illegally typed node still in use!"); + + SDValue Ops[2] = {BottomHalf, TopHalf}; + return DAG.getMergeValues(Ops, dl); +} + +SDValue VETargetLowering::LowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + AtomicOrdering FenceOrdering = static_cast( + cast(Op.getOperand(1))->getZExtValue()); + SyncScope::ID FenceSSID = static_cast( + cast(Op.getOperand(2))->getZExtValue()); + + // VE uses Release consistency, so need a fence instruction if it is a + // cross-thread fence. + if (FenceSSID == SyncScope::System) { + switch (FenceOrdering) { + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: + case AtomicOrdering::Monotonic: + // No need to generate fencem instruction here. + break; + case AtomicOrdering::Acquire: + // Generate "fencem 2" as acquire fence. + return SDValue( + DAG.getMachineNode(VE::FENCEload, DL, MVT::Other, Op.getOperand(0)), + 0); + case AtomicOrdering::Release: + // Generate "fencem 1" as release fence. + return SDValue( + DAG.getMachineNode(VE::FENCEstore, DL, MVT::Other, Op.getOperand(0)), + 0); + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + // Generate "fencem 3" as acq_rel and seq_cst fence. + // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses, + // so seq_cst may require more instruction for them. + return SDValue(DAG.getMachineNode(VE::FENCEloadstore, DL, MVT::Other, + Op.getOperand(0)), + 0); + } + } + + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); +} + +static Instruction *callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *Func = Intrinsic::getDeclaration(M, Id); + return Builder.CreateCall(Func, {}); +} + +Instruction *VETargetLowering::emitLeadingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + switch (Ord) { + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: + llvm_unreachable("Invalid fence: unordered/non-atomic"); + case AtomicOrdering::Monotonic: + case AtomicOrdering::Acquire: + return nullptr; // Nothing to do + case AtomicOrdering::Release: + case AtomicOrdering::AcquireRelease: + return callIntrinsic(Builder, Intrinsic::ve_fencem1); + case AtomicOrdering::SequentiallyConsistent: + if (!Inst->hasAtomicStore()) + return nullptr; // Nothing to do + return callIntrinsic(Builder, Intrinsic::ve_fencem3); + } + llvm_unreachable("Unknown fence ordering in emitLeadingFence"); +} + +Instruction *VETargetLowering::emitTrailingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + switch (Ord) { + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: + llvm_unreachable("Invalid fence: unordered/not-atomic"); + case AtomicOrdering::Monotonic: + case AtomicOrdering::Release: + return nullptr; // Nothing to do + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + return callIntrinsic(Builder, Intrinsic::ve_fencem2); + case AtomicOrdering::SequentiallyConsistent: + return callIntrinsic(Builder, Intrinsic::ve_fencem3); + } + llvm_unreachable("Unknown fence ordering in emitTrailingFence"); +} + +SDValue VETargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::thread_pointer: { + report_fatal_error("Intrinsic::thread_point is not implemented yet"); + } + case Intrinsic::eh_sjlj_lsda: { + MachineFunction &MF = DAG.getMachineFunction(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + const VETargetMachine *TM = + static_cast(&DAG.getTarget()); + + // Creat GCC_except_tableXX string. The real symbol for that will be + // generated in EHStreamer::emitExceptionTable() later. So, we just + // borrow it's name here. + TM->getStrList()->push_back(std::string( + (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str())); + SDValue Addr = + DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), PtrVT, 0); + if (isPositionIndependent()) { + Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32, + VEMCExpr::VK_VE_GOTOFF_LO32, DAG); + SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, dl, PtrVT); + return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalBase, Addr); + } else { + return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, + DAG); + } + } + } +} + +SDValue VETargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + } +} + +SDValue VETargetLowering::LowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + } +} + +SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Should not custom lower this!"); + + case ISD::RETURNADDR: + return LowerRETURNADDR(Op, DAG, *this, Subtarget); + case ISD::FRAMEADDR: + return LowerFRAMEADDR(Op, DAG, *this, Subtarget); + case ISD::GlobalTLSAddress: + return LowerGlobalTLSAddress(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: + return LowerBlockAddress(Op, DAG); + case ISD::ConstantPool: + return LowerConstantPool(Op, DAG); + case ISD::EH_SJLJ_SETJMP: + return LowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: + return LowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::EH_SJLJ_SETUP_DISPATCH: + return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG, *this); + case ISD::VAARG: + return LowerVAARG(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + return LowerDYNAMIC_STACKALLOC(Op, DAG); + + case ISD::LOAD: + return LowerLOAD(Op, DAG); + case ISD::STORE: + return LowerSTORE(Op, DAG); + case ISD::UMULO: + case ISD::SMULO: + return LowerUMULO_SMULO(Op, DAG, *this); + case ISD::ATOMIC_FENCE: + return LowerATOMIC_FENCE(Op, DAG); + case ISD::INTRINSIC_VOID: + return LowerINTRINSIC_VOID(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return LowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return LowerINTRINSIC_WO_CHAIN(Op, DAG); + } +} + +/// Return the entry encoding for a jump table in the +/// current function. The returned value is a member of the +/// MachineJumpTableInfo::JTEntryKind enum. +unsigned VETargetLowering::getJumpTableEncoding() const { + // VE doesn't support GOT32 style of labels in the current version of nas. + // So, we generates a following entry for each jump table. + // .4bytes .LBB0_2- + if (isPositionIndependent()) + return MachineJumpTableInfo::EK_Custom32; + + // Otherwise, use the normal jump table encoding heuristics. + return TargetLowering::getJumpTableEncoding(); +} + +const MCExpr *VETargetLowering::LowerCustomJumpTableEntry( + const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, + unsigned uid, MCContext &Ctx) const { + assert(isPositionIndependent()); + // VE doesn't support GOT32 style of labels in the current version of nas. + // So, we generates a following entry for each jump table. + // .4bytes .LBB0_2- + auto Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx); + MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data()); + auto Base = MCSymbolRefExpr::create(Sym, Ctx); + return MCBinaryExpr::createSub(Value, Base, Ctx); +} + +void VETargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, + MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, + int FI) const { + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + const VEInstrInfo *TII = Subtarget->getInstrInfo(); + + const TargetRegisterClass *TRC = &VE::I64RegClass; + unsigned Tmp1 = MRI->createVirtualRegister(TRC); + unsigned Tmp2 = MRI->createVirtualRegister(TRC); + unsigned VR = MRI->createVirtualRegister(TRC); + unsigned Op = VE::STSri; + + if (isPositionIndependent()) { + // Create following instructions for local linkage PIC code. + // lea %Tmp1, DispatchBB@gotoff_lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %Tmp3, DispatchBB@gotoff_hi(%Tmp2) + // adds.l %VR, %s15, %Tmp3 ; %s15 is GOT + // FIXME: use lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) + unsigned Tmp3 = MRI->createVirtualRegister(&VE::I64RegClass); + BuildMI(*MBB, MI, DL, TII->get(VE::LEAzzi), Tmp1) + .addMBB(DispatchBB, VEMCExpr::VK_VE_GOTOFF_LO32); + BuildMI(*MBB, MI, DL, TII->get(VE::ANDrm0), Tmp2).addReg(Tmp1).addImm(32); + BuildMI(*MBB, MI, DL, TII->get(VE::LEASLrzi), Tmp3) + .addReg(Tmp2) + .addMBB(DispatchBB, VEMCExpr::VK_VE_GOTOFF_HI32); + BuildMI(*MBB, MI, DL, TII->get(VE::ADXrr), VR) + .addReg(VE::SX15) + .addReg(Tmp3); + } else { + // lea %Tmp1, DispatchBB@lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %VR, DispatchBB@hi(%Tmp2) + BuildMI(*MBB, MI, DL, TII->get(VE::LEAzzi), Tmp1) + .addMBB(DispatchBB, VEMCExpr::VK_VE_LO32); + BuildMI(*MBB, MI, DL, TII->get(VE::ANDrm0), Tmp2).addReg(Tmp1).addImm(32); + BuildMI(*MBB, MI, DL, TII->get(VE::LEASLrzi), VR) + .addReg(Tmp2) + .addMBB(DispatchBB, VEMCExpr::VK_VE_HI32); + } + + MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(Op)); + addFrameReference(MIB, FI, 56 + 16); + MIB.addReg(VR); +} + +MachineBasicBlock * +VETargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, + MachineBasicBlock *BB) const { + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = BB->getParent(); + MachineFrameInfo &MFI = MF->getFrameInfo(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + const VEInstrInfo *TII = Subtarget->getInstrInfo(); + int FI = MFI.getFunctionContextIndex(); + + // Get a mapping of the call site numbers to all of the landing pads they're + // associated with. + DenseMap> CallSiteNumToLPad; + unsigned MaxCSNum = 0; + for (auto &MBB : *MF) { + if (!MBB.isEHPad()) + continue; + + MCSymbol *Sym = nullptr; + for (const auto &MI : MBB) { + if (MI.isDebugInstr()) + continue; + + assert(MI.isEHLabel() && "expected EH_LABEL"); + Sym = MI.getOperand(0).getMCSymbol(); + break; + } + + if (!MF->hasCallSiteLandingPad(Sym)) + continue; + + for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) { + CallSiteNumToLPad[CSI].push_back(&MBB); + MaxCSNum = std::max(MaxCSNum, CSI); + } + } + + // Get an ordered list of the machine basic blocks for the jump table. + std::vector LPadList; + SmallPtrSet InvokeBBs; + LPadList.reserve(CallSiteNumToLPad.size()); + + for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) { + for (auto &LP : CallSiteNumToLPad[CSI]) { + LPadList.push_back(LP); + InvokeBBs.insert(LP->pred_begin(), LP->pred_end()); + } + } + + assert(!LPadList.empty() && + "No landing pad destinations for the dispatch jump table!"); + + // Create the MBBs for the dispatch code. + + // Shove the dispatch's address into the return slot in the function context. + MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); + DispatchBB->setIsEHPad(true); + + MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); + BuildMI(TrapBB, DL, TII->get(VE::TRAP)); + BuildMI(TrapBB, DL, TII->get(VE::NOP)); + DispatchBB->addSuccessor(TrapBB); + + MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); + DispatchBB->addSuccessor(DispContBB); + + // Insert MBBs. + MF->push_back(DispatchBB); + MF->push_back(DispContBB); + MF->push_back(TrapBB); + + // Insert code into the entry block that creates and registers the function + // context. + SetupEntryBlockForSjLj(MI, BB, DispatchBB, FI); + + // Create the jump table and associated information + unsigned JTE = getJumpTableEncoding(); + MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE); + unsigned MJTI = JTI->createJumpTableIndex(LPadList); + + const VERegisterInfo &RI = TII->getRegisterInfo(); + // Add a register mask with no preserved registers. This results in all + // registers being marked as clobbered. + BuildMI(DispatchBB, DL, TII->get(VE::NOP)) + .addRegMask(RI.getNoPreservedMask()); + + if (isPositionIndependent()) { + // Force to generate GETGOT, since current implementation doesn't recover + // GOT register correctly. + BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15); + } + + // IReg is used as an index in a memory operand and therefore can't be SP + unsigned IReg = MRI->createVirtualRegister(&VE::I64RegClass); + addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLUri), IReg), FI, 8); + if (LPadList.size() < 63) { + BuildMI(DispatchBB, DL, TII->get(VE::BCRLir)) + .addImm(VECC::CC_ILE) + .addImm(LPadList.size()) + .addReg(IReg) + .addMBB(TrapBB); + } else { + assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!"); + unsigned TmpReg = MRI->createVirtualRegister(&VE::I64RegClass); + BuildMI(DispatchBB, DL, TII->get(VE::LEAzzi), TmpReg) + .addImm(LPadList.size()); + BuildMI(DispatchBB, DL, TII->get(VE::BCRLrr)) + .addImm(VECC::CC_ILE) + .addReg(TmpReg) + .addReg(IReg) + .addMBB(TrapBB); + } + + unsigned BReg = MRI->createVirtualRegister(&VE::I64RegClass); + + unsigned Tmp1 = MRI->createVirtualRegister(&VE::I64RegClass); + unsigned Tmp2 = MRI->createVirtualRegister(&VE::I64RegClass); + + if (isPositionIndependent()) { + // Create following instructions for local linkage PIC code. + // lea %Tmp1, .LJTI0_0@gotoff_lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %Tmp3, .LJTI0_0@gotoff_hi(%Tmp2) + // adds.l %BReg, %s15, %Tmp3 ; %s15 is GOT + // FIXME: use lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) + unsigned Tmp3 = MRI->createVirtualRegister(&VE::I64RegClass); + BuildMI(DispContBB, DL, TII->get(VE::LEAzzi), Tmp1) + .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32); + BuildMI(DispContBB, DL, TII->get(VE::ANDrm0), Tmp2).addReg(Tmp1).addImm(32); + BuildMI(DispContBB, DL, TII->get(VE::LEASLrzi), Tmp3) + .addReg(Tmp2) + .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32); + BuildMI(DispContBB, DL, TII->get(VE::ADXrr), BReg) + .addReg(VE::SX15) + .addReg(Tmp3); + } else { + // lea %Tmp1, .LJTI0_0@lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %BReg, .LJTI0_0@hi(%Tmp2) + BuildMI(DispContBB, DL, TII->get(VE::LEAzzi), Tmp1) + .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32); + BuildMI(DispContBB, DL, TII->get(VE::ANDrm0), Tmp2).addReg(Tmp1).addImm(32); + BuildMI(DispContBB, DL, TII->get(VE::LEASLrzi), BReg) + .addReg(Tmp2) + .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32); + } + + switch (JTE) { + case MachineJumpTableInfo::EK_BlockAddress: { + // Generate simple block address code for no-PIC model. + + unsigned TReg = MRI->createVirtualRegister(&VE::I64RegClass); + unsigned Tmp1 = MRI->createVirtualRegister(&VE::I64RegClass); + unsigned Tmp2 = MRI->createVirtualRegister(&VE::I64RegClass); + + // sll Tmp1, IReg, 3 + BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1).addReg(IReg).addImm(3); + // FIXME: combine these add and lds into "lds TReg, *(BReg, Tmp1)" + // adds.l Tmp2, BReg, Tmp1 + BuildMI(DispContBB, DL, TII->get(VE::ADXrr), Tmp2) + .addReg(Tmp1) + .addReg(BReg); + // lds TReg, *(Tmp2) + BuildMI(DispContBB, DL, TII->get(VE::LDSri), TReg).addReg(Tmp2).addImm(0); + + // jmpq *(TReg) + BuildMI(DispContBB, DL, TII->get(VE::BAri)).addReg(TReg).addImm(0); + break; + } + case MachineJumpTableInfo::EK_Custom32: { + // for the case of PIC, generates these codes + + assert(isPositionIndependent()); + unsigned OReg = MRI->createVirtualRegister(&VE::I64RegClass); + unsigned TReg = MRI->createVirtualRegister(&VE::I64RegClass); + + unsigned Tmp1 = MRI->createVirtualRegister(&VE::I64RegClass); + unsigned Tmp2 = MRI->createVirtualRegister(&VE::I64RegClass); + + // sll Tmp1, IReg, 2 + BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1).addReg(IReg).addImm(2); + // FIXME: combine these add and ldl into "ldl.zx OReg, *(BReg, Tmp1)" + // add Tmp2, BReg, Tmp1 + BuildMI(DispContBB, DL, TII->get(VE::ADXrr), Tmp2) + .addReg(Tmp1) + .addReg(BReg); + // ldl.zx OReg, *(Tmp2) + BuildMI(DispContBB, DL, TII->get(VE::LDLUri), OReg).addReg(Tmp2).addImm(0); + + // Create following instructions for local linkage PIC code. + // lea %Tmp3, fun@gotoff_lo + // and %Tmp4, %Tmp3, (32)0 + // lea.sl %Tmp5, fun@gotoff_hi(%Tmp4) + // adds.l %BReg2, %s15, %Tmp5 ; %s15 is GOT + // FIXME: use lea.sl %BReg2, fun@gotoff_hi(%Tmp4, %s15) + unsigned Tmp3 = MRI->createVirtualRegister(&VE::I64RegClass); + unsigned Tmp4 = MRI->createVirtualRegister(&VE::I64RegClass); + unsigned Tmp5 = MRI->createVirtualRegister(&VE::I64RegClass); + unsigned BReg2 = MRI->createVirtualRegister(&VE::I64RegClass); + const char *FunName = DispContBB->getParent()->getName().data(); + BuildMI(DispContBB, DL, TII->get(VE::LEAzzi), Tmp3) + .addExternalSymbol(FunName, VEMCExpr::VK_VE_GOTOFF_LO32); + BuildMI(DispContBB, DL, TII->get(VE::ANDrm0), Tmp4).addReg(Tmp3).addImm(32); + BuildMI(DispContBB, DL, TII->get(VE::LEASLrzi), Tmp5) + .addReg(Tmp4) + .addExternalSymbol(FunName, VEMCExpr::VK_VE_GOTOFF_HI32); + BuildMI(DispContBB, DL, TII->get(VE::ADXrr), BReg2) + .addReg(VE::SX15) + .addReg(Tmp5); + + // adds.l TReg, BReg2, OReg + BuildMI(DispContBB, DL, TII->get(VE::ADXrr), TReg) + .addReg(OReg) + .addReg(BReg2); + // jmpq *(TReg) + BuildMI(DispContBB, DL, TII->get(VE::BAri)).addReg(TReg).addImm(0); + break; + } + default: + llvm_unreachable("Unexpected jump table encoding"); + } + + // Add the jump table entries as successors to the MBB. + SmallPtrSet SeenMBBs; + for (auto &LP : LPadList) + if (SeenMBBs.insert(LP).second) + DispContBB->addSuccessor(LP); + + // N.B. the order the invoke BBs are processed in doesn't matter here. + SmallVector MBBLPads; + const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs(); + for (MachineBasicBlock *MBB : InvokeBBs) { + // Remove the landing pad successor from the invoke block and replace it + // with the new dispatch block. + // Keep a copy of Successors since it's modified inside the loop. + SmallVector Successors(MBB->succ_rbegin(), + MBB->succ_rend()); + // FIXME: Avoid quadratic complexity. + for (auto MBBS : Successors) { + if (MBBS->isEHPad()) { + MBB->removeSuccessor(MBBS); + MBBLPads.push_back(MBBS); + } + } + + MBB->addSuccessor(DispatchBB); + + // Find the invoke call and mark all of the callee-saved registers as + // 'implicit defined' so that they're spilled. This prevents code from + // moving instructions to before the EH block, where they will never be + // executed. + for (auto &II : reverse(*MBB)) { + if (!II.isCall()) + continue; + + DenseMap DefRegs; + for (auto &MOp : II.operands()) + if (MOp.isReg()) + DefRegs[MOp.getReg()] = true; + + MachineInstrBuilder MIB(*MF, &II); + for (unsigned RI = 0; SavedRegs[RI]; ++RI) { + unsigned Reg = SavedRegs[RI]; + if (!DefRegs[Reg]) + MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); + } + + break; + } + } + + // Mark all former landing pads as non-landing pads. The dispatch is the only + // landing pad now. + for (auto &LP : MBBLPads) + LP->setIsEHPad(false); + + // The instruction is gone now. + MI.eraseFromParent(); + return BB; +} + +MachineBasicBlock * +VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const { + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unknown Custom Instruction!"); + case VE::EH_SjLj_Setup_Dispatch: + return EmitSjLjDispatchBlock(MI, BB); + } +} + +//===----------------------------------------------------------------------===// +// VE Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +VETargetLowering::ConstraintType +VETargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'r': + case 'f': + case 'e': + return C_RegisterClass; + case 'I': // SIMM13 + return C_Other; + } + } + + return TargetLowering::getConstraintType(Constraint); +} + +TargetLowering::ConstraintWeight +VETargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &info, + const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (!CallOperandVal) + return CW_Default; + + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + case 'I': // SIMM13 + if (ConstantInt *C = dyn_cast(info.CallOperandVal)) { + if (isInt<13>(C->getSExtValue())) + weight = CW_Constant; + } + break; + } + return weight; +} + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void VETargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const { + SDValue Result(nullptr, 0); + + // Only support length 1 constraints for now. + if (Constraint.length() > 1) + return; + + char ConstraintLetter = Constraint[0]; + switch (ConstraintLetter) { + default: + break; + case 'I': + if (ConstantSDNode *C = dyn_cast(Op)) { + if (isInt<13>(C->getSExtValue())) { + Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), + Op.getValueType()); + break; + } + return; + } + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +std::pair +VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return std::make_pair(0U, &VE::I64RegClass); + case 'f': + if (VT == MVT::f32 || VT == MVT::f64) + return std::make_pair(0U, &VE::I64RegClass); + else if (VT == MVT::f128) + return std::make_pair(0U, &VE::F128RegClass); + llvm_unreachable("Unknown ValueType for f-register-type!"); + break; + case 'e': + if (VT == MVT::f32 || VT == MVT::f64) + return std::make_pair(0U, &VE::I64RegClass); + else if (VT == MVT::f128) + return std::make_pair(0U, &VE::F128RegClass); + llvm_unreachable("Unknown ValueType for e-register-type!"); + break; + } + } else if (!Constraint.empty() && Constraint.size() <= 5 && + Constraint[0] == '{' && *(Constraint.end() - 1) == '}') { + // constraint = '{r}' + // Remove the braces from around the name. + StringRef name(Constraint.data() + 1, Constraint.size() - 2); + // Handle register aliases: + // r0-r7 -> g0-g7 + // r8-r15 -> o0-o7 + // r16-r23 -> l0-l7 + // r24-r31 -> i0-i7 + uint64_t intVal = 0; + if (name.substr(0, 1).equals("r") && + !name.substr(1).getAsInteger(10, intVal) && intVal <= 31) { + const char regTypes[] = {'g', 'o', 'l', 'i'}; + char regType = regTypes[intVal / 8]; + char regIdx = '0' + (intVal % 8); + char tmp[] = {'{', regType, regIdx, '}', 0}; + std::string newConstraint = std::string(tmp); + return TargetLowering::getRegForInlineAsmConstraint(TRI, newConstraint, + VT); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +bool VETargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + // The VE target isn't yet aware of offsets. + return false; +} + +void VETargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const { + + SDLoc dl(N); + + switch (N->getOpcode()) { + default: + LLVM_DEBUG(N->dumpr(&DAG)); + llvm_unreachable("Do not know how to custom type legalize this operation!"); + } +} + +// Override to enable LOAD_STACK_GUARD lowering on Linux. +bool VETargetLowering::useLoadStackGuardNode() const { + if (!Subtarget->isTargetLinux()) + return TargetLowering::useLoadStackGuardNode(); + return true; +} + +// Override to disable global variable loading on Linux. +void VETargetLowering::insertSSPDeclarations(Module &M) const { + if (!Subtarget->isTargetLinux()) + return TargetLowering::insertSSPDeclarations(M); +} + +void VETargetLowering::finalizeLowering(MachineFunction &MF) const { + TargetLoweringBase::finalizeLowering(MF); +} diff --git a/llvm/lib/Target/VE/VEInstrBuilder.h b/llvm/lib/Target/VE/VEInstrBuilder.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEInstrBuilder.h @@ -0,0 +1,39 @@ +//===-- VEInstrBuilder.h - Aides for building VE insts ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to simplify generating frame and constant pool +// references. +// +// For reference, the order of operands for memory references is: +// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate +// Displacement. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_VEINSTRBUILDER_H +#define LLVM_LIB_TARGET_VE_VEINSTRBUILDER_H + +#include "llvm/CodeGen/MachineInstrBuilder.h" + +namespace llvm { + +/// addFrameReference - This function is used to add a reference to the base of +/// an abstract object on the stack frame of the current function. This +/// reference has base register as the FrameIndex offset until it is resolved. +/// This allows a constant offset to be specified as well... +/// +static inline const MachineInstrBuilder & +addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, + bool mem = true) { + return MIB.addFrameIndex(FI).addImm(Offset); +} + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/VEInstrFormats.td b/llvm/lib/Target/VE/VEInstrFormats.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEInstrFormats.td @@ -0,0 +1,103 @@ +//===-- VEInstrFormats.td - VE Instruction Formats ---------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +class InstVE pattern, + InstrItinClass itin = NoItinerary> + : Instruction { + field bits<64> Inst; + + let Namespace = "VE"; + let Size = 8; + + bits<8> op; + let Inst{0-7} = op; + + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; + + let DecoderNamespace = "VE"; + field bits<64> SoftFail = 0; + + let Itinerary = itin; +} + +class RMopVal, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin = NoItinerary> + : InstVE { + bits<1> cx = 0; + bits<7> sx; + bits<1> cy = 0; + bits<7> sy; + bits<1> cz = 0; + bits<7> sz; + bits<32> imm32 = 0; + let op = opVal; + let Inst{15} = cx; + let Inst{14-8} = sx; + let Inst{23} = cy; + let Inst{22-16} = sy; + let Inst{31} = cz; + let Inst{30-24} = sz; + let Inst{63-32} = imm32; +} + +class RRopVal, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin = NoItinerary> + : RM { + bits<1> cw = 0; + bits<1> cw2 = 0; + bits<4> cfw = 0; + let imm32{0-23} = 0; + let imm32{24} = cw; + let imm32{25} = cw2; + let imm32{26-27} = 0; + let imm32{28-31} = cfw; +} + +class RRFENCEopVal, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin = NoItinerary> + : InstVE { + bits<1> avo = 0; + bits<1> lf = 0; + bits<1> sf = 0; + bits<1> c2 = 0; + bits<1> c1 = 0; + bits<1> c0 = 0; + let op = opVal; + let Inst{15} = avo; + let Inst{14-10} = 0; + let Inst{9} = lf; + let Inst{8} = sf; + let Inst{23-19} = 0; + let Inst{18} = c2; + let Inst{17} = c1; + let Inst{16} = c0; + let Inst{31-24} = 0; + let Inst{63-32} = 0; +} + +class CFopVal, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin = NoItinerary> + : RM { + bits<1> cx2; + bits<2> bpf; + bits<4> cf; + let cx = 0; + let sx{6} = cx2; + let sx{5-4} = bpf; + let sx{3-0} = cf; +} + +// Pseudo instructions. +class Pseudo pattern> + : InstVE { + let isCodeGenOnly = 1; + let isPseudo = 1; +} diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEInstrInfo.h @@ -0,0 +1,103 @@ +//===-- VEInstrInfo.h - VE Instruction Information --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the VE implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_VEINSTRINFO_H +#define LLVM_LIB_TARGET_VE_VEINSTRINFO_H + +#include "VERegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "VEGenInstrInfo.inc" + +namespace llvm { + +class VESubtarget; + +class VEInstrInfo : public VEGenInstrInfo { + const VERegisterInfo RI; + const VESubtarget &Subtarget; + virtual void anchor(); + +public: + explicit VEInstrInfo(VESubtarget &ST); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + const VERegisterInfo &getRegisterInfo() const { return RI; } + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + unsigned isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + unsigned isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify = false) const override; + + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; + + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef Cond, + const DebugLoc &DL, + int *BytesAdded = nullptr) const override; + + bool + reverseBranchCondition(SmallVectorImpl &Cond) const override; + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, + bool KillSrc) const override; + void copyPhysSubRegs(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc, const MCInstrDesc &MCID, + unsigned int numSubRegs, + const unsigned *subRegIdx) const; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + unsigned getGlobalBaseReg(MachineFunction *MF) const; + + // Lower pseudo instructions after register allocation. + bool expandPostRAPseudo(MachineInstr &MI) const override; + + bool expandExtendStackPseudo(MachineInstr &MI) const; + bool expandGetStackTopPseudo(MachineInstr &MI) const; +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -0,0 +1,628 @@ +//===-- VEInstrInfo.cpp - VE Instruction Information ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the VE implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "VEInstrInfo.h" +#include "VE.h" +#include "VEMachineFunctionInfo.h" +#include "VESubtarget.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#define DEBUG_TYPE "ve" + +using namespace llvm; + +#define GET_INSTRINFO_CTOR_DTOR +#include "VEGenInstrInfo.inc" + +// Pin the vtable to this file. +void VEInstrInfo::anchor() {} + +VEInstrInfo::VEInstrInfo(VESubtarget &ST) + : VEGenInstrInfo(VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI(), + Subtarget(ST) {} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + if (MI.getOpcode() == VE::LDSri || // I64 + MI.getOpcode() == VE::LDLri || // I32 + MI.getOpcode() == VE::LDUri || // F32 + MI.getOpcode() == VE::LDQri // F128 (pseudo) + ) { + if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && + MI.getOperand(2).getImm() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); + } + } + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned VEInstrInfo::isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + if (MI.getOpcode() == VE::STSri || // I64 + MI.getOpcode() == VE::STLri || // I32 + MI.getOpcode() == VE::STUri || // F32 + MI.getOpcode() == VE::STQri // F128 (pseudo) + ) { + if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && + MI.getOperand(1).getImm() == 0) { + FrameIndex = MI.getOperand(0).getIndex(); + return MI.getOperand(2).getReg(); + } + } + return 0; +} + +static bool IsIntegerCC(unsigned CC) { return (CC < VECC::CC_AF); } + +static VECC::CondCodes GetOppositeBranchCondition(VECC::CondCodes CC) { + switch (CC) { + case VECC::CC_IG: + return VECC::CC_ILE; + case VECC::CC_IL: + return VECC::CC_IGE; + case VECC::CC_INE: + return VECC::CC_IEQ; + case VECC::CC_IEQ: + return VECC::CC_INE; + case VECC::CC_IGE: + return VECC::CC_IL; + case VECC::CC_ILE: + return VECC::CC_IG; + case VECC::CC_AF: + return VECC::CC_AT; + case VECC::CC_G: + return VECC::CC_LENAN; + case VECC::CC_L: + return VECC::CC_GENAN; + case VECC::CC_NE: + return VECC::CC_EQNAN; + case VECC::CC_EQ: + return VECC::CC_NENAN; + case VECC::CC_GE: + return VECC::CC_LNAN; + case VECC::CC_LE: + return VECC::CC_GNAN; + case VECC::CC_NUM: + return VECC::CC_NAN; + case VECC::CC_NAN: + return VECC::CC_NUM; + case VECC::CC_GNAN: + return VECC::CC_LE; + case VECC::CC_LNAN: + return VECC::CC_GE; + case VECC::CC_NENAN: + return VECC::CC_EQ; + case VECC::CC_EQNAN: + return VECC::CC_NE; + case VECC::CC_GENAN: + return VECC::CC_L; + case VECC::CC_LENAN: + return VECC::CC_G; + case VECC::CC_AT: + return VECC::CC_AF; + } + llvm_unreachable("Invalid cond code"); +} + +// Treat br.l [BCR AT] as unconditional branch +static bool isUncondBranchOpcode(int Opc) { + return Opc == VE::BCRLa || Opc == VE::BCRWa || Opc == VE::BCRDa || + Opc == VE::BCRSa; +} + +static bool isCondBranchOpcode(int Opc) { + return Opc == VE::BCRLrr || Opc == VE::BCRLir || Opc == VE::BCRLrm0 || + Opc == VE::BCRLrm1 || Opc == VE::BCRLim0 || Opc == VE::BCRLim1 || + Opc == VE::BCRWrr || Opc == VE::BCRWir || Opc == VE::BCRWrm0 || + Opc == VE::BCRWrm1 || Opc == VE::BCRWim0 || Opc == VE::BCRWim1 || + Opc == VE::BCRDrr || Opc == VE::BCRDir || Opc == VE::BCRDrm0 || + Opc == VE::BCRDrm1 || Opc == VE::BCRDim0 || Opc == VE::BCRDim1 || + Opc == VE::BCRSrr || Opc == VE::BCRSir || Opc == VE::BCRSrm0 || + Opc == VE::BCRSrm1 || Opc == VE::BCRSim0 || Opc == VE::BCRSim1; +} + +static bool isIndirectBranchOpcode(int Opc) { + report_fatal_error("isIndirectBranchOpcode is not implemented yet"); +} + +static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, + SmallVectorImpl &Cond) { + Cond.push_back(MachineOperand::CreateImm(LastInst->getOperand(0).getImm())); + Cond.push_back(LastInst->getOperand(1)); + Cond.push_back(LastInst->getOperand(2)); + Target = LastInst->getOperand(3).getMBB(); +} + +bool VEInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return false; + + if (!isUnpredicatedTerminator(*I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = &*I; + unsigned LastOpc = LastInst->getOpcode(); + + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { + if (isUncondBranchOpcode(LastOpc)) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (isCondBranchOpcode(LastOpc)) { + // Block ends with fall-through condbranch. + parseCondBranch(LastInst, TBB, Cond); + return false; + } + return true; // Can't handle indirect branch. + } + + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = &*I; + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + + // If AllowModify is true and the block ends with two or more unconditional + // branches, delete all but the first unconditional branch. + if (AllowModify && isUncondBranchOpcode(LastOpc)) { + while (isUncondBranchOpcode(SecondLastOpc)) { + LastInst->eraseFromParent(); + LastInst = SecondLastInst; + LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { + // Return now the only terminator is an unconditional branch. + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else { + SecondLastInst = &*I; + SecondLastOpc = SecondLastInst->getOpcode(); + } + } + } + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) + return true; + + // If the block ends with a B and a Bcc, handle it. + if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + parseCondBranch(SecondLastInst, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two unconditional branches, handle it. The second + // one is not executed. + if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + return false; + } + + // ...likewise if it ends with an indirect branch followed by an unconditional + // branch. + if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return true; + } + + // Otherwise, can't handle this. + return true; +} + +unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded) const { + assert(TBB && "insertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 3 || Cond.size() == 0) && + "VE branch conditions should have three component!"); + assert(!BytesAdded && "code size not handled"); + if (Cond.empty()) { + // Uncondition branch + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, DL, get(VE::BCRLa)).addMBB(TBB); + return 1; + } + + // Conditional branch + // (BCRir CC sy sz addr) + + assert(Cond[0].isImm() && Cond[2].isReg() && "not implemented"); + + unsigned opc[2]; + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineFunction *MF = MBB.getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned Reg = Cond[2].getReg(); + if (IsIntegerCC(Cond[0].getImm())) { + if (TRI->getRegSizeInBits(Reg, MRI) == 32) { + opc[0] = VE::BCRWir; + opc[1] = VE::BCRWrr; + } else { + opc[0] = VE::BCRLir; + opc[1] = VE::BCRLrr; + } + } else { + if (TRI->getRegSizeInBits(Reg, MRI) == 32) { + opc[0] = VE::BCRSir; + opc[1] = VE::BCRSrr; + } else { + opc[0] = VE::BCRDir; + opc[1] = VE::BCRDrr; + } + } + if (Cond[1].isImm()) { + BuildMI(&MBB, DL, get(opc[0])) + .add(Cond[0]) // condition code + .add(Cond[1]) // lhs + .add(Cond[2]) // rhs + .addMBB(TBB); + } else { + BuildMI(&MBB, DL, get(opc[1])) + .add(Cond[0]) + .add(Cond[1]) + .add(Cond[2]) + .addMBB(TBB); + } + + if (!FBB) + return 1; + BuildMI(&MBB, DL, get(VE::BCRLa)).addMBB(FBB); + return 2; +} + +unsigned VEInstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + assert(!BytesRemoved && "code size not handled"); + + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + while (I != MBB.begin()) { + --I; + + if (I->isDebugValue()) + continue; + + if (!isUncondBranchOpcode(I->getOpcode()) && + !isCondBranchOpcode(I->getOpcode())) + break; // Not a branch + + I->eraseFromParent(); + I = MBB.end(); + ++Count; + } + return Count; + + // report_fatal_error("removeBranch is not implemented yet"); +} + +bool VEInstrInfo::reverseBranchCondition( + SmallVectorImpl &Cond) const { + VECC::CondCodes CC = static_cast(Cond[0].getImm()); + Cond[0].setImm(GetOppositeBranchCondition(CC)); + return false; +} + +void VEInstrInfo::copyPhysSubRegs( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned DestReg, unsigned SrcReg, bool KillSrc, const MCInstrDesc &MCID, + unsigned int numSubRegs, const unsigned *subRegIdx) const { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineInstr *MovMI = nullptr; + + for (unsigned i = 0; i != numSubRegs; ++i) { + unsigned SubDest = TRI->getSubReg(DestReg, subRegIdx[i]); + unsigned SubSrc = TRI->getSubReg(SrcReg, subRegIdx[i]); + assert(SubDest && SubSrc && "Bad sub-register"); + + if (MCID.getOpcode() == VE::ORri) { + // generate "ORri, dest, src, 0" instruction. + MachineInstrBuilder MIB = + BuildMI(MBB, I, DL, MCID, SubDest).addReg(SubSrc).addImm(0); + MovMI = MIB.getInstr(); + } else { + llvm_unreachable("Unexpected reg-to-reg copy instruction"); + } + } + // Add implicit super-register defs and kills to the last MovMI. + MovMI->addRegisterDefined(DestReg, TRI); + if (KillSrc) + MovMI->addRegisterKilled(SrcReg, TRI); +} + +void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + MCRegister DestReg, MCRegister SrcReg, + bool KillSrc) const { + + // For the case of VE, I32, I64, and F32 uses the identical + // registers %s0-%s63, so no need to check other register classes + // here + if (VE::I32RegClass.contains(DestReg, SrcReg)) + BuildMI(MBB, I, DL, get(VE::ORri), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + // any scaler to any scaler + else if ((VE::I32RegClass.contains(SrcReg) || + VE::F32RegClass.contains(SrcReg) || + VE::I64RegClass.contains(SrcReg)) && + (VE::I32RegClass.contains(DestReg) || + VE::F32RegClass.contains(DestReg) || + VE::I64RegClass.contains(DestReg))) + BuildMI(MBB, I, DL, get(VE::ORri), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + else if (VE::F128RegClass.contains(DestReg, SrcReg)) { + // Use two instructions. + const unsigned subRegIdx[] = {VE::sub_even, VE::sub_odd}; + unsigned int numSubRegs = 2; + copyPhysSubRegs(MBB, I, DL, DestReg, SrcReg, KillSrc, get(VE::ORri), + numSubRegs, subRegIdx); + } else { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + dbgs() << "Impossible reg-to-reg copy from " << printReg(SrcReg, TRI) + << " to " << printReg(DestReg, TRI) << "\n"; + llvm_unreachable("Impossible reg-to-reg copy"); + } +} + +void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + + // On the order of operands here: think "[FrameIdx + 0] = SrcReg". + if (RC == &VE::I64RegClass) + BuildMI(MBB, I, DL, get(VE::STSri)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + else if (RC == &VE::I32RegClass) + BuildMI(MBB, I, DL, get(VE::STLri)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + else if (RC == &VE::F32RegClass) + BuildMI(MBB, I, DL, get(VE::STUri)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + else if (VE::F128RegClass.hasSubClassEq(RC)) + BuildMI(MBB, I, DL, get(VE::STQri)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + else + report_fatal_error("Can't store this register to stack slot"); +} + +void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + + if (RC == &VE::I64RegClass) + BuildMI(MBB, I, DL, get(VE::LDSri), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + else if (RC == &VE::I32RegClass) + BuildMI(MBB, I, DL, get(VE::LDLri), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + else if (RC == &VE::F32RegClass) + BuildMI(MBB, I, DL, get(VE::LDUri), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + else if (VE::F128RegClass.hasSubClassEq(RC)) + BuildMI(MBB, I, DL, get(VE::LDQri), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + else + report_fatal_error("Can't load this register from stack slot"); +} + +unsigned VEInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { + VEMachineFunctionInfo *VEFI = MF->getInfo(); + unsigned GlobalBaseReg = VEFI->getGlobalBaseReg(); + if (GlobalBaseReg != 0) + return GlobalBaseReg; + + // We use %s15 (%got) as a global base register + GlobalBaseReg = VE::SX15; + + // Insert a pseudo instruction to set the GlobalBaseReg into the first + // MBB of the function + MachineBasicBlock &FirstMBB = MF->front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc dl; + BuildMI(FirstMBB, MBBI, dl, get(VE::GETGOT), GlobalBaseReg); + VEFI->setGlobalBaseReg(GlobalBaseReg); + return GlobalBaseReg; +} + +bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + switch (MI.getOpcode()) { + case VE::EXTEND_STACK: { + return expandExtendStackPseudo(MI); + } + case VE::EXTEND_STACK_GUARD: { + MI.eraseFromParent(); // The pseudo instruction is gone now. + return true; + } + case TargetOpcode::LOAD_STACK_GUARD: { + assert(Subtarget.isTargetLinux() && + "Only Linux target is expected to contain LOAD_STACK_GUARD"); + report_fatal_error( + "expandPostRAPseudo for LOAD_STACK_GUARD is not implemented yet"); + } + case VE::GETSTACKTOP: { + return expandGetStackTopPseudo(MI); + } + } + return false; +} + +bool VEInstrInfo::expandExtendStackPseudo(MachineInstr &MI) const { + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const VEInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + DebugLoc dl = MBB.findDebugLoc(MI); + + // Create following instructions and multiple basic blocks. + // + // thisBB: + // brge.l.t %sp, %sl, sinkBB + // syscallBB: + // ld %s61, 0x18(, %tp) // load param area + // or %s62, 0, %s0 // spill the value of %s0 + // lea %s63, 0x13b // syscall # of grow + // shm.l %s63, 0x0(%s61) // store syscall # at addr:0 + // shm.l %sl, 0x8(%s61) // store old limit at addr:8 + // shm.l %sp, 0x10(%s61) // store new limit at addr:16 + // monc // call monitor + // or %s0, 0, %s62 // restore the value of %s0 + // sinkBB: + + // Create new MBB + MachineBasicBlock *BB = &MBB; + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineBasicBlock *syscallMBB = MF.CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = MF.CreateMachineBasicBlock(LLVM_BB); + MachineFunction::iterator It = ++(BB->getIterator()); + MF.insert(It, syscallMBB); + MF.insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + std::next(std::next(MachineBasicBlock::iterator(MI))), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(syscallMBB); + BB->addSuccessor(sinkMBB); + BuildMI(BB, dl, TII.get(VE::BCRLrr)) + .addImm(VECC::CC_IGE) + .addReg(VE::SX11) // %sp + .addReg(VE::SX8) // %sl + .addMBB(sinkMBB); + + BB = syscallMBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + BuildMI(BB, dl, TII.get(VE::LDSri), VE::SX61).addReg(VE::SX14).addImm(0x18); + BuildMI(BB, dl, TII.get(VE::ORri), VE::SX62).addReg(VE::SX0).addImm(0); + BuildMI(BB, dl, TII.get(VE::LEAzzi), VE::SX63).addImm(0x13b); + BuildMI(BB, dl, TII.get(VE::SHMri)) + .addReg(VE::SX61) + .addImm(0) + .addReg(VE::SX63); + BuildMI(BB, dl, TII.get(VE::SHMri)) + .addReg(VE::SX61) + .addImm(8) + .addReg(VE::SX8); + BuildMI(BB, dl, TII.get(VE::SHMri)) + .addReg(VE::SX61) + .addImm(16) + .addReg(VE::SX11); + BuildMI(BB, dl, TII.get(VE::MONC)); + + BuildMI(BB, dl, TII.get(VE::ORri), VE::SX0).addReg(VE::SX62).addImm(0); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return true; +} + +bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const { + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction &MF = *MBB->getParent(); + const VEInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + DebugLoc dl = MBB->findDebugLoc(MI); + + // Create following instruction + // + // dst = %sp + stack_size + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering(); + unsigned NumBytes = 176; + if (MFI.adjustsStack() && TFL->hasReservedCallFrame(MF)) + NumBytes += MFI.getMaxCallFrameSize(); + + BuildMI(*MBB, MI, dl, TII.get(VE::LEArzi)) + .addDef(MI.getOperand(0).getReg()) + .addReg(VE::SX11) + .addImm(NumBytes); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return true; +} diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -0,0 +1,2270 @@ +//===-- VEInstrInfo.td - Target Description for VE Target -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the VE instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +include "VEInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Feature predicates. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction Pattern Stuff +//===----------------------------------------------------------------------===// + +def simm7 : PatLeaf<(imm), [{ return isInt<7>(N->getSExtValue()); }]>; +def simm32 : PatLeaf<(imm), [{ return isInt<32>(N->getSExtValue()); }]>; +def uimm32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>; +def uimm6 : PatLeaf<(imm), [{ return isUInt<6>(N->getZExtValue()); }]>; +def uimm7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>; +def zero : PatLeaf<(imm), [{ return N->getSExtValue() == 0; }]>; +def lomsbzero : PatLeaf<(imm), [{ return (N->getZExtValue() & 0x80000000) + == 0; }]>; +def lozero : PatLeaf<(imm), [{ return (N->getZExtValue() & 0xffffffff) + == 0; }]>; +def fplomsbzero : PatLeaf<(fpimm), [{ return (N->getValueAPF().bitcastToAPInt() + .getZExtValue() & 0x80000000) == 0; }]>; +def fplozero : PatLeaf<(fpimm), [{ return (N->getValueAPF().bitcastToAPInt() + .getZExtValue() & 0xffffffff) == 0; }]>; + +def CCSIOp : PatLeaf<(cond), [{ + switch (N->get()) { + default: return true; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return false; + } +}]>; + +def CCUIOp : PatLeaf<(cond), [{ + switch (N->get()) { + default: return true; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return false; + } +}]>; + +def GetVL : SDNodeXFormgetMachineFunction(); + unsigned VLReg = MF.getSubtarget().getInstrInfo()->getVectorLengthReg(&MF); + return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(N), VLReg, MVT::i32); +}]>; + +def LOFP32 : SDNodeXFormgetValueAPF().bitcastToAPInt(); + return CurDAG->getTargetConstant((unsigned)(imm.getZExtValue() & 0xffffffff), + SDLoc(N), MVT::i64); +}]>; + +def HIFP32 : SDNodeXFormgetValueAPF().bitcastToAPInt(); + return CurDAG->getTargetConstant((unsigned)(imm.getZExtValue() >> 32), + SDLoc(N), MVT::i64); +}]>; + +def LO32 : SDNodeXFormgetTargetConstant((unsigned)(N->getZExtValue() & 0xffffffff), + SDLoc(N), MVT::i64); +}]>; + +def HI32 : SDNodeXFormgetTargetConstant((unsigned)(N->getZExtValue() >> 32), + SDLoc(N), MVT::i64); +}]>; + +def LEASLimm : PatLeaf<(imm), [{ + return isShiftedUInt<32, 32>(N->getZExtValue()); +}], HI32>; + +def trunc_imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def sext_imm : SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); +}]>; + +def zext_imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i64); +}]>; + +def icond2cc : SDNodeXFormget()) { + default: llvm_unreachable("Unknown integer condition code!"); + case ISD::SETEQ: cc = VECC::CC_IEQ; break; + case ISD::SETNE: cc = VECC::CC_INE; break; + case ISD::SETLT: cc = VECC::CC_IL; break; + case ISD::SETGT: cc = VECC::CC_IG; break; + case ISD::SETLE: cc = VECC::CC_ILE; break; + case ISD::SETGE: cc = VECC::CC_IGE; break; + case ISD::SETULT: cc = VECC::CC_IL; break; + case ISD::SETULE: cc = VECC::CC_ILE; break; + case ISD::SETUGT: cc = VECC::CC_IG; break; + case ISD::SETUGE: cc = VECC::CC_IGE; break; + } + return CurDAG->getTargetConstant(cc, SDLoc(N), MVT::i32); +}]>; + +def fcond2cc : SDNodeXFormget()) { + default: llvm_unreachable("Unknown float condition code!"); + case ISD::SETFALSE: cc = VECC::CC_AF; break; + case ISD::SETEQ: + case ISD::SETOEQ: cc = VECC::CC_EQ; break; + case ISD::SETNE: + case ISD::SETONE: cc = VECC::CC_NE; break; + case ISD::SETLT: + case ISD::SETOLT: cc = VECC::CC_L; break; + case ISD::SETGT: + case ISD::SETOGT: cc = VECC::CC_G; break; + case ISD::SETLE: + case ISD::SETOLE: cc = VECC::CC_LE; break; + case ISD::SETGE: + case ISD::SETOGE: cc = VECC::CC_GE; break; + case ISD::SETO: cc = VECC::CC_NUM; break; + case ISD::SETUO: cc = VECC::CC_NAN; break; + case ISD::SETUEQ: cc = VECC::CC_EQNAN; break; + case ISD::SETUNE: cc = VECC::CC_NENAN; break; + case ISD::SETULT: cc = VECC::CC_LNAN; break; + case ISD::SETUGT: cc = VECC::CC_GNAN; break; + case ISD::SETULE: cc = VECC::CC_LENAN; break; + case ISD::SETUGE: cc = VECC::CC_GENAN; break; + case ISD::SETTRUE: cc = VECC::CC_AT; break; + } + return CurDAG->getTargetConstant(cc, SDLoc(N), MVT::i32); +}]>; + +// Addressing modes. +def ADDRrr : ComplexPattern; +def ADDRri : ComplexPattern; + +// Address operands +def VEMEMrrAsmOperand : AsmOperandClass { + let Name = "MEMrr"; + let ParserMethod = "parseMEMOperand"; +} + +def VEMEMriAsmOperand : AsmOperandClass { + let Name = "MEMri"; + let ParserMethod = "parseMEMOperand"; +} + +// ASX format of memory address + +def MEMrr : Operand { + let PrintMethod = "printMemASXOperand"; + let MIOperandInfo = (ops ptr_rc, ptr_rc); + let ParserMatchClass = VEMEMrrAsmOperand; +} + +def MEMri : Operand { + let PrintMethod = "printMemASXOperand"; + let MIOperandInfo = (ops ptr_rc, i64imm); + let ParserMatchClass = VEMEMriAsmOperand; +} + +// AS format of memory address + +def MEMASri : Operand { + let PrintMethod = "printMemASOperand"; + let MIOperandInfo = (ops ptr_rc, i64imm); + let ParserMatchClass = VEMEMriAsmOperand; +} + +// Branch targets have OtherVT type. +def brtarget32 : Operand { + let EncoderMethod = "getBranchTarget32OpValue"; +} + +def TLSSym : Operand; + +// Branch targets have OtherVT type. +def brtarget : Operand { + let EncoderMethod = "getBranchTargetOpValue"; +} + +def calltarget : Operand { + let EncoderMethod = "getCallTargetOpValue"; + let DecoderMethod = "DecodeCall"; +} + +def simm7Op32 : Operand { + let DecoderMethod = "DecodeSIMM7"; +} + +def simm7Op64 : Operand { + let DecoderMethod = "DecodeSIMM7"; +} + +def simm7Op128 : Operand { + let DecoderMethod = "DecodeSIMM7"; +} + +def simm32Op32 : Operand { + let DecoderMethod = "DecodeSIMM32"; +} + +def simm32Op64 : Operand { + let DecoderMethod = "DecodeSIMM32"; +} + +def uimm7Op32 : Operand { + let DecoderMethod = "DecodeUIMM6"; +} + +def uimm6Op32 : Operand { + let DecoderMethod = "DecodeUIMM6"; +} + +def uimm6Op64 : Operand { + let DecoderMethod = "DecodeUIMM6"; +} + +def uimm6Op128 : Operand { + let DecoderMethod = "DecodeUIMM6"; +} + +// Operand for printing out a condition code. +let PrintMethod = "printCCOperand" in + def CCOp : Operand; + +def VEhi : SDNode<"VEISD::Hi", SDTIntUnaryOp>; +def VElo : SDNode<"VEISD::Lo", SDTIntUnaryOp>; + +// These are target-independent nodes, but have target-specific formats. +def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64>, + SDTCisVT<1, i64> ]>; +def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i64>, + SDTCisVT<1, i64> ]>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>; +def call : SDNode<"VEISD::CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + +def retflag : SDNode<"VEISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +def getGOT : Operand { + let PrintMethod = "printGetGOT"; +} +def getFunPLT : Operand { + let PrintMethod = "printGetFunPLT"; +} + +def VEmax : SDNode<"VEISD::MAX", SDTIntBinOp>; +def VEmin : SDNode<"VEISD::MIN", SDTIntBinOp>; +def VEfmax : SDNode<"VEISD::FMAX", SDTFPBinOp>; +def VEfmin : SDNode<"VEISD::FMIN", SDTFPBinOp>; + +def VEeh_sjlj_setjmp: SDNode<"VEISD::EH_SJLJ_SETJMP", + SDTypeProfile<1, 1, [SDTCisInt<0>, + SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPSideEffect]>; +def VEeh_sjlj_longjmp: SDNode<"VEISD::EH_SJLJ_LONGJMP", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPSideEffect]>; +def VEeh_sjlj_setup_dispatch: SDNode<"VEISD::EH_SJLJ_SETUP_DISPATCH", + SDTypeProfile<0, 0, []>, + [SDNPHasChain, SDNPSideEffect]>; + +// GETFUNPLT for PIC +def GetFunPLT : SDNode<"VEISD::GETFUNPLT", SDTIntUnaryOp>; + +// GETTLSADDR for TLS +def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + +// GETSTACKTOP +def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; + +// MEMBARRIER +def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; + +//===----------------------------------------------------------------------===// +// VE Flag Conditions +//===----------------------------------------------------------------------===// + +// Note that these values must be kept in sync with the CCOp::CondCode enum +// values. +class CC_VAL : PatLeaf<(i32 N)>; +def CC_IG : CC_VAL< 0>; // Greater +def CC_IL : CC_VAL< 1>; // Less +def CC_INE : CC_VAL< 2>; // Not Equal +def CC_IEQ : CC_VAL< 3>; // Equal +def CC_IGE : CC_VAL< 4>; // Greater or Equal +def CC_ILE : CC_VAL< 5>; // Less or Equal +def CC_AF : CC_VAL< 6>; // Always false +def CC_G : CC_VAL< 7>; // Greater +def CC_L : CC_VAL< 8>; // Less +def CC_NE : CC_VAL< 9>; // Not Equal +def CC_EQ : CC_VAL<10>; // Equal +def CC_GE : CC_VAL<11>; // Greater or Equal +def CC_LE : CC_VAL<12>; // Less or Equal +def CC_NUM : CC_VAL<13>; // Number +def CC_NAN : CC_VAL<14>; // NaN +def CC_GNAN : CC_VAL<15>; // Greater or NaN +def CC_LNAN : CC_VAL<16>; // Less or NaN +def CC_NENAN : CC_VAL<17>; // Not Equal or NaN +def CC_EQNAN : CC_VAL<18>; // Equal or NaN +def CC_GENAN : CC_VAL<19>; // Greater or Equal or NaN +def CC_LENAN : CC_VAL<20>; // Less or Equal or NaN +def CC_AT : CC_VAL<21>; // Always true + +//===----------------------------------------------------------------------===// +// VE Multiclasses for common instruction formats +//===----------------------------------------------------------------------===// + +multiclass RMmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { + def rri : RM< + opc, (outs RC:$sx), (ins RC:$sy, RC:$sz, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}($sy, ${sz})"), + [(set Ty:$sx, (OpNode (OpNode Ty:$sy, Ty:$sz), (Ty simm32:$imm32)))]> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def rii : RM< + opc, (outs RC:$sx), (ins RC:$sz, immOp:$sy, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}($sy, ${sz})"), + [/* Not define DAG pattern here to avoid llvm uses LEArii for add + instructions. + (set Ty:$sx, (OpNode (OpNode Ty:$sz, (Ty simm7:$sy)), (Ty simm32:$imm32)))*/]> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def rzi : RM< + opc, (outs RC:$sx), (ins RC:$sz, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}(${sz})"), + [(set Ty:$sx, (OpNode Ty:$sz, (Ty simm32:$imm32)))]> { + let cy = 0; + let sy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def zii : RM< + opc, (outs RC:$sx), (ins immOp:$sy, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}(${sy})"), + [/* Not define DAG pattern here to avoid llvm uses LEAzii for all add + instructions. + (set Ty:$sx, (OpNode (Ty simm7:$sy), (Ty simm32:$imm32))) */]> { + let cy = 0; + let cz = 0; + let sz = 0; + let hasSideEffects = 0; + } + def zzi : RM< + opc, (outs RC:$sx), (ins immOp2:$imm32), + !strconcat(opcStr, " $sx, $imm32"), + [/* Not define DAG pattern here to avoid llvm uses LEAzzi for all set + instructions. + (set Ty:$sx, (Ty simm32:$imm32)) */]> { + let cy = 0; + let sy = 0; + let cz = 0; + let sz = 0; + let hasSideEffects = 0; + } +} + +// RRNDm is similar to RRm without DAG patterns. + +multiclass RMNDmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { + def rri : RM< + opc, (outs RC:$sx), (ins RC:$sy, RC:$sz, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}($sy, ${sz})"), []> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def rii : RM< + opc, (outs RC:$sx), (ins RC:$sz, immOp:$sy, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}($sy, ${sz})"), []> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def rzi : RM< + opc, (outs RC:$sx), (ins RC:$sz, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}(${sz})"), []> { + let cy = 0; + let sy = 0; + let hasSideEffects = 0; + let cz = 1; + } + def zii : RM< + opc, (outs RC:$sx), (ins immOp:$sy, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}(${sy})"), []> { + let cy = 0; + let cz = 0; + let sz = 0; + let hasSideEffects = 0; + } + def zzi : RM< + opc, (outs RC:$sx), (ins immOp2:$imm32), + !strconcat(opcStr, " $sx, $imm32"), []> { + let cy = 0; + let sy = 0; + let cz = 0; + let sz = 0; + let hasSideEffects = 0; + } +} + +let Constraints = "$sx = $sd", DisableEncoding = "$sd" in +multiclass RRCASmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { + def asr : RM< + opc, (outs RC:$sx), (ins MEMASri:$addr, RC:$sy, RC:$sd), + !strconcat(opcStr, " $sx, $addr, $sy"), []> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def asi : RM< + opc, (outs RC:$sx), (ins MEMASri:$addr, immOp:$sy, RC:$sd), + !strconcat(opcStr, " $sx, $addr, $sy"), []> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def rir : RM< + opc, (outs RC:$sx), (ins RC:$sz, immOp2:$imm32, RC:$sy, RC:$sd), + !strconcat(opcStr, " $sx, ${imm32}(${sz}), $sy"), []> { + let cy = 1; + let cz = 1; + let isCodeGenOnly = 1; + let hasSideEffects = 0; + } + def rii : RM< + opc, (outs RC:$sx), (ins RC:$sz, immOp2:$imm32, immOp:$sy, RC:$sd), + !strconcat(opcStr, " $sx, ${imm32}(${sz}), $sy"), []> { + let cy = 0; + let cz = 1; + let isCodeGenOnly = 1; + let hasSideEffects = 0; + } + def zii : RM< + opc, (outs RC:$sx), (ins immOp2:$imm32, immOp:$sy, RC:$sd), + !strconcat(opcStr, " $sx, $imm32, $sy"), []> { + let cy = 0; + let cz = 0; + let sz = 0; + let hasSideEffects = 0; + } +} + +// Multiclass for RR type instructions + +// First, defines components +// Named like RRm if each has their own DAG pattern +// Named like RRNDm if each doesn't have their own DAG pattern + +multiclass RRmrropc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi> { + def rr : RR + { let cy = 1; let cz = 1; let hasSideEffects = 0; } +} + +multiclass RRNDmrropc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi> { + def rr : RR + { let cy = 1; let cz = 1; let hasSideEffects = 0; } +} + +multiclass RRmriopc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, Operand immOp> { + // VE calculates (OpNode $sy, $sz), but llvm requires to have immediate + // in RHS, so we use following definition. + def ri : RR + { let cy = 0; let cz = 1; let hasSideEffects = 0; } +} + +multiclass RRmiropc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, Operand immOp> { + def ri : RR + { let cy = 0; let cz = 1; let hasSideEffects = 0; } +} + +multiclass RRNDmiropc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, Operand immOp> { + def ri : RR + { let cy = 0; let cz = 1; let hasSideEffects = 0; } +} + +multiclass RRmizopc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, Operand immOp> { + def zi : RR + { let cy = 0; let cz = 0; let sz = 0; let hasSideEffects = 0; } +} + +multiclass RRNDmizopc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, Operand immOp> { + def zi : RR + { let cy = 0; let cz = 0; let sz = 0; let hasSideEffects = 0; } +} + +multiclass RRNDmrmopc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, Operand immOp2> { + def rm0 : RR { + let cy = 1; + let cz = 0; + let sz{6} = 1; + // (guess) tblgen conservatively assumes hasSideEffects when + // it fails to infer from a pattern. + let hasSideEffects = 0; + } + def rm1 : RR { + let cy = 1; + let cz = 0; + let hasSideEffects = 0; + } +} + +multiclass RRNDmimopc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, + Operand immOp, Operand immOp2> { + def im0 : RR { + let cy = 0; + let cz = 0; + let sz{6} = 1; + // (guess) tblgen conservatively assumes hasSideEffects when + // it fails to infer from a pattern. + let hasSideEffects = 0; + } + def im1 : RR { + let cy = 0; + let cz = 0; + let hasSideEffects = 0; + } +} + +// Used by add, mul, div, and similar commutative instructions +// The order of operands are "$sx, $sy, $sz" + +multiclass RRmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> : + RRmrr, + RRmri, + RRmiz, + RRNDmrm, + RRNDmim; + +// Used by sub, and similar not commutative instructions +// The order of operands are "$sx, $sy, $sz" + +multiclass RRNCmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> : + RRmrr, + RRmir, + RRmiz, + RRNDmrm, + RRNDmim; + +// Used by fadd, fsub, and similar floating point instructions +// The order of operands are "$sx, $sy, $sz" + +multiclass RRFmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> : + RRmrr, + RRNDmir, + RRNDmiz, + RRNDmrm, + RRNDmim; + +// Used by cmp instruction +// The order of operands are "$sx, $sy, $sz" + +multiclass RRNDmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, + Operand immOp, Operand immOp2> : + RRNDmrr, + RRNDmir, + RRNDmiz, + RRNDmrm, + RRNDmim; + +// Used by fcq instruction like "F64 <- cmp F128, F128" +// The order of operands are "$sx, $sy, $sz" + +multiclass RRFCQmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, + Operand immOp, Operand immOp2> : + RRNDmrr, + RRNDmir, + RRNDmiz, + RRNDmrm, + RRNDmim; + +// Multiclass for RR type instructions +// Used by sra, sla, sll, and similar instructions +// The order of operands are "$sx, $sz, $sy" + +multiclass RRImopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { + def rr : RR< + opc, (outs RC:$sx), (ins RC:$sz, I32:$sy), + !strconcat(opcStr, " $sx, $sz, $sy"), + [(set Ty:$sx, (OpNode Ty:$sz, i32:$sy))]> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def ri : RR< + opc, (outs RC:$sx), (ins RC:$sz, immOp:$sy), + !strconcat(opcStr, " $sx, $sz, $sy"), + [(set Ty:$sx, (OpNode Ty:$sz, (i32 simm7:$sy)))]> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def rm0 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, I32:$sy), + !strconcat(opcStr, " $sx, (${sz})0, $sy"), + []> { + let cy = 1; + let cz = 0; + let sz{6} = 1; + // (guess) tblgen conservatively assumes hasSideEffects when it fails to infer from a pattern. + let hasSideEffects = 0; + } + def rm1 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, I32:$sy), + !strconcat(opcStr, " $sx, (${sz})1, $sy"), + []> { + let cy = 1; + let cz = 0; + let hasSideEffects = 0; + } + def im0 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, immOp:$sy), + !strconcat(opcStr, " $sx, (${sz})0, $sy"), + []> { + let cy = 0; + let cz = 0; + let sz{6} = 1; + let hasSideEffects = 0; + } + def im1 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, immOp:$sy), + !strconcat(opcStr, " $sx, (${sz})1, $sy"), + []> { + let cy = 0; + let cz = 0; + let hasSideEffects = 0; + } + def zi : RR< + opc, (outs RC:$sx), (ins immOp:$sy), + !strconcat(opcStr, " $sx, $sy"), + [(set Ty:$sx, (OpNode 0, (i32 simm7:$sy)))]> { + let cy = 0; + let cz = 0; + let sz = 0; + let hasSideEffects = 0; + } +} + +// Multiclass for RR type instructions without dag pattern +// Used by sra.w.zx, sla.w.zx, and others + +multiclass RRINDmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { + def rr : RR< + opc, (outs RC:$sx), (ins RC:$sz, I32:$sy), + !strconcat(opcStr, " $sx, $sz, $sy"), + []> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def ri : RR< + opc, (outs RC:$sx), (ins RC:$sz, immOp:$sy), + !strconcat(opcStr, " $sx, $sz, $sy"), + []> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def rm0 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, I32:$sy), + !strconcat(opcStr, " $sx, (${sz})0, $sy"), + []> { + let cy = 1; + let cz = 0; + let sz{6} = 1; + // (guess) tblgen conservatively assumes hasSideEffects when it fails to infer from a pattern. + let hasSideEffects = 0; + } + def rm1 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, I32:$sy), + !strconcat(opcStr, " $sx, (${sz})1, $sy"), + []> { + let cy = 1; + let cz = 0; + let hasSideEffects = 0; + } + def im0 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, immOp:$sy), + !strconcat(opcStr, " $sx, (${sz})0, $sy"), + []> { + let cy = 0; + let cz = 0; + let sz{6} = 1; + let hasSideEffects = 0; + } + def im1 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, immOp:$sy), + !strconcat(opcStr, " $sx, (${sz})1, $sy"), + []> { + let cy = 0; + let cz = 0; + let hasSideEffects = 0; + } + def zi : RR< + opc, (outs RC:$sx), (ins immOp:$sy), + !strconcat(opcStr, " $sx, $sy"), + []> { + let cy = 0; + let cz = 0; + let sz = 0; + let hasSideEffects = 0; + } +} + +// Multiclass for RR type instructions +// Used by cmov instruction + +let Constraints = "$sx = $sd", DisableEncoding = "$sd" in +multiclass RRCMOVmopc, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { + def rr : RR< + opc, (outs I64:$sx), (ins CCOp:$cf, RC:$sy, I64:$sz, I64:$sd), + !strconcat(opcStr, " $sx, $sz, $sy"), + []> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def ri : RR< + opc, (outs I64:$sx), (ins CCOp:$cf, I64:$sz, immOp:$sy, I64:$sd), + !strconcat(opcStr, " $sx, $sz, $sy"), + []> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def rm0 : RR< + opc, (outs I64:$sx), (ins CCOp:$cf, RC:$sy, immOp2:$sz, I64:$sd), + !strconcat(opcStr, " $sx, (${sz})0, $sy"), + []> { + let cy = 1; + let cz = 0; + let sz{6} = 1; + // (guess) tblgen conservatively assumes hasSideEffects when it fails to infer from a pattern. + let hasSideEffects = 0; + } + def rm1 : RR< + opc, (outs I64:$sx), (ins CCOp:$cf, RC:$sy, immOp2:$sz, I64:$sd), + !strconcat(opcStr, " $sx, (${sz})1, $sy"), + []> { + let cy = 1; + let cz = 0; + let hasSideEffects = 0; + } + def im0 : RR< + opc, (outs I64:$sx), (ins CCOp:$cf, immOp:$sy, immOp2:$sz, I64:$sd), + !strconcat(opcStr, " $sx, (${sz})0, $sy"), + []> { + let cy = 0; + let cz = 0; + let sz{6} = 1; + let hasSideEffects = 0; + } + def im1 : RR< + opc, (outs I64:$sx), (ins CCOp:$cf, immOp:$sy, immOp2:$sz, I64:$sd), + !strconcat(opcStr, " $sx, (${sz})1, $sy"), + []> { + let cy = 0; + let cz = 0; + let hasSideEffects = 0; + } +} + +// Multiclass for RR type instructions with only 2 operands +// Used by pcnt, brv + +multiclass RRI2mopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp2> { + def r : RR< + opc, (outs RC:$sx), (ins RC:$sz), + !strconcat(opcStr, " $sx, $sz"), + [(set Ty:$sx, (OpNode Ty:$sz))]> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def i : RR< + opc, (outs RC:$sx), (ins RC:$sz), + !strconcat(opcStr, " $sx, $sz"), + [(set Ty:$sx, (OpNode Ty:$sz))]> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def m0 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz), + !strconcat(opcStr, " $sx, (${sz})0"), + []> { + let cy = 1; + let cz = 0; + let sz{6} = 1; + // (guess) tblgen conservatively assumes hasSideEffects when it fails to infer from a pattern. + let hasSideEffects = 0; + } + def m1 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz), + !strconcat(opcStr, " $sx, (${sz})1"), + []> { + let cy = 1; + let cz = 0; + let hasSideEffects = 0; + } +} + + +// Branch multiclass +let isBranch = 1, isTerminator = 1, hasDelaySlot = 1 in +multiclass BCRm opc, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { + def rr : CF< + opc, (outs), + (ins CCOp:$cf, RC:$sy, RC:$sz, brtarget32:$imm32), + !strconcat(opcStr, " $sy, $sz, $imm32"), []> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def ir : CF< + opc, (outs), + (ins CCOp:$cf, immOp:$sy, RC:$sz, brtarget32:$imm32), + !strconcat(opcStr, " $sy, $sz, $imm32"), []> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def rm0 : CF< + opc, (outs), (ins CCOp:$cf, RC:$sy, immOp2:$sz, brtarget32:$imm32), + !strconcat(opcStr, " $sy, (${sz})0, $imm32"), []> { + let cy = 1; + let cz = 0; + let sz{6} = 1; + // (guess) tblgen conservatively assumes hasSideEffects when it fails to infer from a pattern. + let hasSideEffects = 0; + } + def rm1 : CF< + opc, (outs), (ins CCOp:$cf, RC:$sy, immOp2:$sz, brtarget32:$imm32), + !strconcat(opcStr, " $sy, (${sz})1, $imm32"), []> { + let cy = 1; + let cz = 0; + let hasSideEffects = 0; + } + def im0 : CF< + opc, (outs), (ins CCOp:$cf, immOp:$sy, immOp2:$sz, brtarget32:$imm32), + !strconcat(opcStr, " $sy, (${sz})0, $imm32"), []> { + let cy = 0; + let cz = 0; + let sz{6} = 1; + let hasSideEffects = 0; + } + def im1 : CF< + opc, (outs), (ins CCOp:$cf, immOp:$sy, immOp2:$sz, brtarget32:$imm32), + !strconcat(opcStr, " $sy, (${sz})1, $imm32"), []> { + let cy = 0; + let cz = 0; + let hasSideEffects = 0; + } + def a : CF< + opc, (outs), (ins brtarget32:$imm32), + !strconcat(opcStrAt, " $imm32"), []> { + let cy = 0; + let sy = 0; + let cz = 0; + let sz = 0; + let cf = 15; /* AT */ + let isBarrier = 1; + let hasSideEffects = 0; + } +} + +// Multiclass for floating point conversion instructions. +// Used by CVS/CVD/FLT and others +multiclass CVTm opc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, Operand immOp> { + def r : RR { + let cy = 1; + let hasSideEffects = 0; + } + def i : RR { + let cy = 0; + let hasSideEffects = 0; + } +} + + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +// CMOV instructions +let cx = 0, cw = 0, cw2 = 0 in +defm CMOVL : RRCMOVm<"cmov.l.${cf}", 0x3B, I64, i64, simm7Op64, uimm6Op64>; + +let cx = 0, cw = 1, cw2 = 0 in +defm CMOVW : RRCMOVm<"cmov.w.${cf}", 0x3B, I32, i32, simm7Op64, uimm6Op32>; + +let cx = 0, cw = 0, cw2 = 1 in +defm CMOVD : RRCMOVm<"cmov.d.${cf}", 0x3B, I64, f64, simm7Op64, uimm6Op64>; + +let cx = 0, cw = 1, cw2 = 1 in +defm CMOVS : RRCMOVm<"cmov.s.${cf}", 0x3B, F32, f32, simm7Op64, uimm6Op32>; + +// NOP instruction +let cx = 0, sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, imm32 = 0, hasSideEffects = 0 in +def NOP : RR<0x79, (outs), (ins), "nop", []>; + +// LEA and LEASL instruction (load 32 bit imm to low or high part) +let cx = 0 in +defm LEA : RMm<"lea", 0x06, add, I64, i64, simm7Op64, simm32Op64>; +let cx = 1 in +defm LEASL : RMNDm<"lea.sl", 0x06, add, I64, i64, simm7Op64, simm32Op64>; +let isCodeGenOnly = 1 in { +let cx = 0 in +defm LEA32 : RMm<"lea", 0x06, add, I32, i32, simm7Op32, simm32Op32>; +let cx = 1 in +defm LEASL32 : RMNDm<"lea.sl", 0x06, add, I32, i32, simm7Op32, simm32Op32>; +} + +let cx = 0, cy = 1, cz = 0, sz = 0, hasSideEffects = 0 in { + def LEAasx : RM< + 0x06, (outs I64:$sx), (ins MEMri:$addr), + "lea $sx,$addr", [(set iPTR:$sx, ADDRri:$addr)]>; +} + +// 5.3.2.2. Fixed-Point Arithmetic Operation Instructions + +// ADD instruction +let cx = 0 in +defm ADD : RRNDm<"addu.l", 0x48, add, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm ADDUW : RRNDm<"addu.w", 0x48, add, I32, i32, simm7Op32, uimm6Op32>; + +// ADS instruction +let cx = 0 in +defm ADS : RRm<"adds.w.sx", 0x4A, add, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm ADSU : RRNDm<"adds.w.zx", 0x4A, add, I32, i32, simm7Op32, uimm6Op32>; + +// ADX instruction +let cx = 0 in +defm ADX : RRm<"adds.l", 0x59, add, I64, i64, simm7Op64, uimm6Op64>; + +// SUB instruction +let cx = 0 in +defm SUB : RRNDm<"subu.l", 0x58, sub, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm SUBUW : RRNDm<"subu.w", 0x58, sub, I32, i32, simm7Op32, uimm6Op32>; + +// SBS instruction +let cx = 0 in +defm SBS : RRNCm<"subs.w.sx", 0x5A, sub, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm SBSU : RRNDm<"subs.w.zx", 0x5A, sub, I32, i32, simm7Op32, uimm6Op32>; + +// SBX instruction +let cx = 0 in +defm SBX : RRNCm<"subs.l", 0x5B, sub, I64, i64, simm7Op64, uimm6Op64>; + +// MPY instruction +let cx = 0 in +defm MPY : RRNDm<"mulu.l", 0x49, mul, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm MPYUW : RRNDm<"mulu.w", 0x49, mul, I32, i32, simm7Op32, uimm6Op32>; + +// MPS instruction +let cx = 0 in +defm MPS : RRm<"muls.w.sx", 0x4B, mul, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm MPSU : RRNDm<"muls.w.zx", 0x4B, mul, I32, i32, simm7Op32, uimm6Op32>; + +// MPX instruction +let cx = 0 in +defm MPX : RRm<"muls.l", 0x6E, mul, I64, i64, simm7Op64, uimm6Op64>; + +// DIV instruction +let cx = 0 in +defm DIV : RRNCm<"divu.l", 0x6F, udiv, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm DIVUW : RRNCm<"divu.w", 0x6F, udiv, I32, i32, simm7Op32, uimm6Op32>; + +// DVS instruction +let cx = 0 in +defm DVS : RRNCm<"divs.w.sx", 0x7B, sdiv, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm DVSU : RRNDm<"divs.w.zx", 0x7B, sdiv, I32, i32, simm7Op32, uimm6Op32>; + +// DVX instruction +let cx = 0 in +defm DVX : RRNCm<"divs.l", 0x7F, sdiv, I64, i64, simm7Op64, uimm6Op64>; + +// CMP instruction +let cx = 0 in +defm CMP : RRNDm<"cmpu.l", 0x55, setcc, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm CMPUW : RRNDm<"cmpu.w", 0x55, setcc, I32, i32, simm7Op32, uimm6Op32>; + +// CPS instruction +let cx = 0 in +defm CPS : RRNDm<"cmps.w.sx", 0x7A, setcc, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm CPSU : RRNDm<"cmps.w.zx", 0x7A, setcc, I32, i32, simm7Op32, uimm6Op32>; + +// CPX instruction +let cx = 0 in +defm CPX : RRNDm<"cmps.l", 0x6A, setcc, I64, i64, simm7Op64, uimm6Op64>; + +// cx: sx/zx, cw: max/min + +let cw = 0 in defm CMXa : + RRm<"maxs.l", 0x68, VEmax, I64, i64, simm7Op64, uimm6Op64>; + +let cx = 0, cw = 0 in defm CMSa : + RRm<"maxs.w.zx", 0x78, VEmax, I32, i32, simm7Op32, uimm6Op32>; + +let cw = 1 in defm CMXi : + RRm<"mins.l", 0x68, VEmin, I64, i64, simm7Op64, uimm6Op64>; + +let cx = 1, cw = 0 in defm CMSi : + RRm<"mins.w.zx", 0x78, VEmin, I32, i32, simm7Op32, uimm6Op32>; + +// 5.3.2.3. Logical Arithmetic Operation Instructions + +// AND, OR, XOR, EQV, NND, and MRG instruction +let cx = 0 in { + defm AND : RRm<"and", 0x44, and, I64, i64, simm7Op64, uimm6Op64>; + defm OR : RRm<"or", 0x45, or, I64, i64, simm7Op64, uimm6Op64>; + defm XOR : RRm<"xor", 0x46, xor, I64, i64, simm7Op64, uimm6Op64>; + let isCodeGenOnly = 1 in { + defm AND32 : RRm<"and", 0x44, and, I32, i32, simm7Op32, uimm6Op32>; + defm OR32 : RRm<"or", 0x45, or, I32, i32, simm7Op32, uimm6Op32>; + defm XOR32 : RRm<"xor", 0x46, xor, I32, i32, simm7Op32, uimm6Op32>; + } + /* + defm EQV : RRm<"eqv", 0x47, eqv, I64, i64, simm7Op64, uimm6Op64>; + defm NND : RRm<"nnd", 0x54, nnd, I64, i64, simm7Op64, uimm6Op64>; + defm MRG : RRm<"mrg", 0x56, mrg, I64, i64, simm7Op64, uimm6Op64>; + */ +} + +// Bits operations + +let cx = 0 in { +defm PCNT : RRI2m<"pcnt", 0x38, ctpop, I64, i64, uimm6Op64>; +defm BRV : RRI2m<"brv", 0x39, bitreverse, I64, i64, uimm6Op64>; +defm LDZ : RRI2m<"ldz", 0x67, ctlz, I64, i64, uimm6Op64>; +defm BSWP : RRINDm<"bswp", 0x2B, bswap, I64, i64, simm7Op64, uimm6Op64>; +} + + +// 5.3.2.4 Shift Instructions + +let cx = 0 in +defm SRAX : RRIm<"sra.l", 0x77, sra, I64, i64, simm7Op32, uimm6Op64>; +let cx = 0 in +defm SRA : RRIm<"sra.w.sx", 0x76, sra, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm SRAU : RRINDm<"sra.w.zx", 0x76, sra, I32, i32, simm7Op32, uimm6Op32>; + +let cx = 0 in +defm SLL : RRIm<"sll", 0x65, shl, I64, i64, simm7Op32, uimm6Op64>; +let cx = 0 in +defm SLA : RRIm<"sla.w.sx", 0x66, shl, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm SLAU : RRINDm<"sla.w.zx", 0x66, shl, I32, i32, simm7Op32, uimm6Op32>; + +let cx = 0 in +defm SRL : RRIm<"srl", 0x75, srl, I64, i64, simm7Op32, uimm6Op64>; + +def : Pat<(i32 (srl i32:$src, (i32 simm7:$val))), + (EXTRACT_SUBREG (SRLri (ANDrm0 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + $src, sub_i32), 32), imm:$val), sub_i32)>; +def : Pat<(i32 (srl i32:$src, i32:$val)), + (EXTRACT_SUBREG (SRLrr (ANDrm0 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + $src, sub_i32), 32), $val), sub_i32)>; + +// 5.3.2.5. Floating-point Arithmetic Operation Instructions +let cx = 0 in +defm FAD : RRFm<"fadd.d", 0x4C, fadd, I64, f64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm FADS : RRFm<"fadd.s", 0x4C, fadd, F32, f32, simm7Op32, uimm6Op32>; +let cx = 0 in +defm FAQ : RRFm<"fadd.q", 0x6C, fadd, F128, f128, simm7Op128, uimm6Op128>; + +let cx = 0 in +defm FSB : RRFm<"fsub.d", 0x5C, fsub, I64, f64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm FSBS : RRFm<"fsub.s", 0x5C, fsub, F32, f32, simm7Op32, uimm6Op32>; +let cx = 0 in +defm FSQ : RRFm<"fsub.q", 0x7C, fsub, F128, f128, simm7Op128, uimm6Op128>; + +let cx = 0 in +defm FMP : RRFm<"fmul.d", 0x4D, fmul, I64, f64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm FMPS : RRFm<"fmul.s", 0x4D, fmul, F32, f32, simm7Op32, uimm6Op32>; +let cx = 0 in +defm FMQ : RRFm<"fmul.q", 0x6D, fmul, F128, f128, simm7Op128, uimm6Op128>; + +let cx = 0 in +defm FDV : RRFm<"fdiv.d", 0x5D, fdiv, I64, f64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm FDVS : RRFm<"fdiv.s", 0x5D, fdiv, F32, f32, simm7Op32, uimm6Op32>; + +// FCP instruction +let cx = 0 in +defm FCP : RRNDm<"fcmp.d", 0x7E, setcc, I64, f64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm FCPS : RRNDm<"fcmp.s", 0x7E, setcc, F32, f32, simm7Op32, uimm6Op32>; +let cx = 0 in +defm FCQ : RRFCQm<"fcmp.q", 0x7D, setcc, F128, f128, simm7Op128, uimm6Op128>; + +// FCM +let cw = 0 in { + let cx = 0 in + defm FCMA : RRNDm<"fmax.d", 0x3E, VEfmax, I64, f64, simm7Op64, uimm6Op64>; + let cx = 1 in + defm FCMAS : RRNDm<"fmax.s", 0x3E, VEfmax, F32, f32, simm7Op32, uimm6Op32>; +} +let cw = 1 in { + let cx = 0 in + defm FCMI : RRNDm<"fmin.d", 0x3E, VEfmin, I64, f64, simm7Op64, uimm6Op64>; + let cx = 1 in + defm FCMIS : RRNDm<"fmin.s", 0x3E, VEfmin, F32, f32, simm7Op32, uimm6Op32>; +} + +let cx = 0, cw = 0 /* sign extend */, cz = 1, sz = 0 /* round toward zero */ in +defm FIX : CVTm<"cvt.w.d.sx.rz", 0x4E, fp_to_sint, I32, i32, I64, f64, simm7Op32>; +let cx = 1, cw = 0 /* sign extend */, cz = 1, sz = 0 /* round toward zero */ in +defm FIXS : CVTm<"cvt.w.s.sx.rz", 0x4E, fp_to_sint, I32, i32, F32, f32, simm7Op32>; +let cx = 0, cz = 1, sz = 0 /* round toward zero */ in +defm FIXX : CVTm<"cvt.l.d.rz", 0x4F, fp_to_sint, I64, i64, I64, f64, simm7Op64>; +let cz = 0, sz = 0 in { + let cx = 0 in + defm FLT : CVTm<"cvt.d.w", 0x5E, sint_to_fp, I64, f64, I32, i32, simm7Op32>; + let cx = 1 in + defm FLTS : CVTm<"cvt.s.w", 0x5E, sint_to_fp, F32, f32, I32, i32, simm7Op32>; + let cx = 0 in + defm FLTX : CVTm<"cvt.d.l", 0x5F, sint_to_fp, I64, f64, I64, i64, simm7Op64>; + let cx = 0 in + defm CVS : CVTm<"cvt.s.d", 0x1F, fpround, F32, f32, I64, f64, simm7Op64>; + let cx = 1 in + defm CVSQ : CVTm<"cvt.s.q", 0x1F, fpround, F32, f32, F128, f128, simm7Op128>; + let cx = 0 in + defm CVD : CVTm<"cvt.d.s", 0x0F, fpextend, I64, f64, F32, f32, simm7Op32>; + let cx = 1 in + defm CVDQ : CVTm<"cvt.d.q", 0x0F, fpround, I64, f64, F128, f128, simm7Op128>; + let cx = 0 in + defm CVQ : CVTm<"cvt.q.d", 0x2D, fpextend, F128, f128, I64, f64, simm7Op64>; + let cx = 1 in + defm CVQS : CVTm<"cvt.q.s", 0x2D, fpextend, F128, f128, F32, f32, simm7Op32>; +} + +// Load and Store instructions +// As 1st step, only uses sz and imm32 to represent $addr +let mayLoad = 1, hasSideEffects = 0 in { +let cy = 0, sy = 0, cz = 1 in { +let cx = 0 in +def LDSri : RM< + 0x01, (outs I64:$sx), (ins MEMri:$addr), + "ld $sx, $addr", + [(set i64:$sx, (load ADDRri:$addr))]>; +let cx = 0 in +def LDUri : RM< + 0x02, (outs F32:$sx), (ins MEMri:$addr), + "ldu $sx, $addr", + [(set f32:$sx, (load ADDRri:$addr))]>; +let cx = 0 in +def LDLri : RM< + 0x03, (outs I32:$sx), (ins MEMri:$addr), + "ldl.sx $sx, $addr", + [(set i32:$sx, (load ADDRri:$addr))]>; +let cx = 1 in +def LDLUri : RM< + 0x03, (outs I32:$sx), (ins MEMri:$addr), + "ldl.zx $sx, $addr", + [(set i32:$sx, (load ADDRri:$addr))]>; +let cx = 0 in +def LD2Bri : RM< + 0x04, (outs I32:$sx), (ins MEMri:$addr), + "ld2b.sx $sx, $addr", + [(set i32:$sx, (sextloadi16 ADDRri:$addr))]>; +let cx = 1 in +def LD2BUri : RM< + 0x04, (outs I32:$sx), (ins MEMri:$addr), + "ld2b.zx $sx, $addr", + [(set i32:$sx, (zextloadi16 ADDRri:$addr))]>; +let cx = 0 in +def LD1Bri : RM< + 0x05, (outs I32:$sx), (ins MEMri:$addr), + "ld1b.sx $sx, $addr", + [(set i32:$sx, (sextloadi8 ADDRri:$addr))]>; +let cx = 1 in +def LD1BUri : RM< + 0x05, (outs I32:$sx), (ins MEMri:$addr), + "ld1b.zx $sx, $addr", + [(set i32:$sx, (zextloadi8 ADDRri:$addr))]>; +} +def LDQri : Pseudo< + (outs F128:$sx), (ins MEMri:$addr), + "# pseudo ldq $sx, $addr", + [(set f128:$sx, (load ADDRri:$addr))]>; +} + +let mayStore = 1, hasSideEffects = 0 in { +let cx = 0, cy = 0, sy = 0, cz = 1 in { +def STSri : RM< + 0x11, (outs), (ins MEMri:$addr, I64:$sx), + "st $sx, $addr", + [(store i64:$sx, ADDRri:$addr)]>; +def STUri : RM< + 0x12, (outs), (ins MEMri:$addr, F32:$sx), + "stu $sx, $addr", + [(store f32:$sx, ADDRri:$addr)]>; +def STLri : RM< + 0x13, (outs), (ins MEMri:$addr, I32:$sx), + "stl $sx, $addr", + [(store i32:$sx, ADDRri:$addr)]>; +def ST2Bri : RM< + 0x14, (outs), (ins MEMri:$addr, I32:$sx), + "st2b $sx, $addr", + [(truncstorei16 i32:$sx, ADDRri:$addr)]>; +def ST1Bri : RM< + 0x15, (outs), (ins MEMri:$addr, I32:$sx), + "st1b $sx, $addr", + [(truncstorei8 i32:$sx, ADDRri:$addr)]>; +} +def STQri : Pseudo< + (outs), (ins MEMri:$addr, F128:$sx), + "# pseudo stq $sx, $addr", + [(store f128:$sx, ADDRri:$addr)]>; +} + +def : Pat<(f64 (load ADDRri:$addr)), (LDSri ADDRri:$addr)>; +def : Pat<(store f64:$sx, ADDRri:$addr), (STSri ADDRri:$addr, $sx)>; + +// Patterns for unaligned load + +def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def unaligned8load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() < 8; +}]>; +def : Pat<(i32 (unaligned4load ADDRri:$addr)), + (LDLri MEMri:$addr)>; +def : Pat<(f32 (unaligned4load ADDRri:$addr)), + (LDUri MEMri:$addr)>; +def : Pat<(i64 (unaligned8load ADDRri:$addr)), + (LDSri ADDRri:$addr)>; +def : Pat<(f64 (unaligned8load ADDRri:$addr)), + (LDSri ADDRri:$addr)>; + +// Patterns for unaligned store + +def unaligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def unaligned8store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 8; +}]>; +def : Pat<(unaligned4store i32:$sx, ADDRri:$addr), + (STLri ADDRri:$addr, $sx)>; +def : Pat<(unaligned4store f32:$sx, ADDRri:$addr), + (STUri ADDRri:$addr, $sx)>; +def : Pat<(unaligned8store i64:$sx, ADDRri:$addr), + (STSri ADDRri:$addr, $sx)>; +def : Pat<(unaligned8store f64:$sx, ADDRri:$addr), + (STSri ADDRri:$addr, $sx)>; + +// Patterns for unaligned sextload/zextload/extload + +def unaligned2extloadi16 : PatFrag<(ops node:$ptr), (extloadi16 node:$ptr), [{ + return cast(N)->getAlignment() < 2; +}]>; +def unaligned2sextloadi16 : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ + return cast(N)->getAlignment() < 2; +}]>; +def unaligned2zextloadi16 : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr), [{ + return cast(N)->getAlignment() < 2; +}]>; +def unaligned4extloadi32 : PatFrag<(ops node:$ptr), (extloadi32 node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def unaligned4zextloadi32 : PatFrag<(ops node:$ptr), (zextloadi32 node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def : Pat<(i64 (unaligned2sextloadi16 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD2Bri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (unaligned2zextloadi16 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD2BUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (unaligned2extloadi16 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD2BUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (unaligned4sextloadi32 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LDLri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (unaligned4zextloadi32 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LDLUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (unaligned4extloadi32 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LDLri MEMri:$addr), sub_i32)>; + +// Patterns for unaligned truncstore + +def unaligned4truncstorei16 : PatFrag<(ops node:$val, node:$ptr), + (truncstorei16 node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def unaligned8truncstorei32 : PatFrag<(ops node:$val, node:$ptr), + (truncstorei32 node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 8; +}]>; +def : Pat<(unaligned4truncstorei16 i64:$sx, ADDRri:$addr), + (ST2Bri ADDRri:$addr, (EXTRACT_SUBREG $sx, sub_i32))>; +def : Pat<(unaligned8truncstorei32 i64:$sx, ADDRri:$addr), + (STLri ADDRri:$addr, (EXTRACT_SUBREG $sx, sub_i32))>; + +// Jump instruction +let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cy = 1, cz = 1, + isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasSideEffects = 0 in +def BC : CF< + 0x19, (outs), (ins CCOp:$cf, I64:$sy, brtarget32:$imm32), + "b.${cf}.l $sy, $imm32", + []>; + +// Jump always instruction is treated as a special case of jump in order +// to make finding unconditional jump easy. +let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cf = 15 /* AT */, cy = 0, sy = 0, + cz = 1, + isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, + hasDelaySlot = 1, isCodeGenOnly = 1, hasSideEffects = 0 in { +def BArr : CF< + 0x19, (outs), (ins MEMrr:$addr), + "b.l $addr", + [(brind ADDRrr:$addr)]>; +def BAri : CF< + 0x19, (outs), (ins MEMri:$addr), + "b.l $addr", + [(brind ADDRri:$addr)]>; +} + +// Jump never instruction is also a special case of jump. +let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cf = 0 /* AF */, cy = 1, sy = 0, + cz = 1, + isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasSideEffects = 0 in +def BN : CF< + 0x19, (outs), (ins brtarget32:$imm32), + "b.af.l $imm32", + []>; + +// Return instruction is also a special case of jump. +let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cf = 15 /* AT */, cy = 0, sy = 0, + cz = 1, sz = 0x10 /* SX10 */, imm32 = 0, Uses = [SX10], + isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def RET : CF< + 0x19, (outs), (ins), + "b.l (,%lr)", + [(retflag)]>; + +// Branch and Save IC + +let cx = 0, cy = 0, cy = 0, cz = 1, hasSideEffects = 0 /* , Uses = [IC] */ in +def BSIC : RM<0x08, (outs), (ins I64:$sx, I64:$sz), "bsic $sx, (, ${sz})", []>; + +// Branch instruction +let cx = 0, cx2 = 0, bpf = 0 /* NONE */ in +defm BCRL : BCRm<"br${cf}.l", "br.l", 0x18, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1, cx2 = 0, bpf = 0 /* NONE */ in +defm BCRW : BCRm<"br${cf}.w", "br.w", 0x18, I32, i32, simm7Op32, uimm6Op32>; +let cx = 0, cx2 = 1, bpf = 0 /* NONE */ in +defm BCRD : BCRm<"br${cf}.d", "br.d", 0x18, I64, f64, simm7Op64, uimm6Op64>; +let cx = 1, cx2 = 1, bpf = 0 /* NONE */ in +defm BCRS : BCRm<"br${cf}.s", "br.s", 0x18, F32, f32, simm7Op32, uimm6Op32>; + +// Load and Store host memory instructions +let cx = 0, cy = 0, cz = 1, hasSideEffects = 0 in { +let sy = 3 in +def LHMri : RM< + 0x21, (outs I64:$sx), (ins MEMASri:$addr), + "lhm.l $sx, $addr", + []>; +let sy = 2 in +def LHMLri : RM< + 0x21, (outs I32:$sx), (ins MEMASri:$addr), + "lhm.w $sx, $addr", + []>; +let sy = 1 in +def LHM2Bri : RM< + 0x21, (outs I16:$sx), (ins MEMASri:$addr), + "lhm.h $sx, $addr", + []>; +let sy = 0 in +def LHM1Bri : RM< + 0x21, (outs I8:$sx), (ins MEMASri:$addr), + "lhm.b $sx, $addr", + []>; +} + +let cx = 0, cy = 0, cz = 1, hasSideEffects = 0 in { +let sy = 3 in +def SHMri : RM< + 0x31, (outs), (ins MEMASri:$addr, I64:$sx), + "shm.l $sx, $addr", + []>; +let sy = 2 in +def SHMLri : RM< + 0x31, (outs), (ins MEMASri:$addr, I32:$sx), + "shm.l $sx, $addr", + []>; +let sy = 1 in +def SHM2Bri : RM< + 0x31, (outs), (ins MEMASri:$addr, I16:$sx), + "shm.l $sx, $addr", + []>; +let sy = 0 in +def SHM1Bri : RM< + 0x31, (outs), (ins MEMASri:$addr, I8:$sx), + "shm.l $sx, $addr", + []>; +} + +let cx = 0, sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in +def MONC : RR< + 0x3F, (outs), (ins), + "monc", + []>; + +let cx = 1, sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in +def MONCT : RR< + 0x3F, (outs), (ins), + "monc.hdb", + []>; + +// Save Instruction Counter + +let cx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 /* , Uses = [IC] */ in +def SIC : RR<0x28, (outs I32:$sx), (ins), "sic $sx", []>; + +// Test and Set 1 AM (multiple length swap) + +let cx = 0 in +defm TS1AML : RRCASm<"ts1am.l", 0x42, add, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm TS1AMW : RRCASm<"ts1am.w", 0x42, add, I32, i32, simm7Op32, uimm6Op32>; + +// Atomic swap +// FIXME: Assign 4 byte align address to $src +// def : Pat<(i32 (atomic_swap_8 ADDRri:$src, i32:$new)), +// (TS1AMWasi MEMASri:$src, 1, $new)>; +// def : Pat<(i32 (atomic_swap_16 ADDRri:$src, i32:$new)), +// (TS1AMWasi MEMASri:$src, 3, $new)>; +def : Pat<(i32 (atomic_swap_32 ADDRri:$src, i32:$new)), + (TS1AMWasi MEMASri:$src, 15, $new)>; +def : Pat<(i64 (atomic_swap_64 ADDRri:$src, i64:$new)), + (TS1AMLasi MEMASri:$src, 127, $new)>; + +// Compare and Swap + +let cx = 0 in +defm CASL : RRCASm<"cas.l", 0x62, add, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm CASW : RRCASm<"cas.w", 0x62, add, I32, i32, simm7Op32, uimm6Op32>; + +// Atomic cmp and swap +def : Pat<(i32 (atomic_cmp_swap_32 ADDRri:$src, i32:$cmp, i32:$new)), + (CASWasr MEMASri:$src, $cmp, $new)>; +def : Pat<(i64 (atomic_cmp_swap_64 ADDRri:$src, i64:$cmp, i64:$new)), + (CASLasr MEMASri:$src, $cmp, $new)>; + +// Transfer Control Instruction + +let avo = 0, c2 = 0, c1 = 0, c0 = 0, hasSideEffects = 1 in { + let lf = 0, sf = 1 in + def FENCEstore : RRFENCE<0x20, (outs), (ins), "fencem 1", []>; + let lf = 1, sf = 0 in + def FENCEload : RRFENCE<0x20, (outs), (ins), "fencem 2", []>; + let lf = 1, sf = 1 in + def FENCEloadstore : RRFENCE<0x20, (outs), (ins), "fencem 3", []>; +} + +def : Pat<(int_ve_fencem1), (FENCEstore)>; +def : Pat<(int_ve_fencem2), (FENCEload)>; +def : Pat<(int_ve_fencem3), (FENCEloadstore)>; + +// Set Vector Out-of-order memory access Boundary + +let sx = 0, sy = 0, sz = 0, hasSideEffects = 1 in +def SVOB : RR<0x30, (outs), (ins), "svob", []>; + +// MEMBARRIER +let hasSideEffects = 1 in +def MEMBARRIER : Pseudo<(outs), (ins), "# MEMBARRIER", + [(MemBarrier)] >; + +//===----------------------------------------------------------------------===// +// SJLJ Exception handling intrinsics +//===----------------------------------------------------------------------===// + +let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in { + def EH_SjLj_SetJmp : Pseudo<(outs I32:$dst), (ins I64:$buf), + "# EH_SJLJ_SETJMP", + [(set I32:$dst, (VEeh_sjlj_setjmp I64:$buf))]>; + let isTerminator = 1 in { + def EH_SjLj_LongJmp : Pseudo<(outs), (ins I64:$buf), + "# EH_SJLJ_LONGJMP", + [(VEeh_sjlj_longjmp I64:$buf)]>; + } +} + +let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1 in + def EH_SjLj_Setup_Dispatch : Pseudo<(outs), (ins), "# EH_SJLJ_SETUP_DISPATCH", + [(VEeh_sjlj_setup_dispatch)]>; + +//===----------------------------------------------------------------------===// +// Dummy instruction for CPU flow control +//===----------------------------------------------------------------------===// + +let mayLoad = 1, mayStore = 0, hasSideEffects = 1, isTrap = 1 in { + def TRAP : Pseudo<(outs), (ins), "# TRAP", [(trap)]>; +} + +//===----------------------------------------------------------------------===// +// Instructions for CodeGenOnly +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1 in { + +// Call instruction +let Defs = [SX10], Uses = [SX11], hasDelaySlot = 1, isCall = 1, hasSideEffects = 0 in { +let cx = 0, sx = 10, cy = 0, sy = 0, cz = 0, sz = 0 in +def CALL : RM< + 0x08, (outs), (ins calltarget:$imm32, variable_ops), + "bsic %lr, $imm32", + []>; +/* +// use sy and sz to represent 2 registers +let cx = 0, sx = 10, cy = 1, cz = 1, imm32 = 0 in +def CALLrr : RM< + 0x08, (outs), (ins MEMrr:$ptr, variable_ops), + "bsic %lr, $ptr", + [(call ADDRrr:$ptr)]>; +// use sz to represent a register, and use imm32 to represent immediate value +let cx = 0, sx = 10, cy = 0, sy = 0, cz = 1 in +def CALLri : RM< + 0x08, (outs), (ins MEMri:$ptr, variable_ops), + "bsic %lr, $ptr", + [(call ADDRri:$ptr)]>; +*/ +// use sz to represent a register +let cx = 0, sx = 10, cy = 0, sy = 0, cz = 1, imm32 = 0 in +def CALLr : RM< + 0x08, (outs), (ins I64:$sz, variable_ops), + "bsic %lr, (,$sz)", + []>; +} + +} + +//===----------------------------------------------------------------------===// +// Pattern Matchings +//===----------------------------------------------------------------------===// + +// Small immediates. +def : Pat<(i32 simm7:$val), (OR32im1 imm:$val, 0)>; +def : Pat<(i64 simm7:$val), (ORim1 imm:$val, 0)>; +// Medium immediates. +def : Pat<(i32 simm32:$val), (LEA32zzi imm:$val)>; +def : Pat<(i64 simm32:$val), (LEAzzi imm:$val)>; +def : Pat<(i64 uimm32:$val), (ANDrm0 (LEAzzi imm:$val), 32)>; +// Arbitrary immediates. +def : Pat<(i64 lozero:$val), + (LEASLzzi (HI32 imm:$val))>; +def : Pat<(i64 lomsbzero:$val), + (LEASLrzi (LEAzzi (LO32 imm:$val)), (HI32 imm:$val))>; +def : Pat<(i64 imm:$val), + (LEASLrzi (ANDrm0 (LEAzzi (LO32 imm:$val)), 32), + (HI32 imm:$val))>; + +// floating point +def : Pat<(f32 fpimm:$val), + (COPY_TO_REGCLASS (LEASLzzi (LOFP32 $val)), F32)>; +def : Pat<(f64 fplozero:$val), + (LEASLzzi (HIFP32 $val))>; +def : Pat<(f64 fplomsbzero:$val), + (LEASLrzi (LEAzzi (LOFP32 $val)), (HIFP32 $val))>; +def : Pat<(f64 fpimm:$val), + (LEASLrzi (ANDrm0 (LEAzzi (LOFP32 $val)), 32), + (HIFP32 $val))>; + +// The same integer registers are used for i32 and i64 values. +// When registers hold i32 values, the high bits are don't care. + +// Cast to i1 +def : Pat<(sext_inreg I32:$src, i1), + (SRAri (SLAri $src, 31), 31)>; +def : Pat<(sext_inreg I64:$src, i1), + (SRAXri (SLLri $src, 63), 63)>; + +// Cast to i8 +def : Pat<(sext_inreg I32:$src, i8), + (SRAri (SLAri $src, 24), 24)>; +def : Pat<(sext_inreg I64:$src, i8), + (SRAXri (SLLri $src, 56), 56)>; +def : Pat<(sext_inreg (i32 (trunc i64:$src)), i8), + (EXTRACT_SUBREG (SRAXri (SLLri $src, 56), 56), sub_i32)>; +def : Pat<(and (trunc i64:$src), 0xff), + (AND32rm0 (EXTRACT_SUBREG $src, sub_i32), 56)>; + +// Cast to i16 +def : Pat<(sext_inreg I32:$src, i16), + (SRAri (SLAri $src, 16), 16)>; +def : Pat<(sext_inreg I64:$src, i16), + (SRAXri (SLLri $src, 48), 48)>; +def : Pat<(sext_inreg (i32 (trunc i64:$src)), i16), + (EXTRACT_SUBREG (SRAXri (SLLri $src, 48), 48), sub_i32)>; +def : Pat<(and (trunc i64:$src), 0xffff), + (AND32rm0 (EXTRACT_SUBREG $src, sub_i32), 48)>; + +// Cast to i32 +def : Pat<(i32 (trunc i64:$src)), + (ADSrm1 (EXTRACT_SUBREG $src, sub_i32), 0)>; +def : Pat<(i32 (fp_to_sint f128:$sy)), (FIXr (CVDQr $sy))>; + +// Cast to i64 +def : Pat<(sext_inreg I64:$src, i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ADSrm1 (EXTRACT_SUBREG $src, sub_i32), 0), sub_i32)>; +def : Pat<(i64 (sext i32:$sy)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADSrm1 $sy, 0), sub_i32)>; +def : Pat<(i64 (zext i32:$sy)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADSUrm1 $sy, 0), sub_i32)>; +def : Pat<(i64 (fp_to_sint f32:$sy)), (FIXXr (CVDr $sy))>; +def : Pat<(i64 (fp_to_sint f128:$sy)), (FIXXr (CVDQr $sy))>; + +// Cast to f32 +def : Pat<(f32 (sint_to_fp i64:$sy)), (CVSr (FLTXr i64:$sy))>; + +// Cast to f64 + +// Cast to f128 +def : Pat<(f128 (sint_to_fp i32:$sy)), (CVQr (FLTr $sy))>; +def : Pat<(f128 (sint_to_fp i64:$sy)), (CVQr (FLTXr $sy))>; + +def : Pat<(i64 (anyext i32:$sy)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $sy, sub_i32)>; + +// extload, sextload and zextload stuff +def : Pat<(i64 (sextloadi8 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD1Bri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (zextloadi8 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD1BUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (sextloadi16 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD2Bri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (zextloadi16 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD2BUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (sextloadi32 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LDLri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (zextloadi32 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LDLUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (extloadi8 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD1BUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (extloadi16 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD2BUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (extloadi32 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LDLri MEMri:$addr), sub_i32)>; + +// anyextload +def : Pat<(extloadi8 ADDRri:$addr), (LD1BUri MEMri:$addr)>; +def : Pat<(extloadi16 ADDRri:$addr), (LD2BUri MEMri:$addr)>; + +// truncstore +def : Pat<(truncstorei8 i64:$src, ADDRri:$addr), + (ST1Bri MEMri:$addr, (EXTRACT_SUBREG $src, sub_i32))>; +def : Pat<(truncstorei16 i64:$src, ADDRri:$addr), + (ST2Bri MEMri:$addr, (EXTRACT_SUBREG $src, sub_i32))>; +def : Pat<(truncstorei32 i64:$src, ADDRri:$addr), + (STLri MEMri:$addr, (EXTRACT_SUBREG $src, sub_i32))>; + +// Atomic loads +def : Pat<(atomic_load_8 ADDRri:$src), (LD1BUri MEMri:$src)>; +def : Pat<(atomic_load_16 ADDRri:$src), (LD2BUri MEMri:$src)>; +def : Pat<(atomic_load_32 ADDRri:$src), (LDLUri MEMri:$src)>; +def : Pat<(atomic_load_64 ADDRri:$src), (LDSri MEMri:$src)>; + +// Atomic stores +def : Pat<(atomic_store_8 ADDRri:$ptr, i32:$val), + (ST1Bri MEMri:$ptr, $val)>; +def : Pat<(atomic_store_16 ADDRri:$ptr, i32:$val), + (ST2Bri MEMri:$ptr, $val)>; +def : Pat<(atomic_store_32 ADDRri:$ptr, i32:$val), + (STLri MEMri:$ptr, $val)>; +def : Pat<(atomic_store_64 ADDRri:$ptr, i64:$val), + (STSri MEMri:$ptr, $val)>; + +// Address calculation and its optimization +def : Pat<(VEhi tglobaladdr:$in), (LEASLzzi tglobaladdr:$in)>; +def : Pat<(VElo tglobaladdr:$in), (ANDrm0 (LEAzzi tglobaladdr:$in), 32)>; +def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)), + (LEASLrzi (ANDrm0 (LEAzzi tglobaladdr:$in2), 32), + (tglobaladdr:$in1))>; + +// GlobalTLS address calculation and its optimization +def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzzi tglobaltlsaddr:$in)>; +def : Pat<(VElo tglobaltlsaddr:$in), (ANDrm0 (LEAzzi tglobaltlsaddr:$in), 32)>; +def : Pat<(add (VEhi tglobaltlsaddr:$in1), (VElo tglobaltlsaddr:$in2)), + (LEASLrzi (ANDrm0 (LEAzzi tglobaltlsaddr:$in2), 32), + (tglobaltlsaddr:$in1))>; + +// Address calculation and its optimization +def : Pat<(VEhi tconstpool:$in), (LEASLzzi tconstpool:$in)>; +def : Pat<(VElo tconstpool:$in), (ANDrm0 (LEAzzi tconstpool:$in), 32)>; +def : Pat<(add (VEhi tconstpool:$in1), (VElo tconstpool:$in2)), + (LEASLrzi (ANDrm0 (LEAzzi tconstpool:$in2), 32), + (tconstpool:$in1))>; + +// Address calculation and its optimization +def : Pat<(VEhi texternalsym:$in), (LEASLzzi texternalsym:$in)>; +def : Pat<(VElo texternalsym:$in), (ANDrm0 (LEAzzi texternalsym:$in), 32)>; +def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)), + (LEASLrzi (ANDrm0 (LEAzzi texternalsym:$in2), 32), + (texternalsym:$in1))>; + +// Address store of mcsym +def : Pat<(store (i64 mcsym:$src), ADDRri:$dst), + (STSri ADDRri:$dst, (LEASLrzi (ANDrm0 (LEAzzi mcsym:$src), 32), + (mcsym:$src)))>; + +// Calls +def : Pat<(call tglobaladdr:$dst), + (CALL tglobaladdr:$dst)>; +def : Pat<(call texternalsym:$dst), + (CALL texternalsym:$dst)>; +def : Pat<(call i64:$dst), + (CALLr i64:$dst)>; + +// Branches +def : Pat<(br bb:$addr), (BCRLa bb:$addr)>; + +// brcc +def : Pat<(brcc CCSIOp:$cond, i32:$l, i32:$r, bb:$addr), + (BCRWrr (icond2cc $cond), $l, $r, bb:$addr)>; +def : Pat<(brcc CCUIOp:$cond, i32:$l, i32:$r, bb:$addr), + (BCRWir (icond2cc $cond), 0, (CMPUWrr $r, $l), bb:$addr)>; +def : Pat<(brcc CCSIOp:$cond, i64:$l, i64:$r, bb:$addr), + (BCRLrr (icond2cc $cond), $l, $r, bb:$addr)>; +def : Pat<(brcc CCUIOp:$cond, i64:$l, i64:$r, bb:$addr), + (BCRLir (icond2cc $cond), 0, (CMPrr $r, $l), bb:$addr)>; +def : Pat<(brcc cond:$cond, f32:$l, f32:$r, bb:$addr), + (BCRSrr (fcond2cc $cond), $l, $r, bb:$addr)>; +def : Pat<(brcc cond:$cond, f64:$l, f64:$r, bb:$addr), + (BCRDrr (fcond2cc $cond), $l, $r, bb:$addr)>; +def : Pat<(brcc cond:$cond, f128:$l, f128:$r, bb:$addr), + (BCRDrr (fcond2cc $cond), 0, (FCQrr $r, $l), bb:$addr)>; + +//===----------------------------------------------------------------------===// +// Pseudo Instructions +//===----------------------------------------------------------------------===// + +// GETGOT for PIC +let Defs = [SX15 /* %got */, SX16 /* %plt */], hasSideEffects = 0 in { + def GETGOT : Pseudo<(outs getGOT:$getpcseq), (ins), "$getpcseq", [] >; +} + +// GETFUNPLT for PIC +let hasSideEffects = 0 in +def GETFUNPLT : Pseudo<(outs I64:$dst), (ins i64imm:$addr), + "$dst, $addr", + [(set iPTR:$dst, (GetFunPLT tglobaladdr:$addr))] >; + +def : Pat<(GetFunPLT tglobaladdr:$dst), + (GETFUNPLT tglobaladdr:$dst)>; +def : Pat<(GetFunPLT texternalsym:$dst), + (GETFUNPLT texternalsym:$dst)>; + +// GETTLSADDR for TLS +let Defs = [SX0, SX10, SX12], hasSideEffects = 0 in +def GETTLSADDR : Pseudo<(outs), (ins i64imm:$addr), + "# GETTLSADDR $addr", + [(GetTLSAddr tglobaltlsaddr:$addr)] >; + +def : Pat<(GetTLSAddr tglobaltlsaddr:$dst), + (GETTLSADDR tglobaltlsaddr:$dst)>; + +let Defs = [SX11], Uses = [SX11], hasSideEffects = 0 in { +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt, i64imm:$amt2), + "# ADJCALLSTACKDOWN $amt, $amt2", + [(callseq_start timm:$amt, timm:$amt2)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), + "# ADJCALLSTACKUP $amt1", + [(callseq_end timm:$amt1, timm:$amt2)]>; +} + +let Defs = [SX8], Uses = [SX8, SX11], hasSideEffects = 0 in +def EXTEND_STACK : Pseudo<(outs), (ins), + "# EXTEND STACK", + []>; +let hasSideEffects = 0 in +def EXTEND_STACK_GUARD : Pseudo<(outs), (ins), + "# EXTEND STACK GUARD", + []>; + +// Dynamic stack allocation yields a __llvm_grow_stack for VE targets. +// These calls are needed to probe the stack when allocating more over +// %s8 (%sl - stack limit). + +let Uses = [SX11], hasSideEffects = 1 in +def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins), + "# GET STACK TOP", + [(set iPTR:$dst, (GetStackTop))]>; + +// SETCC pattern matches +// +// CMP %tmp, lhs, rhs ; compare lhs and rhs +// or %res, 0, (0)1 ; initialize by 0 +// CMOV %res, (63)0, %tmp ; set 1 if %tmp is true + +def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCSIOp:$cond)), + (EXTRACT_SUBREG + (CMOVLrm0 (icond2cc $cond), + (CPXrr i64:$LHS, i64:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCUIOp:$cond)), + (EXTRACT_SUBREG + (CMOVLrm0 (icond2cc $cond), + (CMPrr i64:$LHS, i64:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCSIOp:$cond)), + (EXTRACT_SUBREG + (CMOVWrm0 (icond2cc $cond), + (CPSrr i32:$LHS, i32:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCUIOp:$cond)), + (EXTRACT_SUBREG + (CMOVWrm0 (icond2cc $cond), + (CMPUWrr i32:$LHS, i32:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +def : Pat<(i32 (setcc f128:$LHS, f128:$RHS, cond:$cond)), + (EXTRACT_SUBREG + (CMOVDrm0 (fcond2cc $cond), + (FCQrr f128:$LHS, f128:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +def : Pat<(i32 (setcc f64:$LHS, f64:$RHS, cond:$cond)), + (EXTRACT_SUBREG + (CMOVDrm0 (fcond2cc $cond), + (FCPrr f64:$LHS, f64:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +def : Pat<(i32 (setcc f32:$LHS, f32:$RHS, cond:$cond)), + (EXTRACT_SUBREG + (CMOVSrm0 (fcond2cc $cond), + (FCPSrr f32:$LHS, f32:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +// Special SELECTCC pattern matches +// Use min/max for better performance. +// +// MAX/MIN %res, %lhs, %rhs + +def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOGT)), + (FCMArr $LHS, $RHS)>; +def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOGT)), + (FCMASrr $LHS, $RHS)>; +def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETGT)), + (CMXarr $LHS, $RHS)>; +def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETGT)), + (CMSarr $LHS, $RHS)>; +def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOGE)), + (FCMArr $LHS, $RHS)>; +def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOGE)), + (FCMASrr $LHS, $RHS)>; +def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETGE)), + (CMXarr $LHS, $RHS)>; +def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETGE)), + (CMSarr $LHS, $RHS)>; + +def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOLT)), + (FCMIrr $LHS, $RHS)>; +def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOLT)), + (FCMISrr $LHS, $RHS)>; +def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLT)), + (CMXirr $LHS, $RHS)>; +def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLT)), + (CMSirr $LHS, $RHS)>; +def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOLE)), + (FCMIrr $LHS, $RHS)>; +def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOLE)), + (FCMISrr $LHS, $RHS)>; +def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLE)), + (CMXirr $LHS, $RHS)>; +def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLE)), + (CMSirr $LHS, $RHS)>; + +// Generic SELECTCC pattern matches +// +// CMP %tmp, %l, %r ; compare %l and %r +// or %res, %f, (0)1 ; initialize by %f +// CMOV %res, %t, %tmp ; set %t if %tmp is true + +// selectcc for i64 result +def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCSIOp:$cond)), + (CMOVWrr (icond2cc $cond), (CPSrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCUIOp:$cond)), + (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCSIOp:$cond)), + (CMOVLrr (icond2cc $cond), (CPXrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCUIOp:$cond)), + (CMOVLrr (icond2cc $cond), (CMPrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc f32:$l, f32:$r, i64:$t, i64:$f, cond:$cond)), + (CMOVSrr (fcond2cc $cond), (FCPSrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc f64:$l, f64:$r, i64:$t, i64:$f, cond:$cond)), + (CMOVDrr (fcond2cc $cond), (FCPrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc f128:$l, f128:$r, i64:$t, i64:$f, cond:$cond)), + (CMOVDrr (fcond2cc $cond), (FCQrr $l, $r), $t, $f)>; + +// selectcc for i32 result +def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCSIOp:$cond)), + (EXTRACT_SUBREG + (CMOVWrr (icond2cc $cond), + (CPSrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; +def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCUIOp:$cond)), + (EXTRACT_SUBREG + (CMOVWrr (icond2cc $cond), + (CMPUWrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; +def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCSIOp:$cond)), + (EXTRACT_SUBREG + (CMOVLrr (icond2cc $cond), + (CPXrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; +def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCUIOp:$cond)), + (EXTRACT_SUBREG + (CMOVLrr (icond2cc $cond), + (CMPrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; +def : Pat<(i32 (selectcc f32:$l, f32:$r, i32:$t, i32:$f, cond:$cond)), + (EXTRACT_SUBREG + (CMOVSrr (fcond2cc $cond), + (FCPSrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; +def : Pat<(i32 (selectcc f64:$l, f64:$r, i32:$t, i32:$f, cond:$cond)), + (EXTRACT_SUBREG + (CMOVDrr (fcond2cc $cond), + (FCPrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; +def : Pat<(i32 (selectcc f128:$l, f128:$r, i32:$t, i32:$f, cond:$cond)), + (EXTRACT_SUBREG + (CMOVDrr (fcond2cc $cond), + (FCQrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; + +// selectcc for f128 result +def : Pat<(f128 (selectcc i32:$l, i32:$r, f128:$t, f128:$f, CCSIOp:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVWrr (icond2cc $cond), (CPSrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVWrr (icond2cc $cond), (CPSrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; +def : Pat<(f128 (selectcc i32:$l, i32:$r, f128:$t, f128:$f, CCUIOp:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; +def : Pat<(f128 (selectcc i64:$l, i64:$r, f128:$t, f128:$f, CCSIOp:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVLrr (icond2cc $cond), (CPXrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVLrr (icond2cc $cond), (CPXrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; +def : Pat<(f128 (selectcc i64:$l, i64:$r, f128:$t, f128:$f, CCUIOp:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVLrr (icond2cc $cond), (CMPrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVLrr (icond2cc $cond), (CMPrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; +def : Pat<(f128 (selectcc f32:$l, f32:$r, f128:$t, f128:$f, cond:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVSrr (fcond2cc $cond), (FCPSrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVSrr (fcond2cc $cond), (FCPSrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; +def : Pat<(f128 (selectcc f64:$l, f64:$r, f128:$t, f128:$f, cond:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVDrr (fcond2cc $cond), (FCPrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVDrr (fcond2cc $cond), (FCPrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; +def : Pat<(f128 (selectcc f128:$l, f128:$r, f128:$t, f128:$f, cond:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVDrr (fcond2cc $cond), (FCQrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVDrr (fcond2cc $cond), (FCQrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; + +// selectcc for f64 result +def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCSIOp:$cond)), + (CMOVWrr (icond2cc $cond), (CPSrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCUIOp:$cond)), + (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCSIOp:$cond)), + (CMOVLrr (icond2cc $cond), (CPXrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCUIOp:$cond)), + (CMOVLrr (icond2cc $cond), (CMPrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc f32:$l, f32:$r, f64:$t, f64:$f, cond:$cond)), + (CMOVSrr (fcond2cc $cond), (FCPSrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc f64:$l, f64:$r, f64:$t, f64:$f, cond:$cond)), + (CMOVDrr (fcond2cc $cond), (FCPrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc f128:$l, f128:$r, f64:$t, f64:$f, cond:$cond)), + (CMOVDrr (fcond2cc $cond), (FCQrr $l, $r), $t, $f)>; + +// selectcc for f32 result +def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCSIOp:$cond)), + (EXTRACT_SUBREG + (CMOVWrr (icond2cc $cond), + (CPSrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; +def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCUIOp:$cond)), + (EXTRACT_SUBREG + (CMOVWrr (icond2cc $cond), + (CMPUWrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; +def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCSIOp:$cond)), + (EXTRACT_SUBREG + (CMOVLrr (icond2cc $cond), + (CPXrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; +def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCUIOp:$cond)), + (EXTRACT_SUBREG + (CMOVLrr (icond2cc $cond), + (CMPrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; +def : Pat<(f32 (selectcc f32:$l, f32:$r, f32:$t, f32:$f, cond:$cond)), + (EXTRACT_SUBREG + (CMOVSrr (fcond2cc $cond), + (FCPSrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; +def : Pat<(f32 (selectcc f64:$l, f64:$r, f32:$t, f32:$f, cond:$cond)), + (EXTRACT_SUBREG + (CMOVDrr (fcond2cc $cond), + (FCPrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; +def : Pat<(f32 (selectcc f128:$l, f128:$r, f32:$t, f32:$f, cond:$cond)), + (EXTRACT_SUBREG + (CMOVDrr (fcond2cc $cond), + (FCQrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; + +// Generic SELECT pattern matches +// Use cmov.w for all cases since %pred holds i32. +// +// CMOV.w.ne %res, %tval, %tmp ; set tval if %tmp is true + +def : Pat<(i64 (select i32:$pred, i64:$t, i64:$f)), + (CMOVWrr CC_INE, $pred, $t, $f)>; + +def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)), + (EXTRACT_SUBREG + (CMOVWrr CC_INE, $pred, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; + +def : Pat<(f128 (select i32:$pred, f128:$t, f128:$f)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVWrr CC_INE, $pred, + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVWrr CC_INE, $pred, + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; + +def : Pat<(f64 (select i32:$pred, f64:$t, f64:$f)), + (CMOVWrr CC_INE, $pred, $t, $f)>; + +def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)), + (EXTRACT_SUBREG + (CMOVWrr CC_INE, $pred, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; + +// bitconvert +def : Pat<(f64 (bitconvert i64:$src)), (COPY_TO_REGCLASS $src, I64)>; +def : Pat<(i64 (bitconvert f64:$src)), (COPY_TO_REGCLASS $src, I64)>; + +def : Pat<(i32 (bitconvert f32:$op)), + (EXTRACT_SUBREG (SRAXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + $op, sub_f32), 32), sub_i32)>; +def : Pat<(f32 (bitconvert i32:$op)), + (EXTRACT_SUBREG (SLLri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + $op, sub_i32), 32), sub_f32)>; + +// Bits operations pattern matchings. +def : Pat<(i32 (ctpop i32:$src)), + (EXTRACT_SUBREG (PCNTr (ANDrm0 (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), $src, sub_i32), 32)), sub_i32)>; +def : Pat<(i32 (bitreverse i32:$src)), + (EXTRACT_SUBREG (SRLri (BRVr (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), $src, sub_i32)), 32), sub_i32)>; +def : Pat<(i32 (ctlz i32:$src)), + (EXTRACT_SUBREG (LDZr (SLLri (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), $src, sub_i32), 32)), sub_i32)>; +def : Pat<(i64 (bswap i64:$src)), + (BSWPri $src, 0)>; +def : Pat<(i32 (bswap i32:$src)), + (EXTRACT_SUBREG (BSWPri (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), $src, sub_i32), 1), sub_i32)>; + +// Several special pattern matches to optimize code + +def : Pat<(i32 (and i32:$lhs, 0xff)), + (AND32rm0 $lhs, 56)>; +def : Pat<(i32 (and i32:$lhs, 0xffff)), + (AND32rm0 $lhs, 48)>; +def : Pat<(i32 (and i32:$lhs, 0xffffffff)), + (AND32rm0 $lhs, 32)>; diff --git a/llvm/lib/Target/VE/VEMCInstLower.cpp b/llvm/lib/Target/VE/VEMCInstLower.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEMCInstLower.cpp @@ -0,0 +1,116 @@ +//===-- VEMCInstLower.cpp - Convert VE MachineInstr to MCInst -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower VE MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/VEMCExpr.h" +#include "VE.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" + +using namespace llvm; + +static MCOperand LowerSymbolOperand(const MachineInstr *MI, + const MachineOperand &MO, + const MCSymbol *Symbol, AsmPrinter &AP) { + + VEMCExpr::VariantKind Kind = (VEMCExpr::VariantKind)MO.getTargetFlags(); + + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol, AP.OutContext); + const VEMCExpr *expr = VEMCExpr::create(Kind, MCSym, AP.OutContext); + return MCOperand::createExpr(expr); +} + +static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO, + AsmPrinter &AP) { + switch (MO.getType()) { + default: + report_fatal_error("unknown operand type"); + break; + case MachineOperand::MO_CImmediate: + report_fatal_error("unsupported MO_CImmediate operand type"); + break; + case MachineOperand::MO_FPImmediate: + report_fatal_error("unsupported MO_FPImmediate operand type"); + break; + case MachineOperand::MO_FrameIndex: + report_fatal_error("unsupported MO_FrameIndex operand type"); + break; + case MachineOperand::MO_TargetIndex: + report_fatal_error("unsupported MO_TargetIndex operand type"); + break; + case MachineOperand::MO_JumpTableIndex: + return LowerSymbolOperand(MI, MO, AP.GetJTISymbol(MO.getIndex()), AP); + case MachineOperand::MO_RegisterLiveOut: + report_fatal_error("unsupported MO_RegistrLiveOut operand type"); + break; + case MachineOperand::MO_Metadata: + report_fatal_error("unsupported MO_Metadata operand type"); + break; + case MachineOperand::MO_MCSymbol: + return LowerSymbolOperand(MI, MO, MO.getMCSymbol(), AP); + break; + case MachineOperand::MO_CFIIndex: + report_fatal_error("unsupported MO_CFIIndex operand type"); + break; + case MachineOperand::MO_IntrinsicID: + report_fatal_error("unsupported MO_IntrinsicID operand type"); + break; + case MachineOperand::MO_Predicate: + report_fatal_error("unsupported MO_Predicate operand type"); + break; + + case MachineOperand::MO_Register: + if (MO.isImplicit()) + break; + return MCOperand::createReg(MO.getReg()); + + case MachineOperand::MO_Immediate: + return MCOperand::createImm(MO.getImm()); + + case MachineOperand::MO_MachineBasicBlock: + return LowerSymbolOperand(MI, MO, MO.getMBB()->getSymbol(), AP); + case MachineOperand::MO_GlobalAddress: + return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP); + case MachineOperand::MO_BlockAddress: + return LowerSymbolOperand( + MI, MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP); + case MachineOperand::MO_ExternalSymbol: + return LowerSymbolOperand( + MI, MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP); + case MachineOperand::MO_ConstantPoolIndex: + return LowerSymbolOperand(MI, MO, AP.GetCPISymbol(MO.getIndex()), AP); + + case MachineOperand::MO_RegisterMask: + break; + } + return MCOperand(); +} + +void llvm::LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + AsmPrinter &AP) { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp = LowerOperand(MI, MO, AP); + + if (MCOp.isValid()) + OutMI.addOperand(MCOp); + } +} diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/llvm/lib/Target/VE/VEMachineFunctionInfo.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.h @@ -0,0 +1,63 @@ +//===- VEMachineFunctionInfo.h - VE Machine Function Info -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares VE specific per-machine-function information. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +class VEMachineFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + +private: + unsigned GlobalBaseReg; + + /// VectorLengthReg - Holds the virtual register for VL register. + unsigned VectorLengthReg; + + /// VarArgsFrameOffset - Frame offset to start of varargs area. + int VarArgsFrameOffset; + + /// SRetReturnReg - Holds the virtual register into which the sret + /// argument is passed. + unsigned SRetReturnReg; + + /// IsLeafProc - True if the function is a leaf procedure. + bool IsLeafProc; + +public: + VEMachineFunctionInfo() + : GlobalBaseReg(0), VectorLengthReg(0), VarArgsFrameOffset(0), + SRetReturnReg(0), IsLeafProc(false) {} + explicit VEMachineFunctionInfo(MachineFunction &MF) + : GlobalBaseReg(0), VectorLengthReg(0), VarArgsFrameOffset(0), + SRetReturnReg(0), IsLeafProc(false) {} + + unsigned getGlobalBaseReg() const { return GlobalBaseReg; } + void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + + unsigned getVectorLengthReg() const { return VectorLengthReg; } + void setVectorLengthReg(unsigned Reg) { VectorLengthReg = Reg; } + + int getVarArgsFrameOffset() const { return VarArgsFrameOffset; } + void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; } + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + void setLeafProc(bool rhs) { IsLeafProc = rhs; } + bool isLeafProc() const { return IsLeafProc; } +}; +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp copy from llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp copy to llvm/lib/Target/VE/VEMachineFunctionInfo.cpp --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp +++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp @@ -1,19 +1,13 @@ -//===-- VEMCTargetDesc.cpp - VE Target Descriptions -----------------------===// +//===-- VEMachineFunctionInfo.cpp - VE Machine Function Info --------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file provides VE specific target descriptions. -// -//===----------------------------------------------------------------------===// -#include "VEMCTargetDesc.h" +#include "VEMachineFunctionInfo.h" using namespace llvm; -extern "C" void LLVMInitializeVETargetMC() { - // TODO -} +void VEMachineFunctionInfo::anchor() {} diff --git a/llvm/lib/Target/VE/VERegisterInfo.h b/llvm/lib/Target/VE/VERegisterInfo.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VERegisterInfo.h @@ -0,0 +1,59 @@ +//===-- VERegisterInfo.h - VE Register Information Impl ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the VE implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_VEREGISTERINFO_H +#define LLVM_LIB_TARGET_VE_VEREGISTERINFO_H + +#include "llvm/CodeGen/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "VEGenRegisterInfo.inc" + +namespace llvm { +struct VERegisterInfo : public VEGenRegisterInfo { +private: + // VLS register class's Pressure Set ID. + unsigned VLSPSetID; + +public: + VERegisterInfo(); + + /// Code Generation virtual methods... + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const uint32_t *getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const override; + const uint32_t *getNoPreservedMask() const override; + + BitVector getReservedRegs(const MachineFunction &MF) const override; + bool isConstantPhysReg(unsigned PhysReg) const override; + + const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const override; + + bool requiresRegisterScavenging(const MachineFunction &MF) const override; + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; + + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; + + Register getFrameRegister(const MachineFunction &MF) const override; + + bool canRealignStack(const MachineFunction &MF) const override; + + unsigned getRegPressureSetLimit(const MachineFunction &MF, + unsigned Idx) const override; +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VERegisterInfo.cpp @@ -0,0 +1,292 @@ +//===-- VERegisterInfo.cpp - VE Register Information ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the VE implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "VERegisterInfo.h" +#include "VE.h" +#include "VEMachineFunctionInfo.h" +#include "VESubtarget.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +#define GET_REGINFO_TARGET_DESC +#include "VEGenRegisterInfo.inc" + +// VE uses %s10 == %lp to keep return address +VERegisterInfo::VERegisterInfo() : VEGenRegisterInfo(VE::SX10) {} + +bool VERegisterInfo::requiresRegisterScavenging( + const MachineFunction &MF) const { + return true; +} + +bool VERegisterInfo::requiresFrameIndexScavenging( + const MachineFunction &MF) const { + return true; +} + +const MCPhysReg * +VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + const Function &F = MF->getFunction(); + CallingConv::ID CC = F.getCallingConv(); + + switch (CC) { + default: + return CSR_SaveList; + } +} + +const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF, + CallingConv::ID CC) const { + switch (CC) { + case CallingConv::VE_VEC_EXPF: + return CSR_vec_expf_RegMask; + case CallingConv::VE_LLVM_GROW_STACK: + return CSR_llvm_grow_stack_RegMask; + default: + return CSR_RegMask; + } +} + +const uint32_t *VERegisterInfo::getNoPreservedMask() const { + return CSR_NoRegs_RegMask; +} + +BitVector VERegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + Reserved.set(VE::SX8); // stack limit + Reserved.set(VE::SX9); // frame pointer + Reserved.set(VE::SX10); // link register (return address) + Reserved.set(VE::SX11); // stack pointer + + // FIXME: maybe not need to be reserved + Reserved.set(VE::SX12); // outer register + Reserved.set(VE::SX13); // id register for dynamic linker + + Reserved.set(VE::SX14); // thread pointer + Reserved.set(VE::SX15); // global offset table register + Reserved.set(VE::SX16); // procedure linkage table register + Reserved.set(VE::SX17); // linkage-area register + + // Also reserve the register pair aliases covering the above + // registers, with the same conditions. This is required since + // LiveIntervals treat a register as a non reserved register if any + // of its aliases are not reserved. + Reserved.set(VE::Q4); // SX8_SX9 + Reserved.set(VE::Q5); // SX10_SX11 + Reserved.set(VE::Q6); // SX12_SX13 + Reserved.set(VE::Q7); // SX14_SX15 + Reserved.set(VE::Q8); // SX16_SX17 + + // Also reserve the integer 32 bit registers convering the above registers. + Reserved.set(VE::SW8); + Reserved.set(VE::SW9); + Reserved.set(VE::SW10); + Reserved.set(VE::SW11); + Reserved.set(VE::SW12); + Reserved.set(VE::SW13); + Reserved.set(VE::SW14); + Reserved.set(VE::SW15); + Reserved.set(VE::SW16); + Reserved.set(VE::SW17); + + // Also reserve the floating point 32 bit registers convering the above + // registers. + Reserved.set(VE::SF8); + Reserved.set(VE::SF9); + Reserved.set(VE::SF10); + Reserved.set(VE::SF11); + Reserved.set(VE::SF12); + Reserved.set(VE::SF13); + Reserved.set(VE::SF14); + Reserved.set(VE::SF15); + Reserved.set(VE::SF16); + Reserved.set(VE::SF17); + + // Also reserve the integer 16 bit registers convering the above registers. + Reserved.set(VE::SH8); + Reserved.set(VE::SH9); + Reserved.set(VE::SH10); + Reserved.set(VE::SH11); + Reserved.set(VE::SH12); + Reserved.set(VE::SH13); + Reserved.set(VE::SH14); + Reserved.set(VE::SH15); + Reserved.set(VE::SH16); + Reserved.set(VE::SH17); + + // Also reserve the integer 8 bit registers convering the above registers. + Reserved.set(VE::SB8); + Reserved.set(VE::SB9); + Reserved.set(VE::SB10); + Reserved.set(VE::SB11); + Reserved.set(VE::SB12); + Reserved.set(VE::SB13); + Reserved.set(VE::SB14); + Reserved.set(VE::SB15); + Reserved.set(VE::SB16); + Reserved.set(VE::SB17); + + // VL register is reserved + // Reserved.set(VE::VL); + + // Other Misc registers are reserved + Reserved.set(VE::UCC); + Reserved.set(VE::PSW); + Reserved.set(VE::SAR); + Reserved.set(VE::PMMR); + Reserved.set(VE::PMCR0); + Reserved.set(VE::PMCR1); + Reserved.set(VE::PMCR2); + Reserved.set(VE::PMCR3); + Reserved.set(VE::PMC0); + Reserved.set(VE::PMC1); + Reserved.set(VE::PMC2); + Reserved.set(VE::PMC3); + Reserved.set(VE::PMC4); + Reserved.set(VE::PMC5); + Reserved.set(VE::PMC6); + Reserved.set(VE::PMC7); + Reserved.set(VE::PMC8); + Reserved.set(VE::PMC9); + Reserved.set(VE::PMC10); + Reserved.set(VE::PMC11); + Reserved.set(VE::PMC12); + Reserved.set(VE::PMC13); + Reserved.set(VE::PMC14); + + // sx18-sx33 are callee-saved registers + // sx34-sx63 are temporary registers + + return Reserved; +} + +bool VERegisterInfo::isConstantPhysReg(unsigned PhysReg) const { + switch (PhysReg) { + default: + return false; + } +} + +const TargetRegisterClass * +VERegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + return &VE::I64RegClass; +} + +#define DEBUG_TYPE "ve" + +static void replaceFI(MachineFunction &MF, MachineBasicBlock::iterator II, + MachineInstr &MI, const DebugLoc &dl, + unsigned FIOperandNum, int Offset, unsigned FramePtr) { + if (1) { + LLVM_DEBUG(dbgs() << "replaceFI: "; MI.dump()); + } + + // Replace frame index with a frame pointer reference directly. + // VE has 32 bit offset field, so no need to expand a target instruction. + // Directly encode it. + MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); +} + +void VERegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + + MachineInstr &MI = *II; + DebugLoc dl = MI.getDebugLoc(); + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + MachineFunction &MF = *MI.getParent()->getParent(); + const VESubtarget &Subtarget = MF.getSubtarget(); + const VEFrameLowering *TFI = getFrameLowering(MF); + + unsigned FrameReg; + int Offset; + Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg); + + Offset += MI.getOperand(FIOperandNum + 1).getImm(); + + if (MI.getOpcode() == VE::STQri) { + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + unsigned SrcReg = MI.getOperand(2).getReg(); + unsigned SrcHiReg = getSubReg(SrcReg, VE::sub_even); + unsigned SrcLoReg = getSubReg(SrcReg, VE::sub_odd); + // VE stores HiReg to 8(addr) and LoReg to 0(addr) + MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(VE::STSri)) + .addReg(FrameReg) + .addImm(0) + .addReg(SrcLoReg); + replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg); + MI.setDesc(TII.get(VE::STSri)); + MI.getOperand(2).setReg(SrcHiReg); + Offset += 8; + } else if (MI.getOpcode() == VE::LDQri) { + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned DestHiReg = getSubReg(DestReg, VE::sub_even); + unsigned DestLoReg = getSubReg(DestReg, VE::sub_odd); + // VE loads HiReg from 8(addr) and LoReg from 0(addr) + MachineInstr *StMI = + BuildMI(*MI.getParent(), II, dl, TII.get(VE::LDSri), DestLoReg) + .addReg(FrameReg) + .addImm(0); + replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg); + MI.setDesc(TII.get(VE::LDSri)); + MI.getOperand(0).setReg(DestHiReg); + Offset += 8; + } + + replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg); +} + +unsigned VERegisterInfo::getRegPressureSetLimit(const MachineFunction &MF, + unsigned Idx) const { + return VEGenRegisterInfo::getRegPressureSetLimit(MF, Idx); +} + +Register VERegisterInfo::getFrameRegister(const MachineFunction &MF) const { + return VE::SX9; +} + +// VE has no architectural need for stack realignment support, +// except that LLVM unfortunately currently implements overaligned +// stack objects by depending upon stack realignment support. +// If that ever changes, this can probably be deleted. +bool VERegisterInfo::canRealignStack(const MachineFunction &MF) const { + if (!TargetRegisterInfo::canRealignStack(MF)) + return false; + + // VE always has a fixed frame pointer register, so don't need to + // worry about needing to reserve it. [even if we don't have a frame + // pointer for our frame, it still cannot be used for other things, + // or register window traps will be SADNESS.] + + // If there's a reserved call frame, we can use VE to access locals. + if (getFrameLowering(MF)->hasReservedCallFrame(MF)) + return true; + + // Otherwise, we'd need a base pointer, but those aren't implemented + // for VE at the moment. + + return false; +} diff --git a/llvm/lib/Target/VE/VERegisterInfo.td b/llvm/lib/Target/VE/VERegisterInfo.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VERegisterInfo.td @@ -0,0 +1,127 @@ +//===-- VERegisterInfo.td - VE Register defs ---------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the VE register file +//===----------------------------------------------------------------------===// + +class VEReg Enc, string n> : Register { + let HWEncoding{15-7} = 0; + let HWEncoding{6-0} = Enc; + let Namespace = "VE"; +} + +class VEMiscReg Enc, string n>: Register { + let HWEncoding{15-5} = 0; + let HWEncoding{4-0} = Enc; + let Namespace = "VE"; +} + +let Namespace = "VE" in { + def sub_i8 : SubRegIndex<8, 56>; // Low 8 bit (56..63) + def sub_i16 : SubRegIndex<16, 48>; // Low 16 bit (48..63) + def sub_i32 : SubRegIndex<32, 32>; // Low 32 bit (32..63) + def sub_f32 : SubRegIndex<32>; // High 32 bit (0..31) + def sub_even : SubRegIndex<64>; // High 64 bit (0..63) + def sub_odd : SubRegIndex<64, 64>; // Low 64 bit (64..127) + def sub_vm_even : SubRegIndex<256>; // High 256 bit (0..255) + def sub_vm_odd : SubRegIndex<256, 256>; // Low 256 bit (256..511) +} + +// Registers are identified with 7-bit ID numbers. +// R - 64-bit integer or floating-point registers +class R Enc, string n, list subregs = [], + list aliases = []>: VEReg { + let SubRegs = subregs; + let Aliases = aliases; +} + +// Rq - Slots in the register file for 128-bit floating-point values. +class Rq Enc, string n, list subregs> : VEReg { + let SubRegs = subregs; + let SubRegIndices = [sub_even, sub_odd]; + let CoveredBySubRegs = 1; +} + +// Miscellaneous Registers +def UCC : VEMiscReg<0, "UCC">; // User clock counter +def PSW : VEMiscReg<1, "PSW">; // Program status word +def SAR : VEMiscReg<2, "SAR">; // Store address register +def PMMR : VEMiscReg<7, "PMMR">; // Performance monitor mode register + +// Performance monitor configuration registers +foreach I = 0-3 in + def PMCR#I : VEMiscReg; + +// Performance monitor counter +foreach I = 0-14 in + def PMC#I : VEMiscReg; + +// Generic integer registers - 8 bits wide +foreach I = 0-63 in + def SB#I : R, DwarfRegNum<[I]>; + +// Generic integer registers - 16 bits wide +let SubRegIndices = [sub_i8] in +foreach I = 0-63 in + def SH#I : R("SB"#I)]>, DwarfRegNum<[I]>; + +// Generic integer registers - 32 bits wide +let SubRegIndices = [sub_i16] in +foreach I = 0-63 in + def SW#I : R("SH"#I)]>, DwarfRegNum<[I]>; + +// Generic floating point registers - 32 bits wide +// NOTE: Mark SF#I as alias of SW#I temporary to avoid register allocation +// problem. +foreach I = 0-63 in + def SF#I : R("SW"#I)]>, DwarfRegNum<[I]>; + +// Generic integer registers - 64 bits wide +let SubRegIndices = [sub_i32, sub_f32], CoveredBySubRegs = 1 in +foreach I = 0-63 in + def SX#I : R("SW"#I), !cast("SF"#I)]>, + DwarfRegNum<[I]>; + +// Aliases of the S* registers used to hold 128-bit for values (long doubles). +// Following foreach represents something like: +// def Q0 : Rq<0, "S0", [S0, S1]>; +// def Q1 : Rq<2, "S2", [S2, S3]>; +// ... +foreach I = 0-31 in + def Q#I : Rq("SX"#!shl(I,1)), + !cast("SX"#!add(!shl(I,1),1))]>; + +// Register classes. +// +// The register order is defined in terms of the preferred +// allocation order. +def I8 : RegisterClass<"VE", [i8], 8, + (add (sequence "SB%u", 34, 63), + (sequence "SB%u", 0, 33))>; +def I16 : RegisterClass<"VE", [i16], 16, + (add (sequence "SH%u", 34, 63), + (sequence "SH%u", 0, 33))>; +def I32 : RegisterClass<"VE", [i32], 32, + (add (sequence "SW%u", 34, 63), + (sequence "SW%u", 0, 33))>; +def I64 : RegisterClass<"VE", [i64, f64], 64, + (add (sequence "SX%u", 34, 63), + (sequence "SX%u", 0, 33))>; +def F32 : RegisterClass<"VE", [f32], 32, + (add (sequence "SF%u", 34, 63), + (sequence "SF%u", 0, 33))>; +def F128 : RegisterClass<"VE", [f128], 128, + (add (sequence "Q%u", 17, 31), + (sequence "Q%u", 0, 16))>; + +def Misc : RegisterClass<"VE", [i64], 64, + (add UCC, PSW, SAR, PMMR, + (sequence "PMCR%u", 0, 3), + (sequence "PMC%u", 0, 14))>; diff --git a/llvm/lib/Target/VE/VESchedule.td b/llvm/lib/Target/VE/VESchedule.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VESchedule.td @@ -0,0 +1,27 @@ +//===-- VESchedule.td - Describe the VE Itineries ----------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +/* +def ALU_FU : FuncUnit; +def LDST_FU : FuncUnit; + +def VE_ALU : InstrItinClass; +def VE_LD : InstrItinClass; +def VE_ST : InstrItinClass; + +def VEItinerary : ProcessorItineraries<[ALU_FU, LDST_FU],[],[ + // InstrItinData]>, + // InstrItinData]>, + // InstrItinData]>, + // InstrItinData]>, + InstrItinData]> +]>; +*/ diff --git a/llvm/lib/Target/VE/VESubtarget.h b/llvm/lib/Target/VE/VESubtarget.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VESubtarget.h @@ -0,0 +1,73 @@ +//===-- VESubtarget.h - Define Subtarget for the VE -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the VE specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_VESUBTARGET_H +#define LLVM_LIB_TARGET_VE_VESUBTARGET_H + +#include "VEFrameLowering.h" +#include "VEISelLowering.h" +#include "VEInstrInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include + +#define GET_SUBTARGETINFO_HEADER +#include "VEGenSubtargetInfo.inc" + +namespace llvm { +class StringRef; + +class VESubtarget : public VEGenSubtargetInfo { + Triple TargetTriple; + virtual void anchor(); + + VEInstrInfo InstrInfo; + VETargetLowering TLInfo; + SelectionDAGTargetInfo TSInfo; + VEFrameLowering FrameLowering; + +public: + VESubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, + const TargetMachine &TM); + + const VEInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const TargetFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const VERegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); + } + const VETargetLowering *getTargetLowering() const override { return &TLInfo; } + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + + bool enableMachineScheduler() const override; + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + VESubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); + + /// Given a actual stack size as determined by FrameInfo, this function + /// returns adjusted framesize which includes space for register window + /// spills and arguments. + int getAdjustedFrameSize(int stackSize) const; + + bool isTargetLinux() const { return TargetTriple.isOSLinux(); } +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/VESubtarget.cpp b/llvm/lib/Target/VE/VESubtarget.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VESubtarget.cpp @@ -0,0 +1,99 @@ +//===-- VESubtarget.cpp - VE Subtarget Information ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the VE specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "VESubtarget.h" +#include "VE.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define DEBUG_TYPE "sparc-subtarget" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "VEGenSubtargetInfo.inc" + +void VESubtarget::anchor() {} + +VESubtarget &VESubtarget::initializeSubtargetDependencies(StringRef CPU, + StringRef FS) { + // Determine default and user specified characteristics + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "ve"; + + // Parse features string. + ParseSubtargetFeatures(CPUName, FS); + + return *this; +} + +VESubtarget::VESubtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, const TargetMachine &TM) + : VEGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), + FrameLowering(*this) {} + +int VESubtarget::getAdjustedFrameSize(int frameSize) const { + + // VE stack frame: + // + // +----------------------------------------+ + // | Locals and temporaries | + // +----------------------------------------+ + // | Parameter area for callee | + // 176(fp) | | + // +----------------------------------------+ + // | Register save area (RSA) for callee | + // | | + // 16(fp) | 20 * 8 bytes | + // +----------------------------------------+ + // 8(fp) | Return address | + // +----------------------------------------+ + // 0(fp) | Frame pointer of caller | + // --------+----------------------------------------+-------- + // | Locals and temporaries for callee | + // +----------------------------------------+ + // | Parameter area for callee of callee | + // +----------------------------------------+ + // 16(sp) | RSA for callee of callee | + // +----------------------------------------+ + // 8(sp) | Return address | + // +----------------------------------------+ + // 0(sp) | Frame pointer of callee | + // +----------------------------------------+ + + // RSA frame: + // +----------------------------------------------+ + // 168(fp) | %s33 | + // +----------------------------------------------+ + // | %s19...%s32 | + // +----------------------------------------------+ + // 48(fp) | %s18 | + // +----------------------------------------------+ + // 40(fp) | Linkage area register (%s17) | + // +----------------------------------------------+ + // 32(fp) | Procedure linkage table register (%plt=%s16) | + // +----------------------------------------------+ + // 24(fp) | Global offset table register (%got=%s15) | + // +----------------------------------------------+ + // 16(fp) | Thread pointer register (%tp=%s14) | + // +----------------------------------------------+ + + frameSize += 176; // for RSA, RA, and FP + frameSize = alignTo(frameSize, 16); // requires 16 bytes alignment + + return frameSize; +} + +bool VESubtarget::enableMachineScheduler() const { return true; } diff --git a/llvm/lib/Target/VE/VETargetMachine.h b/llvm/lib/Target/VE/VETargetMachine.h --- a/llvm/lib/Target/VE/VETargetMachine.h +++ b/llvm/lib/Target/VE/VETargetMachine.h @@ -13,17 +13,43 @@ #ifndef LLVM_LIB_TARGET_VE_VETARGETMACHINE_H #define LLVM_LIB_TARGET_VE_VETARGETMACHINE_H +#include "VEInstrInfo.h" +#include "VESubtarget.h" #include "llvm/Target/TargetMachine.h" namespace llvm { class VETargetMachine : public LLVMTargetMachine { + std::unique_ptr TLOF; + VESubtarget Subtarget; + // Hold Strings that can be free'd all together with VETargetMachine + // e.g.: "GCC_except_tableXX" string. + std::list StrList; + public: VETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional RM, Optional CM, CodeGenOpt::Level OL, bool JIT); ~VETargetMachine() override; + + const VESubtarget *getSubtargetImpl() const { return &Subtarget; } + const VESubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } + std::list *getStrList() const { + return const_cast *>(&StrList); + } + + // Pass Pipeline Configuration + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } + + bool isMachineVerifierClean() const override { return false; } + + TargetTransformInfo getTargetTransformInfo(const Function &F) override; }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp --- a/llvm/lib/Target/VE/VETargetMachine.cpp +++ b/llvm/lib/Target/VE/VETargetMachine.cpp @@ -11,6 +11,11 @@ #include "VETargetMachine.h" #include "VE.h" +#include "VETargetTransformInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -47,16 +52,66 @@ return *RM; } +class VEELFTargetObjectFile : public TargetLoweringObjectFileELF { + void Initialize(MCContext &Ctx, const TargetMachine &TM) override { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + InitializeELF(TM.Options.UseInitArray); + } +}; + +static std::unique_ptr createTLOF() { + return std::make_unique(); +} + /// Create an Aurora VE architecture model -VETargetMachine::VETargetMachine( - const Target &T, const Triple &TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Optional RM, - Optional CM, CodeGenOpt::Level OL, bool JIT) - : LLVMTargetMachine( - T, computeDataLayout(TT), TT, CPU, FS, Options, - getEffectiveRelocModel(RM), - getEffectiveCodeModel(CM, CodeModel::Small), - OL) -{} +VETargetMachine::VETargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Optional RM, + Optional CM, + CodeGenOpt::Level OL, bool JIT) + : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, + getEffectiveRelocModel(RM), + getEffectiveCodeModel(CM, CodeModel::Small), OL), + TLOF(createTLOF()), Subtarget(TT, CPU, FS, *this) { + initAsmInfo(); +} VETargetMachine::~VETargetMachine() {} + +TargetTransformInfo VETargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(VETTIImpl(this, F)); +} + +namespace { +/// VE Code Generator Pass Configuration Options. +class VEPassConfig : public TargetPassConfig { +public: + VEPassConfig(VETargetMachine &TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + VETargetMachine &getVETargetMachine() const { + return getTM(); + } + + void addIRPasses() override; + bool addInstSelector() override; + void addPreEmitPass() override; +}; +} // namespace + +TargetPassConfig *VETargetMachine::createPassConfig(PassManagerBase &PM) { + return new VEPassConfig(*this, PM); +} + +void VEPassConfig::addIRPasses() { + addPass(createAtomicExpandPass()); + TargetPassConfig::addIRPasses(); +} + +bool VEPassConfig::addInstSelector() { + addPass(createVEISelDag(getVETargetMachine())); + return false; +} + +void VEPassConfig::addPreEmitPass() {} diff --git a/llvm/lib/Target/VE/VETargetTransformInfo.h b/llvm/lib/Target/VE/VETargetTransformInfo.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VETargetTransformInfo.h @@ -0,0 +1,55 @@ +//===- VETargetTransformInfo.h - VE specific TTI ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file a TargetTransformInfo::Concept conforming object specific to the +/// VE target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_VETARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_VE_VETARGETTRANSFORMINFO_H + +#include "VE.h" +#include "VETargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" + +namespace llvm { + +class VETTIImpl : public BasicTTIImplBase { + using BaseT = BasicTTIImplBase; + friend BaseT; + + const VESubtarget *ST; + const VETargetLowering *TLI; + + const VESubtarget *getST() const { return ST; } + const VETargetLowering *getTLI() const { return TLI; } + +public: + explicit VETTIImpl(const VETargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + unsigned getNumberOfRegisters(unsigned ClassID) const { return 64; } + + unsigned getRegisterBitWidth(bool Vector) const { return 64; } + + unsigned getMinVectorRegisterBitWidth() const { return 64; } + + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; } + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; } + bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) { return false; }; + bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) { return false; }; +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_VE_VETARGETTRANSFORMINFO_H diff --git a/llvm/test/CodeGen/VE/add.ll b/llvm/test/CodeGen/VE/add.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/add.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define i32 @sample_add(i32, i32) { +; CHECK-LABEL: sample_add: +; CHECK: .LBB0_2: +; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 + %3 = add nsw i32 %1, %0 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/VE/addition.ll b/llvm/test/CodeGen/VE/addition.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/addition.ll @@ -0,0 +1,262 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define signext i8 @func1(i8 signext, i8 signext) { +; CHECK-LABEL: func1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s34, %s1, %s0 + %3 = add i8 %1, %0 + ret i8 %3 +} + +define signext i16 @func2(i16 signext, i16 signext) { +; CHECK-LABEL: func2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s34, %s1, %s0 + %3 = add i16 %1, %0 + ret i16 %3 +} + +define i32 @func3(i32, i32) { +; CHECK-LABEL: func3: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 + %3 = add nsw i32 %1, %0 + ret i32 %3 +} + +define i64 @func4(i64, i64) { +; CHECK-LABEL: func4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.l %s0, %s1, %s0 + %3 = add nsw i64 %1, %0 + ret i64 %3 +} + +define i128 @func5(i128, i128) { +; CHECK-LABEL: func5: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.l %s34, %s3, %s1 +; CHECK-NEXT: adds.l %s0, %s2, %s0 +; CHECK-NEXT: cmpu.l %s35, %s0, %s2 +; CHECK-NEXT: or %s36, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s36, (63)0, %s35 +; CHECK-NEXT: adds.w.zx %s35, %s36, (0)1 +; CHECK-NEXT: adds.l %s1, %s34, %s35 + %3 = add nsw i128 %1, %0 + ret i128 %3 +} + +define zeroext i8 @func6(i8 zeroext, i8 zeroext) { +; CHECK-LABEL: func6: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s34, %s1, %s0 + %3 = add i8 %1, %0 + ret i8 %3 +} + +define zeroext i16 @func7(i16 zeroext, i16 zeroext) { +; CHECK-LABEL: func7: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s34, %s1, %s0 + %3 = add i16 %1, %0 + ret i16 %3 +} + +define i32 @func8(i32, i32) { +; CHECK-LABEL: func8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 + %3 = add i32 %1, %0 + ret i32 %3 +} + +define i64 @func9(i64, i64) { +; CHECK-LABEL: func9: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.l %s0, %s1, %s0 + %3 = add i64 %1, %0 + ret i64 %3 +} + +define i128 @func10(i128, i128) { +; CHECK-LABEL: func10: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.l %s34, %s3, %s1 +; CHECK-NEXT: adds.l %s0, %s2, %s0 +; CHECK-NEXT: cmpu.l %s35, %s0, %s2 +; CHECK-NEXT: or %s36, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s36, (63)0, %s35 +; CHECK-NEXT: adds.w.zx %s35, %s36, (0)1 +; CHECK-NEXT: adds.l %s1, %s34, %s35 + %3 = add i128 %1, %0 + ret i128 %3 +} + +define float @func11(float, float) { +; CHECK-LABEL: func11: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fadd.s %s0, %s0, %s1 + %3 = fadd float %0, %1 + ret float %3 +} + +define double @func12(double, double) { +; CHECK-LABEL: func12: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fadd.d %s0, %s0, %s1 + %3 = fadd double %0, %1 + ret double %3 +} + +define signext i8 @func13(i8 signext) { +; CHECK-LABEL: func13: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, 5(%s0) +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 + %2 = add i8 %0, 5 + ret i8 %2 +} + +define signext i16 @func14(i16 signext) { +; CHECK-LABEL: func14: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, 5(%s0) +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 + %2 = add i16 %0, 5 + ret i16 %2 +} + +define i32 @func15(i32) { +; CHECK-LABEL: func15: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 5(%s0) + %2 = add nsw i32 %0, 5 + ret i32 %2 +} + +define i64 @func16(i64) { +; CHECK-LABEL: func16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 5(%s0) + %2 = add nsw i64 %0, 5 + ret i64 %2 +} + +define i128 @func17(i128) { +; CHECK-LABEL: func17: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, 5(%s0) +; CHECK-NEXT: cmpu.l %s35, %s34, %s0 +; CHECK-NEXT: or %s36, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s36, (63)0, %s35 +; CHECK-NEXT: adds.w.zx %s35, %s36, (0)1 +; CHECK-NEXT: adds.l %s1, %s1, %s35 +; CHECK-NEXT: or %s0, 0, %s34 + %2 = add nsw i128 %0, 5 + ret i128 %2 +} + +define zeroext i8 @func18(i8 zeroext) { +; CHECK-LABEL: func18: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, 5(%s0) +; CHECK-NEXT: and %s0, %s34, (56)0 + %2 = add i8 %0, 5 + ret i8 %2 +} + +define zeroext i16 @func19(i16 zeroext) { +; CHECK-LABEL: func19: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, 5(%s0) +; CHECK-NEXT: and %s0, %s34, (48)0 + %2 = add i16 %0, 5 + ret i16 %2 +} + +define i32 @func20(i32) { +; CHECK-LABEL: func20: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 5(%s0) + %2 = add i32 %0, 5 + ret i32 %2 +} + +define i64 @func21(i64) { +; CHECK-LABEL: func21: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 5(%s0) + %2 = add i64 %0, 5 + ret i64 %2 +} + +define i128 @func22(i128) { +; CHECK-LABEL: func22: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, 5(%s0) +; CHECK-NEXT: cmpu.l %s35, %s34, %s0 +; CHECK-NEXT: or %s36, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s36, (63)0, %s35 +; CHECK-NEXT: adds.w.zx %s35, %s36, (0)1 +; CHECK-NEXT: adds.l %s1, %s1, %s35 +; CHECK-NEXT: or %s0, 0, %s34 + %2 = add i128 %0, 5 + ret i128 %2 +} + +define float @func23(float) { +; CHECK-LABEL: func23: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea.sl %s34, 1084227584 +; CHECK-NEXT: or %s34, 0, %s34 +; CHECK-NEXT: fadd.s %s0, %s0, %s34 + %2 = fadd float %0, 5.000000e+00 + ret float %2 +} + +define double @func24(double) { +; CHECK-LABEL: func24: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea.sl %s34, 1075052544 +; CHECK-NEXT: fadd.d %s0, %s0, %s34 + %2 = fadd double %0, 5.000000e+00 + ret double %2 +} + +define i32 @func25(i32) { +; CHECK-LABEL: func25: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, -2147483648 +; CHECK-NEXT: xor %s0, %s0, %s34 + %2 = xor i32 %0, -2147483648 + ret i32 %2 +} + +define i64 @func26(i64) { +; CHECK-LABEL: func26: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, -2147483648 +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: adds.l %s0, %s0, %s34 + %2 = add nsw i64 %0, 2147483648 + ret i64 %2 +} + +define i128 @func27(i128) { +; CHECK-LABEL: func27: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, -2147483648 +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: adds.l %s34, %s0, %s34 +; CHECK-NEXT: cmpu.l %s35, %s34, %s0 +; CHECK-NEXT: or %s36, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s36, (63)0, %s35 +; CHECK-NEXT: adds.w.zx %s35, %s36, (0)1 +; CHECK-NEXT: adds.l %s1, %s1, %s35 +; CHECK-NEXT: or %s0, 0, %s34 + %2 = add nsw i128 %0, 2147483648 + ret i128 %2 +} + diff --git a/llvm/test/CodeGen/VE/alloca.ll b/llvm/test/CodeGen/VE/alloca.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/alloca.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +@buf = external global i8*, align 8 + +; Function Attrs: nounwind +define void @test(i32) { +; CHECK-LABEL: test: +; CHECK: .LBB0_2: +; CHECK-NEXT: adds.w.sx %s2, %s0, (0)1 +; CHECK-NEXT: lea %s34, 15(%s2) +; CHECK-NEXT: and %s0, -16, %s34 +; CHECK-NEXT: adds.l %s11, -64, %s11 +; CHECK-NEXT: lea %s34, __llvm_grow_stack@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __llvm_grow_stack@hi(%s34) +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: lea %s13, 64 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, 0(%s11, %s13) +; CHECK-NEXT: lea %s1, 176(%s11) +; CHECK-NEXT: lea %s34, buf@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, buf@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: adds.l %s11, -64, %s11 +; CHECK-NEXT: lea %s34, memcpy@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, memcpy@hi(%s34) +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: lea %s13, 64 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, 0(%s11, %s13) +; CHECK-NEXT: or %s11, 0, %s9 + %2 = sext i32 %0 to i64 + %3 = alloca i8, i64 %2, align 8 + %4 = load i8*, i8** @buf, align 8, !tbaa !2 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %4, i8* nonnull align 8 %3, i64 %2, i1 false) + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +!2 = !{!3, !3, i64 0} +!3 = !{!"any pointer", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/VE/atomic.ll b/llvm/test/CodeGen/VE/atomic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/atomic.ll @@ -0,0 +1,1869 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +@c = common global i8 0, align 32 +@s = common global i16 0, align 32 +@i = common global i32 0, align 32 +@l = common global i64 0, align 32 +@it= common global i128 0, align 32 +@ui = common global i32 0, align 32 + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_1() { +; CHECK-LABEL: test_atomic_store_1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 12, i8* @c release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_1seq() { +; CHECK-LABEL: test_atomic_store_1seq: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 12, i8* @c seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_2() { +; CHECK-LABEL: test_atomic_store_2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: st2b %s35, (,%s34) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i16 12, i16* @s release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_2seq() { +; CHECK-LABEL: test_atomic_store_2seq: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: st2b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i16 12, i16* @s seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_4() { +; CHECK-LABEL: test_atomic_store_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: stl %s35, (,%s34) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i32 12, i32* @i release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_4cst() { +; CHECK-LABEL: test_atomic_store_4cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: stl %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i32 12, i32* @i seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_8() { +; CHECK-LABEL: test_atomic_store_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: st %s35, (,%s34) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i64 12, i64* @l release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_8cst() { +; CHECK-LABEL: test_atomic_store_8cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: st %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i64 12, i64* @l seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_16() { +; CHECK-LABEL: t_atomic_store_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_store_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_store_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 3, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i128 12, i128* @it release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_16cst() { +; CHECK-LABEL: test_atomic_store_16cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_store_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_store_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i128 12, i128* @it seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_load_1() { +; CHECK-LABEL: test_atomic_load_1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: ld1b.zx %s34, (,%s34) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i8, i8* @c acquire, align 32 + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_load_1cst() { +; CHECK-LABEL: test_atomic_load_1cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: ld1b.zx %s34, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i8, i8* @c seq_cst, align 32 + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_load_2() { +; CHECK-LABEL: test_atomic_load_2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: ld2b.zx %s34, (,%s34) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i16, i16* @s acquire, align 32 + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_load_2cst() { +; CHECK-LABEL: test_atomic_load_2cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: ld2b.zx %s34, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i16, i16* @s seq_cst, align 32 + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_load_4() { +; CHECK-LABEL: test_atomic_load_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.zx %s0, (,%s34) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i32, i32* @i acquire, align 32 + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_load_4cst() { +; CHECK-LABEL: test_atomic_load_4cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.zx %s0, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i32, i32* @i seq_cst, align 32 + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_load_8() { +; CHECK-LABEL: test_atomic_load_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i64, i64* @l acquire, align 32 + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_load_8cst() { +; CHECK-LABEL: test_atomic_load_8cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i64, i64* @l seq_cst, align 32 + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_load_16() { +; CHECK-LABEL: test_atomic_load_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_load_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_load_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 2, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i128, i128* @it acquire, align 32 + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_load_16cst() { +; CHECK-LABEL: test_atomic_load_16cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_load_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_load_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i128, i128* @it seq_cst, align 32 + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_exchange_1() { +; CHECK-LABEL: test_atomic_exchange_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s34, -4, %s34 +; CHECK-NEXT: ldl.sx %s35, (,%s34) +; CHECK-NEXT: lea %s36, -256 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s37, 0, %s35 +; CHECK-NEXT: and %s35, %s35, %s36 +; CHECK-NEXT: or %s35, 10, %s35 +; CHECK-NEXT: cas.w %s35, (%s34), %s37 +; CHECK-NEXT: brne.w %s35, %s37, .LBB20_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s35, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i8* @c, i8 10 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_exchange_2() { +; CHECK-LABEL: test_atomic_exchange_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s34, -4, %s34 +; CHECK-NEXT: ldl.sx %s35, (,%s34) +; CHECK-NEXT: lea %s36, -65536 +; CHECK-NEXT: lea %s37, 28672 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s38, 0, %s35 +; CHECK-NEXT: and %s35, %s35, %s36 +; CHECK-NEXT: or %s35, %s35, %s37 +; CHECK-NEXT: cas.w %s35, (%s34), %s38 +; CHECK-NEXT: brne.w %s35, %s38, .LBB21_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s35, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i16* @s, i16 28672 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_exchange_4() { +; CHECK-LABEL: test_atomic_exchange_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: lea %s0, 1886417008 +; CHECK-NEXT: ts1am.w %s0, (%s34), 15 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i32* @i, i32 1886417008 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_exchange_8() { +; CHECK-LABEL: test_atomic_exchange_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: lea %s35, 1886417008 +; CHECK-NEXT: lea.sl %s0, 1886417008(%s35) +; CHECK-NEXT: ts1am.l %s0, (%s34), 127 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i64* @l, i64 8102099357864587376 acquire + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_exchange_16() { +; CHECK-LABEL: test_atomic_exchange_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_exchange_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_exchange_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: lea %s34, 1886417008 +; CHECK-NEXT: lea.sl %s1, 1886417008(%s34) +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 2, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i128* @it, i128 8102099357864587376 acquire + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_compare_exchange_1(i8, i8) { +; CHECK-LABEL: test_atomic_compare_exchange_1: +; CHECK: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s34, -4, %s34 +; CHECK-NEXT: ldl.sx %s38, (,%s34) +; CHECK-NEXT: and %s35, %s1, (56)0 +; CHECK-NEXT: and %s36, %s0, (56)0 +; CHECK-NEXT: lea %s37, -256 +; CHECK-NEXT: and %s41, %s38, %s37 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s38, %s41, %s35 +; CHECK-NEXT: or %s39, %s41, %s36 +; CHECK-NEXT: cas.w %s38, (%s34), %s39 +; CHECK-NEXT: breq.w %s38, %s39, .LBB{{[0-9]+}}_3 +; CHECK-NEXT: # %partword.cmpxchg.failure +; CHECK-NEXT: # in Loop: Header=BB25_1 Depth=1 +; CHECK-NEXT: or %s40, 0, %s41 +; CHECK-NEXT: and %s41, %s38, %s37 +; CHECK-NEXT: brne.w %s40, %s41, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmps.w.sx %s34, %s38, %s39 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s34 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i8* @c, i8 %0, i8 %1 seq_cst seq_cst + %3 = extractvalue { i8, i1 } %2, 1 + %frombool = zext i1 %3 to i8 + ret i8 %frombool +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_compare_exchange_2(i16, i16) { +; CHECK-LABEL: test_atomic_compare_exchange_2: +; CHECK: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s34, -4, %s34 +; CHECK-NEXT: or %s35, 2, %s34 +; FIXME: following ld2b.zx should be ldl.sx... +; CHECK-NEXT: ld2b.zx %s37, (,%s35) +; CHECK-NEXT: and %s35, %s1, (48)0 +; CHECK-NEXT: and %s36, %s0, (48)0 +; CHECK-NEXT: sla.w.sx %s41, %s37, 16 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: lea %s39, -65536 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s37, %s41, %s35 +; CHECK-NEXT: or %s38, %s41, %s36 +; CHECK-NEXT: cas.w %s37, (%s34), %s38 +; CHECK-NEXT: breq.w %s37, %s38, .LBB26_3 +; CHECK-NEXT: # %partword.cmpxchg.failure +; CHECK-NEXT: # in Loop: Header=BB{{[0-9]+}}_1 Depth=1 +; CHECK-NEXT: or %s40, 0, %s41 +; CHECK-NEXT: and %s41, %s37, %s39 +; CHECK-NEXT: brne.w %s40, %s41, .LBB26_1 +; CHECK-NEXT: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmps.w.sx %s34, %s37, %s38 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s34 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i16* @s, i16 %0, i16 %1 seq_cst seq_cst + %3 = extractvalue { i16, i1 } %2, 1 + %conv = zext i1 %3 to i16 + ret i16 %conv +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_compare_exchange_4(i32, i32) { +; CHECK-LABEL: test_atomic_compare_exchange_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: cas.w %s1, (%s34), %s0 +; CHECK-NEXT: cmps.w.sx %s34, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s34 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i32* @i, i32 %0, i32 %1 seq_cst seq_cst + %3 = extractvalue { i32, i1 } %2, 1 + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: %2 = cmpxchg i64* @l, i64 %0, i64 %1 seq_cst seq_cst + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_compare_exchange_16(i128, i128) { +; CHECK-LABEL: test_atomic_compare_exchange_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s1, -8(,%s9) +; CHECK-NEXT: st %s0, -16(,%s9) +; CHECK-NEXT: lea %s34, __atomic_compare_exchange_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_compare_exchange_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: lea %s1,-16(,%s9) +; CHECK-NEXT: or %s4, 5, (0)1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i128* @it, i128 %0, i128 %1 seq_cst seq_cst + %3 = extractvalue { i128, i1 } %2, 1 + %conv = zext i1 %3 to i128 + ret i128 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_relaxed(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_relaxed: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 monotonic monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_consume(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_consume: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_acquire(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_acquire: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_release(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_release: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 release monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_acq_rel(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_acq_rel: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 acq_rel acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 seq_cst seq_cst + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_relaxed(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_relaxed: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 monotonic monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_consume(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_consume: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_acquire(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_acquire: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_release(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_release: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 release monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_acq_rel(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_acq_rel: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 acq_rel acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind readnone +define void @test_atomic_fence_relaxed() { +; CHECK-LABEL: test_atomic_fence_relaxed: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s11, 0, %s9 +entry: + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_consume() { +; CHECK-LABEL: test_atomic_fence_consume: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence acquire + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_acquire() { +; CHECK-LABEL: test_atomic_fence_acquire: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence acquire + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_release() { +; CHECK-LABEL: test_atomic_fence_release: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence release + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_acq_rel() { +; CHECK-LABEL: test_atomic_fence_acq_rel: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence acq_rel + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_seq_cst() { +; CHECK-LABEL: test_atomic_fence_seq_cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence seq_cst + ret void +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_add_1() { +; CHECK-LABEL: test_atomic_fetch_add_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s34, -4, %s34 +; CHECK-NEXT: ldl.sx %s36, (,%s34) +; CHECK-NEXT: lea %s35, -256 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s37, 0, %s36 +; CHECK-NEXT: lea %s36, 1(%s36) +; CHECK-NEXT: and %s36, %s36, (56)0 +; CHECK-NEXT: and %s38, %s37, %s35 +; CHECK-NEXT: or %s36, %s38, %s36 +; CHECK-NEXT: cas.w %s36, (%s34), %s37 +; CHECK-NEXT: brne.w %s36, %s37, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s36, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_add_2() { +; CHECK-LABEL: test_atomic_fetch_add_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s34, -4, %s34 +; CHECK-NEXT: ldl.sx %s36, (,%s34) +; CHECK-NEXT: lea %s35, -65536 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s37, 0, %s36 +; CHECK-NEXT: lea %s36, 1(%s36) +; CHECK-NEXT: and %s36, %s36, (48)0 +; CHECK-NEXT: and %s38, %s37, %s35 +; CHECK-NEXT: or %s36, %s38, %s36 +; CHECK-NEXT: cas.w %s36, (%s34), %s37 +; CHECK-NEXT: brne.w %s36, %s37, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s36, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_add_4() { +; CHECK-LABEL: test_atomic_fetch_add_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: lea %s0, 1(%s0) +; CHECK-NEXT: cas.w %s0, (%s34), %s35 +; CHECK-NEXT: brne.w %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_add_8() { +; CHECK-LABEL: test_atomic_fetch_add_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: lea %s0, 1(%s0) +; CHECK-NEXT: cas.l %s0, (%s34), %s35 +; CHECK-NEXT: brne.l %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_add_16() { +; CHECK-LABEL: test_atomic_fetch_add_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_fetch_add_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_add_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_sub_1() { +; CHECK-LABEL: test_atomic_fetch_sub_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s34, -4, %s34 +; CHECK-NEXT: ldl.sx %s36, (,%s34) +; CHECK-NEXT: lea %s35, -256 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s37, 0, %s36 +; CHECK-NEXT: lea %s36, -1(%s36) +; CHECK-NEXT: and %s36, %s36, (56)0 +; CHECK-NEXT: and %s38, %s37, %s35 +; CHECK-NEXT: or %s36, %s38, %s36 +; CHECK-NEXT: cas.w %s36, (%s34), %s37 +; CHECK-NEXT: brne.w %s36, %s37, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s36, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_sub_2() { +; CHECK-LABEL: test_atomic_fetch_sub_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s34, -4, %s34 +; CHECK-NEXT: ldl.sx %s36, (,%s34) +; CHECK-NEXT: lea %s35, -65536 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s37, 0, %s36 +; CHECK-NEXT: lea %s36, -1(%s36) +; CHECK-NEXT: and %s36, %s36, (48)0 +; CHECK-NEXT: and %s38, %s37, %s35 +; CHECK-NEXT: or %s36, %s38, %s36 +; CHECK-NEXT: cas.w %s36, (%s34), %s37 +; CHECK-NEXT: brne.w %s36, %s37, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s36, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_sub_4() { +; CHECK-LABEL: test_atomic_fetch_sub_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: lea %s0, -1(%s0) +; CHECK-NEXT: cas.w %s0, (%s34), %s35 +; CHECK-NEXT: brne.w %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_sub_8() { +; CHECK-LABEL: test_atomic_fetch_sub_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: lea %s0, -1(%s0) +; CHECK-NEXT: cas.l %s0, (%s34), %s35 +; CHECK-NEXT: brne.l %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_sub_16() { +; CHECK-LABEL: test_atomic_fetch_sub_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_fetch_sub_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_sub_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_and_1() { +; CHECK-LABEL: test_atomic_fetch_and_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s34, (,%s35) +; CHECK-NEXT: lea %s36, -255 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s37, 0, %s34 +; CHECK-NEXT: and %s34, %s34, %s36 +; CHECK-NEXT: cas.w %s34, (%s35), %s37 +; CHECK-NEXT: brne.w %s34, %s37, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_and_2() { +; CHECK-LABEL: test_atomic_fetch_and_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s34, (,%s35) +; CHECK-NEXT: lea %s36, -65535 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s37, 0, %s34 +; CHECK-NEXT: and %s34, %s34, %s36 +; CHECK-NEXT: cas.w %s34, (%s35), %s37 +; CHECK-NEXT: brne.w %s34, %s37, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_and_4() { +; CHECK-LABEL: test_atomic_fetch_and_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: and %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s34), %s35 +; CHECK-NEXT: brne.w %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_and_8() { +; CHECK-LABEL: test_atomic_fetch_and_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: and %s0, 1, %s0 +; CHECK-NEXT: cas.l %s0, (%s34), %s35 +; CHECK-NEXT: brne.l %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_and_16() { +; CHECK-LABEL: test_atomic_fetch_and_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_fetch_and_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_and_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_or_1() { +; CHECK-LABEL: test_atomic_fetch_or_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s34, (,%s35) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s36, 0, %s34 +; CHECK-NEXT: or %s34, 1, %s34 +; CHECK-NEXT: cas.w %s34, (%s35), %s36 +; CHECK-NEXT: brne.w %s34, %s36, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_or_2() { +; CHECK-LABEL: test_atomic_fetch_or_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s34, (,%s35) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s36, 0, %s34 +; CHECK-NEXT: or %s34, 1, %s34 +; CHECK-NEXT: cas.w %s34, (%s35), %s36 +; CHECK-NEXT: brne.w %s34, %s36, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_or_4() { +; CHECK-LABEL: test_atomic_fetch_or_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: or %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s34), %s35 +; CHECK-NEXT: brne.w %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_or_8() { +; CHECK-LABEL: test_atomic_fetch_or_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: or %s0, 1, %s0 +; CHECK-NEXT: cas.l %s0, (%s34), %s35 +; CHECK-NEXT: brne.l %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_or_16() { +; CHECK-LABEL: test_atomic_fetch_or_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_fetch_or_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_or_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_xor_1() { +; CHECK-LABEL: test_atomic_fetch_xor_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s34, (,%s35) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s36, 0, %s34 +; CHECK-NEXT: xor %s34, 1, %s34 +; CHECK-NEXT: cas.w %s34, (%s35), %s36 +; CHECK-NEXT: brne.w %s34, %s36, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_xor_2() { +; CHECK-LABEL: test_atomic_fetch_xor_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s34, (,%s35) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s36, 0, %s34 +; CHECK-NEXT: xor %s34, 1, %s34 +; CHECK-NEXT: cas.w %s34, (%s35), %s36 +; CHECK-NEXT: brne.w %s34, %s36, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_xor_4() { +; CHECK-LABEL: test_atomic_fetch_xor_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: xor %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s34), %s35 +; CHECK-NEXT: brne.w %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_xor_8() { +; CHECK-LABEL: test_atomic_fetch_xor_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: xor %s0, 1, %s0 +; CHECK-NEXT: cas.l %s0, (%s34), %s35 +; CHECK-NEXT: brne.l %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_xor_16() { +; CHECK-LABEL: test_atomic_fetch_xor_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_fetch_xor_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_xor_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_nand_1() { +; CHECK-LABEL: test_atomic_fetch_nand_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s34, -4, %s34 +; CHECK-NEXT: ldl.sx %s37, (,%s34) +; CHECK-NEXT: lea %s35, 254 +; CHECK-NEXT: lea %s36, -256 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s38, 0, %s37 +; CHECK-NEXT: xor %s37, -1, %s37 +; CHECK-NEXT: or %s37, %s37, %s35 +; CHECK-NEXT: and %s37, %s37, (56)0 +; CHECK-NEXT: and %s39, %s38, %s36 +; CHECK-NEXT: or %s37, %s39, %s37 +; CHECK-NEXT: cas.w %s37, (%s34), %s38 +; CHECK-NEXT: brne.w %s37, %s38, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s37, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_nand_2() { +; CHECK-LABEL: test_atomic_fetch_nand_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s34, -4, %s34 +; CHECK-NEXT: ldl.sx %s37, (,%s34) +; CHECK-NEXT: lea %s35, 65534 +; CHECK-NEXT: lea %s36, -65536 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s38, 0, %s37 +; CHECK-NEXT: xor %s37, -1, %s37 +; CHECK-NEXT: or %s37, %s37, %s35 +; CHECK-NEXT: and %s37, %s37, (48)0 +; CHECK-NEXT: and %s39, %s38, %s36 +; CHECK-NEXT: or %s37, %s39, %s37 +; CHECK-NEXT: cas.w %s37, (%s34), %s38 +; CHECK-NEXT: brne.w %s37, %s38, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s37, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_nand_4() { +; CHECK-LABEL: test_atomic_fetch_nand_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: xor %s36, -1, %s0 +; CHECK-NEXT: or %s0, -2, %s36 +; CHECK-NEXT: cas.w %s0, (%s34), %s35 +; CHECK-NEXT: brne.w %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_nand_8() { +; CHECK-LABEL: test_atomic_fetch_nand_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: xor %s36, -1, %s0 +; CHECK-NEXT: or %s0, -2, %s36 +; CHECK-NEXT: cas.l %s0, (%s34), %s35 +; CHECK-NEXT: brne.l %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_nand_16() { +; CHECK-LABEL: test_atomic_fetch_nand_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_fetch_nand_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_nand_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_max_4() { +; CHECK-LABEL: test_atomic_fetch_max_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: or %s35, 1, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s36, 0, %s0 +; CHECK-NEXT: maxs.w.zx %s0, %s0, %s35 +; CHECK-NEXT: cas.w %s0, (%s34), %s36 +; CHECK-NEXT: brne.w %s0, %s36, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw max i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_min_4() { +; CHECK-LABEL: test_atomic_fetch_min_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: or %s35, 1, (0)1 +; CHECK-NEXT: or %s36, 2, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s37, 0, %s0 +; CHECK-NEXT: cmps.w.sx %s38, %s0, %s36 +; CHECK-NEXT: or %s0, 0, %s35 +; CHECK-NEXT: cmov.w.lt %s0, %s37, %s38 +; CHECK-NEXT: cas.w %s0, (%s34), %s37 +; CHECK-NEXT: brne.w %s0, %s37, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw min i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_umax_4() { +; CHECK-LABEL: test_atomic_fetch_umax_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, ui@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, ui@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: or %s35, 1, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s36, 0, %s0 +; CHECK-NEXT: cmpu.w %s37, %s0, %s35 +; CHECK-NEXT: or %s0, 0, %s35 +; CHECK-NEXT: cmov.w.gt %s0, %s36, %s37 +; CHECK-NEXT: cas.w %s0, (%s34), %s36 +; CHECK-NEXT: brne.w %s0, %s36, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw umax i32* @ui, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_umin_4() { +; CHECK-LABEL: test_atomic_fetch_umin_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, ui@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, ui@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: or %s35, 1, (0)1 +; CHECK-NEXT: or %s36, 2, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s37, 0, %s0 +; CHECK-NEXT: cmpu.w %s38, %s0, %s36 +; CHECK-NEXT: or %s0, 0, %s35 +; CHECK-NEXT: cmov.w.lt %s0, %s37, %s38 +; CHECK-NEXT: cas.w %s0, (%s34), %s37 +; CHECK-NEXT: brne.w %s0, %s37, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw umin i32* @ui, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_1() { +; CHECK-LABEL: test_atomic_clear_1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* @c seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_2() { +; CHECK-LABEL: test_atomic_clear_2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i16* @s to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_4() { +; CHECK-LABEL: test_atomic_clear_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i32* @i to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_8() { +; CHECK-LABEL: test_atomic_clear_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i64* @l to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_16() { +; CHECK-LABEL: test_atomic_clear_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, it@hi(%s34) +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i128* @it to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8stk(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8stk: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cas.l %s1, {{[0-9]+}}(%s11), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = alloca i64, align 32 + %3 = cmpxchg i64* %2, i64 %0, i64 %1 seq_cst seq_cst + %4 = extractvalue { i64, i1 } %3, 1 + %conv = zext i1 %4 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_8stk() { +; CHECK-LABEL: test_atomic_clear_8stk: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s34, 0, (0)1 +; CHECK-NEXT: st1b %s34, {{[0-9]+}}(,%s11) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = alloca i64, align 32 + %1 = bitcast i64* %0 to i8* + store atomic i8 0, i8* %1 seq_cst, align 32 + ret void +} diff --git a/llvm/test/CodeGen/VE/atomic_unaligned.ll b/llvm/test/CodeGen/VE/atomic_unaligned.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/atomic_unaligned.ll @@ -0,0 +1,2180 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +%struct.sci = type <{ i8, i32 }> +%struct.scl = type <{ i8, i64 }> +%struct.sil = type <{ i32, i64 }> + +@c = common global i8 0, align 1 +@s = common global i16 0, align 1 +@i = common global i32 0, align 1 +@l = common global i64 0, align 1 +@it= common global i128 0, align 1 +@ui = common global i32 0, align 1 +@sci1 = common global %struct.sci <{ i8 0, i32 0 }>, align 1 +@scl1 = common global %struct.scl <{ i8 0, i64 0 }>, align 1 +@sil1 = common global %struct.sil <{ i32 0, i64 0 }>, align 1 + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_1() { +; CHECK-LABEL: test_atomic_store_1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 12, i8* @c release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_1seq() { +; CHECK-LABEL: test_atomic_store_1seq: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 12, i8* @c seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_2() { +; CHECK-LABEL: test_atomic_store_2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: st2b %s35, (,%s34) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i16 12, i16* @s release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_2seq() { +; CHECK-LABEL: test_atomic_store_2seq: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: st2b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i16 12, i16* @s seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_4() { +; CHECK-LABEL: test_atomic_store_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: stl %s35, (,%s34) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i32 12, i32* @i release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_4cst() { +; CHECK-LABEL: test_atomic_store_4cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: stl %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i32 12, i32* @i seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_8() { +; CHECK-LABEL: test_atomic_store_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: st %s35, (,%s34) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i64 12, i64* @l release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_8cst() { +; CHECK-LABEL: test_atomic_store_8cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: or %s35, 12, (0)1 +; CHECK-NEXT: st %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i64 12, i64* @l seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_16() { +; CHECK-LABEL: t_atomic_store_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_store_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_store_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 3, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i128 12, i128* @it release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_16cst() { +; CHECK-LABEL: test_atomic_store_16cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_store_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_store_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i128 12, i128* @it seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_load_1() { +; CHECK-LABEL: test_atomic_load_1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: ld1b.zx %s34, (,%s34) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i8, i8* @c acquire, align 32 + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_load_1cst() { +; CHECK-LABEL: test_atomic_load_1cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: ld1b.zx %s34, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i8, i8* @c seq_cst, align 32 + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_load_2() { +; CHECK-LABEL: test_atomic_load_2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: ld2b.zx %s34, (,%s34) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i16, i16* @s acquire, align 32 + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_load_2cst() { +; CHECK-LABEL: test_atomic_load_2cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: ld2b.zx %s34, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i16, i16* @s seq_cst, align 32 + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_load_4() { +; CHECK-LABEL: test_atomic_load_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.zx %s0, (,%s34) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i32, i32* @i acquire, align 32 + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_load_4cst() { +; CHECK-LABEL: test_atomic_load_4cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.zx %s0, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i32, i32* @i seq_cst, align 32 + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_load_8() { +; CHECK-LABEL: test_atomic_load_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i64, i64* @l acquire, align 32 + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_load_8cst() { +; CHECK-LABEL: test_atomic_load_8cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i64, i64* @l seq_cst, align 32 + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_load_16() { +; CHECK-LABEL: test_atomic_load_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_load_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_load_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 2, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i128, i128* @it acquire, align 32 + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_load_16cst() { +; CHECK-LABEL: test_atomic_load_16cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_load_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_load_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i128, i128* @it seq_cst, align 32 + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_exchange_1() { +; CHECK-LABEL: test_atomic_exchange_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s38, (,%s35) +; CHECK-NEXT: lea %s36, 255 +; CHECK-NEXT: or %s37, 10, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s39, 0, %s38 +; CHECK-NEXT: adds.w.sx %s38, %s34, (0)1 +; CHECK-NEXT: and %s38, 3, %s38 +; CHECK-NEXT: sla.w.sx %s38, %s38, 3 +; CHECK-NEXT: sla.w.sx %s40, %s36, %s38 +; CHECK-NEXT: xor %s40, -1, %s40 +; CHECK-NEXT: and %s40, %s39, %s40 +; CHECK-NEXT: sla.w.sx %s38, %s37, %s38 +; CHECK-NEXT: or %s38, %s40, %s38 +; CHECK-NEXT: cas.w %s38, (%s35), %s39 +; CHECK-NEXT: brne.w %s38, %s39, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s38, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i8* @c, i8 10 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_exchange_2() { +; CHECK-LABEL: test_atomic_exchange_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s38, (,%s35) +; CHECK-NEXT: lea %s36, 65535 +; CHECK-NEXT: lea %s37, 28672 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s39, 0, %s38 +; CHECK-NEXT: adds.w.sx %s38, %s34, (0)1 +; CHECK-NEXT: and %s38, 3, %s38 +; CHECK-NEXT: sla.w.sx %s38, %s38, 3 +; CHECK-NEXT: sla.w.sx %s40, %s36, %s38 +; CHECK-NEXT: xor %s40, -1, %s40 +; CHECK-NEXT: and %s40, %s39, %s40 +; CHECK-NEXT: sla.w.sx %s38, %s37, %s38 +; CHECK-NEXT: or %s38, %s40, %s38 +; CHECK-NEXT: cas.w %s38, (%s35), %s39 +; CHECK-NEXT: brne.w %s38, %s39, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s38, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i16* @s, i16 28672 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_exchange_4() { +; CHECK-LABEL: test_atomic_exchange_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: lea %s0, 1886417008 +; CHECK-NEXT: ts1am.w %s0, (%s34), 15 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i32* @i, i32 1886417008 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_exchange_4_align1() { +; CHECK-LABEL: test_atomic_exchange_4_align1 +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, sci1@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, sci1@hi(%s34) +; CHECK-NEXT: lea %s0, 1886417008 +; FIXME: Bus Error occurred due to unaligned ts1am instruction +; CHECK-NEXT: ts1am.w %s0, 1(%s34), 15 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i32* getelementptr inbounds (%struct.sci, %struct.sci* @sci1, i32 0, i32 1), i32 1886417008 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_exchange_8() { +; CHECK-LABEL: test_atomic_exchange_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: lea %s35, 1886417008 +; CHECK-NEXT: lea.sl %s0, 1886417008(%s35) +; CHECK-NEXT: ts1am.l %s0, (%s34), 127 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i64* @l, i64 8102099357864587376 acquire + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_exchange_8_align1() { +; CHECK-LABEL: test_atomic_exchange_8_align1 +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, scl1@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, scl1@hi(%s34) +; CHECK-NEXT: lea %s35, 1886417008 +; CHECK-NEXT: lea.sl %s0, 1886417008(%s35) +; FIXME: Bus Error occurred due to unaligned ts1am instruction +; CHECK-NEXT: ts1am.l %s0, 1(%s34), 127 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i64* getelementptr inbounds (%struct.scl, %struct.scl* @scl1, i32 0, i32 1), i64 8102099357864587376 acquire + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_exchange_8_align4() { +; CHECK-LABEL: test_atomic_exchange_8_align4 +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, sil1@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, sil1@hi(%s34) +; CHECK-NEXT: lea %s35, 1886417008 +; CHECK-NEXT: lea.sl %s0, 1886417008(%s35) +; FIXME: Bus Error occurred due to unaligned ts1am instruction +; CHECK-NEXT: ts1am.l %s0, 4(%s34), 127 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i64* getelementptr inbounds (%struct.sil, %struct.sil* @sil1, i32 0, i32 1), i64 8102099357864587376 acquire + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_exchange_16() { +; CHECK-LABEL: test_atomic_exchange_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_exchange_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_exchange_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: lea %s34, 1886417008 +; CHECK-NEXT: lea.sl %s1, 1886417008(%s34) +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 2, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i128* @it, i128 8102099357864587376 acquire + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_compare_exchange_1(i8, i8) { +; CHECK-LABEL: test_atomic_compare_exchange_1: +; CHECK: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: and %s35, %s1, (56)0 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: adds.w.sx %s36, %s34, (0)1 +; CHECK-NEXT: and %s36, 3, %s36 +; CHECK-NEXT: sla.w.sx %s39, %s36, 3 +; CHECK-NEXT: sla.w.sx %s35, %s35, %s39 +; CHECK-NEXT: and %s36, %s0, (56)0 +; CHECK-NEXT: sla.w.sx %s36, %s36, %s39 +; CHECK-NEXT: and %s37, -4, %s34 +; CHECK-NEXT: ldl.sx %s40, (,%s37) +; CHECK-NEXT: lea %s38, 255 +; CHECK-NEXT: sla.w.sx %s39, %s38, %s39 +; CHECK-NEXT: xor %s39, -1, %s39 +; CHECK-NEXT: and %s42, %s40, %s39 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %partword.cmpxchg.loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s39, %s42, %s35 +; CHECK-NEXT: or %s40, %s42, %s36 +; CHECK-NEXT: cas.w %s39, (%s37), %s40 +; CHECK-NEXT: breq.w %s39, %s40, .LBB{{[0-9]+}}_3 +; CHECK-NEXT: # %bb.2: # %partword.cmpxchg.failure +; CHECK-NEXT: # in Loop: Header=BB{{[0-9]+}}_1 Depth=1 +; CHECK-NEXT: or %s41, 0, %s42 +; CHECK-NEXT: adds.w.sx %s42, %s34, (0)1 +; CHECK-NEXT: and %s42, 3, %s42 +; CHECK-NEXT: sla.w.sx %s42, %s42, 3 +; CHECK-NEXT: sla.w.sx %s42, %s38, %s42 +; CHECK-NEXT: xor %s42, -1, %s42 +; CHECK-NEXT: and %s42, %s39, %s42 +; CHECK-NEXT: brne.w %s41, %s42, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: .LBB{{[0-9]+}}_3: # %partword.cmpxchg.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmps.w.sx %s34, %s39, %s40 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s34 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i8* @c, i8 %0, i8 %1 seq_cst seq_cst + %3 = extractvalue { i8, i1 } %2, 1 + %frombool = zext i1 %3 to i8 + ret i8 %frombool +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_compare_exchange_2(i16, i16) { +; CHECK-LABEL: test_atomic_compare_exchange_2: +; CHECK: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: and %s35, %s1, (48)0 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: adds.w.sx %s36, %s34, (0)1 +; CHECK-NEXT: and %s36, 3, %s36 +; CHECK-NEXT: sla.w.sx %s39, %s36, 3 +; CHECK-NEXT: sla.w.sx %s35, %s35, %s39 +; CHECK-NEXT: and %s36, %s0, (48)0 +; CHECK-NEXT: sla.w.sx %s36, %s36, %s39 +; CHECK-NEXT: and %s37, -4, %s34 +; CHECK-NEXT: ldl.sx %s40, (,%s37) +; CHECK-NEXT: lea %s38, 65535 +; CHECK-NEXT: sla.w.sx %s39, %s38, %s39 +; CHECK-NEXT: xor %s39, -1, %s39 +; CHECK-NEXT: and %s42, %s40, %s39 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %partword.cmpxchg.loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s39, %s42, %s35 +; CHECK-NEXT: or %s40, %s42, %s36 +; CHECK-NEXT: cas.w %s39, (%s37), %s40 +; CHECK-NEXT: breq.w %s39, %s40, .LBB{{[0-9]+}}_3 +; CHECK-NEXT: # %bb.2: # %partword.cmpxchg.failure +; CHECK-NEXT: # in Loop: Header=BB{{[0-9]+}}_1 Depth=1 +; CHECK-NEXT: or %s41, 0, %s42 +; CHECK-NEXT: adds.w.sx %s42, %s34, (0)1 +; CHECK-NEXT: and %s42, 3, %s42 +; CHECK-NEXT: sla.w.sx %s42, %s42, 3 +; CHECK-NEXT: sla.w.sx %s42, %s38, %s42 +; CHECK-NEXT: xor %s42, -1, %s42 +; CHECK-NEXT: and %s42, %s39, %s42 +; CHECK-NEXT: brne.w %s41, %s42, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: .LBB{{[0-9]+}}_3: # %partword.cmpxchg.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmps.w.sx %s34, %s39, %s40 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s34 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i16* @s, i16 %0, i16 %1 seq_cst seq_cst + %3 = extractvalue { i16, i1 } %2, 1 + %conv = zext i1 %3 to i16 + ret i16 %conv +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_compare_exchange_4(i32, i32) { +; CHECK-LABEL: test_atomic_compare_exchange_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: cas.w %s1, (%s34), %s0 +; CHECK-NEXT: cmps.w.sx %s34, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s34 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i32* @i, i32 %0, i32 %1 seq_cst seq_cst + %3 = extractvalue { i32, i1 } %2, 1 + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_compare_exchange_4_align1(i32, i32) { +; CHECK-LABEL: test_atomic_compare_exchange_4_align1 +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, sci1@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, sci1@hi(%s34) +; FIXME: Bus Error occurred due to unaligned cas instruction +; CHECK-NEXT: cas.w %s1, 1(%s34), %s0 +; CHECK-NEXT: cmps.w.sx %s34, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s34 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i32* getelementptr inbounds (%struct.sci, %struct.sci* @sci1, i32 0, i32 1), i32 %0, i32 %1 seq_cst seq_cst + %3 = extractvalue { i32, i1 } %2, 1 + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: %2 = cmpxchg i64* @l, i64 %0, i64 %1 seq_cst seq_cst + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_align1(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_align1 +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, scl1@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, scl1@hi(%s34) +; FIXME: Bus Error occurred due to unaligned cas instruction +; CHECK-NEXT: cas.l %s1, 1(%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* getelementptr inbounds (%struct.scl, %struct.scl* @scl1, i32 0, i32 1), i64 %0, i64 %1 seq_cst seq_cst + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_align4(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_align4 +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, sil1@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, sil1@hi(%s34) +; FIXME: Bus Error occurred due to unaligned cas instruction +; CHECK-NEXT: cas.l %s1, 4(%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* getelementptr inbounds (%struct.sil, %struct.sil* @sil1, i32 0, i32 1), i64 %0, i64 %1 seq_cst seq_cst + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_compare_exchange_16(i128, i128) { +; CHECK-LABEL: test_atomic_compare_exchange_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s1, -8(,%s9) +; CHECK-NEXT: st %s0, -16(,%s9) +; CHECK-NEXT: lea %s34, __atomic_compare_exchange_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_compare_exchange_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: lea %s1,-16(,%s9) +; CHECK-NEXT: or %s4, 5, (0)1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i128* @it, i128 %0, i128 %1 seq_cst seq_cst + %3 = extractvalue { i128, i1 } %2, 1 + %conv = zext i1 %3 to i128 + ret i128 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_relaxed(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_relaxed: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 monotonic monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_consume(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_consume: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_acquire(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_acquire: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_release(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_release: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 release monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_acq_rel(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_acq_rel: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 acq_rel acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 seq_cst seq_cst + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_relaxed(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_relaxed: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 monotonic monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_consume(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_consume: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_acquire(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_acquire: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_release(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_release: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 release monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_acq_rel(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_acq_rel: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: cas.l %s1, (%s34), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 acq_rel acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind readnone +define void @test_atomic_fence_relaxed() { +; CHECK-LABEL: test_atomic_fence_relaxed: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s11, 0, %s9 +entry: + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_consume() { +; CHECK-LABEL: test_atomic_fence_consume: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence acquire + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_acquire() { +; CHECK-LABEL: test_atomic_fence_acquire: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence acquire + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_release() { +; CHECK-LABEL: test_atomic_fence_release: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence release + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_acq_rel() { +; CHECK-LABEL: test_atomic_fence_acq_rel: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence acq_rel + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_seq_cst() { +; CHECK-LABEL: test_atomic_fence_seq_cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence seq_cst + ret void +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_add_1() { +; CHECK-LABEL: test_atomic_fetch_add_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s38, (,%s35) +; CHECK-NEXT: or %s36, 1, (0)1 +; CHECK-NEXT: lea %s37, 255 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s39, 0, %s38 +; CHECK-NEXT: adds.w.sx %s38, %s34, (0)1 +; CHECK-NEXT: and %s38, 3, %s38 +; CHECK-NEXT: sla.w.sx %s38, %s38, 3 +; CHECK-NEXT: sla.w.sx %s40, %s36, %s38 +; CHECK-NEXT: adds.w.sx %s40, %s39, %s40 +; CHECK-NEXT: sla.w.sx %s38, %s37, %s38 +; CHECK-NEXT: and %s40, %s40, %s38 +; CHECK-NEXT: xor %s38, -1, %s38 +; CHECK-NEXT: and %s38, %s39, %s38 +; CHECK-NEXT: or %s38, %s38, %s40 +; CHECK-NEXT: cas.w %s38, (%s35), %s39 +; CHECK-NEXT: brne.w %s38, %s39, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s38, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_add_2() { +; CHECK-LABEL: test_atomic_fetch_add_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s38, (,%s35) +; CHECK-NEXT: or %s36, 1, (0)1 +; CHECK-NEXT: lea %s37, 65535 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s39, 0, %s38 +; CHECK-NEXT: adds.w.sx %s38, %s34, (0)1 +; CHECK-NEXT: and %s38, 3, %s38 +; CHECK-NEXT: sla.w.sx %s38, %s38, 3 +; CHECK-NEXT: sla.w.sx %s40, %s36, %s38 +; CHECK-NEXT: adds.w.sx %s40, %s39, %s40 +; CHECK-NEXT: sla.w.sx %s38, %s37, %s38 +; CHECK-NEXT: and %s40, %s40, %s38 +; CHECK-NEXT: xor %s38, -1, %s38 +; CHECK-NEXT: and %s38, %s39, %s38 +; CHECK-NEXT: or %s38, %s38, %s40 +; CHECK-NEXT: cas.w %s38, (%s35), %s39 +; CHECK-NEXT: brne.w %s38, %s39, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s38, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_add_4() { +; CHECK-LABEL: test_atomic_fetch_add_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: lea %s0, 1(%s0) +; CHECK-NEXT: cas.w %s0, (%s34), %s35 +; CHECK-NEXT: brne.w %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_add_8() { +; CHECK-LABEL: test_atomic_fetch_add_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: lea %s0, 1(%s0) +; CHECK-NEXT: cas.l %s0, (%s34), %s35 +; CHECK-NEXT: brne.l %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_add_16() { +; CHECK-LABEL: test_atomic_fetch_add_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_fetch_add_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_add_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_sub_1() { +; CHECK-LABEL: test_atomic_fetch_sub_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s38, (,%s35) +; CHECK-NEXT: or %s36, 1, (0)1 +; CHECK-NEXT: lea %s37, 255 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s39, 0, %s38 +; CHECK-NEXT: adds.w.sx %s38, %s34, (0)1 +; CHECK-NEXT: and %s38, 3, %s38 +; CHECK-NEXT: sla.w.sx %s38, %s38, 3 +; CHECK-NEXT: sla.w.sx %s40, %s36, %s38 +; CHECK-NEXT: subs.w.sx %s40, %s39, %s40 +; CHECK-NEXT: sla.w.sx %s38, %s37, %s38 +; CHECK-NEXT: and %s40, %s40, %s38 +; CHECK-NEXT: xor %s38, -1, %s38 +; CHECK-NEXT: and %s38, %s39, %s38 +; CHECK-NEXT: or %s38, %s38, %s40 +; CHECK-NEXT: cas.w %s38, (%s35), %s39 +; CHECK-NEXT: brne.w %s38, %s39, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s38, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_sub_2() { +; CHECK-LABEL: test_atomic_fetch_sub_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s38, (,%s35) +; CHECK-NEXT: or %s36, 1, (0)1 +; CHECK-NEXT: lea %s37, 65535 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s39, 0, %s38 +; CHECK-NEXT: adds.w.sx %s38, %s34, (0)1 +; CHECK-NEXT: and %s38, 3, %s38 +; CHECK-NEXT: sla.w.sx %s38, %s38, 3 +; CHECK-NEXT: sla.w.sx %s40, %s36, %s38 +; CHECK-NEXT: subs.w.sx %s40, %s39, %s40 +; CHECK-NEXT: sla.w.sx %s38, %s37, %s38 +; CHECK-NEXT: and %s40, %s40, %s38 +; CHECK-NEXT: xor %s38, -1, %s38 +; CHECK-NEXT: and %s38, %s39, %s38 +; CHECK-NEXT: or %s38, %s38, %s40 +; CHECK-NEXT: cas.w %s38, (%s35), %s39 +; CHECK-NEXT: brne.w %s38, %s39, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s38, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_sub_4() { +; CHECK-LABEL: test_atomic_fetch_sub_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: lea %s0, -1(%s0) +; CHECK-NEXT: cas.w %s0, (%s34), %s35 +; CHECK-NEXT: brne.w %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_sub_8() { +; CHECK-LABEL: test_atomic_fetch_sub_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: lea %s0, -1(%s0) +; CHECK-NEXT: cas.l %s0, (%s34), %s35 +; CHECK-NEXT: brne.l %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_sub_16() { +; CHECK-LABEL: test_atomic_fetch_sub_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_fetch_sub_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_sub_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_and_1() { +; CHECK-LABEL: test_atomic_fetch_and_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s38, (,%s35) +; CHECK-NEXT: or %s36, 1, (0)1 +; CHECK-NEXT: lea %s37, 255 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s39, 0, %s38 +; CHECK-NEXT: adds.w.sx %s38, %s34, (0)1 +; CHECK-NEXT: and %s38, 3, %s38 +; CHECK-NEXT: sla.w.sx %s38, %s38, 3 +; CHECK-NEXT: sla.w.sx %s40, %s36, %s38 +; CHECK-NEXT: sla.w.sx %s38, %s37, %s38 +; CHECK-NEXT: xor %s38, -1, %s38 +; CHECK-NEXT: or %s38, %s38, %s40 +; CHECK-NEXT: and %s38, %s39, %s38 +; CHECK-NEXT: cas.w %s38, (%s35), %s39 +; CHECK-NEXT: brne.w %s38, %s39, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s38, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_and_2() { +; CHECK-LABEL: test_atomic_fetch_and_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s38, (,%s35) +; CHECK-NEXT: or %s36, 1, (0)1 +; CHECK-NEXT: lea %s37, 65535 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s39, 0, %s38 +; CHECK-NEXT: adds.w.sx %s38, %s34, (0)1 +; CHECK-NEXT: and %s38, 3, %s38 +; CHECK-NEXT: sla.w.sx %s38, %s38, 3 +; CHECK-NEXT: sla.w.sx %s40, %s36, %s38 +; CHECK-NEXT: sla.w.sx %s38, %s37, %s38 +; CHECK-NEXT: xor %s38, -1, %s38 +; CHECK-NEXT: or %s38, %s38, %s40 +; CHECK-NEXT: and %s38, %s39, %s38 +; CHECK-NEXT: cas.w %s38, (%s35), %s39 +; CHECK-NEXT: brne.w %s38, %s39, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s38, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_and_4() { +; CHECK-LABEL: test_atomic_fetch_and_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: and %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s34), %s35 +; CHECK-NEXT: brne.w %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_and_8() { +; CHECK-LABEL: test_atomic_fetch_and_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: and %s0, 1, %s0 +; CHECK-NEXT: cas.l %s0, (%s34), %s35 +; CHECK-NEXT: brne.l %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_and_16() { +; CHECK-LABEL: test_atomic_fetch_and_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_fetch_and_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_and_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_or_1() { +; CHECK-LABEL: test_atomic_fetch_or_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s37, (,%s35) +; CHECK-NEXT: or %s36, 1, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s38, 0, %s37 +; CHECK-NEXT: adds.w.sx %s37, %s34, (0)1 +; CHECK-NEXT: and %s37, 3, %s37 +; CHECK-NEXT: sla.w.sx %s37, %s37, 3 +; CHECK-NEXT: sla.w.sx %s37, %s36, %s37 +; CHECK-NEXT: or %s37, %s38, %s37 +; CHECK-NEXT: cas.w %s37, (%s35), %s38 +; CHECK-NEXT: brne.w %s37, %s38, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s37, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_or_2() { +; CHECK-LABEL: test_atomic_fetch_or_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s37, (,%s35) +; CHECK-NEXT: or %s36, 1, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s38, 0, %s37 +; CHECK-NEXT: adds.w.sx %s37, %s34, (0)1 +; CHECK-NEXT: and %s37, 3, %s37 +; CHECK-NEXT: sla.w.sx %s37, %s37, 3 +; CHECK-NEXT: sla.w.sx %s37, %s36, %s37 +; CHECK-NEXT: or %s37, %s38, %s37 +; CHECK-NEXT: cas.w %s37, (%s35), %s38 +; CHECK-NEXT: brne.w %s37, %s38, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s37, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_or_4() { +; CHECK-LABEL: test_atomic_fetch_or_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: or %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s34), %s35 +; CHECK-NEXT: brne.w %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_or_8() { +; CHECK-LABEL: test_atomic_fetch_or_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: or %s0, 1, %s0 +; CHECK-NEXT: cas.l %s0, (%s34), %s35 +; CHECK-NEXT: brne.l %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_or_16() { +; CHECK-LABEL: test_atomic_fetch_or_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_fetch_or_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_or_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_xor_1() { +; CHECK-LABEL: test_atomic_fetch_xor_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s37, (,%s35) +; CHECK-NEXT: or %s36, 1, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s38, 0, %s37 +; CHECK-NEXT: adds.w.sx %s37, %s34, (0)1 +; CHECK-NEXT: and %s37, 3, %s37 +; CHECK-NEXT: sla.w.sx %s37, %s37, 3 +; CHECK-NEXT: sla.w.sx %s37, %s36, %s37 +; CHECK-NEXT: xor %s37, %s38, %s37 +; CHECK-NEXT: cas.w %s37, (%s35), %s38 +; CHECK-NEXT: brne.w %s37, %s38, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s37, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_xor_2() { +; CHECK-LABEL: test_atomic_fetch_xor_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s37, (,%s35) +; CHECK-NEXT: or %s36, 1, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s38, 0, %s37 +; CHECK-NEXT: adds.w.sx %s37, %s34, (0)1 +; CHECK-NEXT: and %s37, 3, %s37 +; CHECK-NEXT: sla.w.sx %s37, %s37, 3 +; CHECK-NEXT: sla.w.sx %s37, %s36, %s37 +; CHECK-NEXT: xor %s37, %s38, %s37 +; CHECK-NEXT: cas.w %s37, (%s35), %s38 +; CHECK-NEXT: brne.w %s37, %s38, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s37, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_xor_4() { +; CHECK-LABEL: test_atomic_fetch_xor_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: xor %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s34), %s35 +; CHECK-NEXT: brne.w %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_xor_8() { +; CHECK-LABEL: test_atomic_fetch_xor_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: xor %s0, 1, %s0 +; CHECK-NEXT: cas.l %s0, (%s34), %s35 +; CHECK-NEXT: brne.l %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_xor_16() { +; CHECK-LABEL: test_atomic_fetch_xor_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_fetch_xor_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_xor_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_nand_1() { +; CHECK-LABEL: test_atomic_fetch_nand_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s38, (,%s35) +; CHECK-NEXT: or %s36, 1, (0)1 +; CHECK-NEXT: lea %s37, 255 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s39, 0, %s38 +; CHECK-NEXT: adds.w.sx %s38, %s34, (0)1 +; CHECK-NEXT: and %s38, 3, %s38 +; CHECK-NEXT: sla.w.sx %s38, %s38, 3 +; CHECK-NEXT: sla.w.sx %s40, %s36, %s38 +; CHECK-NEXT: and %s40, %s39, %s40 +; CHECK-NEXT: xor %s40, -1, %s40 +; CHECK-NEXT: sla.w.sx %s38, %s37, %s38 +; CHECK-NEXT: and %s40, %s40, %s38 +; CHECK-NEXT: xor %s38, -1, %s38 +; CHECK-NEXT: and %s38, %s39, %s38 +; CHECK-NEXT: or %s38, %s38, %s40 +; CHECK-NEXT: cas.w %s38, (%s35), %s39 +; CHECK-NEXT: brne.w %s38, %s39, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s38, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 24 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_nand_2() { +; CHECK-LABEL: test_atomic_fetch_nand_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: and %s35, -4, %s34 +; CHECK-NEXT: ldl.sx %s38, (,%s35) +; CHECK-NEXT: or %s36, 1, (0)1 +; CHECK-NEXT: lea %s37, 65535 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s39, 0, %s38 +; CHECK-NEXT: adds.w.sx %s38, %s34, (0)1 +; CHECK-NEXT: and %s38, 3, %s38 +; CHECK-NEXT: sla.w.sx %s38, %s38, 3 +; CHECK-NEXT: sla.w.sx %s40, %s36, %s38 +; CHECK-NEXT: and %s40, %s39, %s40 +; CHECK-NEXT: xor %s40, -1, %s40 +; CHECK-NEXT: sla.w.sx %s38, %s37, %s38 +; CHECK-NEXT: and %s40, %s40, %s38 +; CHECK-NEXT: xor %s38, -1, %s38 +; CHECK-NEXT: and %s38, %s39, %s38 +; CHECK-NEXT: or %s38, %s38, %s40 +; CHECK-NEXT: cas.w %s38, (%s35), %s39 +; CHECK-NEXT: brne.w %s38, %s39, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s35, %s38, (32)0 +; CHECK-NEXT: adds.w.sx %s34, %s34, (0)1 +; CHECK-NEXT: and %s34, 3, %s34 +; CHECK-NEXT: sla.w.sx %s34, %s34, 3 +; CHECK-NEXT: srl %s34, %s35, %s34 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s34, %s34, 16 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_nand_4() { +; CHECK-LABEL: test_atomic_fetch_nand_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: xor %s36, -1, %s0 +; CHECK-NEXT: or %s0, -2, %s36 +; CHECK-NEXT: cas.w %s0, (%s34), %s35 +; CHECK-NEXT: brne.w %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_nand_8() { +; CHECK-LABEL: test_atomic_fetch_nand_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: ld %s0, (,%s34) +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s35, 0, %s0 +; CHECK-NEXT: xor %s36, -1, %s0 +; CHECK-NEXT: or %s0, -2, %s36 +; CHECK-NEXT: cas.l %s0, (%s34), %s35 +; CHECK-NEXT: brne.l %s0, %s35, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_nand_16() { +; CHECK-LABEL: test_atomic_fetch_nand_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s34, __atomic_fetch_nand_16@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_nand_16@hi(%s34) +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s34) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_max_4() { +; CHECK-LABEL: test_atomic_fetch_max_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: or %s35, 1, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s36, 0, %s0 +; CHECK-NEXT: maxs.w.zx %s0, %s0, %s35 +; CHECK-NEXT: cas.w %s0, (%s34), %s36 +; CHECK-NEXT: brne.w %s0, %s36, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw max i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_min_4() { +; CHECK-LABEL: test_atomic_fetch_min_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: or %s35, 1, (0)1 +; CHECK-NEXT: or %s36, 2, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s37, 0, %s0 +; CHECK-NEXT: cmps.w.sx %s38, %s0, %s36 +; CHECK-NEXT: or %s0, 0, %s35 +; CHECK-NEXT: cmov.w.lt %s0, %s37, %s38 +; CHECK-NEXT: cas.w %s0, (%s34), %s37 +; CHECK-NEXT: brne.w %s0, %s37, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw min i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_umax_4() { +; CHECK-LABEL: test_atomic_fetch_umax_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, ui@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, ui@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: or %s35, 1, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s36, 0, %s0 +; CHECK-NEXT: cmpu.w %s37, %s0, %s35 +; CHECK-NEXT: or %s0, 0, %s35 +; CHECK-NEXT: cmov.w.gt %s0, %s36, %s37 +; CHECK-NEXT: cas.w %s0, (%s34), %s36 +; CHECK-NEXT: brne.w %s0, %s36, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw umax i32* @ui, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_umin_4() { +; CHECK-LABEL: test_atomic_fetch_umin_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, ui@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, ui@hi(%s34) +; CHECK-NEXT: ldl.sx %s0, (,%s34) +; CHECK-NEXT: or %s35, 1, (0)1 +; CHECK-NEXT: or %s36, 2, (0)1 +; CHECK-NEXT: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s37, 0, %s0 +; CHECK-NEXT: cmpu.w %s38, %s0, %s36 +; CHECK-NEXT: or %s0, 0, %s35 +; CHECK-NEXT: cmov.w.lt %s0, %s37, %s38 +; CHECK-NEXT: cas.w %s0, (%s34), %s37 +; CHECK-NEXT: brne.w %s0, %s37, .LBB{{[0-9]+}}_1 +; CHECK-NEXT: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw umin i32* @ui, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_1() { +; CHECK-LABEL: test_atomic_clear_1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, c@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, c@hi(%s34) +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* @c seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_2() { +; CHECK-LABEL: test_atomic_clear_2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, s@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, s@hi(%s34) +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i16* @s to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_4() { +; CHECK-LABEL: test_atomic_clear_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, i@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, i@hi(%s34) +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i32* @i to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_8() { +; CHECK-LABEL: test_atomic_clear_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, l@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, l@hi(%s34) +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i64* @l to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_16() { +; CHECK-LABEL: test_atomic_clear_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s34, it@lo +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: lea.sl %s34, it@hi(%s34) +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: st1b %s35, (,%s34) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i128* @it to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8stk(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8stk: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cas.l %s1, {{[0-9]+}}(%s11), %s0 +; CHECK-NEXT: cmps.l %s34, %s1, %s0 +; CHECK-NEXT: or %s35, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s35, (63)0, %s34 +; CHECK-NEXT: adds.w.zx %s0, %s35, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = alloca i64, align 32 + %3 = cmpxchg i64* %2, i64 %0, i64 %1 seq_cst seq_cst + %4 = extractvalue { i64, i1 } %3, 1 + %conv = zext i1 %4 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_8stk() { +; CHECK-LABEL: test_atomic_clear_8stk: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s34, 0, (0)1 +; CHECK-NEXT: st1b %s34, {{[0-9]+}}(,%s11) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = alloca i64, align 32 + %1 = bitcast i64* %0 to i8* + store atomic i8 0, i8* %1 seq_cst, align 32 + ret void +} diff --git a/llvm/test/CodeGen/VE/bitcast.ll b/llvm/test/CodeGen/VE/bitcast.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/bitcast.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +; Function Attrs: noinline nounwind optnone +define dso_local i64 @bitcastd2l(double) { +; CHECK-LABEL: bitcastd2l: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(,%s11) +; CHECK-NEXT: ld %s15, 24(,%s11) +; CHECK-NEXT: ld %s10, 8(,%s11) +; CHECK-NEXT: ld %s9, (,%s11) +; CHECK-NEXT: b.l (,%lr) + %2 = bitcast double %0 to i64 + ret i64 %2 +} + +; Function Attrs: noinline nounwind optnone +define dso_local double @bitcastl2d(i64) { +; CHECK-LABEL: bitcastl2d: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(,%s11) +; CHECK-NEXT: ld %s15, 24(,%s11) +; CHECK-NEXT: ld %s10, 8(,%s11) +; CHECK-NEXT: ld %s9, (,%s11) +; CHECK-NEXT: b.l (,%lr) + %2 = bitcast i64 %0 to double + ret double %2 +} diff --git a/llvm/test/CodeGen/VE/bitreverse.ll b/llvm/test/CodeGen/VE/bitreverse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/bitreverse.ll @@ -0,0 +1,103 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define i64 @func1(i64) { +; CHECK-LABEL: func1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i64 @llvm.bitreverse.i64(i64 %0) + ret i64 %2 +} + +declare i64 @llvm.bitreverse.i64(i64) + +define i32 @func2(i32) { +; CHECK-LABEL: func2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: brv %s34, %s0 +; CHECK-NEXT: srl %s0, %s34, 32 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i32 @llvm.bitreverse.i32(i32 %0) + ret i32 %2 +} + +declare i32 @llvm.bitreverse.i32(i32) + +define signext i16 @func3(i16 signext) { +; CHECK-LABEL: func3: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: brv %s34, %s0 +; CHECK-NEXT: srl %s34, %s34, 32 +; CHECK-NEXT: sra.w.sx %s0, %s34, 16 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i16 @llvm.bitreverse.i16(i16 %0) + ret i16 %2 +} + +declare i16 @llvm.bitreverse.i16(i16) + +define signext i8 @func4(i8 signext) { +; CHECK-LABEL: func4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: brv %s34, %s0 +; CHECK-NEXT: srl %s34, %s34, 32 +; CHECK-NEXT: sra.w.sx %s0, %s34, 24 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i8 @llvm.bitreverse.i8(i8 %0) + ret i8 %2 +} + +declare i8 @llvm.bitreverse.i8(i8) + +define i64 @func5(i64) { +; CHECK-LABEL: func5: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i64 @llvm.bitreverse.i64(i64 %0) + ret i64 %2 +} + +define i32 @func6(i32) { +; CHECK-LABEL: func6: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: brv %s34, %s0 +; CHECK-NEXT: srl %s0, %s34, 32 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 + %2 = tail call i32 @llvm.bitreverse.i32(i32 %0) + ret i32 %2 +} + +define zeroext i16 @func7(i16 zeroext) { +; CHECK-LABEL: func7: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: brv %s34, %s0 +; CHECK-NEXT: srl %s34, %s34, 32 +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: srl %s0, %s34, 16 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i16 @llvm.bitreverse.i16(i16 %0) + ret i16 %2 +} + +define zeroext i8 @func8(i8 zeroext) { +; CHECK-LABEL: func8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: brv %s34, %s0 +; CHECK-NEXT: srl %s34, %s34, 32 +; CHECK-NEXT: and %s34, %s34, (32)0 +; CHECK-NEXT: srl %s0, %s34, 24 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i8 @llvm.bitreverse.i8(i8 %0) + ret i8 %2 +} + diff --git a/llvm/test/CodeGen/VE/branch1.ll b/llvm/test/CodeGen/VE/branch1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/branch1.ll @@ -0,0 +1,237 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define signext i8 @func1(i8 signext, i8 signext) { +; CHECK-LABEL: func1: +; CHECK: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: brle.w %s0, %s1, .LBB{{[0-9]+}}_1 + %3 = icmp sgt i8 %0, %1 + br i1 %3, label %4, label %7 + +;