diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -311,6 +311,7 @@ EM_RISCV = 243, // RISC-V EM_LANAI = 244, // Lanai 32-bit processor EM_BPF = 247, // Linux kernel bpf virtual machine + EM_VE = 251, // NEC SX-Aurora VE }; // Object file classes. diff --git a/llvm/include/llvm/IR/CMakeLists.txt b/llvm/include/llvm/IR/CMakeLists.txt --- a/llvm/include/llvm/IR/CMakeLists.txt +++ b/llvm/include/llvm/IR/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(LLVM IntrinsicsR600.h -gen-intrinsic-enums -intrinsic-prefix=r600) tablegen(LLVM IntrinsicsRISCV.h -gen-intrinsic-enums -intrinsic-prefix=riscv) tablegen(LLVM IntrinsicsS390.h -gen-intrinsic-enums -intrinsic-prefix=s390) +tablegen(LLVM IntrinsicsVE.h -gen-intrinsic-enums -intrinsic-prefix=ve) tablegen(LLVM IntrinsicsWebAssembly.h -gen-intrinsic-enums -intrinsic-prefix=wasm) tablegen(LLVM IntrinsicsX86.h -gen-intrinsic-enums -intrinsic-prefix=x86) tablegen(LLVM IntrinsicsXCore.h -gen-intrinsic-enums -intrinsic-prefix=xcore) diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -241,6 +241,14 @@ /// The remainder matches the regular calling convention. WASM_EmscriptenInvoke = 99, + /// Calling convention used for NEC SX-Aurora VE vec_expf intrinsic + /// function. + VE_VEC_EXPF = 100, + + /// Calling convention used for NEC SX-Aurora VE llvm_grow_stack intrinsic + /// function. + VE_LLVM_GROW_STACK = 101, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1359,3 +1359,4 @@ include "llvm/IR/IntrinsicsSystemZ.td" include "llvm/IR/IntrinsicsWebAssembly.td" include "llvm/IR/IntrinsicsRISCV.td" +include "llvm/IR/IntrinsicsVE.td" diff --git a/llvm/include/llvm/IR/IntrinsicsVE.td b/llvm/include/llvm/IR/IntrinsicsVE.td new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/IR/IntrinsicsVE.td @@ -0,0 +1,8 @@ +let TargetPrefix = "ve" in { + + // fencem instructions + def int_ve_fencem1 : Intrinsic<[], [], []>; + def int_ve_fencem2 : Intrinsic<[], [], []>; + def int_ve_fencem3 : Intrinsic<[], [], []>; + +} diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/IntrinsicsS390.h" +#include "llvm/IR/IntrinsicsVE.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/IntrinsicsXCore.h" diff --git a/llvm/lib/Target/VE/AsmParser/CMakeLists.txt b/llvm/lib/Target/VE/AsmParser/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/AsmParser/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_component_library(LLVMVEAsmParser + VEAsmParser.cpp + ) diff --git a/llvm/lib/Target/VE/LLVMBuild.txt b/llvm/lib/Target/VE/AsmParser/LLVMBuild.txt copy from llvm/lib/Target/VE/LLVMBuild.txt copy to llvm/lib/Target/VE/AsmParser/LLVMBuild.txt --- a/llvm/lib/Target/VE/LLVMBuild.txt +++ b/llvm/lib/Target/VE/AsmParser/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/VE/LLVMBuild.txt ----------------------------*- Conf -*--===; +;===- ./lib/Target/VE/AsmParser/LLVMBuild.txt ------------------*- Conf -*--===; ; ; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ; See https://llvm.org/LICENSE.txt for license information. @@ -14,21 +14,9 @@ ; ;===------------------------------------------------------------------------===; -[common] -subdirectories = InstPrinter MCTargetDesc TargetInfo - [component_0] -type = TargetGroup -name = VE -parent = Target -has_asmparser = 0 -has_asmprinter = 1 - -[component_1] type = Library -name = VECodeGen +name = VEAsmParser parent = VE -required_libraries = Analysis AsmPrinter CodeGen Core - MC SelectionDAG VEAsmPrinter - VEDesc VEInfo Support Target +required_libraries = MC MCParser VEDesc VEInfo Support add_to_library_groups = VE diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp @@ -0,0 +1,820 @@ +//===-- VEAsmParser.cpp - Parse VE assembly to MCInst instructions --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/VEMCExpr.h" +#include "MCTargetDesc/VEMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +using namespace llvm; + +// The generated AsmMatcher VEGenAsmMatcher uses "VE" as the target +// namespace. +namespace llvm { +namespace VE { + +using namespace VE; + +} // namespace VE +} // namespace llvm + +namespace { + +class VEOperand; + +class VEAsmParser : public MCTargetAsmParser { + MCAsmParser &Parser; + + /// @name Auto-generated Match Functions + /// { + +#define GET_ASSEMBLER_HEADER +#include "VEGenAsmMatcher.inc" + + /// } + + // public interface of the MCTargetAsmParser. + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + uint64_t &ErrorInfo, + bool MatchingInlineAsm) override; + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; + bool ParseDirective(AsmToken DirectiveID) override; + + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, + unsigned Kind) override; + + // Custom parse functions for VE specific operands. + OperandMatchResultTy parseMEMOperand(OperandVector &Operands); + + OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name); + + OperandMatchResultTy parseVEAsmOperand(std::unique_ptr &Operand, + bool isCall = false); + + OperandMatchResultTy parseBranchModifiers(OperandVector &Operands); + + // Helper function for dealing with %lo / %hi in PIC mode. + const VEMCExpr *adjustPICRelocation(VEMCExpr::VariantKind VK, + const MCExpr *subExpr); + + // returns true if Tok is matched to a register and returns register in RegNo. + bool matchRegisterName(const AsmToken &Tok, unsigned &RegNo, + unsigned &RegKind); + + bool matchVEAsmModifiers(const MCExpr *&EVal, SMLoc &EndLoc); + bool parseDirectiveWord(unsigned Size, SMLoc L); + + bool is64Bit() const { + return getSTI().getTargetTriple().getArch() == Triple::sparcv9; + } + +public: + VEAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(Options, sti, MII), Parser(parser) { + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + } +}; + +} // end anonymous namespace + +namespace { + +/// VEOperand - Instances of this class represent a parsed VE machine +/// instruction. +class VEOperand : public MCParsedAsmOperand { +public: + enum RegisterKind { + rk_None, + rk_IntReg, + rk_IntPairReg, + rk_FloatReg, + rk_DoubleReg, + rk_QuadReg, + rk_CoprocReg, + rk_CoprocPairReg, + rk_Special, + }; + +private: + enum KindTy { + k_Token, + k_Register, + k_Immediate, + k_MemoryReg, + k_MemoryImm + } Kind; + + SMLoc StartLoc, EndLoc; + + struct Token { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + RegisterKind Kind; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned Base; + unsigned OffsetReg; + const MCExpr *Off; + }; + + union { + struct Token Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; + }; + +public: + VEOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + + bool isToken() const override { return Kind == k_Token; } + bool isReg() const override { return Kind == k_Register; } + bool isImm() const override { return Kind == k_Immediate; } + bool isMem() const override { return isMEMrr() || isMEMri(); } + bool isMEMrr() const { return Kind == k_MemoryReg; } + bool isMEMri() const { return Kind == k_MemoryImm; } + + bool isIntReg() const { + return (Kind == k_Register && Reg.Kind == rk_IntReg); + } + + bool isFloatReg() const { + return (Kind == k_Register && Reg.Kind == rk_FloatReg); + } + + bool isFloatOrDoubleReg() const { + return (Kind == k_Register && + (Reg.Kind == rk_FloatReg || Reg.Kind == rk_DoubleReg)); + } + + bool isCoprocReg() const { + return (Kind == k_Register && Reg.Kind == rk_CoprocReg); + } + + StringRef getToken() const { + assert(Kind == k_Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const override { + assert((Kind == k_Register) && "Invalid access!"); + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert((Kind == k_Immediate) && "Invalid access!"); + return Imm.Val; + } + + unsigned getMemBase() const { + assert((Kind == k_MemoryReg || Kind == k_MemoryImm) && "Invalid access!"); + return Mem.Base; + } + + unsigned getMemOffsetReg() const { + assert((Kind == k_MemoryReg) && "Invalid access!"); + return Mem.OffsetReg; + } + + const MCExpr *getMemOff() const { + assert((Kind == k_MemoryImm) && "Invalid access!"); + return Mem.Off; + } + + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const override { return StartLoc; } + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const override { return EndLoc; } + + void print(raw_ostream &OS) const override { + switch (Kind) { + case k_Token: + OS << "Token: " << getToken() << "\n"; + break; + case k_Register: + OS << "Reg: #" << getReg() << "\n"; + break; + case k_Immediate: + OS << "Imm: " << getImm() << "\n"; + break; + case k_MemoryReg: + OS << "Mem: " << getMemBase() << "+" << getMemOffsetReg() << "\n"; + break; + case k_MemoryImm: + assert(getMemOff() != nullptr); + OS << "Mem: " << getMemBase() << "+" << *getMemOff() << "\n"; + break; + } + } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(getReg())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCExpr *Expr = getImm(); + addExpr(Inst, Expr); + } + + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediate when possible. Null MCExpr = 0. + if (!Expr) + Inst.addOperand(MCOperand::createImm(0)); + else if (const MCConstantExpr *CE = dyn_cast(Expr)) + Inst.addOperand(MCOperand::createImm(CE->getValue())); + else + Inst.addOperand(MCOperand::createExpr(Expr)); + } + + void addMEMrrOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::createReg(getMemBase())); + + assert(getMemOffsetReg() != 0 && "Invalid offset"); + Inst.addOperand(MCOperand::createReg(getMemOffsetReg())); + } + + void addMEMriOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::createReg(getMemBase())); + + const MCExpr *Expr = getMemOff(); + addExpr(Inst, Expr); + } + + static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { + auto Op = std::make_unique(k_Token); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + + static std::unique_ptr CreateReg(unsigned RegNum, unsigned Kind, + SMLoc S, SMLoc E) { + auto Op = std::make_unique(k_Register); + Op->Reg.RegNum = RegNum; + Op->Reg.Kind = (VEOperand::RegisterKind)Kind; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc S, + SMLoc E) { + auto Op = std::make_unique(k_Immediate); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr + MorphToMEMrr(unsigned Base, std::unique_ptr Op) { + unsigned offsetReg = Op->getReg(); + Op->Kind = k_MemoryReg; + Op->Mem.Base = Base; + Op->Mem.OffsetReg = offsetReg; + Op->Mem.Off = nullptr; + return Op; + } + + static std::unique_ptr CreateMEMr(unsigned Base, SMLoc S, + SMLoc E) { + auto Op = std::make_unique(k_MemoryReg); + Op->Mem.Base = Base; + Op->Mem.OffsetReg = 0; // always 0 + Op->Mem.Off = nullptr; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + + static std::unique_ptr + MorphToMEMri(unsigned Base, std::unique_ptr Op) { + const MCExpr *Imm = Op->getImm(); + Op->Kind = k_MemoryImm; + Op->Mem.Base = Base; + Op->Mem.OffsetReg = 0; + Op->Mem.Off = Imm; + return Op; + } +}; + +} // end anonymous namespace + +bool VEAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, uint64_t &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; + SmallVector Instructions; + unsigned MatchResult = + MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + switch (MatchResult) { + case Match_Success: { + for (const MCInst &I : Instructions) { + Out.EmitInstruction(I, getSTI()); + } + return false; + } + + case Match_MissingFeature: + return Error(IDLoc, + "instruction requires a CPU feature not currently enabled"); + + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0ULL) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((VEOperand &)*Operands[ErrorInfo]).getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + + return Error(ErrorLoc, "invalid operand for instruction"); + } + case Match_MnemonicFail: + return Error(IDLoc, "invalid instruction mnemonic"); + } + llvm_unreachable("Implement any new match types added!"); +} + +bool VEAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + const AsmToken &Tok = Parser.getTok(); + StartLoc = Tok.getLoc(); + EndLoc = Tok.getEndLoc(); + RegNo = 0; + if (getLexer().getKind() != AsmToken::Percent) + return false; + Parser.Lex(); + unsigned regKind = VEOperand::rk_None; + if (matchRegisterName(Tok, RegNo, regKind)) { + Parser.Lex(); + return false; + } + + return Error(StartLoc, "invalid register name"); +} + +bool VEAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) { + + // First operand in MCInst is instruction mnemonic. + Operands.push_back(VEOperand::CreateToken(Name, NameLoc)); + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + // Read the first operand. + if (getLexer().is(AsmToken::Comma)) { + if (parseBranchModifiers(Operands) != MatchOperand_Success) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token"); + } + } + if (parseOperand(Operands, Name) != MatchOperand_Success) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token"); + } + + while (getLexer().is(AsmToken::Comma) || getLexer().is(AsmToken::Plus)) { + if (getLexer().is(AsmToken::Plus)) { + // Plus tokens are significant in software_traps (p83, sparcv8.pdf). We + // must capture them. + Operands.push_back( + VEOperand::CreateToken("+", Parser.getTok().getLoc())); + } + Parser.Lex(); // Eat the comma or plus. + // Parse and remember the operand. + if (parseOperand(Operands, Name) != MatchOperand_Success) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token"); + } + } + } + if (getLexer().isNot(AsmToken::EndOfStatement)) { + SMLoc Loc = getLexer().getLoc(); + return Error(Loc, "unexpected token"); + } + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +bool VEAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getString(); + + if (IDVal == ".byte") + return parseDirectiveWord(1, DirectiveID.getLoc()); + + if (IDVal == ".half") + return parseDirectiveWord(2, DirectiveID.getLoc()); + + if (IDVal == ".word") + return parseDirectiveWord(4, DirectiveID.getLoc()); + + if (IDVal == ".nword") + return parseDirectiveWord(is64Bit() ? 8 : 4, DirectiveID.getLoc()); + + if (is64Bit() && IDVal == ".xword") + return parseDirectiveWord(8, DirectiveID.getLoc()); + + if (IDVal == ".register") { + // For now, ignore .register directive. + Parser.eatToEndOfStatement(); + return false; + } + if (IDVal == ".proc") { + // For compatibility, ignore this directive. + // (It's supposed to be an "optimization" in the Sun assembler) + Parser.eatToEndOfStatement(); + return false; + } + + // Let the MC layer to handle other directives. + return true; +} + +bool VEAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + while (true) { + const MCExpr *Value; + if (getParser().parseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + Parser.Lex(); + } + } + Parser.Lex(); + return false; +} + +OperandMatchResultTy VEAsmParser::parseMEMOperand(OperandVector &Operands) { + SMLoc S, E; + unsigned BaseReg = 0; + + if (ParseRegister(BaseReg, S, E)) { + return MatchOperand_NoMatch; + } + + switch (getLexer().getKind()) { + default: + return MatchOperand_NoMatch; + + case AsmToken::Comma: + case AsmToken::RBrac: + case AsmToken::EndOfStatement: + Operands.push_back(VEOperand::CreateMEMr(BaseReg, S, E)); + return MatchOperand_Success; + + case AsmToken::Plus: + Parser.Lex(); // Eat the '+' + break; + case AsmToken::Minus: + break; + } + + std::unique_ptr Offset; + OperandMatchResultTy ResTy = parseVEAsmOperand(Offset); + if (ResTy != MatchOperand_Success || !Offset) + return MatchOperand_NoMatch; + + Operands.push_back(Offset->isImm() + ? VEOperand::MorphToMEMri(BaseReg, std::move(Offset)) + : VEOperand::MorphToMEMrr(BaseReg, std::move(Offset))); + + return MatchOperand_Success; +} + +OperandMatchResultTy VEAsmParser::parseOperand(OperandVector &Operands, + StringRef Mnemonic) { + + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + + // If there wasn't a custom match, try the generic matcher below. Otherwise, + // there was a match, but an error occurred, in which case, just return that + // the operand parsing failed. + if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail) + return ResTy; + + if (getLexer().is(AsmToken::LBrac)) { + // Memory operand + Operands.push_back(VEOperand::CreateToken("[", Parser.getTok().getLoc())); + Parser.Lex(); // Eat the [ + + if (Mnemonic == "cas" || Mnemonic == "casx" || Mnemonic == "casa") { + SMLoc S = Parser.getTok().getLoc(); + if (getLexer().getKind() != AsmToken::Percent) + return MatchOperand_NoMatch; + Parser.Lex(); // eat % + + unsigned RegNo, RegKind; + if (!matchRegisterName(Parser.getTok(), RegNo, RegKind)) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat the identifier token. + SMLoc E = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(VEOperand::CreateReg(RegNo, RegKind, S, E)); + ResTy = MatchOperand_Success; + } else { + ResTy = parseMEMOperand(Operands); + } + + if (ResTy != MatchOperand_Success) + return ResTy; + + if (!getLexer().is(AsmToken::RBrac)) + return MatchOperand_ParseFail; + + Operands.push_back(VEOperand::CreateToken("]", Parser.getTok().getLoc())); + Parser.Lex(); // Eat the ] + + // Parse an optional address-space identifier after the address. + if (getLexer().is(AsmToken::Integer)) { + std::unique_ptr Op; + ResTy = parseVEAsmOperand(Op, false); + if (ResTy != MatchOperand_Success || !Op) + return MatchOperand_ParseFail; + Operands.push_back(std::move(Op)); + } + return MatchOperand_Success; + } + + std::unique_ptr Op; + + ResTy = parseVEAsmOperand(Op, (Mnemonic == "call")); + if (ResTy != MatchOperand_Success || !Op) + return MatchOperand_ParseFail; + + // Push the parsed operand into the list of operands + Operands.push_back(std::move(Op)); + + return MatchOperand_Success; +} + +OperandMatchResultTy +VEAsmParser::parseVEAsmOperand(std::unique_ptr &Op, bool isCall) { + SMLoc S = Parser.getTok().getLoc(); + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + const MCExpr *EVal; + + Op = nullptr; + switch (getLexer().getKind()) { + default: + break; + + case AsmToken::Percent: + Parser.Lex(); // Eat the '%'. + unsigned RegNo; + unsigned RegKind; + if (matchRegisterName(Parser.getTok(), RegNo, RegKind)) { + Parser.Lex(); // Eat the identifier token. + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + switch (RegNo) { + default: + Op = VEOperand::CreateReg(RegNo, RegKind, S, E); + break; + } + break; + } + if (matchVEAsmModifiers(EVal, E)) { + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Op = VEOperand::CreateImm(EVal, S, E); + } + break; + + case AsmToken::Minus: + case AsmToken::Integer: + case AsmToken::LParen: + case AsmToken::Dot: + if (!getParser().parseExpression(EVal, E)) + Op = VEOperand::CreateImm(EVal, S, E); + break; + + case AsmToken::Identifier: { + StringRef Identifier; + if (!getParser().parseIdentifier(Identifier)) { + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); + + const MCExpr *Res = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + Op = VEOperand::CreateImm(Res, S, E); + } + break; + } + } + return (Op) ? MatchOperand_Success : MatchOperand_ParseFail; +} + +OperandMatchResultTy +VEAsmParser::parseBranchModifiers(OperandVector &Operands) { + // parse (,a|,pn|,pt)+ + + while (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma + + if (!getLexer().is(AsmToken::Identifier)) + return MatchOperand_ParseFail; + StringRef modName = Parser.getTok().getString(); + if (modName == "a" || modName == "pn" || modName == "pt") { + Operands.push_back( + VEOperand::CreateToken(modName, Parser.getTok().getLoc())); + Parser.Lex(); // eat the identifier. + } + } + return MatchOperand_Success; +} + +bool VEAsmParser::matchRegisterName(const AsmToken &Tok, unsigned &RegNo, + unsigned &RegKind) { + int64_t intVal = 0; + RegNo = 0; + RegKind = VEOperand::rk_None; + if (Tok.is(AsmToken::Identifier)) { + StringRef Name = Tok.getString(); + + // %fp + if (Name.equals("fp")) { + RegNo = VE::SX9; + RegKind = VEOperand::rk_IntReg; + return true; + } + // %sp + if (Name.equals("sp")) { + RegNo = VE::SX11; + RegKind = VEOperand::rk_IntReg; + return true; + } + + // %s0 - %s63 + if (Name.substr(0, 1).equals_lower("s") && + !Name.substr(1).getAsInteger(10, intVal) && intVal < 64) { + RegNo = VEMCRegisterClasses[VE::I64RegClassID].getRegister(intVal); + RegKind = VEOperand::rk_IntReg; + return true; + } + + if (Name.equals("usrcc")) { + RegNo = VE::UCC; + RegKind = VEOperand::rk_Special; + return true; + } + } + return false; +} + +// Determine if an expression contains a reference to the symbol +// "_GLOBAL_OFFSET_TABLE_". +static bool hasGOTReference(const MCExpr *Expr) { + switch (Expr->getKind()) { + case MCExpr::Target: + if (const VEMCExpr *SE = dyn_cast(Expr)) + return hasGOTReference(SE->getSubExpr()); + break; + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Expr); + return hasGOTReference(BE->getLHS()) || hasGOTReference(BE->getRHS()); + } + + case MCExpr::SymbolRef: { + const MCSymbolRefExpr &SymRef = *cast(Expr); + return (SymRef.getSymbol().getName() == "_GLOBAL_OFFSET_TABLE_"); + } + + case MCExpr::Unary: + return hasGOTReference(cast(Expr)->getSubExpr()); + } + return false; +} + +const VEMCExpr *VEAsmParser::adjustPICRelocation(VEMCExpr::VariantKind VK, + const MCExpr *subExpr) { + // When in PIC mode, "%lo(...)" and "%hi(...)" behave differently. + // If the expression refers contains _GLOBAL_OFFSETE_TABLE, it is + // actually a %pc10 or %pc22 relocation. Otherwise, they are interpreted + // as %got10 or %got22 relocation. + + if (getContext().getObjectFileInfo()->isPositionIndependent()) { + switch (VK) { + default: + break; + case VEMCExpr::VK_VE_LO32: + VK = (hasGOTReference(subExpr) ? VEMCExpr::VK_VE_PC_LO32 + : VEMCExpr::VK_VE_GOT_LO32); + break; + case VEMCExpr::VK_VE_HI32: + VK = (hasGOTReference(subExpr) ? VEMCExpr::VK_VE_PC_HI32 + : VEMCExpr::VK_VE_GOT_HI32); + break; + } + } + + return VEMCExpr::create(VK, subExpr, getContext()); +} + +bool VEAsmParser::matchVEAsmModifiers(const MCExpr *&EVal, SMLoc &EndLoc) { + AsmToken Tok = Parser.getTok(); + if (!Tok.is(AsmToken::Identifier)) + return false; + + StringRef Name = Tok.getString(); + + VEMCExpr::VariantKind VK = VEMCExpr::parseVariantKind(Name); + + if (VK == VEMCExpr::VK_VE_None) + return false; + + Parser.Lex(); // Eat the identifier. + if (Parser.getTok().getKind() != AsmToken::LParen) + return false; + + Parser.Lex(); // Eat the LParen token. + const MCExpr *subExpr; + if (Parser.parseParenExpression(subExpr, EndLoc)) + return false; + + EVal = adjustPICRelocation(VK, subExpr); + return true; +} + +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVEAsmParser() { + RegisterMCAsmParser A(getTheVETarget()); +} + +#define GET_REGISTER_MATCHER +#define GET_MATCHER_IMPLEMENTATION +#include "VEGenAsmMatcher.inc" + +unsigned VEAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp, + unsigned Kind) { + VEOperand &Op = (VEOperand &)GOp; + if (Op.isFloatOrDoubleReg()) { + switch (Kind) { + default: + break; + } + } + return Match_InvalidOperand; +} diff --git a/llvm/lib/Target/VE/CMakeLists.txt b/llvm/lib/Target/VE/CMakeLists.txt --- a/llvm/lib/Target/VE/CMakeLists.txt +++ b/llvm/lib/Target/VE/CMakeLists.txt @@ -1,11 +1,14 @@ set(LLVM_TARGET_DEFINITIONS VE.td) -tablegen(LLVM VEGenRegisterInfo.inc -gen-register-info) -tablegen(LLVM VEGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM VEGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM VEGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM VEGenCallingConv.inc -gen-callingconv) tablegen(LLVM VEGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM VEGenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM VEGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM VEGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM VEGenRegisterInfo.inc -gen-register-info) tablegen(LLVM VEGenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM VEGenCallingConv.inc -gen-callingconv) add_public_tablegen_target(VECommonTableGen) add_llvm_target(VECodeGen @@ -14,12 +17,14 @@ VEISelDAGToDAG.cpp VEISelLowering.cpp VEInstrInfo.cpp + VEMachineFunctionInfo.cpp VEMCInstLower.cpp VERegisterInfo.cpp VESubtarget.cpp VETargetMachine.cpp ) +add_subdirectory(AsmParser) add_subdirectory(InstPrinter) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) diff --git a/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h b/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h --- a/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h +++ b/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.h @@ -31,6 +31,12 @@ bool printAliasInstr(const MCInst *, const MCSubtargetInfo &, raw_ostream &); void printInstruction(const MCInst *, uint64_t, const MCSubtargetInfo &, raw_ostream &); + bool printVEAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI, + raw_ostream &OS); + + void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, + unsigned PrintMethodIdx, + const MCSubtargetInfo &STI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, @@ -43,6 +49,8 @@ const char *Modifier = nullptr); void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI, raw_ostream &OS); + bool printGetGOT(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, + raw_ostream &OS); }; } // namespace llvm diff --git a/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp b/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp --- a/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp +++ b/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp @@ -39,12 +39,17 @@ OS << '%' << StringRef(getRegisterName(RegNo)).lower(); } -void VEInstPrinter::printInst(const MCInst *MI, uint64_t Address, - StringRef Annot, const MCSubtargetInfo &STI, - raw_ostream &OS) { - if (!printAliasInstr(MI, STI, OS)) - printInstruction(MI, Address, STI, OS); - printAnnotation(OS, Annot); +void VEInstPrinter::printInst(const MCInst *MI, uint64_t Address, StringRef Annot, + const MCSubtargetInfo &STI, raw_ostream &O) { + if (!printAliasInstr(MI, STI, O) && !printVEAliasInstr(MI, STI, O)) + printInstruction(MI, Address, STI, O); + printAnnotation(O, Annot); +} + +bool VEInstPrinter::printVEAliasInstr(const MCInst *MI, + const MCSubtargetInfo &STI, + raw_ostream &O) { + return false; } void VEInstPrinter::printOperand(const MCInst *MI, int opNum, @@ -115,3 +120,9 @@ int CC = (int)MI->getOperand(opNum).getImm(); O << VECondCodeToString((VECC::CondCodes)CC); } + +bool VEInstPrinter::printGetGOT(const MCInst *MI, unsigned opNum, + const MCSubtargetInfo &STI, raw_ostream &O) { + llvm_unreachable("FIXME: Implement VEInstPrinter::printGetGOT."); + return true; +} diff --git a/llvm/lib/Target/VE/LLVMBuild.txt b/llvm/lib/Target/VE/LLVMBuild.txt --- a/llvm/lib/Target/VE/LLVMBuild.txt +++ b/llvm/lib/Target/VE/LLVMBuild.txt @@ -15,13 +15,13 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = InstPrinter MCTargetDesc TargetInfo +subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup name = VE parent = Target -has_asmparser = 0 +has_asmparser = 1 has_asmprinter = 1 [component_1] diff --git a/llvm/lib/Target/VE/MCTargetDesc/CMakeLists.txt b/llvm/lib/Target/VE/MCTargetDesc/CMakeLists.txt --- a/llvm/lib/Target/VE/MCTargetDesc/CMakeLists.txt +++ b/llvm/lib/Target/VE/MCTargetDesc/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_component_library(LLVMVEDesc VEMCAsmInfo.cpp + VEMCExpr.cpp VEMCTargetDesc.cpp VETargetStreamer.cpp ) diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h @@ -0,0 +1,73 @@ +//===-- VEFixupKinds.h - VE Specific Fixup Entries --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_MCTARGETDESC_VEFIXUPKINDS_H +#define LLVM_LIB_TARGET_VE_MCTARGETDESC_VEFIXUPKINDS_H + +#include "llvm/MC/MCFixup.h" + +namespace llvm { +namespace VE { +enum Fixups { + // fixup_ve_call30 - 30-bit PC relative relocation for call + fixup_ve_call30 = FirstTargetFixupKind, + + /// fixup_ve_br22 - 22-bit PC relative relocation for + /// branches + fixup_ve_br22, + + /// fixup_ve_br19 - 19-bit PC relative relocation for + /// branches on icc/xcc + fixup_ve_br19, + + /// fixup_ve_bpr - 16-bit fixup for bpr + fixup_ve_br16_2, + fixup_ve_br16_14, + + /// fixup_ve_hi32 - 32-bit fixup corresponding to foo@hi + fixup_ve_hi32, + + /// fixup_ve_lo32 - 32-bit fixup corresponding to foo@lo + fixup_ve_lo32, + + /// fixup_ve_pc_hi32 - 32-bit fixup corresponding to foo@pc_hi + fixup_ve_pc_hi32, + + /// fixup_ve_pc_lo32 - 32-bit fixup corresponding to foo@pc_lo + fixup_ve_pc_lo32, + + /// fixup_ve_got_hi32 - 32-bit fixup corresponding to foo@got_hi + fixup_ve_got_hi32, + + /// fixup_ve_got_lo32 - 32-bit fixup corresponding to foo@got_lo + fixup_ve_got_lo32, + + /// fixup_ve_gotoff_hi32 - 32-bit fixup corresponding to foo@gotoff_hi + fixup_ve_gotoff_hi32, + + /// fixup_ve_gotoff_lo32 - 32-bit fixup corresponding to foo@gotoff_lo + fixup_ve_gotoff_lo32, + + /// fixup_ve_plt_hi32/lo32 + fixup_ve_plt_hi32, + fixup_ve_plt_lo32, + + /// fixups for Thread Local Storage + fixup_ve_tls_gd_hi32, + fixup_ve_tls_gd_lo32, + fixup_ve_tpoff_hi32, + fixup_ve_tpoff_lo32, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; +} // namespace VE +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.h --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.h +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.h @@ -24,6 +24,12 @@ public: explicit VEELFMCAsmInfo(const Triple &TheTriple); + + const MCExpr * + getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding, + MCStreamer &Streamer) const override; + const MCExpr *getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding, + MCStreamer &Streamer) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "VEMCAsmInfo.h" +#include "VEMCExpr.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCExpr.h" @@ -26,6 +27,9 @@ CodePointerSize = CalleeSaveStackSlotSize = 8; MaxInstLength = MinInstAlignment = 8; + // VE has ".zero" directive although it is not listed in assembler manual. + // ZeroDirective = nullptr; + // VE uses ".*byte" directive for unaligned data. Data8bitsDirective = "\t.byte\t"; Data16bitsDirective = "\t.2byte\t"; @@ -36,5 +40,30 @@ // assembler manual says sinple '.bss' is supported. UsesELFSectionDirectiveForBSS = true; + // ExceptionsType = ExceptionHandling::DwarfCFI; SupportsDebugInformation = true; + // SunStyleELFSectionSwitchSyntax = true; + // UseIntegratedAssembler = true; +} + +const MCExpr *VEELFMCAsmInfo::getExprForPersonalitySymbol( + const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const { + if (Encoding & dwarf::DW_EH_PE_pcrel) { + MCContext &Ctx = Streamer.getContext(); + return VEMCExpr::create(VEMCExpr::VK_VE_R_DISP32, + MCSymbolRefExpr::create(Sym, Ctx), Ctx); + } + + return MCAsmInfo::getExprForPersonalitySymbol(Sym, Encoding, Streamer); +} + +const MCExpr *VEELFMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const { + if (Encoding & dwarf::DW_EH_PE_pcrel) { + MCContext &Ctx = Streamer.getContext(); + return VEMCExpr::create(VEMCExpr::VK_VE_R_DISP32, + MCSymbolRefExpr::create(Sym, Ctx), Ctx); + } + return MCAsmInfo::getExprForFDESymbol(Sym, Encoding, Streamer); } diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h @@ -0,0 +1,95 @@ +//====- VEMCExpr.h - VE specific MC expression classes --------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes VE-specific MCExprs, used for modifiers like +// "%hi" or "%lo" etc., +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCEXPR_H +#define LLVM_LIB_TARGET_VE_MCTARGETDESC_VEMCEXPR_H + +#include "VEFixupKinds.h" +#include "llvm/MC/MCExpr.h" + +namespace llvm { + +class StringRef; +class VEMCExpr : public MCTargetExpr { +public: + enum VariantKind { + VK_VE_None, + VK_VE_R_DISP32, + VK_VE_HI32, + VK_VE_LO32, + VK_VE_PC_HI32, + VK_VE_PC_LO32, + VK_VE_GOT_HI32, + VK_VE_GOT_LO32, + VK_VE_GOTOFF_HI32, + VK_VE_GOTOFF_LO32, + VK_VE_PLT_HI32, + VK_VE_PLT_LO32, + VK_VE_TLS_GD_HI32, + VK_VE_TLS_GD_LO32, + VK_VE_TPOFF_HI32, + VK_VE_TPOFF_LO32, + }; + +private: + const VariantKind Kind; + const MCExpr *Expr; + + explicit VEMCExpr(VariantKind Kind, const MCExpr *Expr) + : Kind(Kind), Expr(Expr) {} + +public: + /// @name Construction + /// @{ + + static const VEMCExpr *create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx); + /// @} + /// @name Accessors + /// @{ + + /// getOpcode - Get the kind of this expression. + VariantKind getKind() const { return Kind; } + + /// getSubExpr - Get the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + /// getFixupKind - Get the fixup kind of this expression. + VE::Fixups getFixupKind() const { return getFixupKind(Kind); } + + /// @} + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout, + const MCFixup *Fixup) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; + MCFragment *findAssociatedFragment() const override { + return getSubExpr()->findAssociatedFragment(); + } + + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Target; + } + + static bool classof(const VEMCExpr *) { return true; } + + static VariantKind parseVariantKind(StringRef name); + static bool printVariantKind(raw_ostream &OS, VariantKind Kind); + static void printVariantKindSuffix(raw_ostream &OS, VariantKind Kind); + static VE::Fixups getFixupKind(VariantKind Kind); +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp @@ -0,0 +1,229 @@ +//===-- VEMCExpr.cpp - VE specific MC expression classes ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the assembly expression modifiers +// accepted by the VE architecture (e.g. "%hi", "%lo", ...). +// +//===----------------------------------------------------------------------===// + +#include "VEMCExpr.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/Object/ELF.h" + +using namespace llvm; + +#define DEBUG_TYPE "vemcexpr" + +const VEMCExpr *VEMCExpr::create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) VEMCExpr(Kind, Expr); +} + +void VEMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { + + bool closeParen = printVariantKind(OS, Kind); + + const MCExpr *Expr = getSubExpr(); + Expr->print(OS, MAI); + + if (closeParen) + OS << ')'; + printVariantKindSuffix(OS, Kind); +} + +bool VEMCExpr::printVariantKind(raw_ostream &OS, VariantKind Kind) { + bool closeParen = true; + switch (Kind) { + case VK_VE_None: + closeParen = false; + break; + case VK_VE_R_DISP32: + OS << "%r_disp32("; + break; + case VK_VE_HI32: + return false; // OS << "%hi("; break; + case VK_VE_LO32: + return false; // OS << "%lo("; break; + case VK_VE_PC_HI32: + return false; // OS << "%pc_hi("; break; + case VK_VE_PC_LO32: + return false; // OS << "%pc_lo("; break; + case VK_VE_GOT_HI32: + return false; // OS << "%got_hi("; break; + case VK_VE_GOT_LO32: + return false; // OS << "%got_lo("; break; + case VK_VE_GOTOFF_HI32: + return false; // OS << "%gotoff_hi("; break; + case VK_VE_GOTOFF_LO32: + return false; // OS << "%gotoff_lo("; break; + case VK_VE_PLT_HI32: + return false; // OS << "%plt_hi("; break; + case VK_VE_PLT_LO32: + return false; // OS << "%plt_lo("; break; + case VK_VE_TLS_GD_HI32: + return false; // OS << "%tls_gd_hi("; break; + case VK_VE_TLS_GD_LO32: + return false; // OS << "%tls_gd_lo("; break; + case VK_VE_TPOFF_HI32: + return false; // OS << "%tpoff_hi("; break; + case VK_VE_TPOFF_LO32: + return false; // OS << "%tpoff_lo("; break; + } + return closeParen; +} + +void VEMCExpr::printVariantKindSuffix(raw_ostream &OS, VariantKind Kind) { + switch (Kind) { + case VK_VE_None: + break; + case VK_VE_R_DISP32: + break; + case VK_VE_HI32: + OS << "@hi"; + break; + case VK_VE_LO32: + OS << "@lo"; + break; + case VK_VE_PC_HI32: + OS << "@pc_hi"; + break; + case VK_VE_PC_LO32: + OS << "@pc_lo"; + break; + case VK_VE_GOT_HI32: + OS << "@got_hi"; + break; + case VK_VE_GOT_LO32: + OS << "@got_lo"; + break; + case VK_VE_GOTOFF_HI32: + OS << "@gotoff_hi"; + break; + case VK_VE_GOTOFF_LO32: + OS << "@gotoff_lo"; + break; + case VK_VE_PLT_HI32: + OS << "@plt_hi"; + break; + case VK_VE_PLT_LO32: + OS << "@plt_lo"; + break; + case VK_VE_TLS_GD_HI32: + OS << "@tls_gd_hi"; + break; + case VK_VE_TLS_GD_LO32: + OS << "@tls_gd_lo"; + break; + case VK_VE_TPOFF_HI32: + OS << "@tpoff_hi"; + break; + case VK_VE_TPOFF_LO32: + OS << "@tpoff_lo"; + break; + } +} + +VEMCExpr::VariantKind VEMCExpr::parseVariantKind(StringRef name) { + return StringSwitch(name) + .Case("r_disp32", VK_VE_R_DISP32) + .Case("hi", VK_VE_HI32) + .Case("lo", VK_VE_LO32) + .Case("pc_hi", VK_VE_PC_HI32) + .Case("pc_lo", VK_VE_PC_LO32) + .Case("got_hi", VK_VE_GOT_HI32) + .Case("got_lo", VK_VE_GOT_LO32) + .Case("gotoff_hi", VK_VE_GOTOFF_HI32) + .Case("gotoff_lo", VK_VE_GOTOFF_LO32) + .Case("plt_hi", VK_VE_PLT_HI32) + .Case("plt_lo", VK_VE_PLT_LO32) + .Case("tls_gd_hi", VK_VE_TLS_GD_HI32) + .Case("tls_gd_lo", VK_VE_TLS_GD_LO32) + .Case("tpoff_hi", VK_VE_TPOFF_HI32) + .Case("tpoff_lo", VK_VE_TPOFF_LO32) + .Default(VK_VE_None); +} + +VE::Fixups VEMCExpr::getFixupKind(VEMCExpr::VariantKind Kind) { + switch (Kind) { + default: + llvm_unreachable("Unhandled VEMCExpr::VariantKind"); + case VK_VE_HI32: + return VE::fixup_ve_hi32; + case VK_VE_LO32: + return VE::fixup_ve_lo32; + case VK_VE_PC_HI32: + return VE::fixup_ve_pc_hi32; + case VK_VE_PC_LO32: + return VE::fixup_ve_pc_lo32; + case VK_VE_GOT_HI32: + return VE::fixup_ve_got_hi32; + case VK_VE_GOT_LO32: + return VE::fixup_ve_got_lo32; + case VK_VE_GOTOFF_HI32: + return VE::fixup_ve_gotoff_hi32; + case VK_VE_GOTOFF_LO32: + return VE::fixup_ve_gotoff_lo32; + case VK_VE_PLT_HI32: + return VE::fixup_ve_plt_hi32; + case VK_VE_PLT_LO32: + return VE::fixup_ve_plt_lo32; + case VK_VE_TLS_GD_HI32: + return VE::fixup_ve_tls_gd_hi32; + case VK_VE_TLS_GD_LO32: + return VE::fixup_ve_tls_gd_lo32; + case VK_VE_TPOFF_HI32: + return VE::fixup_ve_tpoff_hi32; + case VK_VE_TPOFF_LO32: + return VE::fixup_ve_tpoff_lo32; + } +} + +bool VEMCExpr::evaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout, + const MCFixup *Fixup) const { + return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup); +} + +static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { + switch (Expr->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expr!"); + break; + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Expr); + fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); + fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: { + const MCSymbolRefExpr &SymRef = *cast(Expr); + cast(SymRef.getSymbol()).setType(ELF::STT_TLS); + break; + } + + case MCExpr::Unary: + fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); + break; + } +} + +void VEMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { + fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); +} + +void VEMCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); +} diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp @@ -79,7 +79,7 @@ return new VEInstPrinter(MAI, MII, MRI); } -extern "C" void LLVMInitializeVETargetMC() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETargetMC() { // Register the MC asm info. RegisterMCAsmInfoFn X(getTheVETarget(), createVEMCAsmInfo); diff --git a/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp b/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp --- a/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp +++ b/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp @@ -17,7 +17,7 @@ return TheVETarget; } -extern "C" void LLVMInitializeVETargetInfo() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETargetInfo() { RegisterTarget X(getTheVETarget(), "ve", "VE", "VE"); } diff --git a/llvm/lib/Target/VE/VE.td b/llvm/lib/Target/VE/VE.td --- a/llvm/lib/Target/VE/VE.td +++ b/llvm/lib/Target/VE/VE.td @@ -25,10 +25,15 @@ include "VERegisterInfo.td" include "VECallingConv.td" +include "VESchedule.td" include "VEInstrInfo.td" def VEInstrInfo : InstrInfo; +def VEAsmParser : AsmParser { + bit ShouldEmitMatchRegisterName = 0; +} + //===----------------------------------------------------------------------===// // VE processors supported. //===----------------------------------------------------------------------===// @@ -51,6 +56,7 @@ def VE : Target { // Pull in Instruction Info: let InstructionSet = VEInstrInfo; + let AssemblyParsers = [VEAsmParser]; let AssemblyWriters = [VEAsmWriter]; let AllowRegisterRenaming = 1; } diff --git a/llvm/lib/Target/VE/VEAsmPrinter.cpp b/llvm/lib/Target/VE/VEAsmPrinter.cpp --- a/llvm/lib/Target/VE/VEAsmPrinter.cpp +++ b/llvm/lib/Target/VE/VEAsmPrinter.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "InstPrinter/VEInstPrinter.h" +#include "MCTargetDesc/VEMCExpr.h" #include "MCTargetDesc/VETargetStreamer.h" #include "VE.h" #include "VEInstrInfo.h" @@ -46,14 +47,343 @@ StringRef getPassName() const override { return "VE Assembly Printer"; } + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); + void printMemASXOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = nullptr); + void printMemASOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = nullptr); + + void EmitFunctionBodyStart() override; void EmitInstruction(const MachineInstr *MI) override; static const char *getRegisterName(unsigned RegNo) { return VEInstPrinter::getRegisterName(RegNo); } + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) override; + + void LowerGETGOTAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI); + void LowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI); + void LowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI); + void LowerEH_SJLJ_SETJMPAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI); + void LowerEH_SJLJ_LONGJMPAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI); }; } // end of anonymous namespace +static MCOperand createVEMCOperand(VEMCExpr::VariantKind Kind, MCSymbol *Sym, + MCContext &OutContext) { + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Sym, OutContext); + const VEMCExpr *expr = VEMCExpr::create(Kind, MCSym, OutContext); + return MCOperand::createExpr(expr); +} + +static MCOperand createGOTRelExprOp(VEMCExpr::VariantKind Kind, + MCSymbol *GOTLabel, MCContext &OutContext) { + const MCSymbolRefExpr *GOT = MCSymbolRefExpr::create(GOTLabel, OutContext); + const VEMCExpr *expr = VEMCExpr::create(Kind, GOT, OutContext); + return MCOperand::createExpr(expr); +} + +static void EmitSIC(MCStreamer &OutStreamer, MCOperand &RD, + const MCSubtargetInfo &STI) { + MCInst SICInst; + SICInst.setOpcode(VE::SIC); + SICInst.addOperand(RD); + OutStreamer.EmitInstruction(SICInst, STI); +} + +static void EmitBSIC(MCStreamer &OutStreamer, MCOperand &R1, MCOperand &R2, + const MCSubtargetInfo &STI) { + MCInst BSICInst; + BSICInst.setOpcode(VE::BSIC); + BSICInst.addOperand(R1); + BSICInst.addOperand(R2); + OutStreamer.EmitInstruction(BSICInst, STI); +} + +static void EmitLEAzzi(MCStreamer &OutStreamer, MCOperand &Imm, MCOperand &RD, + const MCSubtargetInfo &STI) { + MCInst LEAInst; + LEAInst.setOpcode(VE::LEAzzi); + LEAInst.addOperand(RD); + LEAInst.addOperand(Imm); + OutStreamer.EmitInstruction(LEAInst, STI); +} + +static void EmitLEASLzzi(MCStreamer &OutStreamer, MCOperand &Imm, MCOperand &RD, + const MCSubtargetInfo &STI) { + MCInst LEASLInst; + LEASLInst.setOpcode(VE::LEASLzzi); + LEASLInst.addOperand(RD); + LEASLInst.addOperand(Imm); + OutStreamer.EmitInstruction(LEASLInst, STI); +} + +static void EmitLEAzii(MCStreamer &OutStreamer, MCOperand &RS1, MCOperand &Imm, + MCOperand &RD, const MCSubtargetInfo &STI) { + MCInst LEAInst; + LEAInst.setOpcode(VE::LEAzii); + LEAInst.addOperand(RD); + LEAInst.addOperand(RS1); + LEAInst.addOperand(Imm); + OutStreamer.EmitInstruction(LEAInst, STI); +} + +static void EmitLEASLrri(MCStreamer &OutStreamer, MCOperand &RS1, + MCOperand &RS2, MCOperand &Imm, MCOperand &RD, + const MCSubtargetInfo &STI) { + MCInst LEASLInst; + LEASLInst.setOpcode(VE::LEASLrri); + LEASLInst.addOperand(RS1); + LEASLInst.addOperand(RS2); + LEASLInst.addOperand(RD); + LEASLInst.addOperand(Imm); + OutStreamer.EmitInstruction(LEASLInst, STI); +} + +static void EmitBinary(MCStreamer &OutStreamer, unsigned Opcode, MCOperand &RS1, + MCOperand &Src2, MCOperand &RD, + const MCSubtargetInfo &STI) { + MCInst Inst; + Inst.setOpcode(Opcode); + Inst.addOperand(RD); + Inst.addOperand(RS1); + Inst.addOperand(Src2); + OutStreamer.EmitInstruction(Inst, STI); +} + +static void EmitANDrm0(MCStreamer &OutStreamer, MCOperand &RS1, MCOperand &Imm, + MCOperand &RD, const MCSubtargetInfo &STI) { + EmitBinary(OutStreamer, VE::ANDrm0, RS1, Imm, RD, STI); +} + +static void EmitHiLo(MCStreamer &OutStreamer, MCSymbol *GOTSym, + VEMCExpr::VariantKind HiKind, VEMCExpr::VariantKind LoKind, + MCOperand &RD, MCContext &OutContext, + const MCSubtargetInfo &STI) { + + MCOperand hi = createVEMCOperand(HiKind, GOTSym, OutContext); + MCOperand lo = createVEMCOperand(LoKind, GOTSym, OutContext); + MCOperand ci32 = MCOperand::createImm(32); + EmitLEAzzi(OutStreamer, lo, RD, STI); + EmitANDrm0(OutStreamer, RD, ci32, RD, STI); + EmitLEASLzzi(OutStreamer, hi, RD, STI); +} + +void VEAsmPrinter::LowerGETGOTAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI) { + MCSymbol *GOTLabel = + OutContext.getOrCreateSymbol(Twine("_GLOBAL_OFFSET_TABLE_")); + + const MachineOperand &MO = MI->getOperand(0); + MCOperand MCRegOP = MCOperand::createReg(MO.getReg()); + + if (!isPositionIndependent()) { + // Just load the address of GOT to MCRegOP. + switch (TM.getCodeModel()) { + default: + llvm_unreachable("Unsupported absolute code model"); + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Large: + EmitHiLo(*OutStreamer, GOTLabel, VEMCExpr::VK_VE_HI32, + VEMCExpr::VK_VE_LO32, MCRegOP, OutContext, STI); + break; + } + return; + } + + MCOperand RegGOT = MCOperand::createReg(VE::SX15); // GOT + MCOperand RegPLT = MCOperand::createReg(VE::SX16); // PLT + + // lea %got, _GLOBAL_OFFSET_TABLE_@PC_LO(-24) + // and %got, %got, (32)0 + // sic %plt + // lea.sl %got, _GLOBAL_OFFSET_TABLE_@PC_HI(%got, %plt) + MCOperand cim24 = MCOperand::createImm(-24); + MCOperand loImm = + createGOTRelExprOp(VEMCExpr::VK_VE_PC_LO32, GOTLabel, OutContext); + EmitLEAzii(*OutStreamer, cim24, loImm, MCRegOP, STI); + MCOperand ci32 = MCOperand::createImm(32); + EmitANDrm0(*OutStreamer, MCRegOP, ci32, MCRegOP, STI); + EmitSIC(*OutStreamer, RegPLT, STI); + MCOperand hiImm = + createGOTRelExprOp(VEMCExpr::VK_VE_PC_HI32, GOTLabel, OutContext); + EmitLEASLrri(*OutStreamer, RegGOT, RegPLT, hiImm, MCRegOP, STI); +} + +void VEAsmPrinter::LowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI) { + const MachineOperand &MO = MI->getOperand(0); + MCOperand MCRegOP = MCOperand::createReg(MO.getReg()); + const MachineOperand &Addr = MI->getOperand(1); + MCSymbol *AddrSym = nullptr; + + switch (Addr.getType()) { + default: + llvm_unreachable(""); + return; + case MachineOperand::MO_MachineBasicBlock: + report_fatal_error("MBB is not supporeted yet"); + return; + case MachineOperand::MO_ConstantPoolIndex: + report_fatal_error("ConstantPool is not supporeted yet"); + return; + case MachineOperand::MO_ExternalSymbol: + AddrSym = GetExternalSymbolSymbol(Addr.getSymbolName()); + break; + case MachineOperand::MO_GlobalAddress: + AddrSym = getSymbol(Addr.getGlobal()); + break; + } + + if (!isPositionIndependent()) { + llvm_unreachable("Unsupported uses of %plt in not PIC code"); + return; + } + + MCOperand RegPLT = MCOperand::createReg(VE::SX16); // PLT + + // lea %dst, %plt_lo(func)(-24) + // and %dst, %dst, (32)0 + // sic %plt ; FIXME: is it safe to use %plt here? + // lea.sl %dst, %plt_hi(func)(%dst, %plt) + MCOperand cim24 = MCOperand::createImm(-24); + MCOperand loImm = + createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, AddrSym, OutContext); + EmitLEAzii(*OutStreamer, cim24, loImm, MCRegOP, STI); + MCOperand ci32 = MCOperand::createImm(32); + EmitANDrm0(*OutStreamer, MCRegOP, ci32, MCRegOP, STI); + EmitSIC(*OutStreamer, RegPLT, STI); + MCOperand hiImm = + createGOTRelExprOp(VEMCExpr::VK_VE_PLT_HI32, AddrSym, OutContext); + EmitLEASLrri(*OutStreamer, MCRegOP, RegPLT, hiImm, MCRegOP, STI); +} + +void VEAsmPrinter::LowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI) { + const MachineOperand &Addr = MI->getOperand(0); + MCSymbol *AddrSym = nullptr; + + switch (Addr.getType()) { + default: + llvm_unreachable(""); + return; + case MachineOperand::MO_MachineBasicBlock: + report_fatal_error("MBB is not supporeted yet"); + return; + case MachineOperand::MO_ConstantPoolIndex: + report_fatal_error("ConstantPool is not supporeted yet"); + return; + case MachineOperand::MO_ExternalSymbol: + AddrSym = GetExternalSymbolSymbol(Addr.getSymbolName()); + break; + case MachineOperand::MO_GlobalAddress: + AddrSym = getSymbol(Addr.getGlobal()); + break; + } + + MCOperand RegLR = MCOperand::createReg(VE::SX10); // LR + MCOperand RegS0 = MCOperand::createReg(VE::SX0); // S0 + MCOperand RegS12 = MCOperand::createReg(VE::SX12); // S12 + MCSymbol *GetTLSLabel = OutContext.getOrCreateSymbol(Twine("__tls_get_addr")); + + // lea %s0, sym@tls_gd_lo(-24) + // and %s0, %s0, (32)0 + // sic %lr + // lea.sl %s0, sym@tls_gd_hi(%s0, %lr) + // lea %s12, __tls_get_addr@plt_lo(8) + // and %s12, %s12, (32)0 + // lea.sl %s12, __tls_get_addr@plt_hi(%s12, %lr) + // bsic %lr, (, %s12) + MCOperand cim24 = MCOperand::createImm(-24); + MCOperand loImm = + createGOTRelExprOp(VEMCExpr::VK_VE_TLS_GD_LO32, AddrSym, OutContext); + EmitLEAzii(*OutStreamer, cim24, loImm, RegS0, STI); + MCOperand ci32 = MCOperand::createImm(32); + EmitANDrm0(*OutStreamer, RegS0, ci32, RegS0, STI); + EmitSIC(*OutStreamer, RegLR, STI); + MCOperand hiImm = + createGOTRelExprOp(VEMCExpr::VK_VE_TLS_GD_HI32, AddrSym, OutContext); + EmitLEASLrri(*OutStreamer, RegS0, RegLR, hiImm, RegS0, STI); + MCOperand ci8 = MCOperand::createImm(8); + MCOperand loImm2 = + createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, GetTLSLabel, OutContext); + EmitLEAzii(*OutStreamer, ci8, loImm2, RegS12, STI); + EmitANDrm0(*OutStreamer, RegS12, ci32, RegS12, STI); + MCOperand hiImm2 = + createGOTRelExprOp(VEMCExpr::VK_VE_PLT_HI32, GetTLSLabel, OutContext); + EmitLEASLrri(*OutStreamer, RegS12, RegLR, hiImm2, RegS12, STI); + EmitBSIC(*OutStreamer, RegLR, RegS12, STI); +} + +void VEAsmPrinter::LowerEH_SJLJ_SETJMPAndEmitMCInsts( + const MachineInstr *MI, const MCSubtargetInfo &STI) { + // sic $dest + // lea $dest, 32($dest) // $dest points 0f + // st $dest, 8(,$src) + // lea $dest, 0 + // br.l 16 // br 1f + // 0: + // lea $dest, 1 + // 1: + + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + EmitToStreamer(*OutStreamer, MCInstBuilder(VE::SIC).addReg(DestReg)); + + EmitToStreamer( + *OutStreamer, + MCInstBuilder(VE::LEArzi).addReg(DestReg).addReg(DestReg).addImm(32)); + + EmitToStreamer( + *OutStreamer, + MCInstBuilder(VE::STSri).addReg(SrcReg).addImm(8).addReg(DestReg)); + + EmitToStreamer(*OutStreamer, + MCInstBuilder(VE::LEAzzi).addReg(DestReg).addImm(0)); + + EmitToStreamer(*OutStreamer, MCInstBuilder(VE::BCRLa).addImm(16)); + + EmitToStreamer(*OutStreamer, + MCInstBuilder(VE::LEAzzi).addReg(DestReg).addImm(1)); +} + +void VEAsmPrinter::LowerEH_SJLJ_LONGJMPAndEmitMCInsts( + const MachineInstr *MI, const MCSubtargetInfo &STI) { + // ld %s9, (, $src) // s9 = fp + // ld %s10, 8(, $src) // s10 = lr + // ld %s11, 16(, $src) // s11 = sp + // b.l (%s10) + + unsigned SrcReg = MI->getOperand(0).getReg(); + + EmitToStreamer( + *OutStreamer, + MCInstBuilder(VE::LDSri).addReg(VE::SX9).addReg(SrcReg).addImm(0)); + + EmitToStreamer( + *OutStreamer, + MCInstBuilder(VE::LDSri).addReg(VE::SX10).addReg(SrcReg).addImm(8)); + + EmitToStreamer( + *OutStreamer, + MCInstBuilder(VE::LDSri).addReg(VE::SX11).addReg(SrcReg).addImm(16)); + + EmitToStreamer(*OutStreamer, + MCInstBuilder(VE::BAri).addReg(VE::SX10).addImm(0)); + return; +} + void VEAsmPrinter::EmitInstruction(const MachineInstr *MI) { switch (MI->getOpcode()) { @@ -62,6 +392,25 @@ case TargetOpcode::DBG_VALUE: // FIXME: Debug Value. return; + case VE::GETGOT: + LowerGETGOTAndEmitMCInsts(MI, getSubtargetInfo()); + return; + case VE::GETFUNPLT: + LowerGETFunPLTAndEmitMCInsts(MI, getSubtargetInfo()); + return; + case VE::GETTLSADDR: + LowerGETTLSAddrAndEmitMCInsts(MI, getSubtargetInfo()); + return; + // Emit nothing here but a comment if we can. + case VE::MEMBARRIER: + OutStreamer->emitRawComment("MEMBARRIER"); + return; + case VE::EH_SjLj_SetJmp: + LowerEH_SJLJ_SETJMPAndEmitMCInsts(MI, getSubtargetInfo()); + return; + case VE::EH_SjLj_LongJmp: + LowerEH_SJLJ_LONGJMPAndEmitMCInsts(MI, getSubtargetInfo()); + return; } MachineBasicBlock::const_instr_iterator I = MI->getIterator(); MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end(); @@ -72,7 +421,130 @@ } while ((++I != E) && I->isInsideBundle()); // Delay slot check. } +void VEAsmPrinter::EmitFunctionBodyStart() {} + +void VEAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &O) { + const DataLayout &DL = getDataLayout(); + const MachineOperand &MO = MI->getOperand(opNum); + VEMCExpr::VariantKind TF = (VEMCExpr::VariantKind)MO.getTargetFlags(); + + bool CloseParen = VEMCExpr::printVariantKind(O, TF); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << "%" << StringRef(getRegisterName(MO.getReg())).lower(); + break; + + case MachineOperand::MO_Immediate: + O << (int)MO.getImm(); + break; + case MachineOperand::MO_MachineBasicBlock: + MO.getMBB()->getSymbol()->print(O, MAI); + return; + case MachineOperand::MO_GlobalAddress: + getSymbol(MO.getGlobal())->print(O, MAI); + break; + case MachineOperand::MO_BlockAddress: + O << GetBlockAddressSymbol(MO.getBlockAddress())->getName(); + break; + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + break; + case MachineOperand::MO_ConstantPoolIndex: + O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" + << MO.getIndex(); + break; + case MachineOperand::MO_Metadata: + MO.getMetadata()->printAsOperand(O, MMI->getModule()); + break; + default: + llvm_unreachable(""); + } + if (CloseParen) + O << ")"; + VEMCExpr::printVariantKindSuffix(O, TF); +} + +void VEAsmPrinter::printMemASXOperand(const MachineInstr *MI, int opNum, + raw_ostream &O, const char *Modifier) { + // If this is an ADD operand, emit it like normal operands. + if (Modifier && !strcmp(Modifier, "arith")) { + printOperand(MI, opNum, O); + O << ", "; + printOperand(MI, opNum + 1, O); + return; + } + + if (MI->getOperand(opNum + 1).isImm() && + MI->getOperand(opNum + 1).getImm() == 0) { + // don't print "+0" + } else { + printOperand(MI, opNum + 1, O); + } + O << "(,"; + printOperand(MI, opNum, O); + O << ")"; +} + +void VEAsmPrinter::printMemASOperand(const MachineInstr *MI, int opNum, + raw_ostream &O, const char *Modifier) { + // If this is an ADD operand, emit it like normal operands. + if (Modifier && !strcmp(Modifier, "arith")) { + printOperand(MI, opNum, O); + O << ", "; + printOperand(MI, opNum + 1, O); + return; + } + + if (MI->getOperand(opNum + 1).isImm() && + MI->getOperand(opNum + 1).getImm() == 0) { + // don't print "+0" + } else { + printOperand(MI, opNum + 1, O); + } + O << "("; + printOperand(MI, opNum, O); + O << ")"; +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool VEAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) + return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + // See if this is a generic print operand + return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); + case 'f': + case 'r': + break; + } + } + + printOperand(MI, OpNo, O); + + return false; +} + +bool VEAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier + + O << '['; + printMemASXOperand(MI, OpNo, O); + O << ']'; + + return false; +} + // Force static initialization. -extern "C" void LLVMInitializeVEAsmPrinter() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVEAsmPrinter() { RegisterAsmPrinter X(getTheVETarget()); } diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td --- a/llvm/lib/Target/VE/VECallingConv.td +++ b/llvm/lib/Target/VE/VECallingConv.td @@ -14,20 +14,102 @@ // Aurora VE //===----------------------------------------------------------------------===// +def CC_VE_C_Stack: CallingConv<[ + // F128 are assigned to the stack in 16-byte aligned units + CCIfType<[f128], CCAssignToStackWithShadow<16, 16, [SX7]>>, + + // float --> need special handling like below. + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + CCIfType<[f32], CCCustom<"allocateFloat">>, + + // All of the rest are assigned to the stack in 8-byte aligned units. + CCAssignToStack<0, 8> +]>; + def CC_VE : CallingConv<[ // All arguments get passed in generic registers if there is space. + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, + + // bool, char, int, enum, long --> generic integer 32 bit registers + CCIfType<[i32], CCAssignToRegWithShadow< + [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7], + [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + + // float --> generic floating point 32 bit registers + CCIfType<[f32], CCAssignToRegWithShadow< + [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7], + [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + // long long/double --> generic 64 bit registers - CCIfType<[i64], + CCIfType<[i64, f64], CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + + // long double --> pair of generic 64 bit registers + // + // NOTE: If Q1 is allocated while SX1 is free, llvm tries to allocate SX1 for + // following operands, this masks SX1 to avoid such behavior. + CCIfType<[f128], + CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3], + [SX0, SX1, SX3, SX5]>>, + + // Alternatively, they are assigned to the stack in 8-byte aligned units. + CCDelegateTo +]>; + +// All arguments get passed in stack for varargs function or non-prototyped +// function. +def CC_VE2 : CallingConv<[ + // F128 are assigned to the stack in 16-byte aligned units + CCIfType<[f128], CCAssignToStack<16, 16>>, + + // float --> need special handling like below. + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + CCIfType<[f32], CCCustom<"allocateFloat">>, + + CCAssignToStack<0, 8> ]>; def RetCC_VE : CallingConv<[ + // Promote i1/i8/i16 arguments to i32. + CCIfType<[i1, i8, i16], CCPromoteToType>, + + // bool, char, int, enum, long --> generic integer 32 bit registers + CCIfType<[i32], CCAssignToRegWithShadow< + [SW0, SW1, SW2, SW3, SW4, SW5, SW6, SW7], + [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + + // float --> generic floating point 32 bit registers + CCIfType<[f32], CCAssignToRegWithShadow< + [SF0, SF1, SF2, SF3, SF4, SF5, SF6, SF7], + [SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + // long long/double --> generic 64 bit registers - CCIfType<[i64], + CCIfType<[i64, f64], CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + + // long double --> pair of generic 64 bit registers + CCIfType<[f128], + CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3], + [SX0, SX1, SX3, SX5]>>, ]>; // Callee-saved registers def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>; def CSR_NoRegs : CalleeSavedRegs<(add)>; + +// vec_expf destroys s0, s1, s5, s6, s11, s61, s62, s63, v0-6, and vm6 +def CSR_vec_expf : CalleeSavedRegs<(add (sequence "SX%u", 2, 4), + (sequence "SX%u", 7, 10), + (sequence "SX%u", 12, 60) + )>; + +// llvm_grow_stack destroys s62 and s63 +def CSR_llvm_grow_stack : CalleeSavedRegs<(add (sequence "SX%u", 0, 61))>; diff --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp --- a/llvm/lib/Target/VE/VEFrameLowering.cpp +++ b/llvm/lib/Target/VE/VEFrameLowering.cpp @@ -12,6 +12,7 @@ #include "VEFrameLowering.h" #include "VEInstrInfo.h" +#include "VEMachineFunctionInfo.h" #include "VESubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -27,6 +28,11 @@ using namespace llvm; +static cl::opt + DisableLeafProc("disable-ve-leaf-proc", cl::init(false), + cl::desc("Disable VE leaf procedure optimization."), + cl::Hidden); + VEFrameLowering::VEFrameLowering(const VESubtarget &ST) : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(16), 0, Align(16)) {} @@ -297,11 +303,61 @@ int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const { + const VESubtarget &Subtarget = MF.getSubtarget(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const VERegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + const VEMachineFunctionInfo *FuncInfo = MF.getInfo(); + bool isFixed = MFI.isFixedObjectIndex(FI); + // Addressable stack objects are accessed using neg. offsets from // %fp, or positive offsets from %sp. + bool UseFP; + + // VE uses FP-based references in general, even when "hasFP" is + // false. That function is rather a misnomer, because %fp is + // actually always available, unless isLeafProc. + if (FuncInfo->isLeafProc()) { + // If there's a leaf proc, all offsets need to be %sp-based, + // because we haven't caused %fp to actually point to our frame. + UseFP = false; + } else if (isFixed) { + // Otherwise, argument access should always use %fp. + UseFP = true; + } else if (RegInfo->needsStackRealignment(MF)) { + // If there is dynamic stack realignment, all local object + // references need to be via %sp, to take account of the + // re-alignment. + UseFP = false; + } else { + // Finally, default to using %fp. + UseFP = true; + } + int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI); - FrameReg = VE::SX11; // %sp - return FrameOffset + MF.getFrameInfo().getStackSize(); + + if (UseFP) { + FrameReg = RegInfo->getFrameRegister(MF); + return FrameOffset; + } else { + FrameReg = VE::SX11; // %sp + return FrameOffset + MF.getFrameInfo().getStackSize(); + } +} + +static bool LLVM_ATTRIBUTE_UNUSED +verifyLeafProcRegUse(MachineRegisterInfo *MRI) { + + // If any of parameter registers are used, this is not leaf function. + for (unsigned reg = VE::SX0; reg <= VE::SX7; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; + + // If any of callee-saved registers are used, this is not leaf function. + for (unsigned reg = VE::SX18; reg <= VE::SX33; ++reg) + if (MRI->isPhysRegUsed(reg)) + return false; + + return true; } bool VEFrameLowering::isLeafProc(MachineFunction &MF) const { @@ -321,5 +377,8 @@ RegScavenger *RS) const { TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); - assert(isLeafProc(MF) && "TODO implement for non-leaf procs"); + if (!DisableLeafProc && isLeafProc(MF)) { + VEMachineFunctionInfo *MFI = MF.getInfo(); + MFI->setLeafProc(true); + } } diff --git a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp --- a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp +++ b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp @@ -43,15 +43,92 @@ void Select(SDNode *N) override; + // Complex Pattern Selectors. + bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2); + bool SelectADDRri(SDValue N, SDValue &Base, SDValue &Offset); + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, + std::vector &OutOps) override; + StringRef getPassName() const override { return "VE DAG->DAG Pattern Instruction Selection"; } // Include the pieces autogenerated from the target description. #include "VEGenDAGISel.inc" + +private: + SDNode *getGlobalBaseReg(); }; } // end anonymous namespace +SDNode *VEDAGToDAGISel::getGlobalBaseReg() { + unsigned GlobalBaseReg = Subtarget->getInstrInfo()->getGlobalBaseReg(MF); + return CurDAG + ->getRegister(GlobalBaseReg, TLI->getPointerTy(CurDAG->getDataLayout())) + .getNode(); +} + +bool VEDAGToDAGISel::SelectADDRri(SDValue Addr, SDValue &Base, + SDValue &Offset) { + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + Base = CurDAG->getTargetFrameIndex( + FIN->getIndex(), TLI->getPointerTy(CurDAG->getDataLayout())); + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); + return true; + } + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress || + Addr.getOpcode() == ISD::TargetGlobalTLSAddress) + return false; // direct calls. + + if (Addr.getOpcode() == ISD::ADD) { + if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) { + if (isInt<13>(CN->getSExtValue())) { + if (FrameIndexSDNode *FIN = + dyn_cast(Addr.getOperand(0))) { + // Constant offset from frame ref. + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), + FIN->getValueType(0)); + } else { + Base = Addr.getOperand(0); + } + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr), + MVT::i32); + return true; + } + } + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); + return true; +} + +bool VEDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) { + if (Addr.getOpcode() == ISD::FrameIndex) + return false; + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress || + Addr.getOpcode() == ISD::TargetGlobalTLSAddress) + return false; // direct calls. + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) + if (isInt<13>(CN->getSExtValue())) + return false; // Let the reg+imm pattern catch this! + if (Addr.getOperand(0).getOpcode() == VEISD::Lo || + Addr.getOperand(1).getOpcode() == VEISD::Lo) + return false; // Let the reg+imm pattern catch this! + R1 = Addr.getOperand(0); + R2 = Addr.getOperand(1); + return true; + } + + return false; // Let the reg+imm pattern catch this! +} + void VEDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); if (N->isMachineOpcode()) { @@ -59,9 +136,38 @@ return; // Already selected. } + switch (N->getOpcode()) { + default: + break; + case VEISD::GLOBAL_BASE_REG: + ReplaceNode(N, getGlobalBaseReg()); + return; + } + SelectCode(N); } +/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for +/// inline asm expressions. +bool VEDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) { + SDValue Op0, Op1; + switch (ConstraintID) { + default: + return true; + case InlineAsm::Constraint_i: + case InlineAsm::Constraint_o: + case InlineAsm::Constraint_m: // memory + if (!SelectADDRrr(Op, Op0, Op1)) + SelectADDRri(Op, Op0, Op1); + break; + } + + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; +} + /// createVEISelDag - This pass converts a legalized DAG into a /// VE-specific DAG, ready for instruction scheduling. /// diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -23,7 +23,49 @@ namespace VEISD { enum NodeType : unsigned { FIRST_NUMBER = ISD::BUILTIN_OP_END, - RET_FLAG, // Return with a flag operand. + CMPICC, // Compare two GPR operands, set icc+xcc. + CMPFCC, // Compare two FP operands, set fcc. + BRICC, // Branch to dest on icc condition + BRXCC, // Branch to dest on xcc condition (64-bit only). + BRFCC, // Branch to dest on fcc condition + SELECT, + SELECT_ICC, // Select between two values using the current ICC flags. + SELECT_XCC, // Select between two values using the current XCC flags. + SELECT_FCC, // Select between two values using the current FCC flags. + + EH_SJLJ_SETJMP, // SjLj exception handling setjmp. + EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. + EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch. + + Hi, + Lo, // Hi/Lo operations, typically on a global address. + + FTOI, // FP to Int within a FP register. + ITOF, // Int to FP within a FP register. + FTOX, // FP to Int64 within a FP register. + XTOF, // Int64 to FP within a FP register. + + MAX, + MIN, + FMAX, + FMIN, + + GETFUNPLT, // load function address through %plt insturction + GETSTACKTOP, // retrieve address of stack top (first address of + // locals and temporaries) + GETTLSADDR, // load address for TLS access + + MEMBARRIER, // Compiler barrier only; generate a no-op. + + CALL, // A call instruction. + RET_FLAG, // Return with a flag operand. + GLOBAL_BASE_REG, // Global base reg for PIC. + FLUSHW, // FLUSH register windows to stack. + + /// A wrapper node for TargetConstantPool, TargetJumpTable, + /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, + /// MCSymbol and TargetBlockAddress. + Wrapper, }; } @@ -32,12 +74,52 @@ public: VETargetLowering(const TargetMachine &TM, const VESubtarget &STI); + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + + /// computeKnownBitsForTargetNode - Determine which of the bits specified + /// in Mask are known to be either zero or one and return them in the + /// KnownZero/KnownOne bitsets. + void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; + + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *MBB) const override; const char *getTargetNodeName(unsigned Opcode) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; + ConstraintWeight + getSingleConstraintMatchWeight(AsmOperandInfo &info, + const char *constraint) const override; + void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; + + unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { + if (ConstraintCode == "o") + return InlineAsm::Constraint_o; + return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); + } + + std::pair + getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, MVT VT) const override; + + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { + return MVT::i32; + } + Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override; + /// Override to support customized stack guard loading. + bool useLoadStackGuardNode() const override; + void insertSSPDeclarations(Module &M) const override; + /// getSetCCResultType - Return the ISD::SETCC ValueType EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; @@ -47,6 +129,14 @@ const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const override; + SDValue LowerFormalArguments_64(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const; + + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, @@ -56,6 +146,91 @@ const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerToTLSLocalExecModel(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; + + SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const; + SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, + SelectionDAG &DAG) const; + SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; + + bool ShouldShrinkFPConstant(EVT VT) const override { + // Do not shrink FP constpool if VT == MVT::f128. + // (ldd, call _Q_fdtoq) is more expensive than two ldds. + return VT != MVT::f128; + } + + /// Returns true if the target allows unaligned memory accesses of the + /// specified type. + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, + MachineMemOperand::Flags Flags, + bool *Fast) const override; + + bool mergeStoresAfterLegalization(EVT) const override { return true; } + + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const override; + + unsigned getJumpTableEncoding() const override; + + const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned uid, + MCContext &Ctx) const override; + + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + // VE uses Release consistency, so need fence for each atomics. + return true; + } + Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, + AtomicOrdering Ord) const override; + + AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + + void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + + MachineBasicBlock *expandSelectCC(MachineInstr &MI, MachineBasicBlock *BB, + unsigned BROpcode) const; + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, + MachineBasicBlock *MBB) const; + MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, + MachineBasicBlock *MBB) const; + MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, + MachineBasicBlock *BB) const; + void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, int FI) const; + void finalizeLowering(MachineFunction &MF) const override; + + // VE supports only vector FMA + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override { + return VT.isVector(); + } }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -12,19 +12,25 @@ //===----------------------------------------------------------------------===// #include "VEISelLowering.h" +#include "MCTargetDesc/VEMCExpr.h" +#include "VEInstrBuilder.h" +#include "VEMachineFunctionInfo.h" #include "VERegisterInfo.h" #include "VETargetMachine.h" +// #include "VETargetObjectFile.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicsVE.h" #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" @@ -36,6 +42,28 @@ // Calling Convention Implementation //===----------------------------------------------------------------------===// +static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + switch (LocVT.SimpleTy) { + case MVT::f32: { + // Allocate stack like below + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + // Use align=8 for dummy area to align the beginning of these 2 area. + State.AllocateStack(4, 8); // for empty area + // Use align=4 for value to place it at just after the dummy area. + unsigned Offset = State.AllocateStack(4, 4); // for float value area + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return true; + } + default: + return false; + } +} + #include "VEGenCallingConv.inc" bool VETargetLowering::CanLowerReturn( @@ -89,6 +117,24 @@ llvm_unreachable("Unknown loc info!"); } + // The custom bit on an i32 return value indicates that it should be passed + // in the high bits of the register. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) { + OutVal = DAG.getNode(ISD::SHL, DL, MVT::i64, OutVal, + DAG.getConstant(32, DL, MVT::i32)); + + // The next value may go in the low bits of the same register. + // Handle both at once. + if (i + 1 < RVLocs.size() && + RVLocs[i + 1].getLocReg() == VA.getLocReg()) { + SDValue NV = + DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, OutVals[i + 1]); + OutVal = DAG.getNode(ISD::OR, DL, MVT::i64, OutVal, NV); + // Skip the next value, it's already done. + ++i; + } + } + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag); // Guarantee that all emitted copies are stuck together with flags. @@ -111,6 +157,8 @@ SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); + // Get the base offset of the incoming arguments stack space. + unsigned ArgsBaseOffset = 176; // Get the size of the preserved arguments area unsigned ArgsPreserved = 64; @@ -126,7 +174,6 @@ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - assert(VA.isRegLoc() && "TODO implement argument passing on stack"); if (VA.isRegLoc()) { // This argument is passed in a register. // All integer register arguments are promoted by the caller to i64. @@ -136,8 +183,10 @@ MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT())); SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT()); - assert((VA.getValVT() == MVT::i64) && - "TODO implement other argument types than i64"); + // Get the high bits for i32 struct elements. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) + Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg, + DAG.getConstant(32, DL, MVT::i32)); // The caller promoted the argument, so insert an Assert?ext SDNode so we // won't promote the value again in this function. @@ -161,9 +210,34 @@ InVals.push_back(Arg); continue; } + + // The registers are exhausted. This argument was passed on the stack. + assert(VA.isMemLoc()); + // The CC_VE_Full/Half functions compute stack offsets relative to the + // beginning of the arguments area at %fp+176. + unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset; + unsigned ValSize = VA.getValVT().getSizeInBits() / 8; + int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true); + InVals.push_back( + DAG.getLoad(VA.getValVT(), DL, Chain, + DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())), + MachinePointerInfo::getFixedStack(MF, FI))); } - assert(!IsVarArg && "TODO implement var args"); + if (!IsVarArg) + return Chain; + + // This function takes variable arguments, some of which may have been passed + // in registers %s0-%s8. + // + // The va_start intrinsic needs to know the offset to the first variable + // argument. + // TODO: need to calculate offset correctly once we support f128. + unsigned ArgOffset = ArgLocs.size() * 8; + VEMachineFunctionInfo *FuncInfo = MF.getInfo(); + // Skip the 176 bytes of register save area. + FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset); + return Chain; } @@ -180,7 +254,8 @@ .Case("outer", VE::SX12) // Outer regiser .Case("info", VE::SX17) // Info area register .Case("got", VE::SX15) // Global offset table register - .Case("plt", VE::SX16) // Procedure linkage table register + .Case("plt", VE::SX16) // Procedure linkage table register + .Case("usrcc", VE::UCC) // User clock counter .Default(0); if (Reg) @@ -189,13 +264,325 @@ report_fatal_error("Invalid register name global variable"); } +SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc DL = CLI.DL; + SDValue Chain = CLI.Chain; + auto PtrVT = getPointerTy(DAG.getDataLayout()); + + // VE target does not yet support tail call optimization. + CLI.IsTailCall = false; + + // Get the base offset of the outgoing arguments stack space. + unsigned ArgsBaseOffset = 176; + // Get the size of the preserved arguments area + unsigned ArgsPreserved = 8 * 8u; + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); + // Allocate the preserved area first. + CCInfo.AllocateStack(ArgsPreserved, 8); + // We already allocated the preserved area, so the stack offset computed + // by CC_VE would be correct now. + CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE); + + // VE requires to use both register and stack for varargs or no-prototyped + // functions. FIXME: How to check prototype here? + bool UseBoth = CLI.IsVarArg /* || CLI.NoProtoType */; + + // Analyze operands again if it is required to store BOTH. + SmallVector ArgLocs2; + CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), + ArgLocs2, *DAG.getContext()); + if (UseBoth) + CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2); + + // Get the size of the outgoing arguments stack space requirement. + unsigned ArgsSize = CCInfo.getNextStackOffset(); + + // Keep stack frames 16-byte aligned. + ArgsSize = alignTo(ArgsSize, 16); + + // Adjust the stack pointer to make room for the arguments. + // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls + // with more than 6 arguments. + Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL); + + // Collect the set of registers to pass to the function and their values. + // This will be emitted as a sequence of CopyToReg nodes glued to the call + // instruction. + SmallVector, 8> RegsToPass; + + // Collect chains from all the memory opeations that copy arguments to the + // stack. They must follow the stack pointer adjustment above and precede the + // call instruction itself. + SmallVector MemOpChains; + + // VE needs to get address of callee function in a register + // So, prepare to copy it to SX12 here. + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + SDValue Callee = CLI.Callee; + + bool IsPICCall = isPositionIndependent(); + + // PC-relative references to external symbols should go through $stub. + // If so, we need to prepare GlobalBaseReg first. + const TargetMachine &TM = DAG.getTarget(); + const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); + const GlobalValue *GV = nullptr; + if (auto *G = dyn_cast(Callee)) + GV = G->getGlobal(); + bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); + bool UsePlt = !Local; + MachineFunction &MF = DAG.getMachineFunction(); + + // Turn GlobalAddress/ExternalSymbol node into a value node + // containing the address of them here. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + if (IsPICCall) { + if (UsePlt) + Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT, 0, 0); + Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee); + } else { + Callee = + makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); + } + } else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) { + if (IsPICCall) { + if (UsePlt) + Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); + Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee); + } else { + Callee = + makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); + } + } + + RegsToPass.push_back(std::make_pair(VE::SX12, Callee)); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = CLI.OutVals[i]; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown location info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + if (!UseBoth) + continue; + VA = ArgLocs2[i]; + } + + assert(VA.isMemLoc()); + + // Create a store off the stack pointer for this argument. + SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT); + // The argument area starts at %fp+176 in the callee frame, + // %sp+176 in ours. + SDValue PtrOff = + DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL); + PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); + MemOpChains.push_back( + DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo())); + } + + // Emit all stores, make sure they occur before the call. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); + + // Build a sequence of CopyToReg nodes glued together with token chain and + // glue operands which copy the outgoing args into registers. The InGlue is + // necessary since all emitted instructions must be stuck together in order + // to pass the live physical registers. + SDValue InGlue; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, + RegsToPass[i].second, InGlue); + InGlue = Chain.getValue(1); + } + + // Build the operands for the call instruction itself. + SmallVector Ops; + Ops.push_back(Chain); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const VERegisterInfo *TRI = Subtarget->getRegisterInfo(); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Make sure the CopyToReg nodes are glued to the call instruction which + // consumes the registers. + if (InGlue.getNode()) + Ops.push_back(InGlue); + + // Now the call itself. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops); + InGlue = Chain.getValue(1); + + // Revert the stack pointer immediately after the call. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true), + DAG.getIntPtrConstant(0, DL, true), InGlue, DL); + InGlue = Chain.getValue(1); + + // Now extract the return values. This is more or less the same as + // LowerFormalArguments. + + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + + // Set inreg flag manually for codegen generated library calls that + // return float. + if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CS) + CLI.Ins[0].Flags.setInReg(); + + RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + unsigned Reg = VA.getLocReg(); + + // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can + // reside in the same register in the high and low bits. Reuse the + // CopyFromReg previous node to avoid duplicate copies. + SDValue RV; + if (RegisterSDNode *SrcReg = dyn_cast(Chain.getOperand(1))) + if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg) + RV = Chain.getValue(0); + + // But usually we'll create a new CopyFromReg for a different register. + if (!RV.getNode()) { + RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue); + Chain = RV.getValue(1); + InGlue = Chain.getValue(2); + } + + // Get the high bits for i32 struct elements. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) + RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV, + DAG.getConstant(32, DL, MVT::i32)); + + // The callee promoted the return value, so insert an Assert?ext SDNode so + // we won't promote the value again in this function. + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + case CCValAssign::ZExt: + RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + default: + break; + } + + // Truncate the register down to the return value type. + if (VA.isExtInLoc()) + RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV); + + InVals.push_back(RV); + } + + return Chain; +} + //===----------------------------------------------------------------------===// // TargetLowering Implementation //===----------------------------------------------------------------------===// +/// isFPImmLegal - Returns true if the target can instruction select the +/// specified FP immediate natively. If false, the legalizer will +/// materialize the FP immediate as a load from a constant pool. +bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const { + return VT == MVT::f32 || VT == MVT::f64; +} + +/// Determine if the target supports unaligned memory accesses. +/// +/// This function returns true if the target allows unaligned memory accesses +/// of the specified type in the given address space. If true, it also returns +/// whether the unaligned memory access is "fast" in the last argument by +/// reference. This is used, for example, in situations where an array +/// copy/move/set is converted to a sequence of store operations. Its use +/// helps to ensure that such replacements don't generate code that causes an +/// alignment error (trap) on the target machine. +bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned AddrSpace, + unsigned Align, + MachineMemOperand::Flags, + bool *Fast) const { + if (Fast) { + // It's fast anytime on VE + *Fast = true; + } + return true; +} + +bool VETargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const { + // Do not merge to float value size (128 bytes) if no implicit + // float attribute is set. + bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + + if (NoFloat) { + unsigned MaxIntSize = 64; + return (MemVT.getSizeInBits() <= MaxIntSize); + } + return true; +} + +TargetLowering::AtomicExpansionKind +VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + if (AI->getOperation() == AtomicRMWInst::Xchg) { + const DataLayout &DL = AI->getModule()->getDataLayout(); + if (DL.getTypeStoreSize(AI->getValOperand()->getType()) < + (VETargetLowering::getMinCmpXchgSizeInBits() / 8)) + return AtomicExpansionKind::CmpXChg; // Uses cas instruction for 1byte or + // 2byte atomic_swap + return AtomicExpansionKind::None; // Uses ts1am instruction + } + return AtomicExpansionKind::CmpXChg; +} + VETargetLowering::VETargetLowering(const TargetMachine &TM, const VESubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { + MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0)); + // Instructions which use registers as conditionals examine all the // bits (as does the pseudo SELECT_CC expansion). I don't think it // matters much whether it's ZeroOrOneBooleanContent, or @@ -205,7 +592,233 @@ setBooleanVectorContents(ZeroOrOneBooleanContent); // Set up the register classes. + addRegisterClass(MVT::i32, &VE::I32RegClass); addRegisterClass(MVT::i64, &VE::I64RegClass); + addRegisterClass(MVT::f32, &VE::F32RegClass); + addRegisterClass(MVT::f64, &VE::I64RegClass); + addRegisterClass(MVT::f128, &VE::F128RegClass); + + // Turn FP extload into load/fpextend + for (MVT VT : MVT::fp_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand); + } + + // VE doesn't have i1 sign extending load + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setTruncStoreAction(VT, MVT::i1, Expand); + } + + // Turn FP truncstore into trunc + store. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + + // Custom legalize GlobalAddress nodes into LO/HI parts. + setOperationAction(ISD::GlobalAddress, PtrVT, Custom); + setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); + setOperationAction(ISD::ConstantPool, PtrVT, Custom); + setOperationAction(ISD::BlockAddress, PtrVT, Custom); + + // VE has no REM or DIVREM operations. + for (MVT VT : MVT::integer_valuetypes()) { + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); + } + + // VE has instructions for fp<->sint, so use them. + + // VE doesn't have instructions for fp<->uint, so expand them by llvm + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64 + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64 + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + + // VE doesn't have BRCOND + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + + // BRIND/BR_JT are not implemented yet. + // FIXME: BRIND instruction is implemented, but JumpTable is not yet. + setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); + if (TM.Options.ExceptionModel == ExceptionHandling::SjLj) + setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); + + setTargetDAGCombine(ISD::FADD); + // setTargetDAGCombine(ISD::FMA); + + // ATOMICs. + // Atomics are supported on VE. + setMaxAtomicSizeInBitsSupported(64); + setMinCmpXchgSizeInBits(32); + setSupportsUnalignedAtomics(false); + + // Use custom inserter, LowerATOMIC_FENCE, for ATOMIC_FENCE. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + for (MVT VT : MVT::integer_valuetypes()) { + // Several atomic operations are converted to VE instructions well. + // Additional memory fences are generated in emitLeadingfence and + // emitTrailingFence functions. + setOperationAction(ISD::ATOMIC_LOAD, VT, Legal); + setOperationAction(ISD::ATOMIC_STORE, VT, Legal); + setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Legal); + setOperationAction(ISD::ATOMIC_SWAP, VT, Legal); + + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand); + + // FIXME: not supported "atmam" isntructions yet + setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand); + + // VE doesn't have follwing instructions + setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand); + } + + // FIXME: VE's I128 stuff is not investigated yet +#if 0 + // These libcalls are not available in 32-bit. + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); +#endif + + for (MVT VT : MVT::fp_valuetypes()) { + // VE has no sclar FMA instruction + setOperationAction(ISD::FMA, VT, Expand); + setOperationAction(ISD::FMAD, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); + setOperationAction(ISD::FNEG, VT, Expand); + setOperationAction(ISD::FABS, VT, Expand); + setOperationAction(ISD::FSQRT, VT, Expand); + setOperationAction(ISD::FSIN, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FPOWI, VT, Expand); + setOperationAction(ISD::FPOW, VT, Expand); + setOperationAction(ISD::FLOG, VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); + setOperationAction(ISD::FLOG10, VT, Expand); + setOperationAction(ISD::FEXP, VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FCEIL, VT, Expand); + setOperationAction(ISD::FTRUNC, VT, Expand); + setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FNEARBYINT, VT, Expand); + setOperationAction(ISD::FROUND, VT, Expand); + setOperationAction(ISD::FFLOOR, VT, Expand); + setOperationAction(ISD::FMINNUM, VT, Expand); + setOperationAction(ISD::FMAXNUM, VT, Expand); + setOperationAction(ISD::FMINIMUM, VT, Expand); + setOperationAction(ISD::FMAXIMUM, VT, Expand); + setOperationAction(ISD::FSINCOS, VT, Expand); + } + + // FIXME: VE's FCOPYSIGN is not investivated yet + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + + // FIXME: VE's SHL_PARTS and others are not investigated yet. + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + if (1) { + setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); + } + + // Expands to [SU]MUL_LOHI. + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + // setOperationAction(ISD::MUL, MVT::i32, Expand); + + if (1) { + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + + setOperationAction(ISD::UMULO, MVT::i64, Custom); + setOperationAction(ISD::SMULO, MVT::i64, Custom); + } + + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + setOperationAction(ISD::BSWAP, MVT::i32, Legal); + setOperationAction(ISD::BSWAP, MVT::i64, Legal); + setOperationAction(ISD::CTPOP, MVT::i32, Legal); + setOperationAction(ISD::CTPOP, MVT::i64, Legal); + setOperationAction(ISD::CTLZ, MVT::i32, Legal); + setOperationAction(ISD::CTLZ, MVT::i64, Legal); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Expand); + setOperationAction(ISD::ROTR, MVT::i64, Expand); + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + // VAARG needs to be lowered to access with 8 bytes alignment. + setOperationAction(ISD::VAARG, MVT::Other, Custom); + + // Use the default implementation. + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + + // Expand DYNAMIC_STACKALLOC + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); + + // LOAD/STORE for f128 needs to be custom lowered to expand two loads/stores + setOperationAction(ISD::LOAD, MVT::f128, Custom); + setOperationAction(ISD::STORE, MVT::f128, Custom); + + // VE has FAQ, FSQ, FMQ, and FCQ + setOperationAction(ISD::FADD, MVT::f128, Legal); + setOperationAction(ISD::FSUB, MVT::f128, Legal); + setOperationAction(ISD::FMUL, MVT::f128, Legal); + setOperationAction(ISD::FDIV, MVT::f128, Expand); + setOperationAction(ISD::FSQRT, MVT::f128, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); + setOperationAction(ISD::FP_ROUND, MVT::f128, Legal); + + // Other configurations related to f128. + setOperationAction(ISD::SELECT, MVT::f128, Legal); + setOperationAction(ISD::SELECT_CC, MVT::f128, Legal); + setOperationAction(ISD::SETCC, MVT::f128, Legal); + setOperationAction(ISD::BR_CC, MVT::f128, Legal); + + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // TRAP to expand (which turns it into abort). + setOperationAction(ISD::TRAP, MVT::Other, Expand); + + // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" + // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); setStackPointerRegisterToSaveRestore(VE::SX11); @@ -222,13 +835,1345 @@ switch ((VEISD::NodeType)Opcode) { case VEISD::FIRST_NUMBER: break; + case VEISD::CMPICC: + return "VEISD::CMPICC"; + case VEISD::CMPFCC: + return "VEISD::CMPFCC"; + case VEISD::BRICC: + return "VEISD::BRICC"; + case VEISD::BRXCC: + return "VEISD::BRXCC"; + case VEISD::BRFCC: + return "VEISD::BRFCC"; + case VEISD::SELECT: + return "VEISD::SELECT"; + case VEISD::SELECT_ICC: + return "VEISD::SELECT_ICC"; + case VEISD::SELECT_XCC: + return "VEISD::SELECT_XCC"; + case VEISD::SELECT_FCC: + return "VEISD::SELECT_FCC"; + case VEISD::EH_SJLJ_SETJMP: + return "VEISD::EH_SJLJ_SETJMP"; + case VEISD::EH_SJLJ_LONGJMP: + return "VEISD::EH_SJLJ_LONGJMP"; + case VEISD::EH_SJLJ_SETUP_DISPATCH: + return "VEISD::EH_SJLJ_SETUP_DISPATCH"; + case VEISD::Hi: + return "VEISD::Hi"; + case VEISD::Lo: + return "VEISD::Lo"; + case VEISD::FTOI: + return "VEISD::FTOI"; + case VEISD::ITOF: + return "VEISD::ITOF"; + case VEISD::FTOX: + return "VEISD::FTOX"; + case VEISD::XTOF: + return "VEISD::XTOF"; + case VEISD::MAX: + return "VEISD::MAX"; + case VEISD::MIN: + return "VEISD::MIN"; + case VEISD::FMAX: + return "VEISD::FMAX"; + case VEISD::FMIN: + return "VEISD::FMIN"; + case VEISD::GETFUNPLT: + return "VEISD::GETFUNPLT"; + case VEISD::GETSTACKTOP: + return "VEISD::GETSTACKTOP"; + case VEISD::GETTLSADDR: + return "VEISD::GETTLSADDR"; + case VEISD::MEMBARRIER: + return "VEISD::MEMBARRIER"; + case VEISD::CALL: + return "VEISD::CALL"; case VEISD::RET_FLAG: return "VEISD::RET_FLAG"; + case VEISD::GLOBAL_BASE_REG: + return "VEISD::GLOBAL_BASE_REG"; + case VEISD::FLUSHW: + return "VEISD::FLUSHW"; + case VEISD::Wrapper: + return "VEISD::Wrapper"; } return nullptr; } EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, EVT VT) const { - return MVT::i64; + if (!VT.isVector()) + return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} + +/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to +/// be zero. Op is expected to be a target specific node. Used by DAG +/// combiner. +void VETargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + KnownBits Known2; + Known.resetAll(); + + switch (Op.getOpcode()) { + default: + break; + case VEISD::SELECT_ICC: + case VEISD::SELECT_XCC: + case VEISD::SELECT_FCC: + Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); + Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); + + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + break; + } +} + +// Convert to a target node and set target flags. +SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF, + SelectionDAG &DAG) const { + if (const GlobalAddressSDNode *GA = dyn_cast(Op)) + return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), + GA->getValueType(0), GA->getOffset(), TF); + + if (const ConstantPoolSDNode *CP = dyn_cast(Op)) + return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0), + CP->getAlignment(), CP->getOffset(), TF); + + if (const BlockAddressSDNode *BA = dyn_cast(Op)) + return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(), + 0, TF); + + if (const ExternalSymbolSDNode *ES = dyn_cast(Op)) + return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), + TF); + + llvm_unreachable("Unhandled address SDNode"); +} + +// Split Op into high and low parts according to HiTF and LoTF. +// Return an ADD node combining the parts. +SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG)); + SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG)); + return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo); +} + +// Build SDNodes for producing an address from a GlobalAddress, ConstantPool, +// or ExternalSymbol SDNode. +SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = getPointerTy(DAG.getDataLayout()); + + // Handle PIC mode first. VE needs a got load for every variable! + if (isPositionIndependent()) { + // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this + // function has calls. + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MFI.setHasCalls(true); + + if (dyn_cast(Op) != nullptr || + (dyn_cast(Op) != nullptr && + dyn_cast(Op)->getGlobal()->hasLocalLinkage())) { + // Create following instructions for local linkage PIC code. + // lea %s35, %gotoff_lo(.LCPI0_0) + // and %s35, %s35, (32)0 + // lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35) + // adds.l %s35, %s15, %s35 ; %s15 is GOT + // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) + SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32, + VEMCExpr::VK_VE_GOTOFF_LO32, DAG); + SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo); + } + // Create following instructions for not local linkage PIC code. + // lea %s35, %got_lo(.LCPI0_0) + // and %s35, %s35, (32)0 + // lea.sl %s35, %got_hi(.LCPI0_0)(%s35) + // adds.l %s35, %s15, %s35 ; %s15 is GOT + // ld %s35, (,%s35) + // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) + SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32, + VEMCExpr::VK_VE_GOT_LO32, DAG); + SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT); + SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo); + return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + } + + // This is one of the absolute code models. + switch (getTargetMachine().getCodeModel()) { + default: + llvm_unreachable("Unsupported absolute code model"); + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Large: + // abs64. + return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); + } +} + +SDValue VETargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + return makeAddress(Op, DAG); +} + +SDValue VETargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + return makeAddress(Op, DAG); +} + +SDValue VETargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + return makeAddress(Op, DAG); +} + +SDValue +VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + + // Generate following code: + // t1: ch,glue = callseq_start t0, 0, 0 + // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1 + // t3: ch,glue = callseq_end t2, 0, 0, t2:2 + // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1 + SDValue Label = withTargetFlags(Op, 0, DAG); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // Lowering the machine isd will make sure everything is in the right + // location. + SDValue Chain = DAG.getEntryNode(); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask( + DAG.getMachineFunction(), CallingConv::C); + Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl); + SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)}; + Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true), + DAG.getIntPtrConstant(0, dl, true), + Chain.getValue(1), dl); + Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1)); + + // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls. + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MFI.setHasCalls(true); + + // Also generate code to prepare a GOT register if it is PIC. + if (isPositionIndependent()) { + MachineFunction &MF = DAG.getMachineFunction(); + Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); + } + + return Chain; +} + +SDValue VETargetLowering::LowerToTLSLocalExecModel(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + + // Generate following code: + // lea %s0, Op@tpoff_lo + // and %s0, %s0, (32)0 + // lea.sl %s0, Op@tpoff_hi(%s0) + // add %s0, %s0, %tp + // FIXME: use lea.sl %s0, Op@tpoff_hi(%tp, %s0) for better performance + SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_TPOFF_HI32, + VEMCExpr::VK_VE_TPOFF_LO32, DAG); + return DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(VE::SX14, PtrVT), + HiLo); +} + +SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + // Current implementation of nld doesn't allow local exec model code + // described in VE-tls_v1.1.pdf (*1) as its input. The nld accept + // only general dynamic model and optimize it whenever. So, here + // we need to generate only general dynamic model code sequence. + // + // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf + return LowerToTLSGeneralDynamicModel(Op, DAG); +} + +SDValue VETargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + return DAG.getNode(VEISD::EH_SJLJ_SETJMP, dl, + DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), + Op.getOperand(1)); +} + +SDValue VETargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), + Op.getOperand(1)); +} + +SDValue VETargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other, + Op.getOperand(0)); +} + +static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, + const VETargetLowering &TLI) { + MachineFunction &MF = DAG.getMachineFunction(); + VEMachineFunctionInfo *FuncInfo = MF.getInfo(); + auto PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + + // Need frame address to find the address of VarArgsFrameIndex. + MF.getFrameInfo().setFrameAddressIsTaken(true); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + SDLoc DL(Op); + SDValue Offset = + DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT), + DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL)); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1), + MachinePointerInfo(SV)); +} + +static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + SDValue InChain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + EVT PtrVT = VAListPtr.getValueType(); + const Value *SV = cast(Node->getOperand(2))->getValue(); + SDLoc DL(Node); + SDValue VAList = + DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV)); + SDValue Chain = VAList.getValue(1); + SDValue NextPtr; + + if (VT == MVT::f128) { + // Alignment + int Align = 16; + VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, + DAG.getConstant(Align - 1, DL, PtrVT)); + VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList, + DAG.getConstant(-Align, DL, PtrVT)); + // Increment the pointer, VAList, by 16 to the next vaarg. + NextPtr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL)); + } else if (VT == MVT::f32) { + // float --> need special handling like below. + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + // Increment the pointer, VAList, by 8 to the next vaarg. + NextPtr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); + // Then, adjust VAList. + unsigned InternalOffset = 4; + VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, + DAG.getConstant(InternalOffset, DL, PtrVT)); + } else { + // Increment the pointer, VAList, by 8 to the next vaarg. + NextPtr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); + } + + // Store the incremented VAList to the legalized pointer. + InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV)); + + // Load the actual argument out of the pointer VAList. + // We can't count on greater alignment than the word size. + return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(), + std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8); +} + +SDValue VETargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + // Generate following code. + // (void)__llvm_grow_stack(size); + // ret = GETSTACKTOP; // pseudo instruction + SDLoc dl(Op); + + SDValue Size = Op.getOperand(1); // Legalize the size. + EVT VT = Size->getValueType(0); + + // Prepare arguments + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Size; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Args.push_back(Entry); + Type *RetTy = Type::getVoidTy(*DAG.getContext()); + + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Callee = DAG.getTargetExternalSymbol("__llvm_grow_stack", PtrVT, 0); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(DAG.getEntryNode()) + .setCallee(CallingConv::VE_LLVM_GROW_STACK, RetTy, Callee, + std::move(Args)) + .setDiscardResult(true); + std::pair pair = LowerCallTo(CLI); + SDValue Chain = pair.second; + SDValue Value = DAG.getNode(VEISD::GETSTACKTOP, dl, VT, Chain); + SDValue Ops[2] = {Value, Chain}; + return DAG.getMergeValues(Ops, dl); +} + +static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, + const VETargetLowering &TLI, + const VESubtarget *Subtarget) { + SDLoc dl(Op); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setFrameAddressIsTaken(true); + + EVT PtrVT = Op.getValueType(); + + // Naked functions never have a frame pointer, and so we use r1. For all + // other functions, this decision must be delayed until during PEI. + const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo(); + Register FrameReg = RegInfo->getFrameRegister(MF); + + SDValue FrameAddr = + DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); + while (Depth--) + FrameAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + FrameAddr, MachinePointerInfo()); + return FrameAddr; +} + +static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG, + const VETargetLowering &TLI, + const VESubtarget *Subtarget) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setReturnAddressIsTaken(true); + + if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + SDLoc dl(Op); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + + auto PtrVT = TLI.getPointerTy(MF.getDataLayout()); + + if (Depth > 0) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG, TLI, Subtarget); + SDValue Offset = DAG.getConstant(8, dl, MVT::i64); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), + MachinePointerInfo()); + } + + // Just load the return address off the stack. + SDValue RetAddrFI = DAG.getFrameIndex(1, PtrVT); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, + MachinePointerInfo()); +} + +// Lower a f128 load into two f64 loads. +static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); + LoadSDNode *LdNode = dyn_cast(Op.getNode()); + assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type"); + + SDValue BasePtr = LdNode->getBasePtr(); + if (dyn_cast(BasePtr.getNode())) { + // For the case of frame index, expanding it here cause dependency + // problem. So, treat it as a legal and expand it in eliminateFrameIndex + return Op; + } + + unsigned alignment = LdNode->getAlignment(); + if (alignment > 8) + alignment = 8; + + SDValue Lo64 = + DAG.getLoad(MVT::f64, dl, LdNode->getChain(), LdNode->getBasePtr(), + LdNode->getPointerInfo(), alignment, + LdNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + EVT addrVT = LdNode->getBasePtr().getValueType(); + SDValue HiPtr = DAG.getNode(ISD::ADD, dl, addrVT, LdNode->getBasePtr(), + DAG.getConstant(8, dl, addrVT)); + SDValue Hi64 = + DAG.getLoad(MVT::f64, dl, LdNode->getChain(), HiPtr, + LdNode->getPointerInfo(), alignment, + LdNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + + SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, dl, MVT::i32); + SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, dl, MVT::i32); + + // VE stores Hi64 to 8(addr) and Lo64 to 0(addr) + SDNode *InFP128 = + DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f128); + InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, MVT::f128, + SDValue(InFP128, 0), Hi64, SubRegEven); + InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, MVT::f128, + SDValue(InFP128, 0), Lo64, SubRegOdd); + SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1), + SDValue(Hi64.getNode(), 1)}; + SDValue OutChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); + SDValue Ops[2] = {SDValue(InFP128, 0), OutChain}; + return DAG.getMergeValues(Ops, dl); +} + +static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) { + LoadSDNode *LdNode = cast(Op.getNode()); + + EVT MemVT = LdNode->getMemoryVT(); + if (MemVT == MVT::f128) + return LowerF128Load(Op, DAG); + + return Op; +} + +// Lower a f128 store into two f64 stores. +static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); + StoreSDNode *StNode = dyn_cast(Op.getNode()); + assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type"); + + SDValue BasePtr = StNode->getBasePtr(); + if (dyn_cast(BasePtr.getNode())) { + // For the case of frame index, expanding it here cause dependency + // problem. So, treat it as a legal and expand it in eliminateFrameIndex + return Op; + } + + SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, dl, MVT::i32); + SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, dl, MVT::i32); + + SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i64, + StNode->getValue(), SubRegEven); + SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i64, + StNode->getValue(), SubRegOdd); + + unsigned alignment = StNode->getAlignment(); + if (alignment > 8) + alignment = 8; + + // VE stores Hi64 to 8(addr) and Lo64 to 0(addr) + SDValue OutChains[2]; + OutChains[0] = + DAG.getStore(StNode->getChain(), dl, SDValue(Lo64, 0), + StNode->getBasePtr(), MachinePointerInfo(), alignment, + StNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + EVT addrVT = StNode->getBasePtr().getValueType(); + SDValue HiPtr = DAG.getNode(ISD::ADD, dl, addrVT, StNode->getBasePtr(), + DAG.getConstant(8, dl, addrVT)); + OutChains[1] = + DAG.getStore(StNode->getChain(), dl, SDValue(Hi64, 0), HiPtr, + MachinePointerInfo(), alignment, + StNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); +} + +static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); + StoreSDNode *St = cast(Op.getNode()); + + EVT MemVT = St->getMemoryVT(); + if (MemVT == MVT::f128) + return LowerF128Store(Op, DAG); + + // Otherwise, ask llvm to expand it. + return SDValue(); +} + +// Custom lower UMULO/SMULO for VE. This code is similar to ExpandNode() +// in LegalizeDAG.cpp except the order of arguments to the library function. +static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG, + const VETargetLowering &TLI) { + unsigned opcode = Op.getOpcode(); + assert((opcode == ISD::UMULO || opcode == ISD::SMULO) && "Invalid Opcode."); + + bool isSigned = (opcode == ISD::SMULO); + EVT VT = MVT::i64; + EVT WideVT = MVT::i128; + SDLoc dl(Op); + SDValue LHS = Op.getOperand(0); + + if (LHS.getValueType() != VT) + return Op; + + SDValue ShiftAmt = DAG.getConstant(63, dl, VT); + + SDValue RHS = Op.getOperand(1); + SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, ShiftAmt); + SDValue HiRHS = DAG.getNode(ISD::SRA, dl, MVT::i64, RHS, ShiftAmt); + SDValue Args[] = {LHS, HiLHS, RHS, HiRHS}; + + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); + SDValue MulResult = + TLI.makeLibCall(DAG, RTLIB::MUL_I128, WideVT, Args, CallOptions, dl) + .first; + SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, MulResult, + DAG.getIntPtrConstant(0, dl)); + SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, MulResult, + DAG.getIntPtrConstant(1, dl)); + if (isSigned) { + SDValue Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt); + TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, Tmp1, ISD::SETNE); + } else { + TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, DAG.getConstant(0, dl, VT), + ISD::SETNE); + } + // MulResult is a node with an illegal type. Because such things are not + // generally permitted during this phase of legalization, ensure that + // nothing is left using the node. The above EXTRACT_ELEMENT nodes should have + // been folded. + assert(MulResult->use_empty() && "Illegally typed node still in use!"); + + SDValue Ops[2] = {BottomHalf, TopHalf}; + return DAG.getMergeValues(Ops, dl); +} + +SDValue VETargetLowering::LowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + AtomicOrdering FenceOrdering = static_cast( + cast(Op.getOperand(1))->getZExtValue()); + SyncScope::ID FenceSSID = static_cast( + cast(Op.getOperand(2))->getZExtValue()); + + // VE uses Release consistency, so need a fence instruction if it is a + // cross-thread fence. + if (FenceSSID == SyncScope::System) { + switch (FenceOrdering) { + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: + case AtomicOrdering::Monotonic: + // No need to generate fencem instruction here. + break; + case AtomicOrdering::Acquire: + // Generate "fencem 2" as acquire fence. + return SDValue( + DAG.getMachineNode(VE::FENCEload, DL, MVT::Other, Op.getOperand(0)), + 0); + case AtomicOrdering::Release: + // Generate "fencem 1" as release fence. + return SDValue( + DAG.getMachineNode(VE::FENCEstore, DL, MVT::Other, Op.getOperand(0)), + 0); + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + // Generate "fencem 3" as acq_rel and seq_cst fence. + // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses, + // so seq_cst may require more instruction for them. + return SDValue(DAG.getMachineNode(VE::FENCEloadstore, DL, MVT::Other, + Op.getOperand(0)), + 0); + } + } + + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); +} + +static Instruction *callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *Func = Intrinsic::getDeclaration(M, Id); + return Builder.CreateCall(Func, {}); +} + +Instruction *VETargetLowering::emitLeadingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + switch (Ord) { + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: + llvm_unreachable("Invalid fence: unordered/non-atomic"); + case AtomicOrdering::Monotonic: + case AtomicOrdering::Acquire: + return nullptr; // Nothing to do + case AtomicOrdering::Release: + case AtomicOrdering::AcquireRelease: + return callIntrinsic(Builder, Intrinsic::ve_fencem1); + case AtomicOrdering::SequentiallyConsistent: + if (!Inst->hasAtomicStore()) + return nullptr; // Nothing to do + return callIntrinsic(Builder, Intrinsic::ve_fencem3); + } + llvm_unreachable("Unknown fence ordering in emitLeadingFence"); +} + +Instruction *VETargetLowering::emitTrailingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + switch (Ord) { + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: + llvm_unreachable("Invalid fence: unordered/not-atomic"); + case AtomicOrdering::Monotonic: + case AtomicOrdering::Release: + return nullptr; // Nothing to do + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + return callIntrinsic(Builder, Intrinsic::ve_fencem2); + case AtomicOrdering::SequentiallyConsistent: + return callIntrinsic(Builder, Intrinsic::ve_fencem3); + } + llvm_unreachable("Unknown fence ordering in emitTrailingFence"); +} + +SDValue VETargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::thread_pointer: { + report_fatal_error("Intrinsic::thread_point is not implemented yet"); + } + case Intrinsic::eh_sjlj_lsda: { + MachineFunction &MF = DAG.getMachineFunction(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + const VETargetMachine *TM = + static_cast(&DAG.getTarget()); + + // Creat GCC_except_tableXX string. The real symbol for that will be + // generated in EHStreamer::emitExceptionTable() later. So, we just + // borrow it's name here. + TM->getStrList()->push_back(std::string( + (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str())); + SDValue Addr = + DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), PtrVT, 0); + if (isPositionIndependent()) { + Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32, + VEMCExpr::VK_VE_GOTOFF_LO32, DAG); + SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, dl, PtrVT); + return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalBase, Addr); + } else { + return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, + DAG); + } + } + } +} + +SDValue VETargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + } +} + +SDValue VETargetLowering::LowerINTRINSIC_VOID(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + } +} + +SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Should not custom lower this!"); + + case ISD::RETURNADDR: + return LowerRETURNADDR(Op, DAG, *this, Subtarget); + case ISD::FRAMEADDR: + return LowerFRAMEADDR(Op, DAG, *this, Subtarget); + case ISD::GlobalTLSAddress: + return LowerGlobalTLSAddress(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: + return LowerBlockAddress(Op, DAG); + case ISD::ConstantPool: + return LowerConstantPool(Op, DAG); + case ISD::EH_SJLJ_SETJMP: + return LowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: + return LowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::EH_SJLJ_SETUP_DISPATCH: + return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG, *this); + case ISD::VAARG: + return LowerVAARG(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + return LowerDYNAMIC_STACKALLOC(Op, DAG); + + case ISD::LOAD: + return LowerLOAD(Op, DAG); + case ISD::STORE: + return LowerSTORE(Op, DAG); + case ISD::UMULO: + case ISD::SMULO: + return LowerUMULO_SMULO(Op, DAG, *this); + case ISD::ATOMIC_FENCE: + return LowerATOMIC_FENCE(Op, DAG); + case ISD::INTRINSIC_VOID: + return LowerINTRINSIC_VOID(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return LowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return LowerINTRINSIC_WO_CHAIN(Op, DAG); + } +} + +/// Return the entry encoding for a jump table in the +/// current function. The returned value is a member of the +/// MachineJumpTableInfo::JTEntryKind enum. +unsigned VETargetLowering::getJumpTableEncoding() const { + // VE doesn't support GOT32 style of labels in the current version of nas. + // So, we generates a following entry for each jump table. + // .4bytes .LBB0_2- + if (isPositionIndependent()) + return MachineJumpTableInfo::EK_Custom32; + + // Otherwise, use the normal jump table encoding heuristics. + return TargetLowering::getJumpTableEncoding(); +} + +const MCExpr *VETargetLowering::LowerCustomJumpTableEntry( + const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, + unsigned uid, MCContext &Ctx) const { + assert(isPositionIndependent()); + // VE doesn't support GOT32 style of labels in the current version of nas. + // So, we generates a following entry for each jump table. + // .4bytes .LBB0_2- + auto Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx); + MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data()); + auto Base = MCSymbolRefExpr::create(Sym, Ctx); + return MCBinaryExpr::createSub(Value, Base, Ctx); +} + +void VETargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, + MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, + int FI) const { + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + const VEInstrInfo *TII = Subtarget->getInstrInfo(); + + const TargetRegisterClass *TRC = &VE::I64RegClass; + Register Tmp1 = MRI->createVirtualRegister(TRC); + Register Tmp2 = MRI->createVirtualRegister(TRC); + Register VR = MRI->createVirtualRegister(TRC); + unsigned Op = VE::STSri; + + if (isPositionIndependent()) { + // Create following instructions for local linkage PIC code. + // lea %Tmp1, DispatchBB@gotoff_lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %Tmp3, DispatchBB@gotoff_hi(%Tmp2) + // adds.l %VR, %s15, %Tmp3 ; %s15 is GOT + // FIXME: use lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) + Register Tmp3 = MRI->createVirtualRegister(&VE::I64RegClass); + BuildMI(*MBB, MI, DL, TII->get(VE::LEAzzi), Tmp1) + .addMBB(DispatchBB, VEMCExpr::VK_VE_GOTOFF_LO32); + BuildMI(*MBB, MI, DL, TII->get(VE::ANDrm0), Tmp2).addReg(Tmp1).addImm(32); + BuildMI(*MBB, MI, DL, TII->get(VE::LEASLrzi), Tmp3) + .addReg(Tmp2) + .addMBB(DispatchBB, VEMCExpr::VK_VE_GOTOFF_HI32); + BuildMI(*MBB, MI, DL, TII->get(VE::ADXrr), VR) + .addReg(VE::SX15) + .addReg(Tmp3); + } else { + // lea %Tmp1, DispatchBB@lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %VR, DispatchBB@hi(%Tmp2) + BuildMI(*MBB, MI, DL, TII->get(VE::LEAzzi), Tmp1) + .addMBB(DispatchBB, VEMCExpr::VK_VE_LO32); + BuildMI(*MBB, MI, DL, TII->get(VE::ANDrm0), Tmp2).addReg(Tmp1).addImm(32); + BuildMI(*MBB, MI, DL, TII->get(VE::LEASLrzi), VR) + .addReg(Tmp2) + .addMBB(DispatchBB, VEMCExpr::VK_VE_HI32); + } + + MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(Op)); + addFrameReference(MIB, FI, 56 + 16); + MIB.addReg(VR); +} + +MachineBasicBlock * +VETargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, + MachineBasicBlock *BB) const { + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = BB->getParent(); + MachineFrameInfo &MFI = MF->getFrameInfo(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + const VEInstrInfo *TII = Subtarget->getInstrInfo(); + int FI = MFI.getFunctionContextIndex(); + + // Get a mapping of the call site numbers to all of the landing pads they're + // associated with. + DenseMap> CallSiteNumToLPad; + unsigned MaxCSNum = 0; + for (auto &MBB : *MF) { + if (!MBB.isEHPad()) + continue; + + MCSymbol *Sym = nullptr; + for (const auto &MI : MBB) { + if (MI.isDebugInstr()) + continue; + + assert(MI.isEHLabel() && "expected EH_LABEL"); + Sym = MI.getOperand(0).getMCSymbol(); + break; + } + + if (!MF->hasCallSiteLandingPad(Sym)) + continue; + + for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) { + CallSiteNumToLPad[CSI].push_back(&MBB); + MaxCSNum = std::max(MaxCSNum, CSI); + } + } + + // Get an ordered list of the machine basic blocks for the jump table. + std::vector LPadList; + SmallPtrSet InvokeBBs; + LPadList.reserve(CallSiteNumToLPad.size()); + + for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) { + for (auto &LP : CallSiteNumToLPad[CSI]) { + LPadList.push_back(LP); + InvokeBBs.insert(LP->pred_begin(), LP->pred_end()); + } + } + + assert(!LPadList.empty() && + "No landing pad destinations for the dispatch jump table!"); + + // Create the MBBs for the dispatch code. + + // Shove the dispatch's address into the return slot in the function context. + MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); + DispatchBB->setIsEHPad(true); + + MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); + BuildMI(TrapBB, DL, TII->get(VE::TRAP)); + BuildMI(TrapBB, DL, TII->get(VE::NOP)); + DispatchBB->addSuccessor(TrapBB); + + MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); + DispatchBB->addSuccessor(DispContBB); + + // Insert MBBs. + MF->push_back(DispatchBB); + MF->push_back(DispContBB); + MF->push_back(TrapBB); + + // Insert code into the entry block that creates and registers the function + // context. + SetupEntryBlockForSjLj(MI, BB, DispatchBB, FI); + + // Create the jump table and associated information + unsigned JTE = getJumpTableEncoding(); + MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE); + unsigned MJTI = JTI->createJumpTableIndex(LPadList); + + const VERegisterInfo &RI = TII->getRegisterInfo(); + // Add a register mask with no preserved registers. This results in all + // registers being marked as clobbered. + BuildMI(DispatchBB, DL, TII->get(VE::NOP)) + .addRegMask(RI.getNoPreservedMask()); + + if (isPositionIndependent()) { + // Force to generate GETGOT, since current implementation doesn't recover + // GOT register correctly. + BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15); + } + + // IReg is used as an index in a memory operand and therefore can't be SP + Register IReg = MRI->createVirtualRegister(&VE::I64RegClass); + addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLUri), IReg), FI, 8); + if (LPadList.size() < 63) { + BuildMI(DispatchBB, DL, TII->get(VE::BCRLir)) + .addImm(VECC::CC_ILE) + .addImm(LPadList.size()) + .addReg(IReg) + .addMBB(TrapBB); + } else { + assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!"); + Register TmpReg = MRI->createVirtualRegister(&VE::I64RegClass); + BuildMI(DispatchBB, DL, TII->get(VE::LEAzzi), TmpReg) + .addImm(LPadList.size()); + BuildMI(DispatchBB, DL, TII->get(VE::BCRLrr)) + .addImm(VECC::CC_ILE) + .addReg(TmpReg) + .addReg(IReg) + .addMBB(TrapBB); + } + + Register BReg = MRI->createVirtualRegister(&VE::I64RegClass); + + Register Tmp1 = MRI->createVirtualRegister(&VE::I64RegClass); + Register Tmp2 = MRI->createVirtualRegister(&VE::I64RegClass); + + if (isPositionIndependent()) { + // Create following instructions for local linkage PIC code. + // lea %Tmp1, .LJTI0_0@gotoff_lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %Tmp3, .LJTI0_0@gotoff_hi(%Tmp2) + // adds.l %BReg, %s15, %Tmp3 ; %s15 is GOT + // FIXME: use lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) + Register Tmp3 = MRI->createVirtualRegister(&VE::I64RegClass); + BuildMI(DispContBB, DL, TII->get(VE::LEAzzi), Tmp1) + .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32); + BuildMI(DispContBB, DL, TII->get(VE::ANDrm0), Tmp2).addReg(Tmp1).addImm(32); + BuildMI(DispContBB, DL, TII->get(VE::LEASLrzi), Tmp3) + .addReg(Tmp2) + .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32); + BuildMI(DispContBB, DL, TII->get(VE::ADXrr), BReg) + .addReg(VE::SX15) + .addReg(Tmp3); + } else { + // lea %Tmp1, .LJTI0_0@lo + // and %Tmp2, %Tmp1, (32)0 + // lea.sl %BReg, .LJTI0_0@hi(%Tmp2) + BuildMI(DispContBB, DL, TII->get(VE::LEAzzi), Tmp1) + .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32); + BuildMI(DispContBB, DL, TII->get(VE::ANDrm0), Tmp2).addReg(Tmp1).addImm(32); + BuildMI(DispContBB, DL, TII->get(VE::LEASLrzi), BReg) + .addReg(Tmp2) + .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32); + } + + switch (JTE) { + case MachineJumpTableInfo::EK_BlockAddress: { + // Generate simple block address code for no-PIC model. + + Register TReg = MRI->createVirtualRegister(&VE::I64RegClass); + Register Tmp1 = MRI->createVirtualRegister(&VE::I64RegClass); + Register Tmp2 = MRI->createVirtualRegister(&VE::I64RegClass); + + // sll Tmp1, IReg, 3 + BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1).addReg(IReg).addImm(3); + // FIXME: combine these add and lds into "lds TReg, *(BReg, Tmp1)" + // adds.l Tmp2, BReg, Tmp1 + BuildMI(DispContBB, DL, TII->get(VE::ADXrr), Tmp2) + .addReg(Tmp1) + .addReg(BReg); + // lds TReg, *(Tmp2) + BuildMI(DispContBB, DL, TII->get(VE::LDSri), TReg).addReg(Tmp2).addImm(0); + + // jmpq *(TReg) + BuildMI(DispContBB, DL, TII->get(VE::BAri)).addReg(TReg).addImm(0); + break; + } + case MachineJumpTableInfo::EK_Custom32: { + // for the case of PIC, generates these codes + + assert(isPositionIndependent()); + Register OReg = MRI->createVirtualRegister(&VE::I64RegClass); + Register TReg = MRI->createVirtualRegister(&VE::I64RegClass); + + Register Tmp1 = MRI->createVirtualRegister(&VE::I64RegClass); + Register Tmp2 = MRI->createVirtualRegister(&VE::I64RegClass); + + // sll Tmp1, IReg, 2 + BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1).addReg(IReg).addImm(2); + // FIXME: combine these add and ldl into "ldl.zx OReg, *(BReg, Tmp1)" + // add Tmp2, BReg, Tmp1 + BuildMI(DispContBB, DL, TII->get(VE::ADXrr), Tmp2) + .addReg(Tmp1) + .addReg(BReg); + // ldl.zx OReg, *(Tmp2) + BuildMI(DispContBB, DL, TII->get(VE::LDLUri), OReg).addReg(Tmp2).addImm(0); + + // Create following instructions for local linkage PIC code. + // lea %Tmp3, fun@gotoff_lo + // and %Tmp4, %Tmp3, (32)0 + // lea.sl %Tmp5, fun@gotoff_hi(%Tmp4) + // adds.l %BReg2, %s15, %Tmp5 ; %s15 is GOT + // FIXME: use lea.sl %BReg2, fun@gotoff_hi(%Tmp4, %s15) + Register Tmp3 = MRI->createVirtualRegister(&VE::I64RegClass); + Register Tmp4 = MRI->createVirtualRegister(&VE::I64RegClass); + Register Tmp5 = MRI->createVirtualRegister(&VE::I64RegClass); + Register BReg2 = MRI->createVirtualRegister(&VE::I64RegClass); + const char *FunName = DispContBB->getParent()->getName().data(); + BuildMI(DispContBB, DL, TII->get(VE::LEAzzi), Tmp3) + .addExternalSymbol(FunName, VEMCExpr::VK_VE_GOTOFF_LO32); + BuildMI(DispContBB, DL, TII->get(VE::ANDrm0), Tmp4).addReg(Tmp3).addImm(32); + BuildMI(DispContBB, DL, TII->get(VE::LEASLrzi), Tmp5) + .addReg(Tmp4) + .addExternalSymbol(FunName, VEMCExpr::VK_VE_GOTOFF_HI32); + BuildMI(DispContBB, DL, TII->get(VE::ADXrr), BReg2) + .addReg(VE::SX15) + .addReg(Tmp5); + + // adds.l TReg, BReg2, OReg + BuildMI(DispContBB, DL, TII->get(VE::ADXrr), TReg) + .addReg(OReg) + .addReg(BReg2); + // jmpq *(TReg) + BuildMI(DispContBB, DL, TII->get(VE::BAri)).addReg(TReg).addImm(0); + break; + } + default: + llvm_unreachable("Unexpected jump table encoding"); + } + + // Add the jump table entries as successors to the MBB. + SmallPtrSet SeenMBBs; + for (auto &LP : LPadList) + if (SeenMBBs.insert(LP).second) + DispContBB->addSuccessor(LP); + + // N.B. the order the invoke BBs are processed in doesn't matter here. + SmallVector MBBLPads; + const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs(); + for (MachineBasicBlock *MBB : InvokeBBs) { + // Remove the landing pad successor from the invoke block and replace it + // with the new dispatch block. + // Keep a copy of Successors since it's modified inside the loop. + SmallVector Successors(MBB->succ_rbegin(), + MBB->succ_rend()); + // FIXME: Avoid quadratic complexity. + for (auto MBBS : Successors) { + if (MBBS->isEHPad()) { + MBB->removeSuccessor(MBBS); + MBBLPads.push_back(MBBS); + } + } + + MBB->addSuccessor(DispatchBB); + + // Find the invoke call and mark all of the callee-saved registers as + // 'implicit defined' so that they're spilled. This prevents code from + // moving instructions to before the EH block, where they will never be + // executed. + for (auto &II : reverse(*MBB)) { + if (!II.isCall()) + continue; + + DenseMap DefRegs; + for (auto &MOp : II.operands()) + if (MOp.isReg()) + DefRegs[MOp.getReg()] = true; + + MachineInstrBuilder MIB(*MF, &II); + for (unsigned RI = 0; SavedRegs[RI]; ++RI) { + Register Reg = SavedRegs[RI]; + if (!DefRegs[Reg]) + MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); + } + + break; + } + } + + // Mark all former landing pads as non-landing pads. The dispatch is the only + // landing pad now. + for (auto &LP : MBBLPads) + LP->setIsEHPad(false); + + // The instruction is gone now. + MI.eraseFromParent(); + return BB; +} + +MachineBasicBlock * +VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const { + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unknown Custom Instruction!"); + case VE::EH_SjLj_Setup_Dispatch: + return EmitSjLjDispatchBlock(MI, BB); + } +} + +//===----------------------------------------------------------------------===// +// VE Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +VETargetLowering::ConstraintType +VETargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'r': + case 'f': + case 'e': + return C_RegisterClass; + case 'I': // SIMM13 + return C_Other; + } + } + + return TargetLowering::getConstraintType(Constraint); +} + +TargetLowering::ConstraintWeight +VETargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &info, + const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (!CallOperandVal) + return CW_Default; + + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + case 'I': // SIMM13 + if (ConstantInt *C = dyn_cast(info.CallOperandVal)) { + if (isInt<13>(C->getSExtValue())) + weight = CW_Constant; + } + break; + } + return weight; +} + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void VETargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const { + SDValue Result(nullptr, 0); + + // Only support length 1 constraints for now. + if (Constraint.length() > 1) + return; + + char ConstraintLetter = Constraint[0]; + switch (ConstraintLetter) { + default: + break; + case 'I': + if (ConstantSDNode *C = dyn_cast(Op)) { + if (isInt<13>(C->getSExtValue())) { + Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), + Op.getValueType()); + break; + } + return; + } + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +std::pair +VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, + MVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return std::make_pair(0U, &VE::I64RegClass); + case 'f': + if (VT == MVT::f32 || VT == MVT::f64) + return std::make_pair(0U, &VE::I64RegClass); + else if (VT == MVT::f128) + return std::make_pair(0U, &VE::F128RegClass); + llvm_unreachable("Unknown ValueType for f-register-type!"); + break; + case 'e': + if (VT == MVT::f32 || VT == MVT::f64) + return std::make_pair(0U, &VE::I64RegClass); + else if (VT == MVT::f128) + return std::make_pair(0U, &VE::F128RegClass); + llvm_unreachable("Unknown ValueType for e-register-type!"); + break; + } + } else if (!Constraint.empty() && Constraint.size() <= 5 && + Constraint[0] == '{' && *(Constraint.end() - 1) == '}') { + // constraint = '{r}' + // Remove the braces from around the name. + StringRef name(Constraint.data() + 1, Constraint.size() - 2); + // Handle register aliases: + // r0-r7 -> g0-g7 + // r8-r15 -> o0-o7 + // r16-r23 -> l0-l7 + // r24-r31 -> i0-i7 + uint64_t intVal = 0; + if (name.substr(0, 1).equals("r") && + !name.substr(1).getAsInteger(10, intVal) && intVal <= 31) { + const char regTypes[] = {'g', 'o', 'l', 'i'}; + char regType = regTypes[intVal / 8]; + char regIdx = '0' + (intVal % 8); + char tmp[] = {'{', regType, regIdx, '}', 0}; + std::string newConstraint = std::string(tmp); + return TargetLowering::getRegForInlineAsmConstraint(TRI, newConstraint, + VT); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +bool VETargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + // The VE target isn't yet aware of offsets. + return false; +} + +void VETargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const { + + SDLoc dl(N); + + switch (N->getOpcode()) { + default: + LLVM_DEBUG(N->dumpr(&DAG)); + llvm_unreachable("Do not know how to custom type legalize this operation!"); + } +} + +// Override to enable LOAD_STACK_GUARD lowering on Linux. +bool VETargetLowering::useLoadStackGuardNode() const { + if (!Subtarget->isTargetLinux()) + return TargetLowering::useLoadStackGuardNode(); + return true; +} + +// Override to disable global variable loading on Linux. +void VETargetLowering::insertSSPDeclarations(Module &M) const { + if (!Subtarget->isTargetLinux()) + return TargetLowering::insertSSPDeclarations(M); +} + +void VETargetLowering::finalizeLowering(MachineFunction &MF) const { + TargetLoweringBase::finalizeLowering(MF); } diff --git a/llvm/lib/Target/VE/VEInstrBuilder.h b/llvm/lib/Target/VE/VEInstrBuilder.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEInstrBuilder.h @@ -0,0 +1,39 @@ +//===-- VEInstrBuilder.h - Aides for building VE insts ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to simplify generating frame and constant pool +// references. +// +// For reference, the order of operands for memory references is: +// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate +// Displacement. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_VE_VEINSTRBUILDER_H +#define LLVM_LIB_TARGET_VE_VEINSTRBUILDER_H + +#include "llvm/CodeGen/MachineInstrBuilder.h" + +namespace llvm { + +/// addFrameReference - This function is used to add a reference to the base of +/// an abstract object on the stack frame of the current function. This +/// reference has base register as the FrameIndex offset until it is resolved. +/// This allows a constant offset to be specified as well... +/// +static inline const MachineInstrBuilder & +addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, + bool mem = true) { + return MIB.addFrameIndex(FI).addImm(Offset); +} + +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/VEInstrFormats.td b/llvm/lib/Target/VE/VEInstrFormats.td --- a/llvm/lib/Target/VE/VEInstrFormats.td +++ b/llvm/lib/Target/VE/VEInstrFormats.td @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// -class InstVE pattern> +class InstVE pattern, + InstrItinClass itin = NoItinerary> : Instruction { field bits<64> Inst; @@ -23,10 +24,13 @@ let DecoderNamespace = "VE"; field bits<64> SoftFail = 0; + + let Itinerary = itin; } -class RMopVal, dag outs, dag ins, string asmstr, list pattern=[]> - : InstVE { +class RMopVal, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin = NoItinerary> + : InstVE { bits<1> cx = 0; bits<7> sx; bits<1> cy = 0; @@ -44,8 +48,9 @@ let Inst{63-32} = imm32; } -class RRopVal, dag outs, dag ins, string asmstr> - : RM { +class RRopVal, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin = NoItinerary> + : RM { bits<1> cw = 0; bits<1> cw2 = 0; bits<4> cfw = 0; @@ -56,8 +61,31 @@ let imm32{28-31} = cfw; } -class CFopVal, dag outs, dag ins, string asmstr, list pattern=[]> - : RM { +class RRFENCEopVal, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin = NoItinerary> + : InstVE { + bits<1> avo = 0; + bits<1> lf = 0; + bits<1> sf = 0; + bits<1> c2 = 0; + bits<1> c1 = 0; + bits<1> c0 = 0; + let op = opVal; + let Inst{15} = avo; + let Inst{14-10} = 0; + let Inst{9} = lf; + let Inst{8} = sf; + let Inst{23-19} = 0; + let Inst{18} = c2; + let Inst{17} = c1; + let Inst{16} = c0; + let Inst{31-24} = 0; + let Inst{63-32} = 0; +} + +class CFopVal, dag outs, dag ins, string asmstr, list pattern, + InstrItinClass itin = NoItinerary> + : RM { bits<1> cx2; bits<2> bpf; bits<4> cf; @@ -68,7 +96,7 @@ } // Pseudo instructions. -class Pseudo pattern=[]> +class Pseudo pattern> : InstVE { let isCodeGenOnly = 1; let isPseudo = 1; diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h --- a/llvm/lib/Target/VE/VEInstrInfo.h +++ b/llvm/lib/Target/VE/VEInstrInfo.h @@ -40,11 +40,62 @@ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; + void copyPhysSubRegs(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc, const MCInstrDesc &MCID, + unsigned int numSubRegs, + const unsigned *subRegIdx) const; + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + unsigned isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + unsigned isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify = false) const override; + + unsigned removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; + + unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, ArrayRef Cond, + const DebugLoc &DL, + int *BytesAdded = nullptr) const override; + + bool + reverseBranchCondition(SmallVectorImpl &Cond) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + unsigned getGlobalBaseReg(MachineFunction *MF) const; // Lower pseudo instructions after register allocation. bool expandPostRAPseudo(MachineInstr &MI) const override; bool expandExtendStackPseudo(MachineInstr &MI) const; + bool expandGetStackTopPseudo(MachineInstr &MI) const; }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -12,6 +12,7 @@ #include "VEInstrInfo.h" #include "VE.h" +#include "VEMachineFunctionInfo.h" #include "VESubtarget.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -38,15 +39,361 @@ : VEGenInstrInfo(VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI(), Subtarget(ST) {} +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned VEInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + if (MI.getOpcode() == VE::LDSri || // I64 + MI.getOpcode() == VE::LDLri || // I32 + MI.getOpcode() == VE::LDUri || // F32 + MI.getOpcode() == VE::LDQri // F128 (pseudo) + ) { + if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && + MI.getOperand(2).getImm() == 0) { + FrameIndex = MI.getOperand(1).getIndex(); + return MI.getOperand(0).getReg(); + } + } + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned VEInstrInfo::isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + if (MI.getOpcode() == VE::STSri || // I64 + MI.getOpcode() == VE::STLri || // I32 + MI.getOpcode() == VE::STUri || // F32 + MI.getOpcode() == VE::STQri // F128 (pseudo) + ) { + if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && + MI.getOperand(1).getImm() == 0) { + FrameIndex = MI.getOperand(0).getIndex(); + return MI.getOperand(2).getReg(); + } + } + return 0; +} + +static bool IsIntegerCC(unsigned CC) { return (CC < VECC::CC_AF); } + +static VECC::CondCodes GetOppositeBranchCondition(VECC::CondCodes CC) { + switch (CC) { + case VECC::CC_IG: + return VECC::CC_ILE; + case VECC::CC_IL: + return VECC::CC_IGE; + case VECC::CC_INE: + return VECC::CC_IEQ; + case VECC::CC_IEQ: + return VECC::CC_INE; + case VECC::CC_IGE: + return VECC::CC_IL; + case VECC::CC_ILE: + return VECC::CC_IG; + case VECC::CC_AF: + return VECC::CC_AT; + case VECC::CC_G: + return VECC::CC_LENAN; + case VECC::CC_L: + return VECC::CC_GENAN; + case VECC::CC_NE: + return VECC::CC_EQNAN; + case VECC::CC_EQ: + return VECC::CC_NENAN; + case VECC::CC_GE: + return VECC::CC_LNAN; + case VECC::CC_LE: + return VECC::CC_GNAN; + case VECC::CC_NUM: + return VECC::CC_NAN; + case VECC::CC_NAN: + return VECC::CC_NUM; + case VECC::CC_GNAN: + return VECC::CC_LE; + case VECC::CC_LNAN: + return VECC::CC_GE; + case VECC::CC_NENAN: + return VECC::CC_EQ; + case VECC::CC_EQNAN: + return VECC::CC_NE; + case VECC::CC_GENAN: + return VECC::CC_L; + case VECC::CC_LENAN: + return VECC::CC_G; + case VECC::CC_AT: + return VECC::CC_AF; + } + llvm_unreachable("Invalid cond code"); +} + +// Treat br.l [BCR AT] as unconditional branch +static bool isUncondBranchOpcode(int Opc) { + return Opc == VE::BCRLa || Opc == VE::BCRWa || Opc == VE::BCRDa || + Opc == VE::BCRSa; +} + +static bool isCondBranchOpcode(int Opc) { + return Opc == VE::BCRLrr || Opc == VE::BCRLir || Opc == VE::BCRLrm0 || + Opc == VE::BCRLrm1 || Opc == VE::BCRLim0 || Opc == VE::BCRLim1 || + Opc == VE::BCRWrr || Opc == VE::BCRWir || Opc == VE::BCRWrm0 || + Opc == VE::BCRWrm1 || Opc == VE::BCRWim0 || Opc == VE::BCRWim1 || + Opc == VE::BCRDrr || Opc == VE::BCRDir || Opc == VE::BCRDrm0 || + Opc == VE::BCRDrm1 || Opc == VE::BCRDim0 || Opc == VE::BCRDim1 || + Opc == VE::BCRSrr || Opc == VE::BCRSir || Opc == VE::BCRSrm0 || + Opc == VE::BCRSrm1 || Opc == VE::BCRSim0 || Opc == VE::BCRSim1; +} + +static bool isIndirectBranchOpcode(int Opc) { + report_fatal_error("isIndirectBranchOpcode is not implemented yet"); +} + +static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, + SmallVectorImpl &Cond) { + Cond.push_back(MachineOperand::CreateImm(LastInst->getOperand(0).getImm())); + Cond.push_back(LastInst->getOperand(1)); + Cond.push_back(LastInst->getOperand(2)); + Target = LastInst->getOperand(3).getMBB(); +} + +bool VEInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I == MBB.end()) + return false; + + if (!isUnpredicatedTerminator(*I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = &*I; + unsigned LastOpc = LastInst->getOpcode(); + + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { + if (isUncondBranchOpcode(LastOpc)) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (isCondBranchOpcode(LastOpc)) { + // Block ends with fall-through condbranch. + parseCondBranch(LastInst, TBB, Cond); + return false; + } + return true; // Can't handle indirect branch. + } + + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = &*I; + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + + // If AllowModify is true and the block ends with two or more unconditional + // branches, delete all but the first unconditional branch. + if (AllowModify && isUncondBranchOpcode(LastOpc)) { + while (isUncondBranchOpcode(SecondLastOpc)) { + LastInst->eraseFromParent(); + LastInst = SecondLastInst; + LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) { + // Return now the only terminator is an unconditional branch. + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else { + SecondLastInst = &*I; + SecondLastOpc = SecondLastInst->getOpcode(); + } + } + } + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I)) + return true; + + // If the block ends with a B and a Bcc, handle it. + if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + parseCondBranch(SecondLastInst, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two unconditional branches, handle it. The second + // one is not executed. + if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + return false; + } + + // ...likewise if it ends with an indirect branch followed by an unconditional + // branch. + if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return true; + } + + // Otherwise, can't handle this. + return true; +} + +unsigned VEInstrInfo::insertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + const DebugLoc &DL, int *BytesAdded) const { + assert(TBB && "insertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 3 || Cond.size() == 0) && + "VE branch conditions should have three component!"); + assert(!BytesAdded && "code size not handled"); + if (Cond.empty()) { + // Uncondition branch + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, DL, get(VE::BCRLa)).addMBB(TBB); + return 1; + } + + // Conditional branch + // (BCRir CC sy sz addr) + + assert(Cond[0].isImm() && Cond[2].isReg() && "not implemented"); + + unsigned opc[2]; + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineFunction *MF = MBB.getParent(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned Reg = Cond[2].getReg(); + if (IsIntegerCC(Cond[0].getImm())) { + if (TRI->getRegSizeInBits(Reg, MRI) == 32) { + opc[0] = VE::BCRWir; + opc[1] = VE::BCRWrr; + } else { + opc[0] = VE::BCRLir; + opc[1] = VE::BCRLrr; + } + } else { + if (TRI->getRegSizeInBits(Reg, MRI) == 32) { + opc[0] = VE::BCRSir; + opc[1] = VE::BCRSrr; + } else { + opc[0] = VE::BCRDir; + opc[1] = VE::BCRDrr; + } + } + if (Cond[1].isImm()) { + BuildMI(&MBB, DL, get(opc[0])) + .add(Cond[0]) // condition code + .add(Cond[1]) // lhs + .add(Cond[2]) // rhs + .addMBB(TBB); + } else { + BuildMI(&MBB, DL, get(opc[1])) + .add(Cond[0]) + .add(Cond[1]) + .add(Cond[2]) + .addMBB(TBB); + } + + if (!FBB) + return 1; + BuildMI(&MBB, DL, get(VE::BCRLa)).addMBB(FBB); + return 2; +} + +unsigned VEInstrInfo::removeBranch(MachineBasicBlock &MBB, + int *BytesRemoved) const { + assert(!BytesRemoved && "code size not handled"); + + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + while (I != MBB.begin()) { + --I; + + if (I->isDebugValue()) + continue; + + if (!isUncondBranchOpcode(I->getOpcode()) && + !isCondBranchOpcode(I->getOpcode())) + break; // Not a branch + + I->eraseFromParent(); + I = MBB.end(); + ++Count; + } + return Count; + + // report_fatal_error("removeBranch is not implemented yet"); +} + +bool VEInstrInfo::reverseBranchCondition( + SmallVectorImpl &Cond) const { + VECC::CondCodes CC = static_cast(Cond[0].getImm()); + Cond[0].setImm(GetOppositeBranchCondition(CC)); + return false; +} + +void VEInstrInfo::copyPhysSubRegs( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, + unsigned DestReg, unsigned SrcReg, bool KillSrc, const MCInstrDesc &MCID, + unsigned int numSubRegs, const unsigned *subRegIdx) const { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + MachineInstr *MovMI = nullptr; + + for (unsigned i = 0; i != numSubRegs; ++i) { + unsigned SubDest = TRI->getSubReg(DestReg, subRegIdx[i]); + unsigned SubSrc = TRI->getSubReg(SrcReg, subRegIdx[i]); + assert(SubDest && SubSrc && "Bad sub-register"); + + if (MCID.getOpcode() == VE::ORri) { + // generate "ORri, dest, src, 0" instruction. + MachineInstrBuilder MIB = + BuildMI(MBB, I, DL, MCID, SubDest).addReg(SubSrc).addImm(0); + MovMI = MIB.getInstr(); + } else { + llvm_unreachable("Unexpected reg-to-reg copy instruction"); + } + } + // Add implicit super-register defs and kills to the last MovMI. + MovMI->addRegisterDefined(DestReg, TRI); + if (KillSrc) + MovMI->addRegisterKilled(SrcReg, TRI); +} + void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const { - if (VE::I64RegClass.contains(SrcReg) && VE::I64RegClass.contains(DestReg)) { + // For the case of VE, I32, I64, and F32 uses the identical + // registers %s0-%s63, so no need to check other register classes + // here + if (VE::I32RegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(VE::ORri), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) .addImm(0); + // any scaler to any scaler + else if ((VE::I32RegClass.contains(SrcReg) || + VE::F32RegClass.contains(SrcReg) || + VE::I64RegClass.contains(SrcReg)) && + (VE::I32RegClass.contains(DestReg) || + VE::F32RegClass.contains(DestReg) || + VE::I64RegClass.contains(DestReg))) + BuildMI(MBB, I, DL, get(VE::ORri), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + else if (VE::F128RegClass.contains(DestReg, SrcReg)) { + // Use two instructions. + const unsigned subRegIdx[] = {VE::sub_even, VE::sub_odd}; + unsigned int numSubRegs = 2; + copyPhysSubRegs(MBB, I, DL, DestReg, SrcReg, KillSrc, get(VE::ORri), + numSubRegs, subRegIdx); } else { const TargetRegisterInfo *TRI = &getRegisterInfo(); dbgs() << "Impossible reg-to-reg copy from " << printReg(SrcReg, TRI) @@ -55,6 +402,108 @@ } } +void VEInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + + // On the order of operands here: think "[FrameIdx + 0] = SrcReg". + if (RC == &VE::I64RegClass) + BuildMI(MBB, I, DL, get(VE::STSri)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + else if (RC == &VE::I32RegClass) + BuildMI(MBB, I, DL, get(VE::STLri)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + else if (RC == &VE::F32RegClass) + BuildMI(MBB, I, DL, get(VE::STUri)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + else if (VE::F128RegClass.hasSubClassEq(RC)) + BuildMI(MBB, I, DL, get(VE::STQri)) + .addFrameIndex(FI) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); + else + report_fatal_error("Can't store this register to stack slot"); +} + +void VEInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + + MachineFunction *MF = MBB.getParent(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + + if (RC == &VE::I64RegClass) + BuildMI(MBB, I, DL, get(VE::LDSri), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + else if (RC == &VE::I32RegClass) + BuildMI(MBB, I, DL, get(VE::LDLri), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + else if (RC == &VE::F32RegClass) + BuildMI(MBB, I, DL, get(VE::LDUri), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + else if (VE::F128RegClass.hasSubClassEq(RC)) + BuildMI(MBB, I, DL, get(VE::LDQri), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + else + report_fatal_error("Can't load this register from stack slot"); +} + +unsigned VEInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { + VEMachineFunctionInfo *VEFI = MF->getInfo(); + unsigned GlobalBaseReg = VEFI->getGlobalBaseReg(); + if (GlobalBaseReg != 0) + return GlobalBaseReg; + + // We use %s15 (%got) as a global base register + GlobalBaseReg = VE::SX15; + + // Insert a pseudo instruction to set the GlobalBaseReg into the first + // MBB of the function + MachineBasicBlock &FirstMBB = MF->front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc dl; + BuildMI(FirstMBB, MBBI, dl, get(VE::GETGOT), GlobalBaseReg); + VEFI->setGlobalBaseReg(GlobalBaseReg); + return GlobalBaseReg; +} + bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { switch (MI.getOpcode()) { case VE::EXTEND_STACK: { @@ -64,6 +513,15 @@ MI.eraseFromParent(); // The pseudo instruction is gone now. return true; } + case TargetOpcode::LOAD_STACK_GUARD: { + assert(Subtarget.isTargetLinux() && + "Only Linux target is expected to contain LOAD_STACK_GUARD"); + report_fatal_error( + "expandPostRAPseudo for LOAD_STACK_GUARD is not implemented yet"); + } + case VE::GETSTACKTOP: { + return expandGetStackTopPseudo(MI); + } } return false; } @@ -148,3 +606,30 @@ MI.eraseFromParent(); // The pseudo instruction is gone now. return true; } + +bool VEInstrInfo::expandGetStackTopPseudo(MachineInstr &MI) const { + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction &MF = *MBB->getParent(); + const VEInstrInfo &TII = + *static_cast(MF.getSubtarget().getInstrInfo()); + DebugLoc dl = MBB->findDebugLoc(MI); + + // Create following instruction + // + // dst = %sp + stack_size + + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering(); + unsigned NumBytes = 176; + if (MFI.adjustsStack() && TFL->hasReservedCallFrame(MF)) + NumBytes += MFI.getMaxCallFrameSize(); + + BuildMI(*MBB, MI, dl, TII.get(VE::LEArzi)) + .addDef(MI.getOperand(0).getReg()) + .addReg(VE::SX11) + .addImm(NumBytes); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return true; +} diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -28,10 +28,57 @@ def simm32 : PatLeaf<(imm), [{ return isInt<32>(N->getSExtValue()); }]>; def uimm32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>; def uimm6 : PatLeaf<(imm), [{ return isUInt<6>(N->getZExtValue()); }]>; +def uimm7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>; +def zero : PatLeaf<(imm), [{ return N->getSExtValue() == 0; }]>; def lomsbzero : PatLeaf<(imm), [{ return (N->getZExtValue() & 0x80000000) == 0; }]>; def lozero : PatLeaf<(imm), [{ return (N->getZExtValue() & 0xffffffff) == 0; }]>; +def fplomsbzero : PatLeaf<(fpimm), [{ return (N->getValueAPF().bitcastToAPInt() + .getZExtValue() & 0x80000000) == 0; }]>; +def fplozero : PatLeaf<(fpimm), [{ return (N->getValueAPF().bitcastToAPInt() + .getZExtValue() & 0xffffffff) == 0; }]>; + +def CCSIOp : PatLeaf<(cond), [{ + switch (N->get()) { + default: return true; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return false; + } +}]>; + +def CCUIOp : PatLeaf<(cond), [{ + switch (N->get()) { + default: return true; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return false; + } +}]>; + +def GetVL : SDNodeXFormgetMachineFunction(); + unsigned VLReg = MF.getSubtarget().getInstrInfo()->getVectorLengthReg(&MF); + return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(N), VLReg, MVT::i32); +}]>; + +def LOFP32 : SDNodeXFormgetValueAPF().bitcastToAPInt(); + return CurDAG->getTargetConstant(Lo_32(imm.getZExtValue() & 0xffffffff), + SDLoc(N), MVT::i64); +}]>; + +def HIFP32 : SDNodeXFormgetValueAPF().bitcastToAPInt(); + return CurDAG->getTargetConstant(Hi_32(imm.getZExtValue()), + SDLoc(N), MVT::i64); +}]>; def LO32 : SDNodeXFormgetTargetConstant(Lo_32(N->getZExtValue()), @@ -44,16 +91,103 @@ SDLoc(N), MVT::i32); }]>; +def LEASLimm : PatLeaf<(imm), [{ + return isShiftedUInt<32, 32>(N->getZExtValue()); +}], HI32>; + +def trunc_imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); +}]>; + +def sext_imm : SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); +}]>; + +def zext_imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i64); +}]>; + +def icond2cc : SDNodeXFormget()) { + default: llvm_unreachable("Unknown integer condition code!"); + case ISD::SETEQ: cc = VECC::CC_IEQ; break; + case ISD::SETNE: cc = VECC::CC_INE; break; + case ISD::SETLT: cc = VECC::CC_IL; break; + case ISD::SETGT: cc = VECC::CC_IG; break; + case ISD::SETLE: cc = VECC::CC_ILE; break; + case ISD::SETGE: cc = VECC::CC_IGE; break; + case ISD::SETULT: cc = VECC::CC_IL; break; + case ISD::SETULE: cc = VECC::CC_ILE; break; + case ISD::SETUGT: cc = VECC::CC_IG; break; + case ISD::SETUGE: cc = VECC::CC_IGE; break; + } + return CurDAG->getTargetConstant(cc, SDLoc(N), MVT::i32); +}]>; + +def fcond2cc : SDNodeXFormget()) { + default: llvm_unreachable("Unknown float condition code!"); + case ISD::SETFALSE: cc = VECC::CC_AF; break; + case ISD::SETEQ: + case ISD::SETOEQ: cc = VECC::CC_EQ; break; + case ISD::SETNE: + case ISD::SETONE: cc = VECC::CC_NE; break; + case ISD::SETLT: + case ISD::SETOLT: cc = VECC::CC_L; break; + case ISD::SETGT: + case ISD::SETOGT: cc = VECC::CC_G; break; + case ISD::SETLE: + case ISD::SETOLE: cc = VECC::CC_LE; break; + case ISD::SETGE: + case ISD::SETOGE: cc = VECC::CC_GE; break; + case ISD::SETO: cc = VECC::CC_NUM; break; + case ISD::SETUO: cc = VECC::CC_NAN; break; + case ISD::SETUEQ: cc = VECC::CC_EQNAN; break; + case ISD::SETUNE: cc = VECC::CC_NENAN; break; + case ISD::SETULT: cc = VECC::CC_LNAN; break; + case ISD::SETUGT: cc = VECC::CC_GNAN; break; + case ISD::SETULE: cc = VECC::CC_LENAN; break; + case ISD::SETUGE: cc = VECC::CC_GENAN; break; + case ISD::SETTRUE: cc = VECC::CC_AT; break; + } + return CurDAG->getTargetConstant(cc, SDLoc(N), MVT::i32); +}]>; + +// Addressing modes. +def ADDRrr : ComplexPattern; +def ADDRri : ComplexPattern; + +// Address operands +def VEMEMrrAsmOperand : AsmOperandClass { + let Name = "MEMrr"; + let ParserMethod = "parseMEMOperand"; +} + +def VEMEMriAsmOperand : AsmOperandClass { + let Name = "MEMri"; + let ParserMethod = "parseMEMOperand"; +} + // ASX format of memory address +def MEMrr : Operand { + let PrintMethod = "printMemASXOperand"; + let MIOperandInfo = (ops ptr_rc, ptr_rc); + let ParserMatchClass = VEMEMrrAsmOperand; +} + def MEMri : Operand { let PrintMethod = "printMemASXOperand"; let MIOperandInfo = (ops ptr_rc, i64imm); + let ParserMatchClass = VEMEMriAsmOperand; } // AS format of memory address def MEMASri : Operand { let PrintMethod = "printMemASOperand"; let MIOperandInfo = (ops ptr_rc, i64imm); + let ParserMatchClass = VEMEMriAsmOperand; } // Branch targets have OtherVT type. @@ -61,22 +195,61 @@ let EncoderMethod = "getBranchTarget32OpValue"; } +def TLSSym : Operand; + +// Branch targets have OtherVT type. +def brtarget : Operand { + let EncoderMethod = "getBranchTargetOpValue"; +} + +def calltarget : Operand { + let EncoderMethod = "getCallTargetOpValue"; + let DecoderMethod = "DecodeCall"; +} + +def simm7Op32 : Operand { + let DecoderMethod = "DecodeSIMM7"; +} + def simm7Op64 : Operand { let DecoderMethod = "DecodeSIMM7"; } +def simm7Op128 : Operand { + let DecoderMethod = "DecodeSIMM7"; +} + +def simm32Op32 : Operand { + let DecoderMethod = "DecodeSIMM32"; +} + def simm32Op64 : Operand { let DecoderMethod = "DecodeSIMM32"; } +def uimm7Op32 : Operand { + let DecoderMethod = "DecodeUIMM6"; +} + +def uimm6Op32 : Operand { + let DecoderMethod = "DecodeUIMM6"; +} + def uimm6Op64 : Operand { let DecoderMethod = "DecodeUIMM6"; } +def uimm6Op128 : Operand { + let DecoderMethod = "DecodeUIMM6"; +} + // Operand for printing out a condition code. let PrintMethod = "printCCOperand" in def CCOp : Operand; +def VEhi : SDNode<"VEISD::Hi", SDTIntUnaryOp>; +def VElo : SDNode<"VEISD::Lo", SDTIntUnaryOp>; + // These are target-independent nodes, but have target-specific formats. def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64>, SDTCisVT<1, i64> ]>; @@ -88,10 +261,53 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -// def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>; +def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>; +def call : SDNode<"VEISD::CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def retflag : SDNode<"VEISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +def getGOT : Operand { + let PrintMethod = "printGetGOT"; +} +def getFunPLT : Operand { + let PrintMethod = "printGetFunPLT"; +} + +def VEmax : SDNode<"VEISD::MAX", SDTIntBinOp>; +def VEmin : SDNode<"VEISD::MIN", SDTIntBinOp>; +def VEfmax : SDNode<"VEISD::FMAX", SDTFPBinOp>; +def VEfmin : SDNode<"VEISD::FMIN", SDTFPBinOp>; + +def VEeh_sjlj_setjmp: SDNode<"VEISD::EH_SJLJ_SETJMP", + SDTypeProfile<1, 1, [SDTCisInt<0>, + SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPSideEffect]>; +def VEeh_sjlj_longjmp: SDNode<"VEISD::EH_SJLJ_LONGJMP", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPSideEffect]>; +def VEeh_sjlj_setup_dispatch: SDNode<"VEISD::EH_SJLJ_SETUP_DISPATCH", + SDTypeProfile<0, 0, []>, + [SDNPHasChain, SDNPSideEffect]>; + +// GETFUNPLT for PIC +def GetFunPLT : SDNode<"VEISD::GETFUNPLT", SDTIntUnaryOp>; + +// GETTLSADDR for TLS +def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + +// GETSTACKTOP +def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; + +// MEMBARRIER +def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // VE Flag Conditions //===----------------------------------------------------------------------===// @@ -126,26 +342,97 @@ // VE Multiclasses for common instruction formats //===----------------------------------------------------------------------===// -multiclass RMmopc, +multiclass RMmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { + def rri : RM< + opc, (outs RC:$sx), (ins RC:$sy, RC:$sz, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}($sy, ${sz})"), + [(set Ty:$sx, (OpNode (OpNode Ty:$sy, Ty:$sz), (Ty simm32:$imm32)))]> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def rii : RM< + opc, (outs RC:$sx), (ins RC:$sz, immOp:$sy, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}($sy, ${sz})"), + [/* Not define DAG pattern here to avoid llvm uses LEArii for add + instructions. + (set Ty:$sx, (OpNode (OpNode Ty:$sz, (Ty simm7:$sy)), (Ty simm32:$imm32)))*/]> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def rzi : RM< + opc, (outs RC:$sx), (ins RC:$sz, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}(${sz})"), + [(set Ty:$sx, (OpNode Ty:$sz, (Ty simm32:$imm32)))]> { + let cy = 0; + let sy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def zii : RM< + opc, (outs RC:$sx), (ins immOp:$sy, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}(${sy})"), + [/* Not define DAG pattern here to avoid llvm uses LEAzii for all add + instructions. + (set Ty:$sx, (OpNode (Ty simm7:$sy), (Ty simm32:$imm32))) */]> { + let cy = 0; + let cz = 0; + let sz = 0; + let hasSideEffects = 0; + } + def zzi : RM< + opc, (outs RC:$sx), (ins immOp2:$imm32), + !strconcat(opcStr, " $sx, $imm32"), + [/* Not define DAG pattern here to avoid llvm uses LEAzzi for all set + instructions. + (set Ty:$sx, (Ty simm32:$imm32)) */]> { + let cy = 0; + let sy = 0; + let cz = 0; + let sz = 0; + let hasSideEffects = 0; + } +} + +// RRNDm is similar to RRm without DAG patterns. + +multiclass RMNDmopc, SDNode OpNode, RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { def rri : RM< opc, (outs RC:$sx), (ins RC:$sy, RC:$sz, immOp2:$imm32), - !strconcat(opcStr, " $sx, ${imm32}($sy, ${sz})")> { + !strconcat(opcStr, " $sx, ${imm32}($sy, ${sz})"), []> { let cy = 1; let cz = 1; let hasSideEffects = 0; } + def rii : RM< + opc, (outs RC:$sx), (ins RC:$sz, immOp:$sy, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}($sy, ${sz})"), []> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } def rzi : RM< opc, (outs RC:$sx), (ins RC:$sz, immOp2:$imm32), - !strconcat(opcStr, " $sx, ${imm32}(${sz})")> { + !strconcat(opcStr, " $sx, ${imm32}(${sz})"), []> { let cy = 0; let sy = 0; + let hasSideEffects = 0; let cz = 1; + } + def zii : RM< + opc, (outs RC:$sx), (ins immOp:$sy, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}(${sy})"), []> { + let cy = 0; + let cz = 0; + let sz = 0; let hasSideEffects = 0; } def zzi : RM< opc, (outs RC:$sx), (ins immOp2:$imm32), - !strconcat(opcStr, " $sx, $imm32")> { + !strconcat(opcStr, " $sx, $imm32"), []> { let cy = 0; let sy = 0; let cz = 0; @@ -154,39 +441,122 @@ } } +let Constraints = "$sx = $sd", DisableEncoding = "$sd" in +multiclass RRCASmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { + def asr : RM< + opc, (outs RC:$sx), (ins MEMASri:$addr, RC:$sy, RC:$sd), + !strconcat(opcStr, " $sx, $addr, $sy"), []> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def asi : RM< + opc, (outs RC:$sx), (ins MEMASri:$addr, immOp:$sy, RC:$sd), + !strconcat(opcStr, " $sx, $addr, $sy"), []> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def rir : RM< + opc, (outs RC:$sx), (ins RC:$sz, immOp2:$imm32, RC:$sy, RC:$sd), + !strconcat(opcStr, " $sx, ${imm32}(${sz}), $sy"), []> { + let cy = 1; + let cz = 1; + let isCodeGenOnly = 1; + let hasSideEffects = 0; + } + def rii : RM< + opc, (outs RC:$sx), (ins RC:$sz, immOp2:$imm32, immOp:$sy, RC:$sd), + !strconcat(opcStr, " $sx, ${imm32}(${sz}), $sy"), []> { + let cy = 0; + let cz = 1; + let isCodeGenOnly = 1; + let hasSideEffects = 0; + } + def zii : RM< + opc, (outs RC:$sx), (ins immOp2:$imm32, immOp:$sy, RC:$sd), + !strconcat(opcStr, " $sx, $imm32, $sy"), []> { + let cy = 0; + let cz = 0; + let sz = 0; + let hasSideEffects = 0; + } +} + // Multiclass for RR type instructions -multiclass RRmrropc, +// First, defines components +// Named like RRm if each has their own DAG pattern +// Named like RRNDm if each doesn't have their own DAG pattern + +multiclass RRmrropc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi> { + def rr : RR + { let cy = 1; let cz = 1; let hasSideEffects = 0; } +} + +multiclass RRNDmrropc, SDNode OpNode, RegisterClass RCo, ValueType Tyo, RegisterClass RCi, ValueType Tyi> { def rr : RR + !strconcat(opcStr, " $sx, $sy, $sz"), []> { let cy = 1; let cz = 1; let hasSideEffects = 0; } } -multiclass RRmriopc, +multiclass RRmriopc, SDNode OpNode, RegisterClass RCo, ValueType Tyo, RegisterClass RCi, ValueType Tyi, Operand immOp> { // VE calculates (OpNode $sy, $sz), but llvm requires to have immediate // in RHS, so we use following definition. def ri : RR + !strconcat(opcStr, " $sx, $sy, $sz"), + [(set Tyo:$sx, (OpNode Tyi:$sz, (Tyi simm7:$sy)))]> { let cy = 0; let cz = 1; let hasSideEffects = 0; } } -multiclass RRmizopc, +multiclass RRmiropc, SDNode OpNode, RegisterClass RCo, ValueType Tyo, RegisterClass RCi, ValueType Tyi, Operand immOp> { + def ri : RR + { let cy = 0; let cz = 1; let hasSideEffects = 0; } +} + +multiclass RRNDmiropc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, Operand immOp> { + def ri : RR + { let cy = 0; let cz = 1; let hasSideEffects = 0; } +} + +multiclass RRmizopc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, Operand immOp> { + def zi : RR + { let cy = 0; let cz = 0; let sz = 0; let hasSideEffects = 0; } +} + +multiclass RRNDmizopc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, Operand immOp> { def zi : RR + !strconcat(opcStr, " $sx, $sy"), []> { let cy = 0; let cz = 0; let sz = 0; let hasSideEffects = 0; } } -multiclass RRNDmrmopc, +multiclass RRNDmrmopc, SDNode OpNode, RegisterClass RCo, ValueType Tyo, RegisterClass RCi, ValueType Tyi, Operand immOp2> { def rm0 : RR { + !strconcat(opcStr, " $sx, $sy, (${sz})0"), []> { let cy = 1; let cz = 0; let sz{6} = 1; @@ -194,14 +564,29 @@ // it fails to infer from a pattern. let hasSideEffects = 0; } + def rm1 : RR { + let cy = 1; + let cz = 0; + let hasSideEffects = 0; + } } -multiclass RRNDmimopc, +multiclass RRNDmimopc, SDNode OpNode, RegisterClass RCo, ValueType Tyo, RegisterClass RCi, ValueType Tyi, Operand immOp, Operand immOp2> { + def im0 : RR { + let cy = 0; + let cz = 0; + let sz{6} = 1; + // (guess) tblgen conservatively assumes hasSideEffects when + // it fails to infer from a pattern. + let hasSideEffects = 0; + } def im1 : RR { + !strconcat(opcStr, " $sx, $sy, (${sz})1"), []> { let cy = 0; let cz = 0; let hasSideEffects = 0; @@ -211,96 +596,1250 @@ // Used by add, mul, div, and similar commutative instructions // The order of operands are "$sx, $sy, $sz" -multiclass RRmopc, +multiclass RRmopc, SDNode OpNode, RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> : - RRmrr, - RRmri, - RRmiz, - RRNDmrm, - RRNDmim; + RRmrr, + RRmri, + RRmiz, + RRNDmrm, + RRNDmim; -// Branch multiclass -let isBranch = 1, isTerminator = 1, hasDelaySlot = 1 in -multiclass BCRm opc, - RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { - def rr : CF< - opc, (outs), - (ins CCOp:$cf, RC:$sy, RC:$sz, brtarget32:$imm32), - !strconcat(opcStr, " $sy, $sz, $imm32")> { - let cy = 1; - let cz = 1; - let hasSideEffects = 0; - } -} +// Used by sub, and similar not commutative instructions +// The order of operands are "$sx, $sy, $sz" +multiclass RRNCmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> : + RRmrr, + RRmir, + RRmiz, + RRNDmrm, + RRNDmim; -//===----------------------------------------------------------------------===// -// Instructions -//===----------------------------------------------------------------------===// +// Used by fadd, fsub, and similar floating point instructions +// The order of operands are "$sx, $sy, $sz" -// LEA and LEASL instruction (load 32 bit imm to low or high part) -let cx = 0 in -defm LEA : RMm<"lea", 0x06, I64, i64, simm7Op64, simm32Op64>; -let cx = 1 in -defm LEASL : RMm<"lea.sl", 0x06, I64, i64, simm7Op64, simm32Op64>; +multiclass RRFmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> : + RRmrr, + RRNDmir, + RRNDmiz, + RRNDmrm, + RRNDmim; -// 5.3.2.2. Fixed-Point Arithmetic Operation Instructions +// Used by cmp instruction +// The order of operands are "$sx, $sy, $sz" -// ADX instruction -let cx = 0 in -defm ADX : RRm<"adds.l", 0x59, I64, i64, simm7Op64, uimm6Op64>; +multiclass RRNDmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, + Operand immOp, Operand immOp2> : + RRNDmrr, + RRNDmir, + RRNDmiz, + RRNDmrm, + RRNDmim; -// 5.3.2.3. Logical Arithmetic Operation Instructions +// Used by fcq instruction like "F64 <- cmp F128, F128" +// The order of operands are "$sx, $sy, $sz" -let cx = 0 in { - defm AND : RRm<"and", 0x44, I64, i64, simm7Op64, uimm6Op64>; - defm OR : RRm<"or", 0x45, I64, i64, simm7Op64, uimm6Op64>; -} +multiclass RRFCQmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, + Operand immOp, Operand immOp2> : + RRNDmrr, + RRNDmir, + RRNDmiz, + RRNDmrm, + RRNDmim; -// Load and Store instructions -// As 1st step, only uses sz and imm32 to represent $addr -let mayLoad = 1, hasSideEffects = 0 in { -let cy = 0, sy = 0, cz = 1 in { -let cx = 0 in -def LDSri : RM< - 0x01, (outs I64:$sx), (ins MEMri:$addr), - "ld $sx, $addr">; -} -} +// Multiclass for RR type instructions +// Used by sra, sla, sll, and similar instructions +// The order of operands are "$sx, $sz, $sy" -let mayStore = 1, hasSideEffects = 0 in { -let cx = 0, cy = 0, sy = 0, cz = 1 in { -def STSri : RM< - 0x11, (outs), (ins MEMri:$addr, I64:$sx), - "st $sx, $addr">; -} +multiclass RRImopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { + def rr : RR< + opc, (outs RC:$sx), (ins RC:$sz, I32:$sy), + !strconcat(opcStr, " $sx, $sz, $sy"), + [(set Ty:$sx, (OpNode Ty:$sz, i32:$sy))]> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def ri : RR< + opc, (outs RC:$sx), (ins RC:$sz, immOp:$sy), + !strconcat(opcStr, " $sx, $sz, $sy"), + [(set Ty:$sx, (OpNode Ty:$sz, (i32 simm7:$sy)))]> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def rm0 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, I32:$sy), + !strconcat(opcStr, " $sx, (${sz})0, $sy"), + []> { + let cy = 1; + let cz = 0; + let sz{6} = 1; + // (guess) tblgen conservatively assumes hasSideEffects when it fails to infer from a pattern. + let hasSideEffects = 0; + } + def rm1 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, I32:$sy), + !strconcat(opcStr, " $sx, (${sz})1, $sy"), + []> { + let cy = 1; + let cz = 0; + let hasSideEffects = 0; + } + def im0 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, immOp:$sy), + !strconcat(opcStr, " $sx, (${sz})0, $sy"), + []> { + let cy = 0; + let cz = 0; + let sz{6} = 1; + let hasSideEffects = 0; + } + def im1 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, immOp:$sy), + !strconcat(opcStr, " $sx, (${sz})1, $sy"), + []> { + let cy = 0; + let cz = 0; + let hasSideEffects = 0; + } + def zi : RR< + opc, (outs RC:$sx), (ins immOp:$sy), + !strconcat(opcStr, " $sx, $sy"), + [(set Ty:$sx, (OpNode 0, (i32 simm7:$sy)))]> { + let cy = 0; + let cz = 0; + let sz = 0; + let hasSideEffects = 0; + } } -// Return instruction is also a special case of jump. -let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cf = 15 /* AT */, cy = 0, sy = 0, - cz = 1, sz = 0x10 /* SX10 */, imm32 = 0, Uses = [SX10], - isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1, - isCodeGenOnly = 1, hasSideEffects = 0 in -def RET : CF< - 0x19, (outs), (ins), - "b.l (,%lr)", - [(retflag)]>; +// Multiclass for RR type instructions without dag pattern +// Used by sra.w.zx, sla.w.zx, and others -// Branch instruction -let cx = 0, cx2 = 0, bpf = 0 /* NONE */ in -defm BCRL : BCRm<"br${cf}.l", "br.l", 0x18, I64, i64, simm7Op64, uimm6Op64>; +multiclass RRINDmopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { + def rr : RR< + opc, (outs RC:$sx), (ins RC:$sz, I32:$sy), + !strconcat(opcStr, " $sx, $sz, $sy"), + []> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def ri : RR< + opc, (outs RC:$sx), (ins RC:$sz, immOp:$sy), + !strconcat(opcStr, " $sx, $sz, $sy"), + []> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def rm0 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, I32:$sy), + !strconcat(opcStr, " $sx, (${sz})0, $sy"), + []> { + let cy = 1; + let cz = 0; + let sz{6} = 1; + // (guess) tblgen conservatively assumes hasSideEffects when it fails to infer from a pattern. + let hasSideEffects = 0; + } + def rm1 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, I32:$sy), + !strconcat(opcStr, " $sx, (${sz})1, $sy"), + []> { + let cy = 1; + let cz = 0; + let hasSideEffects = 0; + } + def im0 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, immOp:$sy), + !strconcat(opcStr, " $sx, (${sz})0, $sy"), + []> { + let cy = 0; + let cz = 0; + let sz{6} = 1; + let hasSideEffects = 0; + } + def im1 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz, immOp:$sy), + !strconcat(opcStr, " $sx, (${sz})1, $sy"), + []> { + let cy = 0; + let cz = 0; + let hasSideEffects = 0; + } + def zi : RR< + opc, (outs RC:$sx), (ins immOp:$sy), + !strconcat(opcStr, " $sx, $sy"), + []> { + let cy = 0; + let cz = 0; + let sz = 0; + let hasSideEffects = 0; + } +} + +// Multiclass for RR type instructions +// Used by cmov instruction + +let Constraints = "$sx = $sd", DisableEncoding = "$sd" in +multiclass RRCMOVmopc, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { + def rr : RR< + opc, (outs I64:$sx), (ins CCOp:$cf, RC:$sy, I64:$sz, I64:$sd), + !strconcat(opcStr, " $sx, $sz, $sy"), + []> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def ri : RR< + opc, (outs I64:$sx), (ins CCOp:$cf, I64:$sz, immOp:$sy, I64:$sd), + !strconcat(opcStr, " $sx, $sz, $sy"), + []> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def rm0 : RR< + opc, (outs I64:$sx), (ins CCOp:$cf, RC:$sy, immOp2:$sz, I64:$sd), + !strconcat(opcStr, " $sx, (${sz})0, $sy"), + []> { + let cy = 1; + let cz = 0; + let sz{6} = 1; + // (guess) tblgen conservatively assumes hasSideEffects when it fails to infer from a pattern. + let hasSideEffects = 0; + } + def rm1 : RR< + opc, (outs I64:$sx), (ins CCOp:$cf, RC:$sy, immOp2:$sz, I64:$sd), + !strconcat(opcStr, " $sx, (${sz})1, $sy"), + []> { + let cy = 1; + let cz = 0; + let hasSideEffects = 0; + } + def im0 : RR< + opc, (outs I64:$sx), (ins CCOp:$cf, immOp:$sy, immOp2:$sz, I64:$sd), + !strconcat(opcStr, " $sx, (${sz})0, $sy"), + []> { + let cy = 0; + let cz = 0; + let sz{6} = 1; + let hasSideEffects = 0; + } + def im1 : RR< + opc, (outs I64:$sx), (ins CCOp:$cf, immOp:$sy, immOp2:$sz, I64:$sd), + !strconcat(opcStr, " $sx, (${sz})1, $sy"), + []> { + let cy = 0; + let cz = 0; + let hasSideEffects = 0; + } +} + +// Multiclass for RR type instructions with only 2 operands +// Used by pcnt, brv + +multiclass RRI2mopc, SDNode OpNode, + RegisterClass RC, ValueType Ty, Operand immOp2> { + def r : RR< + opc, (outs RC:$sx), (ins RC:$sz), + !strconcat(opcStr, " $sx, $sz"), + [(set Ty:$sx, (OpNode Ty:$sz))]> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def i : RR< + opc, (outs RC:$sx), (ins RC:$sz), + !strconcat(opcStr, " $sx, $sz"), + [(set Ty:$sx, (OpNode Ty:$sz))]> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def m0 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz), + !strconcat(opcStr, " $sx, (${sz})0"), + []> { + let cy = 1; + let cz = 0; + let sz{6} = 1; + // (guess) tblgen conservatively assumes hasSideEffects when it fails to infer from a pattern. + let hasSideEffects = 0; + } + def m1 : RR< + opc, (outs RC:$sx), (ins immOp2:$sz), + !strconcat(opcStr, " $sx, (${sz})1"), + []> { + let cy = 1; + let cz = 0; + let hasSideEffects = 0; + } +} + + +// Branch multiclass +let isBranch = 1, isTerminator = 1, hasDelaySlot = 1 in +multiclass BCRm opc, + RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> { + def rr : CF< + opc, (outs), + (ins CCOp:$cf, RC:$sy, RC:$sz, brtarget32:$imm32), + !strconcat(opcStr, " $sy, $sz, $imm32"), []> { + let cy = 1; + let cz = 1; + let hasSideEffects = 0; + } + def ir : CF< + opc, (outs), + (ins CCOp:$cf, immOp:$sy, RC:$sz, brtarget32:$imm32), + !strconcat(opcStr, " $sy, $sz, $imm32"), []> { + let cy = 0; + let cz = 1; + let hasSideEffects = 0; + } + def rm0 : CF< + opc, (outs), (ins CCOp:$cf, RC:$sy, immOp2:$sz, brtarget32:$imm32), + !strconcat(opcStr, " $sy, (${sz})0, $imm32"), []> { + let cy = 1; + let cz = 0; + let sz{6} = 1; + // (guess) tblgen conservatively assumes hasSideEffects when it fails to infer from a pattern. + let hasSideEffects = 0; + } + def rm1 : CF< + opc, (outs), (ins CCOp:$cf, RC:$sy, immOp2:$sz, brtarget32:$imm32), + !strconcat(opcStr, " $sy, (${sz})1, $imm32"), []> { + let cy = 1; + let cz = 0; + let hasSideEffects = 0; + } + def im0 : CF< + opc, (outs), (ins CCOp:$cf, immOp:$sy, immOp2:$sz, brtarget32:$imm32), + !strconcat(opcStr, " $sy, (${sz})0, $imm32"), []> { + let cy = 0; + let cz = 0; + let sz{6} = 1; + let hasSideEffects = 0; + } + def im1 : CF< + opc, (outs), (ins CCOp:$cf, immOp:$sy, immOp2:$sz, brtarget32:$imm32), + !strconcat(opcStr, " $sy, (${sz})1, $imm32"), []> { + let cy = 0; + let cz = 0; + let hasSideEffects = 0; + } + def a : CF< + opc, (outs), (ins brtarget32:$imm32), + !strconcat(opcStrAt, " $imm32"), []> { + let cy = 0; + let sy = 0; + let cz = 0; + let sz = 0; + let cf = 15; /* AT */ + let isBarrier = 1; + let hasSideEffects = 0; + } +} + +// Multiclass for floating point conversion instructions. +// Used by CVS/CVD/FLT and others +multiclass CVTm opc, SDNode OpNode, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, Operand immOp> { + def r : RR { + let cy = 1; + let hasSideEffects = 0; + } + def i : RR { + let cy = 0; + let hasSideEffects = 0; + } +} + + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +// CMOV instructions +let cx = 0, cw = 0, cw2 = 0 in +defm CMOVL : RRCMOVm<"cmov.l.${cf}", 0x3B, I64, i64, simm7Op64, uimm6Op64>; + +let cx = 0, cw = 1, cw2 = 0 in +defm CMOVW : RRCMOVm<"cmov.w.${cf}", 0x3B, I32, i32, simm7Op64, uimm6Op32>; + +let cx = 0, cw = 0, cw2 = 1 in +defm CMOVD : RRCMOVm<"cmov.d.${cf}", 0x3B, I64, f64, simm7Op64, uimm6Op64>; + +let cx = 0, cw = 1, cw2 = 1 in +defm CMOVS : RRCMOVm<"cmov.s.${cf}", 0x3B, F32, f32, simm7Op64, uimm6Op32>; + +// NOP instruction +let cx = 0, sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, imm32 = 0, hasSideEffects = 0 in +def NOP : RR<0x79, (outs), (ins), "nop", []>; + +// LEA and LEASL instruction (load 32 bit imm to low or high part) +let cx = 0 in +defm LEA : RMm<"lea", 0x06, add, I64, i64, simm7Op64, simm32Op64>; +let cx = 1 in +defm LEASL : RMNDm<"lea.sl", 0x06, add, I64, i64, simm7Op64, simm32Op64>; +let isCodeGenOnly = 1 in { +let cx = 0 in +defm LEA32 : RMm<"lea", 0x06, add, I32, i32, simm7Op32, simm32Op32>; +let cx = 1 in +defm LEASL32 : RMNDm<"lea.sl", 0x06, add, I32, i32, simm7Op32, simm32Op32>; +} + +let cx = 0, cy = 1, cz = 0, sz = 0, hasSideEffects = 0 in { + def LEAasx : RM< + 0x06, (outs I64:$sx), (ins MEMri:$addr), + "lea $sx,$addr", [(set iPTR:$sx, ADDRri:$addr)]>; +} + +// 5.3.2.2. Fixed-Point Arithmetic Operation Instructions + +// ADD instruction +let cx = 0 in +defm ADD : RRNDm<"addu.l", 0x48, add, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm ADDUW : RRNDm<"addu.w", 0x48, add, I32, i32, simm7Op32, uimm6Op32>; + +// ADS instruction +let cx = 0 in +defm ADS : RRm<"adds.w.sx", 0x4A, add, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm ADSU : RRNDm<"adds.w.zx", 0x4A, add, I32, i32, simm7Op32, uimm6Op32>; + +// ADX instruction +let cx = 0 in +defm ADX : RRm<"adds.l", 0x59, add, I64, i64, simm7Op64, uimm6Op64>; + +// SUB instruction +let cx = 0 in +defm SUB : RRNDm<"subu.l", 0x58, sub, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm SUBUW : RRNDm<"subu.w", 0x58, sub, I32, i32, simm7Op32, uimm6Op32>; + +// SBS instruction +let cx = 0 in +defm SBS : RRNCm<"subs.w.sx", 0x5A, sub, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm SBSU : RRNDm<"subs.w.zx", 0x5A, sub, I32, i32, simm7Op32, uimm6Op32>; + +// SBX instruction +let cx = 0 in +defm SBX : RRNCm<"subs.l", 0x5B, sub, I64, i64, simm7Op64, uimm6Op64>; + +// MPY instruction +let cx = 0 in +defm MPY : RRNDm<"mulu.l", 0x49, mul, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm MPYUW : RRNDm<"mulu.w", 0x49, mul, I32, i32, simm7Op32, uimm6Op32>; + +// MPS instruction +let cx = 0 in +defm MPS : RRm<"muls.w.sx", 0x4B, mul, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm MPSU : RRNDm<"muls.w.zx", 0x4B, mul, I32, i32, simm7Op32, uimm6Op32>; + +// MPX instruction +let cx = 0 in +defm MPX : RRm<"muls.l", 0x6E, mul, I64, i64, simm7Op64, uimm6Op64>; + +// DIV instruction +let cx = 0 in +defm DIV : RRNCm<"divu.l", 0x6F, udiv, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm DIVUW : RRNCm<"divu.w", 0x6F, udiv, I32, i32, simm7Op32, uimm6Op32>; + +// DVS instruction +let cx = 0 in +defm DVS : RRNCm<"divs.w.sx", 0x7B, sdiv, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm DVSU : RRNDm<"divs.w.zx", 0x7B, sdiv, I32, i32, simm7Op32, uimm6Op32>; + +// DVX instruction +let cx = 0 in +defm DVX : RRNCm<"divs.l", 0x7F, sdiv, I64, i64, simm7Op64, uimm6Op64>; + +// CMP instruction +let cx = 0 in +defm CMP : RRNDm<"cmpu.l", 0x55, setcc, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm CMPUW : RRNDm<"cmpu.w", 0x55, setcc, I32, i32, simm7Op32, uimm6Op32>; + +// CPS instruction +let cx = 0 in +defm CPS : RRNDm<"cmps.w.sx", 0x7A, setcc, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm CPSU : RRNDm<"cmps.w.zx", 0x7A, setcc, I32, i32, simm7Op32, uimm6Op32>; + +// CPX instruction +let cx = 0 in +defm CPX : RRNDm<"cmps.l", 0x6A, setcc, I64, i64, simm7Op64, uimm6Op64>; + +// cx: sx/zx, cw: max/min + +let cw = 0 in defm CMXa : + RRm<"maxs.l", 0x68, VEmax, I64, i64, simm7Op64, uimm6Op64>; + +let cx = 0, cw = 0 in defm CMSa : + RRm<"maxs.w.zx", 0x78, VEmax, I32, i32, simm7Op32, uimm6Op32>; + +let cw = 1 in defm CMXi : + RRm<"mins.l", 0x68, VEmin, I64, i64, simm7Op64, uimm6Op64>; + +let cx = 1, cw = 0 in defm CMSi : + RRm<"mins.w.zx", 0x78, VEmin, I32, i32, simm7Op32, uimm6Op32>; + +// 5.3.2.3. Logical Arithmetic Operation Instructions + +// AND, OR, XOR, EQV, NND, and MRG instruction +let cx = 0 in { + defm AND : RRm<"and", 0x44, and, I64, i64, simm7Op64, uimm6Op64>; + defm OR : RRm<"or", 0x45, or, I64, i64, simm7Op64, uimm6Op64>; + defm XOR : RRm<"xor", 0x46, xor, I64, i64, simm7Op64, uimm6Op64>; + let isCodeGenOnly = 1 in { + defm AND32 : RRm<"and", 0x44, and, I32, i32, simm7Op32, uimm6Op32>; + defm OR32 : RRm<"or", 0x45, or, I32, i32, simm7Op32, uimm6Op32>; + defm XOR32 : RRm<"xor", 0x46, xor, I32, i32, simm7Op32, uimm6Op32>; + } + /* + defm EQV : RRm<"eqv", 0x47, eqv, I64, i64, simm7Op64, uimm6Op64>; + defm NND : RRm<"nnd", 0x54, nnd, I64, i64, simm7Op64, uimm6Op64>; + defm MRG : RRm<"mrg", 0x56, mrg, I64, i64, simm7Op64, uimm6Op64>; + */ +} + +// Bits operations + +let cx = 0 in { +defm PCNT : RRI2m<"pcnt", 0x38, ctpop, I64, i64, uimm6Op64>; +defm BRV : RRI2m<"brv", 0x39, bitreverse, I64, i64, uimm6Op64>; +defm LDZ : RRI2m<"ldz", 0x67, ctlz, I64, i64, uimm6Op64>; +defm BSWP : RRINDm<"bswp", 0x2B, bswap, I64, i64, simm7Op64, uimm6Op64>; +} + + +// 5.3.2.4 Shift Instructions + +let cx = 0 in +defm SRAX : RRIm<"sra.l", 0x77, sra, I64, i64, simm7Op32, uimm6Op64>; +let cx = 0 in +defm SRA : RRIm<"sra.w.sx", 0x76, sra, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm SRAU : RRINDm<"sra.w.zx", 0x76, sra, I32, i32, simm7Op32, uimm6Op32>; + +let cx = 0 in +defm SLL : RRIm<"sll", 0x65, shl, I64, i64, simm7Op32, uimm6Op64>; +let cx = 0 in +defm SLA : RRIm<"sla.w.sx", 0x66, shl, I32, i32, simm7Op32, uimm6Op32>; +let cx = 1 in +defm SLAU : RRINDm<"sla.w.zx", 0x66, shl, I32, i32, simm7Op32, uimm6Op32>; + +let cx = 0 in +defm SRL : RRIm<"srl", 0x75, srl, I64, i64, simm7Op32, uimm6Op64>; + +def : Pat<(i32 (srl i32:$src, (i32 simm7:$val))), + (EXTRACT_SUBREG (SRLri (ANDrm0 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + $src, sub_i32), 32), imm:$val), sub_i32)>; +def : Pat<(i32 (srl i32:$src, i32:$val)), + (EXTRACT_SUBREG (SRLrr (ANDrm0 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + $src, sub_i32), 32), $val), sub_i32)>; + +// 5.3.2.5. Floating-point Arithmetic Operation Instructions +let cx = 0 in +defm FAD : RRFm<"fadd.d", 0x4C, fadd, I64, f64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm FADS : RRFm<"fadd.s", 0x4C, fadd, F32, f32, simm7Op32, uimm6Op32>; +let cx = 0 in +defm FAQ : RRFm<"fadd.q", 0x6C, fadd, F128, f128, simm7Op128, uimm6Op128>; + +let cx = 0 in +defm FSB : RRFm<"fsub.d", 0x5C, fsub, I64, f64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm FSBS : RRFm<"fsub.s", 0x5C, fsub, F32, f32, simm7Op32, uimm6Op32>; +let cx = 0 in +defm FSQ : RRFm<"fsub.q", 0x7C, fsub, F128, f128, simm7Op128, uimm6Op128>; + +let cx = 0 in +defm FMP : RRFm<"fmul.d", 0x4D, fmul, I64, f64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm FMPS : RRFm<"fmul.s", 0x4D, fmul, F32, f32, simm7Op32, uimm6Op32>; +let cx = 0 in +defm FMQ : RRFm<"fmul.q", 0x6D, fmul, F128, f128, simm7Op128, uimm6Op128>; + +let cx = 0 in +defm FDV : RRFm<"fdiv.d", 0x5D, fdiv, I64, f64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm FDVS : RRFm<"fdiv.s", 0x5D, fdiv, F32, f32, simm7Op32, uimm6Op32>; + +// FCP instruction +let cx = 0 in +defm FCP : RRNDm<"fcmp.d", 0x7E, setcc, I64, f64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm FCPS : RRNDm<"fcmp.s", 0x7E, setcc, F32, f32, simm7Op32, uimm6Op32>; +let cx = 0 in +defm FCQ : RRFCQm<"fcmp.q", 0x7D, setcc, F128, f128, simm7Op128, uimm6Op128>; + +// FCM +let cw = 0 in { + let cx = 0 in + defm FCMA : RRNDm<"fmax.d", 0x3E, VEfmax, I64, f64, simm7Op64, uimm6Op64>; + let cx = 1 in + defm FCMAS : RRNDm<"fmax.s", 0x3E, VEfmax, F32, f32, simm7Op32, uimm6Op32>; +} +let cw = 1 in { + let cx = 0 in + defm FCMI : RRNDm<"fmin.d", 0x3E, VEfmin, I64, f64, simm7Op64, uimm6Op64>; + let cx = 1 in + defm FCMIS : RRNDm<"fmin.s", 0x3E, VEfmin, F32, f32, simm7Op32, uimm6Op32>; +} + +let cx = 0, cw = 0 /* sign extend */, cz = 1, sz = 0 /* round toward zero */ in +defm FIX : CVTm<"cvt.w.d.sx.rz", 0x4E, fp_to_sint, I32, i32, I64, f64, simm7Op32>; +let cx = 1, cw = 0 /* sign extend */, cz = 1, sz = 0 /* round toward zero */ in +defm FIXS : CVTm<"cvt.w.s.sx.rz", 0x4E, fp_to_sint, I32, i32, F32, f32, simm7Op32>; +let cx = 0, cz = 1, sz = 0 /* round toward zero */ in +defm FIXX : CVTm<"cvt.l.d.rz", 0x4F, fp_to_sint, I64, i64, I64, f64, simm7Op64>; +let cz = 0, sz = 0 in { + let cx = 0 in + defm FLT : CVTm<"cvt.d.w", 0x5E, sint_to_fp, I64, f64, I32, i32, simm7Op32>; + let cx = 1 in + defm FLTS : CVTm<"cvt.s.w", 0x5E, sint_to_fp, F32, f32, I32, i32, simm7Op32>; + let cx = 0 in + defm FLTX : CVTm<"cvt.d.l", 0x5F, sint_to_fp, I64, f64, I64, i64, simm7Op64>; + let cx = 0 in + defm CVS : CVTm<"cvt.s.d", 0x1F, fpround, F32, f32, I64, f64, simm7Op64>; + let cx = 1 in + defm CVSQ : CVTm<"cvt.s.q", 0x1F, fpround, F32, f32, F128, f128, simm7Op128>; + let cx = 0 in + defm CVD : CVTm<"cvt.d.s", 0x0F, fpextend, I64, f64, F32, f32, simm7Op32>; + let cx = 1 in + defm CVDQ : CVTm<"cvt.d.q", 0x0F, fpround, I64, f64, F128, f128, simm7Op128>; + let cx = 0 in + defm CVQ : CVTm<"cvt.q.d", 0x2D, fpextend, F128, f128, I64, f64, simm7Op64>; + let cx = 1 in + defm CVQS : CVTm<"cvt.q.s", 0x2D, fpextend, F128, f128, F32, f32, simm7Op32>; +} + +// Load and Store instructions +// As 1st step, only uses sz and imm32 to represent $addr +let mayLoad = 1, hasSideEffects = 0 in { +let cy = 0, sy = 0, cz = 1 in { +let cx = 0 in +def LDSri : RM< + 0x01, (outs I64:$sx), (ins MEMri:$addr), + "ld $sx, $addr", + [(set i64:$sx, (load ADDRri:$addr))]>; +let cx = 0 in +def LDUri : RM< + 0x02, (outs F32:$sx), (ins MEMri:$addr), + "ldu $sx, $addr", + [(set f32:$sx, (load ADDRri:$addr))]>; +let cx = 0 in +def LDLri : RM< + 0x03, (outs I32:$sx), (ins MEMri:$addr), + "ldl.sx $sx, $addr", + [(set i32:$sx, (load ADDRri:$addr))]>; +let cx = 1 in +def LDLUri : RM< + 0x03, (outs I32:$sx), (ins MEMri:$addr), + "ldl.zx $sx, $addr", + [(set i32:$sx, (load ADDRri:$addr))]>; +let cx = 0 in +def LD2Bri : RM< + 0x04, (outs I32:$sx), (ins MEMri:$addr), + "ld2b.sx $sx, $addr", + [(set i32:$sx, (sextloadi16 ADDRri:$addr))]>; +let cx = 1 in +def LD2BUri : RM< + 0x04, (outs I32:$sx), (ins MEMri:$addr), + "ld2b.zx $sx, $addr", + [(set i32:$sx, (zextloadi16 ADDRri:$addr))]>; +let cx = 0 in +def LD1Bri : RM< + 0x05, (outs I32:$sx), (ins MEMri:$addr), + "ld1b.sx $sx, $addr", + [(set i32:$sx, (sextloadi8 ADDRri:$addr))]>; +let cx = 1 in +def LD1BUri : RM< + 0x05, (outs I32:$sx), (ins MEMri:$addr), + "ld1b.zx $sx, $addr", + [(set i32:$sx, (zextloadi8 ADDRri:$addr))]>; +} +def LDQri : Pseudo< + (outs F128:$sx), (ins MEMri:$addr), + "# pseudo ldq $sx, $addr", + [(set f128:$sx, (load ADDRri:$addr))]>; +} + +let mayStore = 1, hasSideEffects = 0 in { +let cx = 0, cy = 0, sy = 0, cz = 1 in { +def STSri : RM< + 0x11, (outs), (ins MEMri:$addr, I64:$sx), + "st $sx, $addr", + [(store i64:$sx, ADDRri:$addr)]>; +def STUri : RM< + 0x12, (outs), (ins MEMri:$addr, F32:$sx), + "stu $sx, $addr", + [(store f32:$sx, ADDRri:$addr)]>; +def STLri : RM< + 0x13, (outs), (ins MEMri:$addr, I32:$sx), + "stl $sx, $addr", + [(store i32:$sx, ADDRri:$addr)]>; +def ST2Bri : RM< + 0x14, (outs), (ins MEMri:$addr, I32:$sx), + "st2b $sx, $addr", + [(truncstorei16 i32:$sx, ADDRri:$addr)]>; +def ST1Bri : RM< + 0x15, (outs), (ins MEMri:$addr, I32:$sx), + "st1b $sx, $addr", + [(truncstorei8 i32:$sx, ADDRri:$addr)]>; +} +def STQri : Pseudo< + (outs), (ins MEMri:$addr, F128:$sx), + "# pseudo stq $sx, $addr", + [(store f128:$sx, ADDRri:$addr)]>; +} + +def : Pat<(f64 (load ADDRri:$addr)), (LDSri ADDRri:$addr)>; +def : Pat<(store f64:$sx, ADDRri:$addr), (STSri ADDRri:$addr, $sx)>; + +// Patterns for unaligned load + +def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def unaligned8load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() < 8; +}]>; +def : Pat<(i32 (unaligned4load ADDRri:$addr)), + (LDLri MEMri:$addr)>; +def : Pat<(f32 (unaligned4load ADDRri:$addr)), + (LDUri MEMri:$addr)>; +def : Pat<(i64 (unaligned8load ADDRri:$addr)), + (LDSri ADDRri:$addr)>; +def : Pat<(f64 (unaligned8load ADDRri:$addr)), + (LDSri ADDRri:$addr)>; + +// Patterns for unaligned store + +def unaligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def unaligned8store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 8; +}]>; +def : Pat<(unaligned4store i32:$sx, ADDRri:$addr), + (STLri ADDRri:$addr, $sx)>; +def : Pat<(unaligned4store f32:$sx, ADDRri:$addr), + (STUri ADDRri:$addr, $sx)>; +def : Pat<(unaligned8store i64:$sx, ADDRri:$addr), + (STSri ADDRri:$addr, $sx)>; +def : Pat<(unaligned8store f64:$sx, ADDRri:$addr), + (STSri ADDRri:$addr, $sx)>; + +// Patterns for unaligned sextload/zextload/extload + +def unaligned2extloadi16 : PatFrag<(ops node:$ptr), (extloadi16 node:$ptr), [{ + return cast(N)->getAlignment() < 2; +}]>; +def unaligned2sextloadi16 : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{ + return cast(N)->getAlignment() < 2; +}]>; +def unaligned2zextloadi16 : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr), [{ + return cast(N)->getAlignment() < 2; +}]>; +def unaligned4extloadi32 : PatFrag<(ops node:$ptr), (extloadi32 node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def unaligned4zextloadi32 : PatFrag<(ops node:$ptr), (zextloadi32 node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def : Pat<(i64 (unaligned2sextloadi16 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD2Bri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (unaligned2zextloadi16 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD2BUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (unaligned2extloadi16 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD2BUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (unaligned4sextloadi32 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LDLri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (unaligned4zextloadi32 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LDLUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (unaligned4extloadi32 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LDLri MEMri:$addr), sub_i32)>; + +// Patterns for unaligned truncstore + +def unaligned4truncstorei16 : PatFrag<(ops node:$val, node:$ptr), + (truncstorei16 node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 4; +}]>; +def unaligned8truncstorei32 : PatFrag<(ops node:$val, node:$ptr), + (truncstorei32 node:$val, node:$ptr), [{ + return cast(N)->getAlignment() < 8; +}]>; +def : Pat<(unaligned4truncstorei16 i64:$sx, ADDRri:$addr), + (ST2Bri ADDRri:$addr, (EXTRACT_SUBREG $sx, sub_i32))>; +def : Pat<(unaligned8truncstorei32 i64:$sx, ADDRri:$addr), + (STLri ADDRri:$addr, (EXTRACT_SUBREG $sx, sub_i32))>; + +// Jump instruction +let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cy = 1, cz = 1, + isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasSideEffects = 0 in +def BC : CF< + 0x19, (outs), (ins CCOp:$cf, I64:$sy, brtarget32:$imm32), + "b.${cf}.l $sy, $imm32", + []>; + +// Jump always instruction is treated as a special case of jump in order +// to make finding unconditional jump easy. +let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cf = 15 /* AT */, cy = 0, sy = 0, + cz = 1, + isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, + hasDelaySlot = 1, isCodeGenOnly = 1, hasSideEffects = 0 in { +def BArr : CF< + 0x19, (outs), (ins MEMrr:$addr), + "b.l $addr", + [(brind ADDRrr:$addr)]>; +def BAri : CF< + 0x19, (outs), (ins MEMri:$addr), + "b.l $addr", + [(brind ADDRri:$addr)]>; +} + +// Jump never instruction is also a special case of jump. +let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cf = 0 /* AF */, cy = 1, sy = 0, + cz = 1, + isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasSideEffects = 0 in +def BN : CF< + 0x19, (outs), (ins brtarget32:$imm32), + "b.af.l $imm32", + []>; + +// Return instruction is also a special case of jump. +let cx = 0, cx2 = 0, bpf = 0 /* NONE */, cf = 15 /* AT */, cy = 0, sy = 0, + cz = 1, sz = 0x10 /* SX10 */, imm32 = 0, Uses = [SX10], + isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1, + isCodeGenOnly = 1, hasSideEffects = 0 in +def RET : CF< + 0x19, (outs), (ins), + "b.l (,%lr)", + [(retflag)]>; + +// Branch and Save IC + +let cx = 0, cy = 0, cy = 0, cz = 1, hasSideEffects = 0 /* , Uses = [IC] */ in +def BSIC : RM<0x08, (outs), (ins I64:$sx, I64:$sz), "bsic $sx, (, ${sz})", []>; + +// Branch instruction +let cx = 0, cx2 = 0, bpf = 0 /* NONE */ in +defm BCRL : BCRm<"br${cf}.l", "br.l", 0x18, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1, cx2 = 0, bpf = 0 /* NONE */ in +defm BCRW : BCRm<"br${cf}.w", "br.w", 0x18, I32, i32, simm7Op32, uimm6Op32>; +let cx = 0, cx2 = 1, bpf = 0 /* NONE */ in +defm BCRD : BCRm<"br${cf}.d", "br.d", 0x18, I64, f64, simm7Op64, uimm6Op64>; +let cx = 1, cx2 = 1, bpf = 0 /* NONE */ in +defm BCRS : BCRm<"br${cf}.s", "br.s", 0x18, F32, f32, simm7Op32, uimm6Op32>; + +// Load and Store host memory instructions +let cx = 0, cy = 0, cz = 1, hasSideEffects = 0 in { +let sy = 3 in +def LHMri : RM< + 0x21, (outs I64:$sx), (ins MEMASri:$addr), + "lhm.l $sx, $addr", + []>; +let sy = 2 in +def LHMLri : RM< + 0x21, (outs I32:$sx), (ins MEMASri:$addr), + "lhm.w $sx, $addr", + []>; +let sy = 1 in +def LHM2Bri : RM< + 0x21, (outs I16:$sx), (ins MEMASri:$addr), + "lhm.h $sx, $addr", + []>; +let sy = 0 in +def LHM1Bri : RM< + 0x21, (outs I8:$sx), (ins MEMASri:$addr), + "lhm.b $sx, $addr", + []>; +} let cx = 0, cy = 0, cz = 1, hasSideEffects = 0 in { let sy = 3 in def SHMri : RM< 0x31, (outs), (ins MEMASri:$addr, I64:$sx), - "shm.l $sx, $addr">; + "shm.l $sx, $addr", + []>; +let sy = 2 in +def SHMLri : RM< + 0x31, (outs), (ins MEMASri:$addr, I32:$sx), + "shm.l $sx, $addr", + []>; +let sy = 1 in +def SHM2Bri : RM< + 0x31, (outs), (ins MEMASri:$addr, I16:$sx), + "shm.l $sx, $addr", + []>; +let sy = 0 in +def SHM1Bri : RM< + 0x31, (outs), (ins MEMASri:$addr, I8:$sx), + "shm.l $sx, $addr", + []>; } let cx = 0, sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in def MONC : RR< 0x3F, (outs), (ins), - "monc">; + "monc", + []>; + +let cx = 1, sx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 in +def MONCT : RR< + 0x3F, (outs), (ins), + "monc.hdb", + []>; + +// Save Instruction Counter + +let cx = 0, cy = 0, sy = 0, cz = 0, sz = 0, hasSideEffects = 0 /* , Uses = [IC] */ in +def SIC : RR<0x28, (outs I32:$sx), (ins), "sic $sx", []>; + +// Test and Set 1 AM (multiple length swap) + +let cx = 0 in +defm TS1AML : RRCASm<"ts1am.l", 0x42, add, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm TS1AMW : RRCASm<"ts1am.w", 0x42, add, I32, i32, simm7Op32, uimm6Op32>; + +// Atomic swap +// FIXME: Assign 4 byte align address to $src +// def : Pat<(i32 (atomic_swap_8 ADDRri:$src, i32:$new)), +// (TS1AMWasi MEMASri:$src, 1, $new)>; +// def : Pat<(i32 (atomic_swap_16 ADDRri:$src, i32:$new)), +// (TS1AMWasi MEMASri:$src, 3, $new)>; +def : Pat<(i32 (atomic_swap_32 ADDRri:$src, i32:$new)), + (TS1AMWasi MEMASri:$src, 15, $new)>; +def : Pat<(i64 (atomic_swap_64 ADDRri:$src, i64:$new)), + (TS1AMLasi MEMASri:$src, 127, $new)>; + +// Compare and Swap + +let cx = 0 in +defm CASL : RRCASm<"cas.l", 0x62, add, I64, i64, simm7Op64, uimm6Op64>; +let cx = 1 in +defm CASW : RRCASm<"cas.w", 0x62, add, I32, i32, simm7Op32, uimm6Op32>; + +// Atomic cmp and swap +def : Pat<(i32 (atomic_cmp_swap_32 ADDRri:$src, i32:$cmp, i32:$new)), + (CASWasr MEMASri:$src, $cmp, $new)>; +def : Pat<(i64 (atomic_cmp_swap_64 ADDRri:$src, i64:$cmp, i64:$new)), + (CASLasr MEMASri:$src, $cmp, $new)>; + +// Transfer Control Instruction + +let avo = 0, c2 = 0, c1 = 0, c0 = 0, hasSideEffects = 1 in { + let lf = 0, sf = 1 in + def FENCEstore : RRFENCE<0x20, (outs), (ins), "fencem 1", []>; + let lf = 1, sf = 0 in + def FENCEload : RRFENCE<0x20, (outs), (ins), "fencem 2", []>; + let lf = 1, sf = 1 in + def FENCEloadstore : RRFENCE<0x20, (outs), (ins), "fencem 3", []>; +} + +def : Pat<(int_ve_fencem1), (FENCEstore)>; +def : Pat<(int_ve_fencem2), (FENCEload)>; +def : Pat<(int_ve_fencem3), (FENCEloadstore)>; + +// Set Vector Out-of-order memory access Boundary + +let sx = 0, sy = 0, sz = 0, hasSideEffects = 1 in +def SVOB : RR<0x30, (outs), (ins), "svob", []>; + +// MEMBARRIER +let hasSideEffects = 1 in +def MEMBARRIER : Pseudo<(outs), (ins), "# MEMBARRIER", + [(MemBarrier)] >; + +//===----------------------------------------------------------------------===// +// SJLJ Exception handling intrinsics +//===----------------------------------------------------------------------===// + +let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in { + def EH_SjLj_SetJmp : Pseudo<(outs I32:$dst), (ins I64:$buf), + "# EH_SJLJ_SETJMP", + [(set I32:$dst, (VEeh_sjlj_setjmp I64:$buf))]>; + let isTerminator = 1 in { + def EH_SjLj_LongJmp : Pseudo<(outs), (ins I64:$buf), + "# EH_SJLJ_LONGJMP", + [(VEeh_sjlj_longjmp I64:$buf)]>; + } +} + +let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1 in + def EH_SjLj_Setup_Dispatch : Pseudo<(outs), (ins), "# EH_SJLJ_SETUP_DISPATCH", + [(VEeh_sjlj_setup_dispatch)]>; + +//===----------------------------------------------------------------------===// +// Dummy instruction for CPU flow control +//===----------------------------------------------------------------------===// + +let mayLoad = 1, mayStore = 0, hasSideEffects = 1, isTrap = 1 in { + def TRAP : Pseudo<(outs), (ins), "# TRAP", [(trap)]>; +} + +//===----------------------------------------------------------------------===// +// Instructions for CodeGenOnly +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1 in { + +// Call instruction +let Defs = [SX10], Uses = [SX11], hasDelaySlot = 1, isCall = 1, hasSideEffects = 0 in { +let cx = 0, sx = 10, cy = 0, sy = 0, cz = 0, sz = 0 in +def CALL : RM< + 0x08, (outs), (ins calltarget:$imm32, variable_ops), + "bsic %lr, $imm32", + []>; +/* +// use sy and sz to represent 2 registers +let cx = 0, sx = 10, cy = 1, cz = 1, imm32 = 0 in +def CALLrr : RM< + 0x08, (outs), (ins MEMrr:$ptr, variable_ops), + "bsic %lr, $ptr", + [(call ADDRrr:$ptr)]>; +// use sz to represent a register, and use imm32 to represent immediate value +let cx = 0, sx = 10, cy = 0, sy = 0, cz = 1 in +def CALLri : RM< + 0x08, (outs), (ins MEMri:$ptr, variable_ops), + "bsic %lr, $ptr", + [(call ADDRri:$ptr)]>; +*/ +// use sz to represent a register +let cx = 0, sx = 10, cy = 0, sy = 0, cz = 1, imm32 = 0 in +def CALLr : RM< + 0x08, (outs), (ins I64:$sz, variable_ops), + "bsic %lr, (,$sz)", + []>; +} + +} + +//===----------------------------------------------------------------------===// +// Pattern Matchings +//===----------------------------------------------------------------------===// + +// Small immediates. +def : Pat<(i32 simm7:$val), (OR32im1 imm:$val, 0)>; +def : Pat<(i64 simm7:$val), (ORim1 imm:$val, 0)>; +// Medium immediates. +def : Pat<(i32 simm32:$val), (LEA32zzi imm:$val)>; +def : Pat<(i64 simm32:$val), (LEAzzi imm:$val)>; +def : Pat<(i64 uimm32:$val), (ANDrm0 (LEAzzi imm:$val), 32)>; +// Arbitrary immediates. +def : Pat<(i64 lozero:$val), + (LEASLzzi (HI32 imm:$val))>; +def : Pat<(i64 lomsbzero:$val), + (LEASLrzi (LEAzzi (LO32 imm:$val)), (HI32 imm:$val))>; +def : Pat<(i64 imm:$val), + (LEASLrzi (ANDrm0 (LEAzzi (LO32 imm:$val)), 32), + (HI32 imm:$val))>; + +// floating point +def : Pat<(f32 fpimm:$val), + (COPY_TO_REGCLASS (LEASLzzi (LOFP32 $val)), F32)>; +def : Pat<(f64 fplozero:$val), + (LEASLzzi (HIFP32 $val))>; +def : Pat<(f64 fplomsbzero:$val), + (LEASLrzi (LEAzzi (LOFP32 $val)), (HIFP32 $val))>; +def : Pat<(f64 fpimm:$val), + (LEASLrzi (ANDrm0 (LEAzzi (LOFP32 $val)), 32), + (HIFP32 $val))>; + +// The same integer registers are used for i32 and i64 values. +// When registers hold i32 values, the high bits are don't care. + +// Cast to i1 +def : Pat<(sext_inreg I32:$src, i1), + (SRAri (SLAri $src, 31), 31)>; +def : Pat<(sext_inreg I64:$src, i1), + (SRAXri (SLLri $src, 63), 63)>; + +// Cast to i8 +def : Pat<(sext_inreg I32:$src, i8), + (SRAri (SLAri $src, 24), 24)>; +def : Pat<(sext_inreg I64:$src, i8), + (SRAXri (SLLri $src, 56), 56)>; +def : Pat<(sext_inreg (i32 (trunc i64:$src)), i8), + (EXTRACT_SUBREG (SRAXri (SLLri $src, 56), 56), sub_i32)>; +def : Pat<(and (trunc i64:$src), 0xff), + (AND32rm0 (EXTRACT_SUBREG $src, sub_i32), 56)>; + +// Cast to i16 +def : Pat<(sext_inreg I32:$src, i16), + (SRAri (SLAri $src, 16), 16)>; +def : Pat<(sext_inreg I64:$src, i16), + (SRAXri (SLLri $src, 48), 48)>; +def : Pat<(sext_inreg (i32 (trunc i64:$src)), i16), + (EXTRACT_SUBREG (SRAXri (SLLri $src, 48), 48), sub_i32)>; +def : Pat<(and (trunc i64:$src), 0xffff), + (AND32rm0 (EXTRACT_SUBREG $src, sub_i32), 48)>; + +// Cast to i32 +def : Pat<(i32 (trunc i64:$src)), + (ADSrm1 (EXTRACT_SUBREG $src, sub_i32), 0)>; +def : Pat<(i32 (fp_to_sint f128:$sy)), (FIXr (CVDQr $sy))>; + +// Cast to i64 +def : Pat<(sext_inreg I64:$src, i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (ADSrm1 (EXTRACT_SUBREG $src, sub_i32), 0), sub_i32)>; +def : Pat<(i64 (sext i32:$sy)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADSrm1 $sy, 0), sub_i32)>; +def : Pat<(i64 (zext i32:$sy)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (ADSUrm1 $sy, 0), sub_i32)>; +def : Pat<(i64 (fp_to_sint f32:$sy)), (FIXXr (CVDr $sy))>; +def : Pat<(i64 (fp_to_sint f128:$sy)), (FIXXr (CVDQr $sy))>; + +// Cast to f32 +def : Pat<(f32 (sint_to_fp i64:$sy)), (CVSr (FLTXr i64:$sy))>; + +// Cast to f64 + +// Cast to f128 +def : Pat<(f128 (sint_to_fp i32:$sy)), (CVQr (FLTr $sy))>; +def : Pat<(f128 (sint_to_fp i64:$sy)), (CVQr (FLTXr $sy))>; + +def : Pat<(i64 (anyext i32:$sy)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $sy, sub_i32)>; + +// extload, sextload and zextload stuff +def : Pat<(i64 (sextloadi8 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD1Bri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (zextloadi8 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD1BUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (sextloadi16 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD2Bri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (zextloadi16 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD2BUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (sextloadi32 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LDLri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (zextloadi32 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LDLUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (extloadi8 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD1BUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (extloadi16 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LD2BUri MEMri:$addr), sub_i32)>; +def : Pat<(i64 (extloadi32 ADDRri:$addr)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (LDLri MEMri:$addr), sub_i32)>; + +// anyextload +def : Pat<(extloadi8 ADDRri:$addr), (LD1BUri MEMri:$addr)>; +def : Pat<(extloadi16 ADDRri:$addr), (LD2BUri MEMri:$addr)>; + +// truncstore +def : Pat<(truncstorei8 i64:$src, ADDRri:$addr), + (ST1Bri MEMri:$addr, (EXTRACT_SUBREG $src, sub_i32))>; +def : Pat<(truncstorei16 i64:$src, ADDRri:$addr), + (ST2Bri MEMri:$addr, (EXTRACT_SUBREG $src, sub_i32))>; +def : Pat<(truncstorei32 i64:$src, ADDRri:$addr), + (STLri MEMri:$addr, (EXTRACT_SUBREG $src, sub_i32))>; + +// Atomic loads +def : Pat<(atomic_load_8 ADDRri:$src), (LD1BUri MEMri:$src)>; +def : Pat<(atomic_load_16 ADDRri:$src), (LD2BUri MEMri:$src)>; +def : Pat<(atomic_load_32 ADDRri:$src), (LDLUri MEMri:$src)>; +def : Pat<(atomic_load_64 ADDRri:$src), (LDSri MEMri:$src)>; + +// Atomic stores +def : Pat<(atomic_store_8 ADDRri:$ptr, i32:$val), + (ST1Bri MEMri:$ptr, $val)>; +def : Pat<(atomic_store_16 ADDRri:$ptr, i32:$val), + (ST2Bri MEMri:$ptr, $val)>; +def : Pat<(atomic_store_32 ADDRri:$ptr, i32:$val), + (STLri MEMri:$ptr, $val)>; +def : Pat<(atomic_store_64 ADDRri:$ptr, i64:$val), + (STSri MEMri:$ptr, $val)>; + +// Address calculation and its optimization +def : Pat<(VEhi tglobaladdr:$in), (LEASLzzi tglobaladdr:$in)>; +def : Pat<(VElo tglobaladdr:$in), (ANDrm0 (LEAzzi tglobaladdr:$in), 32)>; +def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)), + (LEASLrzi (ANDrm0 (LEAzzi tglobaladdr:$in2), 32), + (tglobaladdr:$in1))>; + +// GlobalTLS address calculation and its optimization +def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzzi tglobaltlsaddr:$in)>; +def : Pat<(VElo tglobaltlsaddr:$in), (ANDrm0 (LEAzzi tglobaltlsaddr:$in), 32)>; +def : Pat<(add (VEhi tglobaltlsaddr:$in1), (VElo tglobaltlsaddr:$in2)), + (LEASLrzi (ANDrm0 (LEAzzi tglobaltlsaddr:$in2), 32), + (tglobaltlsaddr:$in1))>; + +// Address calculation and its optimization +def : Pat<(VEhi tconstpool:$in), (LEASLzzi tconstpool:$in)>; +def : Pat<(VElo tconstpool:$in), (ANDrm0 (LEAzzi tconstpool:$in), 32)>; +def : Pat<(add (VEhi tconstpool:$in1), (VElo tconstpool:$in2)), + (LEASLrzi (ANDrm0 (LEAzzi tconstpool:$in2), 32), + (tconstpool:$in1))>; + +// Address calculation and its optimization +def : Pat<(VEhi texternalsym:$in), (LEASLzzi texternalsym:$in)>; +def : Pat<(VElo texternalsym:$in), (ANDrm0 (LEAzzi texternalsym:$in), 32)>; +def : Pat<(add (VEhi texternalsym:$in1), (VElo texternalsym:$in2)), + (LEASLrzi (ANDrm0 (LEAzzi texternalsym:$in2), 32), + (texternalsym:$in1))>; + +// Address store of mcsym +def : Pat<(store (i64 mcsym:$src), ADDRri:$dst), + (STSri ADDRri:$dst, (LEASLrzi (ANDrm0 (LEAzzi mcsym:$src), 32), + (mcsym:$src)))>; + +// Calls +def : Pat<(call tglobaladdr:$dst), + (CALL tglobaladdr:$dst)>; +def : Pat<(call texternalsym:$dst), + (CALL texternalsym:$dst)>; +def : Pat<(call i64:$dst), + (CALLr i64:$dst)>; + +// Branches +def : Pat<(br bb:$addr), (BCRLa bb:$addr)>; + +// brcc +def : Pat<(brcc CCSIOp:$cond, i32:$l, i32:$r, bb:$addr), + (BCRWrr (icond2cc $cond), $l, $r, bb:$addr)>; +def : Pat<(brcc CCUIOp:$cond, i32:$l, i32:$r, bb:$addr), + (BCRWir (icond2cc $cond), 0, (CMPUWrr $r, $l), bb:$addr)>; +def : Pat<(brcc CCSIOp:$cond, i64:$l, i64:$r, bb:$addr), + (BCRLrr (icond2cc $cond), $l, $r, bb:$addr)>; +def : Pat<(brcc CCUIOp:$cond, i64:$l, i64:$r, bb:$addr), + (BCRLir (icond2cc $cond), 0, (CMPrr $r, $l), bb:$addr)>; +def : Pat<(brcc cond:$cond, f32:$l, f32:$r, bb:$addr), + (BCRSrr (fcond2cc $cond), $l, $r, bb:$addr)>; +def : Pat<(brcc cond:$cond, f64:$l, f64:$r, bb:$addr), + (BCRDrr (fcond2cc $cond), $l, $r, bb:$addr)>; +def : Pat<(brcc cond:$cond, f128:$l, f128:$r, bb:$addr), + (BCRDrr (fcond2cc $cond), 0, (FCQrr $r, $l), bb:$addr)>; //===----------------------------------------------------------------------===// // Pattern Matchings @@ -324,6 +1863,31 @@ // Pseudo Instructions //===----------------------------------------------------------------------===// +// GETGOT for PIC +let Defs = [SX15 /* %got */, SX16 /* %plt */], hasSideEffects = 0 in { + def GETGOT : Pseudo<(outs getGOT:$getpcseq), (ins), "$getpcseq", [] >; +} + +// GETFUNPLT for PIC +let hasSideEffects = 0 in +def GETFUNPLT : Pseudo<(outs I64:$dst), (ins i64imm:$addr), + "$dst, $addr", + [(set iPTR:$dst, (GetFunPLT tglobaladdr:$addr))] >; + +def : Pat<(GetFunPLT tglobaladdr:$dst), + (GETFUNPLT tglobaladdr:$dst)>; +def : Pat<(GetFunPLT texternalsym:$dst), + (GETFUNPLT texternalsym:$dst)>; + +// GETTLSADDR for TLS +let Defs = [SX0, SX10, SX12], hasSideEffects = 0 in +def GETTLSADDR : Pseudo<(outs), (ins i64imm:$addr), + "# GETTLSADDR $addr", + [(GetTLSAddr tglobaltlsaddr:$addr)] >; + +def : Pat<(GetTLSAddr tglobaltlsaddr:$dst), + (GETTLSADDR tglobaltlsaddr:$dst)>; + let Defs = [SX11], Uses = [SX11], hasSideEffects = 0 in { def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt, i64imm:$amt2), "# ADJCALLSTACKDOWN $amt, $amt2", @@ -341,3 +1905,382 @@ def EXTEND_STACK_GUARD : Pseudo<(outs), (ins), "# EXTEND STACK GUARD", []>; + +// Dynamic stack allocation yields a __llvm_grow_stack for VE targets. +// These calls are needed to probe the stack when allocating more over +// %s8 (%sl - stack limit). + +let Uses = [SX11], hasSideEffects = 1 in +def GETSTACKTOP : Pseudo<(outs I64:$dst), (ins), + "# GET STACK TOP", + [(set iPTR:$dst, (GetStackTop))]>; + +// SETCC pattern matches +// +// CMP %tmp, lhs, rhs ; compare lhs and rhs +// or %res, 0, (0)1 ; initialize by 0 +// CMOV %res, (63)0, %tmp ; set 1 if %tmp is true + +def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCSIOp:$cond)), + (EXTRACT_SUBREG + (CMOVLrm0 (icond2cc $cond), + (CPXrr i64:$LHS, i64:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCUIOp:$cond)), + (EXTRACT_SUBREG + (CMOVLrm0 (icond2cc $cond), + (CMPrr i64:$LHS, i64:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCSIOp:$cond)), + (EXTRACT_SUBREG + (CMOVWrm0 (icond2cc $cond), + (CPSrr i32:$LHS, i32:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCUIOp:$cond)), + (EXTRACT_SUBREG + (CMOVWrm0 (icond2cc $cond), + (CMPUWrr i32:$LHS, i32:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +def : Pat<(i32 (setcc f128:$LHS, f128:$RHS, cond:$cond)), + (EXTRACT_SUBREG + (CMOVDrm0 (fcond2cc $cond), + (FCQrr f128:$LHS, f128:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +def : Pat<(i32 (setcc f64:$LHS, f64:$RHS, cond:$cond)), + (EXTRACT_SUBREG + (CMOVDrm0 (fcond2cc $cond), + (FCPrr f64:$LHS, f64:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +def : Pat<(i32 (setcc f32:$LHS, f32:$RHS, cond:$cond)), + (EXTRACT_SUBREG + (CMOVSrm0 (fcond2cc $cond), + (FCPSrr f32:$LHS, f32:$RHS), + 63, + (ORim1 0, 0)), sub_i32)>; + +// Special SELECTCC pattern matches +// Use min/max for better performance. +// +// MAX/MIN %res, %lhs, %rhs + +def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOGT)), + (FCMArr $LHS, $RHS)>; +def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOGT)), + (FCMASrr $LHS, $RHS)>; +def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETGT)), + (CMXarr $LHS, $RHS)>; +def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETGT)), + (CMSarr $LHS, $RHS)>; +def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOGE)), + (FCMArr $LHS, $RHS)>; +def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOGE)), + (FCMASrr $LHS, $RHS)>; +def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETGE)), + (CMXarr $LHS, $RHS)>; +def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETGE)), + (CMSarr $LHS, $RHS)>; + +def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOLT)), + (FCMIrr $LHS, $RHS)>; +def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOLT)), + (FCMISrr $LHS, $RHS)>; +def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLT)), + (CMXirr $LHS, $RHS)>; +def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLT)), + (CMSirr $LHS, $RHS)>; +def : Pat<(f64 (selectcc f64:$LHS, f64:$RHS, f64:$LHS, f64:$RHS, SETOLE)), + (FCMIrr $LHS, $RHS)>; +def : Pat<(f32 (selectcc f32:$LHS, f32:$RHS, f32:$LHS, f32:$RHS, SETOLE)), + (FCMISrr $LHS, $RHS)>; +def : Pat<(i64 (selectcc i64:$LHS, i64:$RHS, i64:$LHS, i64:$RHS, SETLE)), + (CMXirr $LHS, $RHS)>; +def : Pat<(i32 (selectcc i32:$LHS, i32:$RHS, i32:$LHS, i32:$RHS, SETLE)), + (CMSirr $LHS, $RHS)>; + +// Generic SELECTCC pattern matches +// +// CMP %tmp, %l, %r ; compare %l and %r +// or %res, %f, (0)1 ; initialize by %f +// CMOV %res, %t, %tmp ; set %t if %tmp is true + +// selectcc for i64 result +def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCSIOp:$cond)), + (CMOVWrr (icond2cc $cond), (CPSrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc i32:$l, i32:$r, i64:$t, i64:$f, CCUIOp:$cond)), + (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCSIOp:$cond)), + (CMOVLrr (icond2cc $cond), (CPXrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc i64:$l, i64:$r, i64:$t, i64:$f, CCUIOp:$cond)), + (CMOVLrr (icond2cc $cond), (CMPrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc f32:$l, f32:$r, i64:$t, i64:$f, cond:$cond)), + (CMOVSrr (fcond2cc $cond), (FCPSrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc f64:$l, f64:$r, i64:$t, i64:$f, cond:$cond)), + (CMOVDrr (fcond2cc $cond), (FCPrr $l, $r), $t, $f)>; +def : Pat<(i64 (selectcc f128:$l, f128:$r, i64:$t, i64:$f, cond:$cond)), + (CMOVDrr (fcond2cc $cond), (FCQrr $l, $r), $t, $f)>; + +// selectcc for i32 result +def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCSIOp:$cond)), + (EXTRACT_SUBREG + (CMOVWrr (icond2cc $cond), + (CPSrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; +def : Pat<(i32 (selectcc i32:$l, i32:$r, i32:$t, i32:$f, CCUIOp:$cond)), + (EXTRACT_SUBREG + (CMOVWrr (icond2cc $cond), + (CMPUWrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; +def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCSIOp:$cond)), + (EXTRACT_SUBREG + (CMOVLrr (icond2cc $cond), + (CPXrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; +def : Pat<(i32 (selectcc i64:$l, i64:$r, i32:$t, i32:$f, CCUIOp:$cond)), + (EXTRACT_SUBREG + (CMOVLrr (icond2cc $cond), + (CMPrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; +def : Pat<(i32 (selectcc f32:$l, f32:$r, i32:$t, i32:$f, cond:$cond)), + (EXTRACT_SUBREG + (CMOVSrr (fcond2cc $cond), + (FCPSrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; +def : Pat<(i32 (selectcc f64:$l, f64:$r, i32:$t, i32:$f, cond:$cond)), + (EXTRACT_SUBREG + (CMOVDrr (fcond2cc $cond), + (FCPrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; +def : Pat<(i32 (selectcc f128:$l, f128:$r, i32:$t, i32:$f, cond:$cond)), + (EXTRACT_SUBREG + (CMOVDrr (fcond2cc $cond), + (FCQrr $l, $r), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; + +// selectcc for f128 result +def : Pat<(f128 (selectcc i32:$l, i32:$r, f128:$t, f128:$f, CCSIOp:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVWrr (icond2cc $cond), (CPSrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVWrr (icond2cc $cond), (CPSrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; +def : Pat<(f128 (selectcc i32:$l, i32:$r, f128:$t, f128:$f, CCUIOp:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; +def : Pat<(f128 (selectcc i64:$l, i64:$r, f128:$t, f128:$f, CCSIOp:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVLrr (icond2cc $cond), (CPXrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVLrr (icond2cc $cond), (CPXrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; +def : Pat<(f128 (selectcc i64:$l, i64:$r, f128:$t, f128:$f, CCUIOp:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVLrr (icond2cc $cond), (CMPrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVLrr (icond2cc $cond), (CMPrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; +def : Pat<(f128 (selectcc f32:$l, f32:$r, f128:$t, f128:$f, cond:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVSrr (fcond2cc $cond), (FCPSrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVSrr (fcond2cc $cond), (FCPSrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; +def : Pat<(f128 (selectcc f64:$l, f64:$r, f128:$t, f128:$f, cond:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVDrr (fcond2cc $cond), (FCPrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVDrr (fcond2cc $cond), (FCPrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; +def : Pat<(f128 (selectcc f128:$l, f128:$r, f128:$t, f128:$f, cond:$cond)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVDrr (fcond2cc $cond), (FCQrr $l, $r), + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVDrr (fcond2cc $cond), (FCQrr $l, $r), + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; + +// selectcc for f64 result +def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCSIOp:$cond)), + (CMOVWrr (icond2cc $cond), (CPSrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc i32:$l, i32:$r, f64:$t, f64:$f, CCUIOp:$cond)), + (CMOVWrr (icond2cc $cond), (CMPUWrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCSIOp:$cond)), + (CMOVLrr (icond2cc $cond), (CPXrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc i64:$l, i64:$r, f64:$t, f64:$f, CCUIOp:$cond)), + (CMOVLrr (icond2cc $cond), (CMPrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc f32:$l, f32:$r, f64:$t, f64:$f, cond:$cond)), + (CMOVSrr (fcond2cc $cond), (FCPSrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc f64:$l, f64:$r, f64:$t, f64:$f, cond:$cond)), + (CMOVDrr (fcond2cc $cond), (FCPrr $l, $r), $t, $f)>; +def : Pat<(f64 (selectcc f128:$l, f128:$r, f64:$t, f64:$f, cond:$cond)), + (CMOVDrr (fcond2cc $cond), (FCQrr $l, $r), $t, $f)>; + +// selectcc for f32 result +def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCSIOp:$cond)), + (EXTRACT_SUBREG + (CMOVWrr (icond2cc $cond), + (CPSrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; +def : Pat<(f32 (selectcc i32:$l, i32:$r, f32:$t, f32:$f, CCUIOp:$cond)), + (EXTRACT_SUBREG + (CMOVWrr (icond2cc $cond), + (CMPUWrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; +def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCSIOp:$cond)), + (EXTRACT_SUBREG + (CMOVLrr (icond2cc $cond), + (CPXrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; +def : Pat<(f32 (selectcc i64:$l, i64:$r, f32:$t, f32:$f, CCUIOp:$cond)), + (EXTRACT_SUBREG + (CMOVLrr (icond2cc $cond), + (CMPrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; +def : Pat<(f32 (selectcc f32:$l, f32:$r, f32:$t, f32:$f, cond:$cond)), + (EXTRACT_SUBREG + (CMOVSrr (fcond2cc $cond), + (FCPSrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; +def : Pat<(f32 (selectcc f64:$l, f64:$r, f32:$t, f32:$f, cond:$cond)), + (EXTRACT_SUBREG + (CMOVDrr (fcond2cc $cond), + (FCPrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; +def : Pat<(f32 (selectcc f128:$l, f128:$r, f32:$t, f32:$f, cond:$cond)), + (EXTRACT_SUBREG + (CMOVDrr (fcond2cc $cond), + (FCQrr $l, $r), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; + +// Generic SELECT pattern matches +// Use cmov.w for all cases since %pred holds i32. +// +// CMOV.w.ne %res, %tval, %tmp ; set tval if %tmp is true + +def : Pat<(i64 (select i32:$pred, i64:$t, i64:$f)), + (CMOVWrr CC_INE, $pred, $t, $f)>; + +def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)), + (EXTRACT_SUBREG + (CMOVWrr CC_INE, $pred, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)), + sub_i32)>; + +def : Pat<(f128 (select i32:$pred, f128:$t, f128:$f)), + (INSERT_SUBREG + (INSERT_SUBREG (f128 (IMPLICIT_DEF)), + (CMOVWrr CC_INE, $pred, + (EXTRACT_SUBREG $t, sub_odd), + (EXTRACT_SUBREG $f, sub_odd)), sub_odd), + (CMOVWrr CC_INE, $pred, + (EXTRACT_SUBREG $t, sub_even), + (EXTRACT_SUBREG $f, sub_even)), sub_even)>; + +def : Pat<(f64 (select i32:$pred, f64:$t, f64:$f)), + (CMOVWrr CC_INE, $pred, $t, $f)>; + +def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)), + (EXTRACT_SUBREG + (CMOVWrr CC_INE, $pred, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_f32), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_f32)), + sub_f32)>; + +// bitconvert +def : Pat<(f64 (bitconvert i64:$src)), (COPY_TO_REGCLASS $src, I64)>; +def : Pat<(i64 (bitconvert f64:$src)), (COPY_TO_REGCLASS $src, I64)>; + +def : Pat<(i32 (bitconvert f32:$op)), + (EXTRACT_SUBREG (SRAXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + $op, sub_f32), 32), sub_i32)>; +def : Pat<(f32 (bitconvert i32:$op)), + (EXTRACT_SUBREG (SLLri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + $op, sub_i32), 32), sub_f32)>; + +// Bits operations pattern matchings. +def : Pat<(i32 (ctpop i32:$src)), + (EXTRACT_SUBREG (PCNTr (ANDrm0 (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), $src, sub_i32), 32)), sub_i32)>; +def : Pat<(i32 (bitreverse i32:$src)), + (EXTRACT_SUBREG (SRLri (BRVr (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), $src, sub_i32)), 32), sub_i32)>; +def : Pat<(i32 (ctlz i32:$src)), + (EXTRACT_SUBREG (LDZr (SLLri (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), $src, sub_i32), 32)), sub_i32)>; +def : Pat<(i64 (bswap i64:$src)), + (BSWPri $src, 0)>; +def : Pat<(i32 (bswap i32:$src)), + (EXTRACT_SUBREG (BSWPri (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), $src, sub_i32), 1), sub_i32)>; + +// Several special pattern matches to optimize code + +def : Pat<(i32 (and i32:$lhs, 0xff)), + (AND32rm0 $lhs, 56)>; +def : Pat<(i32 (and i32:$lhs, 0xffff)), + (AND32rm0 $lhs, 48)>; +def : Pat<(i32 (and i32:$lhs, 0xffffffff)), + (AND32rm0 $lhs, 32)>; diff --git a/llvm/lib/Target/VE/VEMCInstLower.cpp b/llvm/lib/Target/VE/VEMCInstLower.cpp --- a/llvm/lib/Target/VE/VEMCInstLower.cpp +++ b/llvm/lib/Target/VE/VEMCInstLower.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/VEMCExpr.h" #include "VE.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" @@ -28,31 +29,63 @@ const MachineOperand &MO, const MCSymbol *Symbol, AsmPrinter &AP) { + VEMCExpr::VariantKind Kind = (VEMCExpr::VariantKind)MO.getTargetFlags(); + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::create(Symbol, AP.OutContext); - return MCOperand::createExpr(MCSym); + const VEMCExpr *expr = VEMCExpr::create(Kind, MCSym, AP.OutContext); + return MCOperand::createExpr(expr); } static MCOperand LowerOperand(const MachineInstr *MI, const MachineOperand &MO, AsmPrinter &AP) { switch (MO.getType()) { default: - report_fatal_error("unsupported operand type"); + report_fatal_error("unknown operand type"); - case MachineOperand::MO_Register: - if (MO.isImplicit()) - break; - return MCOperand::createReg(MO.getReg()); + case MachineOperand::MO_CFIIndex: + report_fatal_error("unsupported MO_CFIIndex operand type"); + case MachineOperand::MO_CImmediate: + report_fatal_error("unsupported MO_CImmediate operand type"); + case MachineOperand::MO_FPImmediate: + report_fatal_error("unsupported MO_FPImmediate operand type"); + case MachineOperand::MO_FrameIndex: + report_fatal_error("unsupported MO_FrameIndex operand type"); + case MachineOperand::MO_IntrinsicID: + report_fatal_error("unsupported MO_IntrinsicID operand type"); + case MachineOperand::MO_Metadata: + report_fatal_error("unsupported MO_Metadata operand type"); + case MachineOperand::MO_Predicate: + report_fatal_error("unsupported MO_Predicate operand type"); + case MachineOperand::MO_RegisterLiveOut: + report_fatal_error("unsupported MO_RegistrLiveOut operand type"); + case MachineOperand::MO_TargetIndex: + report_fatal_error("unsupported MO_TargetIndex operand type"); + case MachineOperand::MO_BlockAddress: + return LowerSymbolOperand( + MI, MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP); + case MachineOperand::MO_ConstantPoolIndex: + return LowerSymbolOperand(MI, MO, AP.GetCPISymbol(MO.getIndex()), AP); + case MachineOperand::MO_ExternalSymbol: + return LowerSymbolOperand( + MI, MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP); + case MachineOperand::MO_GlobalAddress: + return LowerSymbolOperand(MI, MO, AP.getSymbol(MO.getGlobal()), AP); case MachineOperand::MO_Immediate: return MCOperand::createImm(MO.getImm()); - + case MachineOperand::MO_JumpTableIndex: + return LowerSymbolOperand(MI, MO, AP.GetJTISymbol(MO.getIndex()), AP); + case MachineOperand::MO_MCSymbol: + return LowerSymbolOperand(MI, MO, MO.getMCSymbol(), AP); case MachineOperand::MO_MachineBasicBlock: return LowerSymbolOperand(MI, MO, MO.getMBB()->getSymbol(), AP); - + case MachineOperand::MO_Register: + if (MO.isImplicit()) + return MCOperand(); + return MCOperand::createReg(MO.getReg()); case MachineOperand::MO_RegisterMask: - break; + return MCOperand(); } - return MCOperand(); } void llvm::LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/llvm/lib/Target/VE/VEMachineFunctionInfo.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.h @@ -0,0 +1,63 @@ +//===- VEMachineFunctionInfo.h - VE Machine Function Info -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares VE specific per-machine-function information. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +class VEMachineFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + +private: + unsigned GlobalBaseReg; + + /// VectorLengthReg - Holds the virtual register for VL register. + unsigned VectorLengthReg; + + /// VarArgsFrameOffset - Frame offset to start of varargs area. + int VarArgsFrameOffset; + + /// SRetReturnReg - Holds the virtual register into which the sret + /// argument is passed. + unsigned SRetReturnReg; + + /// IsLeafProc - True if the function is a leaf procedure. + bool IsLeafProc; + +public: + VEMachineFunctionInfo() + : GlobalBaseReg(0), VectorLengthReg(0), VarArgsFrameOffset(0), + SRetReturnReg(0), IsLeafProc(false) {} + explicit VEMachineFunctionInfo(MachineFunction &MF) + : GlobalBaseReg(0), VectorLengthReg(0), VarArgsFrameOffset(0), + SRetReturnReg(0), IsLeafProc(false) {} + + unsigned getGlobalBaseReg() const { return GlobalBaseReg; } + void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + + unsigned getVectorLengthReg() const { return VectorLengthReg; } + void setVectorLengthReg(unsigned Reg) { VectorLengthReg = Reg; } + + int getVarArgsFrameOffset() const { return VarArgsFrameOffset; } + void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; } + + unsigned getSRetReturnReg() const { return SRetReturnReg; } + void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } + + void setLeafProc(bool rhs) { IsLeafProc = rhs; } + bool isLeafProc() const { return IsLeafProc; } +}; +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp @@ -0,0 +1,13 @@ +//===-- VEMachineFunctionInfo.cpp - VE Machine Function Info --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "VEMachineFunctionInfo.h" + +using namespace llvm; + +void VEMachineFunctionInfo::anchor() {} diff --git a/llvm/lib/Target/VE/VERegisterInfo.h b/llvm/lib/Target/VE/VERegisterInfo.h --- a/llvm/lib/Target/VE/VERegisterInfo.h +++ b/llvm/lib/Target/VE/VERegisterInfo.h @@ -35,6 +35,9 @@ const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, unsigned Kind) const override; + bool requiresRegisterScavenging(const MachineFunction &MF) const override; + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp --- a/llvm/lib/Target/VE/VERegisterInfo.cpp +++ b/llvm/lib/Target/VE/VERegisterInfo.cpp @@ -12,6 +12,7 @@ #include "VERegisterInfo.h" #include "VE.h" +#include "VEMachineFunctionInfo.h" #include "VESubtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" @@ -32,14 +33,37 @@ // VE uses %s10 == %lp to keep return address VERegisterInfo::VERegisterInfo() : VEGenRegisterInfo(VE::SX10) {} +bool VERegisterInfo::requiresRegisterScavenging( + const MachineFunction &MF) const { + return true; +} + +bool VERegisterInfo::requiresFrameIndexScavenging( + const MachineFunction &MF) const { + return true; +} + const MCPhysReg * VERegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - return CSR_SaveList; + const Function &F = MF->getFunction(); + CallingConv::ID CC = F.getCallingConv(); + + switch (CC) { + default: + return CSR_SaveList; + } } const uint32_t *VERegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { - return CSR_RegMask; + switch (CC) { + case CallingConv::VE_VEC_EXPF: + return CSR_vec_expf_RegMask; + case CallingConv::VE_LLVM_GROW_STACK: + return CSR_llvm_grow_stack_RegMask; + default: + return CSR_RegMask; + } } const uint32_t *VERegisterInfo::getNoPreservedMask() const { @@ -48,26 +72,68 @@ BitVector VERegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - Reserved.set(VE::SX8); // stack limit - Reserved.set(VE::SX9); // frame pointer - Reserved.set(VE::SX10); // link register (return address) - Reserved.set(VE::SX11); // stack pointer - Reserved.set(VE::SX12); // outer register - Reserved.set(VE::SX13); // id register for dynamic linker - - Reserved.set(VE::SX14); // thread pointer - Reserved.set(VE::SX15); // global offset table register - Reserved.set(VE::SX16); // procedure linkage table register - Reserved.set(VE::SX17); // linkage-area register - - // sx18-sx33 are callee-saved registers - // sx34-sx63 are temporary registers + const Register ReservedRegs[] = { + VE::SX8, // Stack limit + VE::SX9, // Frame pointer + VE::SX10, // Link register (return address) + VE::SX11, // Stack pointer + + // FIXME: maybe not need to be reserved + VE::SX12, // Outer register + VE::SX13, // Id register for dynamic linker + + VE::SX14, // Thread pointer + VE::SX15, // Global offset table register + VE::SX16, // Procedure linkage table register + VE::SX17, // Linkage-area register + // sx18-sx33 are callee-saved registers + // sx34-sx63 are temporary registers + + VE::UCC, // User clock counter + VE::PSW, // Program status word + VE::SAR, // Store adress + VE::PMMR, // Performance monitor mode + + // Performance monitor configuration + VE::PMCR0, + VE::PMCR1, + VE::PMCR2, + VE::PMCR3, + + // Performance monitor counter + VE::PMC0, + VE::PMC1, + VE::PMC2, + VE::PMC3, + VE::PMC4, + VE::PMC5, + VE::PMC6, + VE::PMC7, + VE::PMC8, + VE::PMC9, + VE::PMC10, + VE::PMC11, + VE::PMC12, + VE::PMC13, + VE::PMC14, + }; + + for (auto R : ReservedRegs) + for (MCRegAliasIterator ItAlias(R, this, true); + ItAlias.isValid(); + ++ItAlias) + Reserved.set(*ItAlias); return Reserved; } -bool VERegisterInfo::isConstantPhysReg(unsigned PhysReg) const { return false; } +bool VERegisterInfo::isConstantPhysReg(unsigned PhysReg) const { + switch (PhysReg) { + default: + return false; + } +} const TargetRegisterClass * VERegisterInfo::getPointerRegClass(const MachineFunction &MF, @@ -94,6 +160,7 @@ DebugLoc dl = MI.getDebugLoc(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); MachineFunction &MF = *MI.getParent()->getParent(); + const VESubtarget &Subtarget = MF.getSubtarget(); const VEFrameLowering *TFI = getFrameLowering(MF); unsigned FrameReg; @@ -102,6 +169,36 @@ Offset += MI.getOperand(FIOperandNum + 1).getImm(); + if (MI.getOpcode() == VE::STQri) { + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + unsigned SrcReg = MI.getOperand(2).getReg(); + unsigned SrcHiReg = getSubReg(SrcReg, VE::sub_even); + unsigned SrcLoReg = getSubReg(SrcReg, VE::sub_odd); + // VE stores HiReg to 8(addr) and LoReg to 0(addr) + MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(VE::STSri)) + .addReg(FrameReg) + .addImm(0) + .addReg(SrcLoReg); + replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg); + MI.setDesc(TII.get(VE::STSri)); + MI.getOperand(2).setReg(SrcHiReg); + Offset += 8; + } else if (MI.getOpcode() == VE::LDQri) { + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned DestHiReg = getSubReg(DestReg, VE::sub_even); + unsigned DestLoReg = getSubReg(DestReg, VE::sub_odd); + // VE loads HiReg from 8(addr) and LoReg from 0(addr) + MachineInstr *StMI = + BuildMI(*MI.getParent(), II, dl, TII.get(VE::LDSri), DestLoReg) + .addReg(FrameReg) + .addImm(0); + replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg); + MI.setDesc(TII.get(VE::LDSri)); + MI.getOperand(0).setReg(DestHiReg); + Offset += 8; + } + replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg); } diff --git a/llvm/lib/Target/VE/VERegisterInfo.td b/llvm/lib/Target/VE/VERegisterInfo.td --- a/llvm/lib/Target/VE/VERegisterInfo.td +++ b/llvm/lib/Target/VE/VERegisterInfo.td @@ -16,6 +16,23 @@ let Namespace = "VE"; } +class VEMiscReg Enc, string n>: Register { + let HWEncoding{15-5} = 0; + let HWEncoding{4-0} = Enc; + let Namespace = "VE"; +} + +let Namespace = "VE" in { + def sub_i8 : SubRegIndex<8, 56>; // Low 8 bit (56..63) + def sub_i16 : SubRegIndex<16, 48>; // Low 16 bit (48..63) + def sub_i32 : SubRegIndex<32, 32>; // Low 32 bit (32..63) + def sub_f32 : SubRegIndex<32>; // High 32 bit (0..31) + def sub_even : SubRegIndex<64>; // High 64 bit (0..63) + def sub_odd : SubRegIndex<64, 64>; // Low 64 bit (64..127) + def sub_vm_even : SubRegIndex<256>; // High 256 bit (0..255) + def sub_vm_odd : SubRegIndex<256, 256>; // Low 256 bit (256..511) +} + // Registers are identified with 7-bit ID numbers. // R - 64-bit integer or floating-point registers class R Enc, string n, list subregs = [], @@ -24,14 +41,93 @@ let Aliases = aliases; } +// Rq - Slots in the register file for 128-bit floating-point values. +class Rq Enc, string n, list subregs> : VEReg { + let SubRegs = subregs; + let SubRegIndices = [sub_even, sub_odd]; + let CoveredBySubRegs = 1; +} + +// Miscellaneous Registers +def UCC : VEMiscReg<0, "UCC">; // User clock counter +def PSW : VEMiscReg<1, "PSW">; // Program status word +def SAR : VEMiscReg<2, "SAR">; // Store address register +def PMMR : VEMiscReg<7, "PMMR">; // Performance monitor mode register + +// Performance monitor configuration registers +foreach I = 0-3 in + def PMCR#I : VEMiscReg; + +// Performance monitor counter +foreach I = 0-14 in + def PMC#I : VEMiscReg; + +// Generic integer registers - 8 bits wide +foreach I = 0-63 in + def SB#I : R, DwarfRegNum<[I]>; + +// Generic integer registers - 16 bits wide +let SubRegIndices = [sub_i8] in +foreach I = 0-63 in + def SH#I : R("SB"#I)]>, DwarfRegNum<[I]>; + +// Generic integer registers - 32 bits wide +let SubRegIndices = [sub_i16] in +foreach I = 0-63 in + def SW#I : R("SH"#I)]>, DwarfRegNum<[I]>; + +// Generic floating point registers - 32 bits wide +// NOTE: Mark SF#I as alias of SW#I temporary to avoid register allocation +// problem. +foreach I = 0-63 in + def SF#I : R("SW"#I)]>, DwarfRegNum<[I]>; + // Generic integer registers - 64 bits wide +let SubRegIndices = [sub_i32, sub_f32], CoveredBySubRegs = 1 in foreach I = 0-63 in - def SX#I : R, + def SX#I : R("SW"#I), !cast("SF"#I)]>, DwarfRegNum<[I]>; +// Aliases of the S* registers used to hold 128-bit for values (long doubles). +// Following foreach represents something like: +// def Q0 : Rq<0, "S0", [S0, S1]>; +// def Q1 : Rq<2, "S2", [S2, S3]>; +// ... +foreach I = 0-31 in + def Q#I : Rq("SX"#!shl(I,1)), + !cast("SX"#!add(!shl(I,1),1))]>; + // Register classes. // // The register order is defined in terms of the preferred // allocation order. -def I64 : RegisterClass<"VE", [i64], 64, - (sequence "SX%u", 0, 63)>; +def I8 : RegisterClass<"VE", [i8], 8, + (add (sequence "SB%u", 0, 7), + (sequence "SB%u", 34, 63), + (sequence "SB%u", 8, 33))>; +def I16 : RegisterClass<"VE", [i16], 16, + (add (sequence "SH%u", 0, 7), + (sequence "SH%u", 34, 63), + (sequence "SH%u", 8, 33))>; +def I32 : RegisterClass<"VE", [i32], 32, + (add (sequence "SW%u", 0, 7), + (sequence "SW%u", 34, 63), + (sequence "SW%u", 8, 33))>; +def I64 : RegisterClass<"VE", [i64, f64], 64, + (add (sequence "SX%u", 0, 7), + (sequence "SX%u", 34, 63), + (sequence "SX%u", 8, 33))>; +def F32 : RegisterClass<"VE", [f32], 32, + (add (sequence "SF%u", 0, 7), + (sequence "SF%u", 34, 63), + (sequence "SF%u", 8, 33))>; +def F128 : RegisterClass<"VE", [f128], 128, + (add (sequence "Q%u", 0, 3), + (sequence "Q%u", 17, 31), + (sequence "Q%u", 4, 16))>; + +def Misc : RegisterClass<"VE", [i64], 64, + (add UCC, PSW, SAR, PMMR, + (sequence "PMCR%u", 0, 3), + (sequence "PMC%u", 0, 14))>; diff --git a/llvm/lib/Target/VE/VESchedule.td b/llvm/lib/Target/VE/VESchedule.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VESchedule.td @@ -0,0 +1,27 @@ +//===-- VESchedule.td - Describe the VE Itineries ----------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +/* +def ALU_FU : FuncUnit; +def LDST_FU : FuncUnit; + +def VE_ALU : InstrItinClass; +def VE_LD : InstrItinClass; +def VE_ST : InstrItinClass; + +def VEItinerary : ProcessorItineraries<[ALU_FU, LDST_FU],[],[ + // InstrItinData]>, + // InstrItinData]>, + // InstrItinData]>, + // InstrItinData]>, + InstrItinData]> +]>; +*/ diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp --- a/llvm/lib/Target/VE/VETargetMachine.cpp +++ b/llvm/lib/Target/VE/VETargetMachine.cpp @@ -11,6 +11,7 @@ #include "VETargetMachine.h" #include "VE.h" +// #include "VETargetObjectFile.h" #include "VETargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" @@ -22,7 +23,7 @@ #define DEBUG_TYPE "ve" -extern "C" void LLVMInitializeVETarget() { +extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETarget() { // Register the target. RegisterTargetMachine X(getTheVETarget()); } @@ -94,7 +95,9 @@ return getTM(); } + void addIRPasses() override; bool addInstSelector() override; + void addPreEmitPass() override; }; } // namespace @@ -102,7 +105,14 @@ return new VEPassConfig(*this, PM); } +void VEPassConfig::addIRPasses() { + addPass(createAtomicExpandPass()); + TargetPassConfig::addIRPasses(); +} + bool VEPassConfig::addInstSelector() { addPass(createVEISelDag(getVETargetMachine())); return false; } + +void VEPassConfig::addPreEmitPass() {} diff --git a/llvm/lib/Target/VE/VETargetTransformInfo.h b/llvm/lib/Target/VE/VETargetTransformInfo.h --- a/llvm/lib/Target/VE/VETargetTransformInfo.h +++ b/llvm/lib/Target/VE/VETargetTransformInfo.h @@ -40,9 +40,25 @@ unsigned getNumberOfRegisters(unsigned ClassID) const { return 64; } - unsigned getRegisterBitWidth(bool Vector) const { return 64; } + unsigned getRegisterBitWidth(bool Vector) const { + if (Vector) { + return 256 * 64; + } + return 64; + } unsigned getMinVectorRegisterBitWidth() const { return 64; } + + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; } + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { + return false; + } + bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) { + return false; + }; + bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) { + return false; + } }; } // namespace llvm diff --git a/llvm/test/CodeGen/VE/add.ll b/llvm/test/CodeGen/VE/add.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/add.ll @@ -0,0 +1,10 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define i32 @sample_add(i32, i32) { +; CHECK-LABEL: sample_add: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = add nsw i32 %1, %0 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/VE/addition.ll b/llvm/test/CodeGen/VE/addition.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/addition.ll @@ -0,0 +1,295 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define signext i8 @func1(i8 signext, i8 signext) { +; CHECK-LABEL: func1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = add i8 %1, %0 + ret i8 %3 +} + +define signext i16 @func2(i16 signext, i16 signext) { +; CHECK-LABEL: func2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = add i16 %1, %0 + ret i16 %3 +} + +define i32 @func3(i32, i32) { +; CHECK-LABEL: func3: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = add nsw i32 %1, %0 + ret i32 %3 +} + +define i64 @func4(i64, i64) { +; CHECK-LABEL: func4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.l %s0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = add nsw i64 %1, %0 + ret i64 %3 +} + +define i128 @func5(i128, i128) { +; CHECK-LABEL: func5: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.l %s1, %s3, %s1 +; CHECK-NEXT: adds.l %s0, %s2, %s0 +; CHECK-NEXT: cmpu.l %s2, %s0, %s2 +; CHECK-NEXT: or %s3, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s3, (63)0, %s2 +; CHECK-NEXT: adds.w.zx %s2, %s3, (0)1 +; CHECK-NEXT: adds.l %s1, %s1, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = add nsw i128 %1, %0 + ret i128 %3 +} + +define zeroext i8 @func6(i8 zeroext, i8 zeroext) { +; CHECK-LABEL: func6: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 +; CHECK-NEXT: and %s0, %s0, (56)0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = add i8 %1, %0 + ret i8 %3 +} + +define zeroext i16 @func7(i16 zeroext, i16 zeroext) { +; CHECK-LABEL: func7: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 +; CHECK-NEXT: and %s0, %s0, (48)0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = add i16 %1, %0 + ret i16 %3 +} + +define i32 @func8(i32, i32) { +; CHECK-LABEL: func8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = add i32 %1, %0 + ret i32 %3 +} + +define i64 @func9(i64, i64) { +; CHECK-LABEL: func9: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.l %s0, %s1, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = add i64 %1, %0 + ret i64 %3 +} + +define i128 @func10(i128, i128) { +; CHECK-LABEL: func10: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.l %s1, %s3, %s1 +; CHECK-NEXT: adds.l %s0, %s2, %s0 +; CHECK-NEXT: cmpu.l %s2, %s0, %s2 +; CHECK-NEXT: or %s3, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s3, (63)0, %s2 +; CHECK-NEXT: adds.w.zx %s2, %s3, (0)1 +; CHECK-NEXT: adds.l %s1, %s1, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = add i128 %1, %0 + ret i128 %3 +} + +define float @func11(float, float) { +; CHECK-LABEL: func11: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fadd.s %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = fadd float %0, %1 + ret float %3 +} + +define double @func12(double, double) { +; CHECK-LABEL: func12: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fadd.d %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = fadd double %0, %1 + ret double %3 +} + +define signext i8 @func13(i8 signext) { +; CHECK-LABEL: func13: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 5(%s0) +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = add i8 %0, 5 + ret i8 %2 +} + +define signext i16 @func14(i16 signext) { +; CHECK-LABEL: func14: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 5(%s0) +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = add i16 %0, 5 + ret i16 %2 +} + +define i32 @func15(i32) { +; CHECK-LABEL: func15: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 5(%s0) +; CHECK-NEXT: or %s11, 0, %s9 + %2 = add nsw i32 %0, 5 + ret i32 %2 +} + +define i64 @func16(i64) { +; CHECK-LABEL: func16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 5(%s0) +; CHECK-NEXT: or %s11, 0, %s9 + %2 = add nsw i64 %0, 5 + ret i64 %2 +} + +define i128 @func17(i128) { +; CHECK-LABEL: func17: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 5(%s0) +; CHECK-NEXT: cmpu.l %s0, %s2, %s0 +; CHECK-NEXT: or %s3, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s3, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s3, (0)1 +; CHECK-NEXT: adds.l %s1, %s1, %s0 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = add nsw i128 %0, 5 + ret i128 %2 +} + +define zeroext i8 @func18(i8 zeroext) { +; CHECK-LABEL: func18: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 5(%s0) +; CHECK-NEXT: and %s0, %s0, (56)0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = add i8 %0, 5 + ret i8 %2 +} + +define zeroext i16 @func19(i16 zeroext) { +; CHECK-LABEL: func19: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 5(%s0) +; CHECK-NEXT: and %s0, %s0, (48)0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = add i16 %0, 5 + ret i16 %2 +} + +define i32 @func20(i32) { +; CHECK-LABEL: func20: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 5(%s0) +; CHECK-NEXT: or %s11, 0, %s9 + %2 = add i32 %0, 5 + ret i32 %2 +} + +define i64 @func21(i64) { +; CHECK-LABEL: func21: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 5(%s0) +; CHECK-NEXT: or %s11, 0, %s9 + %2 = add i64 %0, 5 + ret i64 %2 +} + +define i128 @func22(i128) { +; CHECK-LABEL: func22: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, 5(%s0) +; CHECK-NEXT: cmpu.l %s0, %s2, %s0 +; CHECK-NEXT: or %s3, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s3, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s3, (0)1 +; CHECK-NEXT: adds.l %s1, %s1, %s0 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = add i128 %0, 5 + ret i128 %2 +} + +define float @func23(float) { +; CHECK-LABEL: func23: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea.sl %s1, 1084227584 +; CHECK-NEXT: or %s1, 0, %s1 +; CHECK-NEXT: fadd.s %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = fadd float %0, 5.000000e+00 + ret float %2 +} + +define double @func24(double) { +; CHECK-LABEL: func24: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea.sl %s1, 1075052544 +; CHECK-NEXT: fadd.d %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = fadd double %0, 5.000000e+00 + ret double %2 +} + +define i32 @func25(i32) { +; CHECK-LABEL: func25: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, -2147483648 +; CHECK-NEXT: xor %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = xor i32 %0, -2147483648 + ret i32 %2 +} + +define i64 @func26(i64) { +; CHECK-LABEL: func26: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s1, -2147483648 +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: adds.l %s0, %s0, %s1 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = add nsw i64 %0, 2147483648 + ret i64 %2 +} + +define i128 @func27(i128) { +; CHECK-LABEL: func27: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, -2147483648 +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: adds.l %s2, %s0, %s2 +; CHECK-NEXT: cmpu.l %s0, %s2, %s0 +; CHECK-NEXT: or %s3, 0, (0)1 +; CHECK-NEXT: cmov.l.lt %s3, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s3, (0)1 +; CHECK-NEXT: adds.l %s1, %s1, %s0 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = add nsw i128 %0, 2147483648 + ret i128 %2 +} + diff --git a/llvm/test/CodeGen/VE/alloca.ll b/llvm/test/CodeGen/VE/alloca.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/alloca.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +@buf = external global i8*, align 8 + +; Function Attrs: nounwind +define void @test(i32) { +; CHECK-LABEL: test: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s2, %s0, (0)1 +; CHECK-NEXT: lea %s0, 15(%s2) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: adds.l %s11, -64, %s11 +; CHECK-NEXT: lea %s1, __llvm_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __llvm_grow_stack@hi(%s1) +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: lea %s13, 64 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, 0(%s11, %s13) +; CHECK-NEXT: lea %s1, 176(%s11) +; CHECK-NEXT: lea %s0, buf@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, buf@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s0) +; CHECK-NEXT: adds.l %s11, -64, %s11 +; CHECK-NEXT: lea %s3, memcpy@lo +; CHECK-NEXT: and %s3, %s3, (32)0 +; CHECK-NEXT: lea.sl %s12, memcpy@hi(%s3) +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: lea %s13, 64 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, 0(%s11, %s13) +; CHECK-NEXT: or %s11, 0, %s9 + %2 = sext i32 %0 to i64 + %3 = alloca i8, i64 %2, align 8 + %4 = load i8*, i8** @buf, align 8, !tbaa !2 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %4, i8* nonnull align 8 %3, i64 %2, i1 false) + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +!2 = !{!3, !3, i64 0} +!3 = !{!"any pointer", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/VE/atomic.ll b/llvm/test/CodeGen/VE/atomic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/atomic.ll @@ -0,0 +1,1869 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +@c = common global i8 0, align 32 +@s = common global i16 0, align 32 +@i = common global i32 0, align 32 +@l = common global i64 0, align 32 +@it= common global i128 0, align 32 +@ui = common global i32 0, align 32 + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_1() { +; CHECK-LABEL: test_atomic_store_1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 12, i8* @c release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_1seq() { +; CHECK-LABEL: test_atomic_store_1seq: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 12, i8* @c seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_2() { +; CHECK-LABEL: test_atomic_store_2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: st2b %s1, (,%s0) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i16 12, i16* @s release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_2seq() { +; CHECK-LABEL: test_atomic_store_2seq: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: st2b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i16 12, i16* @s seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_4() { +; CHECK-LABEL: test_atomic_store_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, i@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: stl %s1, (,%s0) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i32 12, i32* @i release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_4cst() { +; CHECK-LABEL: test_atomic_store_4cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, i@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: stl %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i32 12, i32* @i seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_8() { +; CHECK-LABEL: test_atomic_store_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, l@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: st %s1, (,%s0) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i64 12, i64* @l release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_8cst() { +; CHECK-LABEL: test_atomic_store_8cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, l@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: st %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i64 12, i64* @l seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_16() { +; CHECK-LABEL: test_atomic_store_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_store_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_store_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 3, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i128 12, i128* @it release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_16cst() { +; CHECK-LABEL: test_atomic_store_16cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_store_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_store_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i128 12, i128* @it seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_load_1() { +; CHECK-LABEL: test_atomic_load_1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: ld1b.zx %s0, (,%s0) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i8, i8* @c acquire, align 32 + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_load_1cst() { +; CHECK-LABEL: test_atomic_load_1cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: ld1b.zx %s0, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i8, i8* @c seq_cst, align 32 + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_load_2() { +; CHECK-LABEL: test_atomic_load_2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: ld2b.zx %s0, (,%s0) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i16, i16* @s acquire, align 32 + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_load_2cst() { +; CHECK-LABEL: test_atomic_load_2cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: ld2b.zx %s0, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i16, i16* @s seq_cst, align 32 + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_load_4() { +; CHECK-LABEL: test_atomic_load_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, i@hi(%s0) +; CHECK-NEXT: ldl.zx %s0, (,%s0) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i32, i32* @i acquire, align 32 + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_load_4cst() { +; CHECK-LABEL: test_atomic_load_4cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, i@hi(%s0) +; CHECK-NEXT: ldl.zx %s0, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i32, i32* @i seq_cst, align 32 + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_load_8() { +; CHECK-LABEL: test_atomic_load_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s0) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i64, i64* @l acquire, align 32 + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_load_8cst() { +; CHECK-LABEL: test_atomic_load_8cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i64, i64* @l seq_cst, align 32 + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_load_16() { +; CHECK-LABEL: test_atomic_load_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_load_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_load_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 2, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i128, i128* @it acquire, align 32 + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_load_16cst() { +; CHECK-LABEL: test_atomic_load_16cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_load_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_load_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i128, i128* @it seq_cst, align 32 + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_exchange_1() { +; CHECK-LABEL: test_atomic_exchange_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s0, -4, %s0 +; CHECK-NEXT: ldl.sx %s1, (,%s0) +; CHECK-NEXT: lea %s2, -256 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s1 +; CHECK-NEXT: and %s1, %s1, %s2 +; CHECK-NEXT: or %s1, 10, %s1 +; CHECK-NEXT: cas.w %s1, (%s0), %s3 +; CHECK-NEXT: brne.w %s1, %s3, .LBB20_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s1, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i8* @c, i8 10 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_exchange_2() { +; CHECK-LABEL: test_atomic_exchange_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s0, -4, %s0 +; CHECK-NEXT: ldl.sx %s1, (,%s0) +; CHECK-NEXT: lea %s2, -65536 +; CHECK-NEXT: lea %s3, 28672 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s4, 0, %s1 +; CHECK-NEXT: and %s1, %s1, %s2 +; CHECK-NEXT: or %s1, %s1, %s3 +; CHECK-NEXT: cas.w %s1, (%s0), %s4 +; CHECK-NEXT: brne.w %s1, %s4, .LBB21_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s1, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i16* @s, i16 28672 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_exchange_4() { +; CHECK-LABEL: test_atomic_exchange_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: lea %s0, 1886417008 +; CHECK-NEXT: ts1am.w %s0, (%s1), 15 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i32* @i, i32 1886417008 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_exchange_8() { +; CHECK-LABEL: test_atomic_exchange_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: lea %s0, 1886417008 +; CHECK-NEXT: lea.sl %s0, 1886417008(%s0) +; CHECK-NEXT: ts1am.l %s0, (%s1), 127 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i64* @l, i64 8102099357864587376 acquire + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_exchange_16() { +; CHECK-LABEL: test_atomic_exchange_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_exchange_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_exchange_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: lea %s1, 1886417008 +; CHECK-NEXT: lea.sl %s1, 1886417008(%s1) +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 2, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i128* @it, i128 8102099357864587376 acquire + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_compare_exchange_1(i8, i8) { +; CHECK-LABEL: test_atomic_compare_exchange_1: +; CHECK: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s2, c@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, c@hi(%s2) +; CHECK-NEXT: and %s2, -4, %s2 +; CHECK-NEXT: ldl.sx %s5, (,%s2) +; CHECK-NEXT: and %s1, %s1, (56)0 +; CHECK-NEXT: and %s3, %s0, (56)0 +; CHECK-NEXT: lea %s4, -256 +; CHECK-NEXT: and %s34, %s5, %s4 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s5, %s34, %s1 +; CHECK-NEXT: or %s6, %s34, %s3 +; CHECK-NEXT: cas.w %s5, (%s2), %s6 +; CHECK-NEXT: breq.w %s5, %s6, .LBB25_3 +; CHECK-NEXT: # %bb.2: # %partword.cmpxchg.failure +; CHECK-NEXT: # in Loop: Header=BB25_1 Depth=1 +; CHECK-NEXT: or %s7, 0, %s34 +; CHECK-NEXT: and %s34, %s5, %s4 +; CHECK-NEXT: brne.w %s7, %s34, .LBB25_1 +; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmps.w.sx %s1, %s5, %s6 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s1 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i8* @c, i8 %0, i8 %1 seq_cst seq_cst + %3 = extractvalue { i8, i1 } %2, 1 + %frombool = zext i1 %3 to i8 + ret i8 %frombool +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_compare_exchange_2(i16, i16) { +; CHECK-LABEL: test_atomic_compare_exchange_2: +; CHECK: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s2, s@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, s@hi(%s2) +; CHECK-NEXT: and %s2, -4, %s2 +; CHECK-NEXT: or %s3, 2, %s2 +; CHECK-NEXT: ld2b.zx %s4, (,%s3) +; CHECK-NEXT: and %s1, %s1, (48)0 +; CHECK-NEXT: and %s3, %s0, (48)0 +; CHECK-NEXT: sla.w.sx %s34, %s4, 16 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: lea %s6, -65536 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s4, %s34, %s1 +; CHECK-NEXT: or %s5, %s34, %s3 +; CHECK-NEXT: cas.w %s4, (%s2), %s5 +; CHECK-NEXT: breq.w %s4, %s5, .LBB26_3 +; CHECK-NEXT: # %bb.2: # %partword.cmpxchg.failure +; CHECK-NEXT: # in Loop: Header=BB26_1 Depth=1 +; CHECK-NEXT: or %s7, 0, %s34 +; CHECK-NEXT: and %s34, %s4, %s6 +; CHECK-NEXT: brne.w %s7, %s34, .LBB26_1 +; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmps.w.sx %s1, %s4, %s5 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s1 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +; FIXME: following ld2b.zx should be ldl.sx... +entry: + %2 = cmpxchg i16* @s, i16 %0, i16 %1 seq_cst seq_cst + %3 = extractvalue { i16, i1 } %2, 1 + %conv = zext i1 %3 to i16 + ret i16 %conv +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_compare_exchange_4(i32, i32) { +; CHECK-LABEL: test_atomic_compare_exchange_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s2, i@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, i@hi(%s2) +; CHECK-NEXT: cas.w %s1, (%s2), %s0 +; CHECK-NEXT: cmps.w.sx %s1, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s1 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i32* @i, i32 %0, i32 %1 seq_cst seq_cst + %3 = extractvalue { i32, i1 } %2, 1 + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: %2 = cmpxchg i64* @l, i64 %0, i64 %1 seq_cst seq_cst + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_compare_exchange_16(i128, i128) { +; CHECK-LABEL: test_atomic_compare_exchange_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s1, -8(,%s9) +; CHECK-NEXT: st %s0, -16(,%s9) +; CHECK-NEXT: lea %s0, __atomic_compare_exchange_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_compare_exchange_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: lea %s1,-16(,%s9) +; CHECK-NEXT: or %s4, 5, (0)1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i128* @it, i128 %0, i128 %1 seq_cst seq_cst + %3 = extractvalue { i128, i1 } %2, 1 + %conv = zext i1 %3 to i128 + ret i128 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_relaxed(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_relaxed: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 monotonic monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_consume(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_consume: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_acquire(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_acquire: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_release(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_release: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 release monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_acq_rel(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_acq_rel: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 acq_rel acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 seq_cst seq_cst + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_relaxed(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_relaxed: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 monotonic monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_consume(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_consume: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_acquire(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_acquire: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_release(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_release: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 release monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_acq_rel(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_acq_rel: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 acq_rel acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind readnone +define void @test_atomic_fence_relaxed() { +; CHECK-LABEL: test_atomic_fence_relaxed: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s11, 0, %s9 +entry: + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_consume() { +; CHECK-LABEL: test_atomic_fence_consume: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence acquire + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_acquire() { +; CHECK-LABEL: test_atomic_fence_acquire: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence acquire + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_release() { +; CHECK-LABEL: test_atomic_fence_release: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence release + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_acq_rel() { +; CHECK-LABEL: test_atomic_fence_acq_rel: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence acq_rel + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_seq_cst() { +; CHECK-LABEL: test_atomic_fence_seq_cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence seq_cst + ret void +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_add_1() { +; CHECK-LABEL: test_atomic_fetch_add_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s0, -4, %s0 +; CHECK-NEXT: ldl.sx %s2, (,%s0) +; CHECK-NEXT: lea %s1, -256 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s2 +; CHECK-NEXT: lea %s2, 1(%s2) +; CHECK-NEXT: and %s2, %s2, (56)0 +; CHECK-NEXT: and %s4, %s3, %s1 +; CHECK-NEXT: or %s2, %s4, %s2 +; CHECK-NEXT: cas.w %s2, (%s0), %s3 +; CHECK-NEXT: brne.w %s2, %s3, .LBB47_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s2, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_add_2() { +; CHECK-LABEL: test_atomic_fetch_add_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s0, -4, %s0 +; CHECK-NEXT: ldl.sx %s2, (,%s0) +; CHECK-NEXT: lea %s1, -65536 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s2 +; CHECK-NEXT: lea %s2, 1(%s2) +; CHECK-NEXT: and %s2, %s2, (48)0 +; CHECK-NEXT: and %s4, %s3, %s1 +; CHECK-NEXT: or %s2, %s4, %s2 +; CHECK-NEXT: cas.w %s2, (%s0), %s3 +; CHECK-NEXT: brne.w %s2, %s3, .LBB48_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s2, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_add_4() { +; CHECK-LABEL: test_atomic_fetch_add_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: lea %s0, 1(%s0) +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB49_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_add_8() { +; CHECK-LABEL: test_atomic_fetch_add_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: lea %s0, 1(%s0) +; CHECK-NEXT: cas.l %s0, (%s1), %s2 +; CHECK-NEXT: brne.l %s0, %s2, .LBB50_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_add_16() { +; CHECK-LABEL: test_atomic_fetch_add_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_fetch_add_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_add_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_sub_1() { +; CHECK-LABEL: test_atomic_fetch_sub_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s0, -4, %s0 +; CHECK-NEXT: ldl.sx %s2, (,%s0) +; CHECK-NEXT: lea %s1, -256 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s2 +; CHECK-NEXT: lea %s2, -1(%s2) +; CHECK-NEXT: and %s2, %s2, (56)0 +; CHECK-NEXT: and %s4, %s3, %s1 +; CHECK-NEXT: or %s2, %s4, %s2 +; CHECK-NEXT: cas.w %s2, (%s0), %s3 +; CHECK-NEXT: brne.w %s2, %s3, .LBB52_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s2, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_sub_2() { +; CHECK-LABEL: test_atomic_fetch_sub_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s0, -4, %s0 +; CHECK-NEXT: ldl.sx %s2, (,%s0) +; CHECK-NEXT: lea %s1, -65536 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s2 +; CHECK-NEXT: lea %s2, -1(%s2) +; CHECK-NEXT: and %s2, %s2, (48)0 +; CHECK-NEXT: and %s4, %s3, %s1 +; CHECK-NEXT: or %s2, %s4, %s2 +; CHECK-NEXT: cas.w %s2, (%s0), %s3 +; CHECK-NEXT: brne.w %s2, %s3, .LBB53_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s2, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_sub_4() { +; CHECK-LABEL: test_atomic_fetch_sub_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: lea %s0, -1(%s0) +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB54_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_sub_8() { +; CHECK-LABEL: test_atomic_fetch_sub_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: lea %s0, -1(%s0) +; CHECK-NEXT: cas.l %s0, (%s1), %s2 +; CHECK-NEXT: brne.l %s0, %s2, .LBB55_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_sub_16() { +; CHECK-LABEL: test_atomic_fetch_sub_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_fetch_sub_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_sub_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_and_1() { +; CHECK-LABEL: test_atomic_fetch_and_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK-NEXT: lea %s2, -255 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s0 +; CHECK-NEXT: and %s0, %s0, %s2 +; CHECK-NEXT: cas.w %s0, (%s1), %s3 +; CHECK-NEXT: brne.w %s0, %s3, .LBB57_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_and_2() { +; CHECK-LABEL: test_atomic_fetch_and_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK-NEXT: lea %s2, -65535 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s0 +; CHECK-NEXT: and %s0, %s0, %s2 +; CHECK-NEXT: cas.w %s0, (%s1), %s3 +; CHECK-NEXT: brne.w %s0, %s3, .LBB58_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_and_4() { +; CHECK-LABEL: test_atomic_fetch_and_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: and %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB59_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_and_8() { +; CHECK-LABEL: test_atomic_fetch_and_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: and %s0, 1, %s0 +; CHECK-NEXT: cas.l %s0, (%s1), %s2 +; CHECK-NEXT: brne.l %s0, %s2, .LBB60_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_and_16() { +; CHECK-LABEL: test_atomic_fetch_and_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_fetch_and_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_and_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_or_1() { +; CHECK-LABEL: test_atomic_fetch_or_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: or %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB62_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_or_2() { +; CHECK-LABEL: test_atomic_fetch_or_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: or %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB63_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_or_4() { +; CHECK-LABEL: test_atomic_fetch_or_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: or %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB64_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_or_8() { +; CHECK-LABEL: test_atomic_fetch_or_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: or %s0, 1, %s0 +; CHECK-NEXT: cas.l %s0, (%s1), %s2 +; CHECK-NEXT: brne.l %s0, %s2, .LBB65_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_or_16() { +; CHECK-LABEL: test_atomic_fetch_or_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_fetch_or_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_or_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_xor_1() { +; CHECK-LABEL: test_atomic_fetch_xor_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: xor %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB67_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_xor_2() { +; CHECK-LABEL: test_atomic_fetch_xor_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: xor %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB68_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_xor_4() { +; CHECK-LABEL: test_atomic_fetch_xor_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: xor %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB69_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_xor_8() { +; CHECK-LABEL: test_atomic_fetch_xor_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: xor %s0, 1, %s0 +; CHECK-NEXT: cas.l %s0, (%s1), %s2 +; CHECK-NEXT: brne.l %s0, %s2, .LBB70_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_xor_16() { +; CHECK-LABEL: test_atomic_fetch_xor_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_fetch_xor_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_xor_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_nand_1() { +; CHECK-LABEL: test_atomic_fetch_nand_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s0, -4, %s0 +; CHECK-NEXT: ldl.sx %s3, (,%s0) +; CHECK-NEXT: lea %s1, 254 +; CHECK-NEXT: lea %s2, -256 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s4, 0, %s3 +; CHECK-NEXT: xor %s3, -1, %s3 +; CHECK-NEXT: or %s3, %s3, %s1 +; CHECK-NEXT: and %s3, %s3, (56)0 +; CHECK-NEXT: and %s5, %s4, %s2 +; CHECK-NEXT: or %s3, %s5, %s3 +; CHECK-NEXT: cas.w %s3, (%s0), %s4 +; CHECK-NEXT: brne.w %s3, %s4, .LBB72_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s3, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_nand_2() { +; CHECK-LABEL: test_atomic_fetch_nand_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s0, -4, %s0 +; CHECK-NEXT: ldl.sx %s3, (,%s0) +; CHECK-NEXT: lea %s1, 65534 +; CHECK-NEXT: lea %s2, -65536 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s4, 0, %s3 +; CHECK-NEXT: xor %s3, -1, %s3 +; CHECK-NEXT: or %s3, %s3, %s1 +; CHECK-NEXT: and %s3, %s3, (48)0 +; CHECK-NEXT: and %s5, %s4, %s2 +; CHECK-NEXT: or %s3, %s5, %s3 +; CHECK-NEXT: cas.w %s3, (%s0), %s4 +; CHECK-NEXT: brne.w %s3, %s4, .LBB73_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s3, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_nand_4() { +; CHECK-LABEL: test_atomic_fetch_nand_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: or %s0, -2, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB74_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_nand_8() { +; CHECK-LABEL: test_atomic_fetch_nand_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: or %s0, -2, %s0 +; CHECK-NEXT: cas.l %s0, (%s1), %s2 +; CHECK-NEXT: brne.l %s0, %s2, .LBB75_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_nand_16() { +; CHECK-LABEL: test_atomic_fetch_nand_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_fetch_nand_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_nand_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_max_4() { +; CHECK-LABEL: test_atomic_fetch_max_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s0 +; CHECK-NEXT: maxs.w.zx %s0, %s0, %s2 +; CHECK-NEXT: cas.w %s0, (%s1), %s3 +; CHECK-NEXT: brne.w %s0, %s3, .LBB77_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw max i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_min_4() { +; CHECK-LABEL: test_atomic_fetch_min_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK-NEXT: or %s3, 2, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s4, 0, %s0 +; CHECK-NEXT: cmps.w.sx %s5, %s0, %s3 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: cmov.w.lt %s0, %s4, %s5 +; CHECK-NEXT: cas.w %s0, (%s1), %s4 +; CHECK-NEXT: brne.w %s0, %s4, .LBB78_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw min i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_umax_4() { +; CHECK-LABEL: test_atomic_fetch_umax_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, ui@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, ui@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s0 +; CHECK-NEXT: cmpu.w %s4, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: cmov.w.gt %s0, %s3, %s4 +; CHECK-NEXT: cas.w %s0, (%s1), %s3 +; CHECK-NEXT: brne.w %s0, %s3, .LBB79_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw umax i32* @ui, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_umin_4() { +; CHECK-LABEL: test_atomic_fetch_umin_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, ui@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, ui@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK-NEXT: or %s3, 2, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s4, 0, %s0 +; CHECK-NEXT: cmpu.w %s5, %s0, %s3 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: cmov.w.lt %s0, %s4, %s5 +; CHECK-NEXT: cas.w %s0, (%s1), %s4 +; CHECK-NEXT: brne.w %s0, %s4, .LBB80_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw umin i32* @ui, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_1() { +; CHECK-LABEL: test_atomic_clear_1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* @c seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_2() { +; CHECK-LABEL: test_atomic_clear_2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i16* @s to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_4() { +; CHECK-LABEL: test_atomic_clear_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, i@hi(%s0) +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i32* @i to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_8() { +; CHECK-LABEL: test_atomic_clear_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, l@hi(%s0) +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i64* @l to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_16() { +; CHECK-LABEL: test_atomic_clear_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i128* @it to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8stk(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8stk: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cas.l %s1, 192(%s11), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = alloca i64, align 32 + %3 = cmpxchg i64* %2, i64 %0, i64 %1 seq_cst seq_cst + %4 = extractvalue { i64, i1 } %3, 1 + %conv = zext i1 %4 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_8stk() { +; CHECK-LABEL: test_atomic_clear_8stk: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: st1b %s0, 192(,%s11) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = alloca i64, align 32 + %1 = bitcast i64* %0 to i8* + store atomic i8 0, i8* %1 seq_cst, align 32 + ret void +} diff --git a/llvm/test/CodeGen/VE/atomic_unaligned.ll b/llvm/test/CodeGen/VE/atomic_unaligned.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/atomic_unaligned.ll @@ -0,0 +1,2180 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +%struct.sci = type <{ i8, i32 }> +%struct.scl = type <{ i8, i64 }> +%struct.sil = type <{ i32, i64 }> + +@c = common global i8 0, align 1 +@s = common global i16 0, align 1 +@i = common global i32 0, align 1 +@l = common global i64 0, align 1 +@it= common global i128 0, align 1 +@ui = common global i32 0, align 1 +@sci1 = common global %struct.sci <{ i8 0, i32 0 }>, align 1 +@scl1 = common global %struct.scl <{ i8 0, i64 0 }>, align 1 +@sil1 = common global %struct.sil <{ i32 0, i64 0 }>, align 1 + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_1() { +; CHECK-LABEL: test_atomic_store_1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 12, i8* @c release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_1seq() { +; CHECK-LABEL: test_atomic_store_1seq: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 12, i8* @c seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_2() { +; CHECK-LABEL: test_atomic_store_2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: st2b %s1, (,%s0) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i16 12, i16* @s release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_2seq() { +; CHECK-LABEL: test_atomic_store_2seq: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: st2b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i16 12, i16* @s seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_4() { +; CHECK-LABEL: test_atomic_store_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, i@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: stl %s1, (,%s0) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i32 12, i32* @i release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_4cst() { +; CHECK-LABEL: test_atomic_store_4cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, i@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: stl %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i32 12, i32* @i seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_8() { +; CHECK-LABEL: test_atomic_store_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, l@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: st %s1, (,%s0) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i64 12, i64* @l release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_8cst() { +; CHECK-LABEL: test_atomic_store_8cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, l@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: st %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i64 12, i64* @l seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_16() { +; CHECK-LABEL: test_atomic_store_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_store_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_store_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 3, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i128 12, i128* @it release, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_store_16cst() { +; CHECK-LABEL: test_atomic_store_16cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_store_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_store_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 12, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i128 12, i128* @it seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_load_1() { +; CHECK-LABEL: test_atomic_load_1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: ld1b.zx %s0, (,%s0) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i8, i8* @c acquire, align 32 + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_load_1cst() { +; CHECK-LABEL: test_atomic_load_1cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: ld1b.zx %s0, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i8, i8* @c seq_cst, align 32 + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_load_2() { +; CHECK-LABEL: test_atomic_load_2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: ld2b.zx %s0, (,%s0) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i16, i16* @s acquire, align 32 + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_load_2cst() { +; CHECK-LABEL: test_atomic_load_2cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: ld2b.zx %s0, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i16, i16* @s seq_cst, align 32 + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_load_4() { +; CHECK-LABEL: test_atomic_load_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, i@hi(%s0) +; CHECK-NEXT: ldl.zx %s0, (,%s0) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i32, i32* @i acquire, align 32 + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_load_4cst() { +; CHECK-LABEL: test_atomic_load_4cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, i@hi(%s0) +; CHECK-NEXT: ldl.zx %s0, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i32, i32* @i seq_cst, align 32 + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_load_8() { +; CHECK-LABEL: test_atomic_load_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s0) +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i64, i64* @l acquire, align 32 + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_load_8cst() { +; CHECK-LABEL: test_atomic_load_8cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i64, i64* @l seq_cst, align 32 + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_load_16() { +; CHECK-LABEL: test_atomic_load_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_load_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_load_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 2, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i128, i128* @it acquire, align 32 + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_load_16cst() { +; CHECK-LABEL: test_atomic_load_16cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_load_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_load_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = load atomic i128, i128* @it seq_cst, align 32 + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_exchange_1() { +; CHECK-LABEL: test_atomic_exchange_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s4, (,%s1) +; CHECK-NEXT: lea %s2, 255 +; CHECK-NEXT: or %s3, 10, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: and %s4, 3, %s4 +; CHECK-NEXT: sla.w.sx %s4, %s4, 3 +; CHECK-NEXT: sla.w.sx %s6, %s2, %s4 +; CHECK-NEXT: xor %s6, -1, %s6 +; CHECK-NEXT: and %s6, %s5, %s6 +; CHECK-NEXT: sla.w.sx %s4, %s3, %s4 +; CHECK-NEXT: or %s4, %s6, %s4 +; CHECK-NEXT: cas.w %s4, (%s1), %s5 +; CHECK-NEXT: brne.w %s4, %s5, .LBB20_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s4, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i8* @c, i8 10 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_exchange_2() { +; CHECK-LABEL: test_atomic_exchange_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s4, (,%s1) +; CHECK-NEXT: lea %s2, 65535 +; CHECK-NEXT: lea %s3, 28672 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: and %s4, 3, %s4 +; CHECK-NEXT: sla.w.sx %s4, %s4, 3 +; CHECK-NEXT: sla.w.sx %s6, %s2, %s4 +; CHECK-NEXT: xor %s6, -1, %s6 +; CHECK-NEXT: and %s6, %s5, %s6 +; CHECK-NEXT: sla.w.sx %s4, %s3, %s4 +; CHECK-NEXT: or %s4, %s6, %s4 +; CHECK-NEXT: cas.w %s4, (%s1), %s5 +; CHECK-NEXT: brne.w %s4, %s5, .LBB21_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s4, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i16* @s, i16 28672 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_exchange_4() { +; CHECK-LABEL: test_atomic_exchange_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: lea %s0, 1886417008 +; CHECK-NEXT: ts1am.w %s0, (%s1), 15 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i32* @i, i32 1886417008 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_exchange_4_align1() { +; CHECK-LABEL: test_atomic_exchange_4_align1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, sci1@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, sci1@hi(%s0) +; CHECK-NEXT: lea %s0, 1886417008 +; CHECK-NEXT: ts1am.w %s0, 1(%s1), 15 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +; FIXME: Bus Error occurred due to unaligned ts1am instruction +entry: + %0 = atomicrmw xchg i32* getelementptr inbounds (%struct.sci, %struct.sci* @sci1, i32 0, i32 1), i32 1886417008 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_exchange_8() { +; CHECK-LABEL: test_atomic_exchange_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: lea %s0, 1886417008 +; CHECK-NEXT: lea.sl %s0, 1886417008(%s0) +; CHECK-NEXT: ts1am.l %s0, (%s1), 127 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i64* @l, i64 8102099357864587376 acquire + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_exchange_8_align1() { +; CHECK-LABEL: test_atomic_exchange_8_align1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, scl1@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, scl1@hi(%s0) +; CHECK-NEXT: lea %s0, 1886417008 +; CHECK-NEXT: lea.sl %s0, 1886417008(%s0) +; CHECK-NEXT: ts1am.l %s0, 1(%s1), 127 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +; FIXME: Bus Error occurred due to unaligned ts1am instruction +entry: + %0 = atomicrmw xchg i64* getelementptr inbounds (%struct.scl, %struct.scl* @scl1, i32 0, i32 1), i64 8102099357864587376 acquire + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_exchange_8_align4() { +; CHECK-LABEL: test_atomic_exchange_8_align4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, sil1@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, sil1@hi(%s0) +; CHECK-NEXT: lea %s0, 1886417008 +; CHECK-NEXT: lea.sl %s0, 1886417008(%s0) +; CHECK-NEXT: ts1am.l %s0, 4(%s1), 127 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +; FIXME: Bus Error occurred due to unaligned ts1am instruction +entry: + %0 = atomicrmw xchg i64* getelementptr inbounds (%struct.sil, %struct.sil* @sil1, i32 0, i32 1), i64 8102099357864587376 acquire + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_exchange_16() { +; CHECK-LABEL: test_atomic_exchange_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_exchange_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_exchange_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: lea %s1, 1886417008 +; CHECK-NEXT: lea.sl %s1, 1886417008(%s1) +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 2, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xchg i128* @it, i128 8102099357864587376 acquire + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_compare_exchange_1(i8, i8) { +; CHECK-LABEL: test_atomic_compare_exchange_1: +; CHECK: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: and %s2, %s1, (56)0 +; CHECK-NEXT: lea %s1, c@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s1, c@hi(%s1) +; CHECK-NEXT: adds.w.sx %s3, %s1, (0)1 +; CHECK-NEXT: and %s3, 3, %s3 +; CHECK-NEXT: sla.w.sx %s6, %s3, 3 +; CHECK-NEXT: sla.w.sx %s2, %s2, %s6 +; CHECK-NEXT: and %s0, %s0, (56)0 +; CHECK-NEXT: sla.w.sx %s3, %s0, %s6 +; CHECK-NEXT: and %s4, -4, %s1 +; CHECK-NEXT: ldl.sx %s0, (,%s4) +; CHECK-NEXT: lea %s5, 255 +; CHECK-NEXT: sla.w.sx %s6, %s5, %s6 +; CHECK-NEXT: xor %s6, -1, %s6 +; CHECK-NEXT: and %s35, %s0, %s6 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s6, %s35, %s2 +; CHECK-NEXT: or %s7, %s35, %s3 +; CHECK-NEXT: cas.w %s6, (%s4), %s7 +; CHECK-NEXT: breq.w %s6, %s7, .LBB28_3 +; CHECK-NEXT: # %bb.2: # %partword.cmpxchg.failure +; CHECK-NEXT: # in Loop: Header=BB28_1 Depth=1 +; CHECK-NEXT: or %s34, 0, %s35 +; CHECK-NEXT: adds.w.sx %s35, %s1, (0)1 +; CHECK-NEXT: and %s35, 3, %s35 +; CHECK-NEXT: sla.w.sx %s35, %s35, 3 +; CHECK-NEXT: sla.w.sx %s35, %s5, %s35 +; CHECK-NEXT: xor %s35, -1, %s35 +; CHECK-NEXT: and %s35, %s6, %s35 +; CHECK-NEXT: brne.w %s34, %s35, .LBB28_1 +; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmps.w.sx %s1, %s6, %s7 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s1 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i8* @c, i8 %0, i8 %1 seq_cst seq_cst + %3 = extractvalue { i8, i1 } %2, 1 + %frombool = zext i1 %3 to i8 + ret i8 %frombool +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_compare_exchange_2(i16, i16) { +; CHECK-LABEL: test_atomic_compare_exchange_2: +; CHECK: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: and %s2, %s1, (48)0 +; CHECK-NEXT: lea %s1, s@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s1, s@hi(%s1) +; CHECK-NEXT: adds.w.sx %s3, %s1, (0)1 +; CHECK-NEXT: and %s3, 3, %s3 +; CHECK-NEXT: sla.w.sx %s6, %s3, 3 +; CHECK-NEXT: sla.w.sx %s2, %s2, %s6 +; CHECK-NEXT: and %s0, %s0, (48)0 +; CHECK-NEXT: sla.w.sx %s3, %s0, %s6 +; CHECK-NEXT: and %s4, -4, %s1 +; CHECK-NEXT: ldl.sx %s0, (,%s4) +; CHECK-NEXT: lea %s5, 65535 +; CHECK-NEXT: sla.w.sx %s6, %s5, %s6 +; CHECK-NEXT: xor %s6, -1, %s6 +; CHECK-NEXT: and %s35, %s0, %s6 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s6, %s35, %s2 +; CHECK-NEXT: or %s7, %s35, %s3 +; CHECK-NEXT: cas.w %s6, (%s4), %s7 +; CHECK-NEXT: breq.w %s6, %s7, .LBB29_3 +; CHECK-NEXT: # %bb.2: # %partword.cmpxchg.failure +; CHECK-NEXT: # in Loop: Header=BB29_1 Depth=1 +; CHECK-NEXT: or %s34, 0, %s35 +; CHECK-NEXT: adds.w.sx %s35, %s1, (0)1 +; CHECK-NEXT: and %s35, 3, %s35 +; CHECK-NEXT: sla.w.sx %s35, %s35, 3 +; CHECK-NEXT: sla.w.sx %s35, %s5, %s35 +; CHECK-NEXT: xor %s35, -1, %s35 +; CHECK-NEXT: and %s35, %s6, %s35 +; CHECK-NEXT: brne.w %s34, %s35, .LBB29_1 +; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmps.w.sx %s1, %s6, %s7 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s1 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i16* @s, i16 %0, i16 %1 seq_cst seq_cst + %3 = extractvalue { i16, i1 } %2, 1 + %conv = zext i1 %3 to i16 + ret i16 %conv +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_compare_exchange_4(i32, i32) { +; CHECK-LABEL: test_atomic_compare_exchange_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s2, i@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, i@hi(%s2) +; CHECK-NEXT: cas.w %s1, (%s2), %s0 +; CHECK-NEXT: cmps.w.sx %s1, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s1 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i32* @i, i32 %0, i32 %1 seq_cst seq_cst + %3 = extractvalue { i32, i1 } %2, 1 + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_compare_exchange_4_align1(i32, i32) { +; CHECK-LABEL: test_atomic_compare_exchange_4_align1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s2, sci1@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, sci1@hi(%s2) +; CHECK-NEXT: cas.w %s1, 1(%s2), %s0 +; CHECK-NEXT: cmps.w.sx %s1, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: cmov.w.eq %s0, (63)0, %s1 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +; FIXME: Bus Error occurred due to unaligned cas instruction +entry: + %2 = cmpxchg i32* getelementptr inbounds (%struct.sci, %struct.sci* @sci1, i32 0, i32 1), i32 %0, i32 %1 seq_cst seq_cst + %3 = extractvalue { i32, i1 } %2, 1 + %conv = zext i1 %3 to i32 + ret i32 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: %2 = cmpxchg i64* @l, i64 %0, i64 %1 seq_cst seq_cst + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_align1(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_align1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s2, scl1@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, scl1@hi(%s2) +; CHECK-NEXT: cas.l %s1, 1(%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +; FIXME: Bus Error occurred due to unaligned cas instruction +entry: + %2 = cmpxchg i64* getelementptr inbounds (%struct.scl, %struct.scl* @scl1, i32 0, i32 1), i64 %0, i64 %1 seq_cst seq_cst + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_align4(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_align4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s2, sil1@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, sil1@hi(%s2) +; CHECK-NEXT: cas.l %s1, 4(%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +; FIXME: Bus Error occurred due to unaligned cas instruction +entry: + %2 = cmpxchg i64* getelementptr inbounds (%struct.sil, %struct.sil* @sil1, i32 0, i32 1), i64 %0, i64 %1 seq_cst seq_cst + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_compare_exchange_16(i128, i128) { +; CHECK-LABEL: test_atomic_compare_exchange_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s1, -8(,%s9) +; CHECK-NEXT: st %s0, -16(,%s9) +; CHECK-NEXT: lea %s0, __atomic_compare_exchange_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_compare_exchange_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: lea %s1,-16(,%s9) +; CHECK-NEXT: or %s4, 5, (0)1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i128* @it, i128 %0, i128 %1 seq_cst seq_cst + %3 = extractvalue { i128, i1 } %2, 1 + %conv = zext i1 %3 to i128 + ret i128 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_relaxed(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_relaxed: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 monotonic monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_consume(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_consume: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_acquire(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_acquire: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_release(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_release: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 release monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_acq_rel(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_acq_rel: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg i64* @l, i64 %0, i64 %1 acq_rel acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 seq_cst seq_cst + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_relaxed(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_relaxed: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 monotonic monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_consume(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_consume: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_acquire(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_acquire: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 acquire acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_release(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_release: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 release monotonic + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8_weak_acq_rel(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8_weak_acq_rel: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: lea %s2, l@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, l@hi(%s2) +; CHECK-NEXT: cas.l %s1, (%s2), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = cmpxchg weak i64* @l, i64 %0, i64 %1 acq_rel acquire + %3 = extractvalue { i64, i1 } %2, 1 + %conv = zext i1 %3 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind readnone +define void @test_atomic_fence_relaxed() { +; CHECK-LABEL: test_atomic_fence_relaxed: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s11, 0, %s9 +entry: + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_consume() { +; CHECK-LABEL: test_atomic_fence_consume: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence acquire + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_acquire() { +; CHECK-LABEL: test_atomic_fence_acquire: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence acquire + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_release() { +; CHECK-LABEL: test_atomic_fence_release: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence release + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_acq_rel() { +; CHECK-LABEL: test_atomic_fence_acq_rel: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence acq_rel + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_fence_seq_cst() { +; CHECK-LABEL: test_atomic_fence_seq_cst: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + fence seq_cst + ret void +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_add_1() { +; CHECK-LABEL: test_atomic_fetch_add_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s4, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK-NEXT: lea %s3, 255 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: and %s4, 3, %s4 +; CHECK-NEXT: sla.w.sx %s4, %s4, 3 +; CHECK-NEXT: sla.w.sx %s6, %s2, %s4 +; CHECK-NEXT: adds.w.sx %s6, %s5, %s6 +; CHECK-NEXT: sla.w.sx %s4, %s3, %s4 +; CHECK-NEXT: and %s6, %s6, %s4 +; CHECK-NEXT: xor %s4, -1, %s4 +; CHECK-NEXT: and %s4, %s5, %s4 +; CHECK-NEXT: or %s4, %s4, %s6 +; CHECK-NEXT: cas.w %s4, (%s1), %s5 +; CHECK-NEXT: brne.w %s4, %s5, .LBB53_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s4, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_add_2() { +; CHECK-LABEL: test_atomic_fetch_add_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s4, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK-NEXT: lea %s3, 65535 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: and %s4, 3, %s4 +; CHECK-NEXT: sla.w.sx %s4, %s4, 3 +; CHECK-NEXT: sla.w.sx %s6, %s2, %s4 +; CHECK-NEXT: adds.w.sx %s6, %s5, %s6 +; CHECK-NEXT: sla.w.sx %s4, %s3, %s4 +; CHECK-NEXT: and %s6, %s6, %s4 +; CHECK-NEXT: xor %s4, -1, %s4 +; CHECK-NEXT: and %s4, %s5, %s4 +; CHECK-NEXT: or %s4, %s4, %s6 +; CHECK-NEXT: cas.w %s4, (%s1), %s5 +; CHECK-NEXT: brne.w %s4, %s5, .LBB54_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s4, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_add_4() { +; CHECK-LABEL: test_atomic_fetch_add_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: lea %s0, 1(%s0) +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB55_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_add_8() { +; CHECK-LABEL: test_atomic_fetch_add_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: lea %s0, 1(%s0) +; CHECK-NEXT: cas.l %s0, (%s1), %s2 +; CHECK-NEXT: brne.l %s0, %s2, .LBB56_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_add_16() { +; CHECK-LABEL: test_atomic_fetch_add_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_fetch_add_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_add_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw add i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_sub_1() { +; CHECK-LABEL: test_atomic_fetch_sub_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s4, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK-NEXT: lea %s3, 255 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: and %s4, 3, %s4 +; CHECK-NEXT: sla.w.sx %s4, %s4, 3 +; CHECK-NEXT: sla.w.sx %s6, %s2, %s4 +; CHECK-NEXT: subs.w.sx %s6, %s5, %s6 +; CHECK-NEXT: sla.w.sx %s4, %s3, %s4 +; CHECK-NEXT: and %s6, %s6, %s4 +; CHECK-NEXT: xor %s4, -1, %s4 +; CHECK-NEXT: and %s4, %s5, %s4 +; CHECK-NEXT: or %s4, %s4, %s6 +; CHECK-NEXT: cas.w %s4, (%s1), %s5 +; CHECK-NEXT: brne.w %s4, %s5, .LBB58_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s4, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_sub_2() { +; CHECK-LABEL: test_atomic_fetch_sub_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s4, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK-NEXT: lea %s3, 65535 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: and %s4, 3, %s4 +; CHECK-NEXT: sla.w.sx %s4, %s4, 3 +; CHECK-NEXT: sla.w.sx %s6, %s2, %s4 +; CHECK-NEXT: subs.w.sx %s6, %s5, %s6 +; CHECK-NEXT: sla.w.sx %s4, %s3, %s4 +; CHECK-NEXT: and %s6, %s6, %s4 +; CHECK-NEXT: xor %s4, -1, %s4 +; CHECK-NEXT: and %s4, %s5, %s4 +; CHECK-NEXT: or %s4, %s4, %s6 +; CHECK-NEXT: cas.w %s4, (%s1), %s5 +; CHECK-NEXT: brne.w %s4, %s5, .LBB59_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s4, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_sub_4() { +; CHECK-LABEL: test_atomic_fetch_sub_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: lea %s0, -1(%s0) +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB60_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_sub_8() { +; CHECK-LABEL: test_atomic_fetch_sub_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: lea %s0, -1(%s0) +; CHECK-NEXT: cas.l %s0, (%s1), %s2 +; CHECK-NEXT: brne.l %s0, %s2, .LBB61_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_sub_16() { +; CHECK-LABEL: test_atomic_fetch_sub_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_fetch_sub_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_sub_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw sub i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_and_1() { +; CHECK-LABEL: test_atomic_fetch_and_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s4, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK-NEXT: lea %s3, 255 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: and %s4, 3, %s4 +; CHECK-NEXT: sla.w.sx %s4, %s4, 3 +; CHECK-NEXT: sla.w.sx %s6, %s2, %s4 +; CHECK-NEXT: sla.w.sx %s4, %s3, %s4 +; CHECK-NEXT: xor %s4, -1, %s4 +; CHECK-NEXT: or %s4, %s4, %s6 +; CHECK-NEXT: and %s4, %s5, %s4 +; CHECK-NEXT: cas.w %s4, (%s1), %s5 +; CHECK-NEXT: brne.w %s4, %s5, .LBB63_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s4, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_and_2() { +; CHECK-LABEL: test_atomic_fetch_and_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s4, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK-NEXT: lea %s3, 65535 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: and %s4, 3, %s4 +; CHECK-NEXT: sla.w.sx %s4, %s4, 3 +; CHECK-NEXT: sla.w.sx %s6, %s2, %s4 +; CHECK-NEXT: sla.w.sx %s4, %s3, %s4 +; CHECK-NEXT: xor %s4, -1, %s4 +; CHECK-NEXT: or %s4, %s4, %s6 +; CHECK-NEXT: and %s4, %s5, %s4 +; CHECK-NEXT: cas.w %s4, (%s1), %s5 +; CHECK-NEXT: brne.w %s4, %s5, .LBB64_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s4, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_and_4() { +; CHECK-LABEL: test_atomic_fetch_and_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: and %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB65_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_and_8() { +; CHECK-LABEL: test_atomic_fetch_and_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: and %s0, 1, %s0 +; CHECK-NEXT: cas.l %s0, (%s1), %s2 +; CHECK-NEXT: brne.l %s0, %s2, .LBB66_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_and_16() { +; CHECK-LABEL: test_atomic_fetch_and_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_fetch_and_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_and_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw and i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_or_1() { +; CHECK-LABEL: test_atomic_fetch_or_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s3, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s4, 0, %s3 +; CHECK-NEXT: adds.w.sx %s3, %s0, (0)1 +; CHECK-NEXT: and %s3, 3, %s3 +; CHECK-NEXT: sla.w.sx %s3, %s3, 3 +; CHECK-NEXT: sla.w.sx %s3, %s2, %s3 +; CHECK-NEXT: or %s3, %s4, %s3 +; CHECK-NEXT: cas.w %s3, (%s1), %s4 +; CHECK-NEXT: brne.w %s3, %s4, .LBB68_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s3, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_or_2() { +; CHECK-LABEL: test_atomic_fetch_or_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s3, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s4, 0, %s3 +; CHECK-NEXT: adds.w.sx %s3, %s0, (0)1 +; CHECK-NEXT: and %s3, 3, %s3 +; CHECK-NEXT: sla.w.sx %s3, %s3, 3 +; CHECK-NEXT: sla.w.sx %s3, %s2, %s3 +; CHECK-NEXT: or %s3, %s4, %s3 +; CHECK-NEXT: cas.w %s3, (%s1), %s4 +; CHECK-NEXT: brne.w %s3, %s4, .LBB69_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s3, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_or_4() { +; CHECK-LABEL: test_atomic_fetch_or_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: or %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB70_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_or_8() { +; CHECK-LABEL: test_atomic_fetch_or_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: or %s0, 1, %s0 +; CHECK-NEXT: cas.l %s0, (%s1), %s2 +; CHECK-NEXT: brne.l %s0, %s2, .LBB71_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_or_16() { +; CHECK-LABEL: test_atomic_fetch_or_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_fetch_or_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_or_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw or i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_xor_1() { +; CHECK-LABEL: test_atomic_fetch_xor_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s3, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s4, 0, %s3 +; CHECK-NEXT: adds.w.sx %s3, %s0, (0)1 +; CHECK-NEXT: and %s3, 3, %s3 +; CHECK-NEXT: sla.w.sx %s3, %s3, 3 +; CHECK-NEXT: sla.w.sx %s3, %s2, %s3 +; CHECK-NEXT: xor %s3, %s4, %s3 +; CHECK-NEXT: cas.w %s3, (%s1), %s4 +; CHECK-NEXT: brne.w %s3, %s4, .LBB73_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s3, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_xor_2() { +; CHECK-LABEL: test_atomic_fetch_xor_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s3, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s4, 0, %s3 +; CHECK-NEXT: adds.w.sx %s3, %s0, (0)1 +; CHECK-NEXT: and %s3, 3, %s3 +; CHECK-NEXT: sla.w.sx %s3, %s3, 3 +; CHECK-NEXT: sla.w.sx %s3, %s2, %s3 +; CHECK-NEXT: xor %s3, %s4, %s3 +; CHECK-NEXT: cas.w %s3, (%s1), %s4 +; CHECK-NEXT: brne.w %s3, %s4, .LBB74_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s3, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_xor_4() { +; CHECK-LABEL: test_atomic_fetch_xor_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: xor %s0, 1, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB75_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_xor_8() { +; CHECK-LABEL: test_atomic_fetch_xor_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: xor %s0, 1, %s0 +; CHECK-NEXT: cas.l %s0, (%s1), %s2 +; CHECK-NEXT: brne.l %s0, %s2, .LBB76_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_xor_16() { +; CHECK-LABEL: test_atomic_fetch_xor_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_fetch_xor_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_xor_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw xor i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define signext i8 @test_atomic_fetch_nand_1() { +; CHECK-LABEL: test_atomic_fetch_nand_1: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s4, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK-NEXT: lea %s3, 255 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: and %s4, 3, %s4 +; CHECK-NEXT: sla.w.sx %s4, %s4, 3 +; CHECK-NEXT: sla.w.sx %s6, %s2, %s4 +; CHECK-NEXT: and %s6, %s5, %s6 +; CHECK-NEXT: xor %s6, -1, %s6 +; CHECK-NEXT: sla.w.sx %s4, %s3, %s4 +; CHECK-NEXT: and %s6, %s6, %s4 +; CHECK-NEXT: xor %s4, -1, %s4 +; CHECK-NEXT: and %s4, %s5, %s4 +; CHECK-NEXT: or %s4, %s4, %s6 +; CHECK-NEXT: cas.w %s4, (%s1), %s5 +; CHECK-NEXT: brne.w %s4, %s5, .LBB78_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s4, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i8* @c, i8 1 seq_cst + ret i8 %0 +} + +; Function Attrs: norecurse nounwind +define signext i16 @test_atomic_fetch_nand_2() { +; CHECK-LABEL: test_atomic_fetch_nand_2: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: and %s1, -4, %s0 +; CHECK-NEXT: ldl.sx %s4, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK-NEXT: lea %s3, 65535 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s5, 0, %s4 +; CHECK-NEXT: adds.w.sx %s4, %s0, (0)1 +; CHECK-NEXT: and %s4, 3, %s4 +; CHECK-NEXT: sla.w.sx %s4, %s4, 3 +; CHECK-NEXT: sla.w.sx %s6, %s2, %s4 +; CHECK-NEXT: and %s6, %s5, %s6 +; CHECK-NEXT: xor %s6, -1, %s6 +; CHECK-NEXT: sla.w.sx %s4, %s3, %s4 +; CHECK-NEXT: and %s6, %s6, %s4 +; CHECK-NEXT: xor %s4, -1, %s4 +; CHECK-NEXT: and %s4, %s5, %s4 +; CHECK-NEXT: or %s4, %s4, %s6 +; CHECK-NEXT: cas.w %s4, (%s1), %s5 +; CHECK-NEXT: brne.w %s4, %s5, .LBB79_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: and %s1, %s4, (32)0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: and %s0, 3, %s0 +; CHECK-NEXT: sla.w.sx %s0, %s0, 3 +; CHECK-NEXT: srl %s0, %s1, %s0 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: sla.w.sx %s0, %s0, 16 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i16* @s, i16 1 seq_cst + ret i16 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_nand_4() { +; CHECK-LABEL: test_atomic_fetch_nand_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: or %s0, -2, %s0 +; CHECK-NEXT: cas.w %s0, (%s1), %s2 +; CHECK-NEXT: brne.w %s0, %s2, .LBB80_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_fetch_nand_8() { +; CHECK-LABEL: test_atomic_fetch_nand_8: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, l@hi(%s0) +; CHECK-NEXT: ld %s0, (,%s1) +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: or %s0, -2, %s0 +; CHECK-NEXT: cas.l %s0, (%s1), %s2 +; CHECK-NEXT: brne.l %s0, %s2, .LBB81_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i64* @l, i64 1 seq_cst + ret i64 %0 +} + +; Function Attrs: norecurse nounwind +define i128 @test_atomic_fetch_nand_16() { +; CHECK-LABEL: test_atomic_fetch_nand_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, __atomic_fetch_nand_16@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __atomic_fetch_nand_16@hi(%s0) +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 1, (0)1 +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: or %s3, 5, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw nand i128* @it, i128 1 seq_cst + ret i128 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_max_4() { +; CHECK-LABEL: test_atomic_fetch_max_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s0 +; CHECK-NEXT: maxs.w.zx %s0, %s0, %s2 +; CHECK-NEXT: cas.w %s0, (%s1), %s3 +; CHECK-NEXT: brne.w %s0, %s3, .LBB83_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw max i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_min_4() { +; CHECK-LABEL: test_atomic_fetch_min_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, i@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK-NEXT: or %s3, 2, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s4, 0, %s0 +; CHECK-NEXT: cmps.w.sx %s5, %s0, %s3 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: cmov.w.lt %s0, %s4, %s5 +; CHECK-NEXT: cas.w %s0, (%s1), %s4 +; CHECK-NEXT: brne.w %s0, %s4, .LBB84_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw min i32* @i, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_umax_4() { +; CHECK-LABEL: test_atomic_fetch_umax_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, ui@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, ui@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s3, 0, %s0 +; CHECK-NEXT: cmpu.w %s4, %s0, %s2 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: cmov.w.gt %s0, %s3, %s4 +; CHECK-NEXT: cas.w %s0, (%s1), %s3 +; CHECK-NEXT: brne.w %s0, %s3, .LBB85_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw umax i32* @ui, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define i32 @test_atomic_fetch_umin_4() { +; CHECK-LABEL: test_atomic_fetch_umin_4: +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, ui@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, ui@hi(%s0) +; CHECK-NEXT: ldl.sx %s0, (,%s1) +; CHECK-NEXT: or %s2, 1, (0)1 +; CHECK-NEXT: or %s3, 2, (0)1 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: or %s4, 0, %s0 +; CHECK-NEXT: cmpu.w %s5, %s0, %s3 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: cmov.w.lt %s0, %s4, %s5 +; CHECK-NEXT: cas.w %s0, (%s1), %s4 +; CHECK-NEXT: brne.w %s0, %s4, .LBB86_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = atomicrmw umin i32* @ui, i32 1 seq_cst + ret i32 %0 +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_1() { +; CHECK-LABEL: test_atomic_clear_1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, c@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, c@hi(%s0) +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* @c seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_2() { +; CHECK-LABEL: test_atomic_clear_2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, s@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, s@hi(%s0) +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i16* @s to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_4() { +; CHECK-LABEL: test_atomic_clear_4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, i@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, i@hi(%s0) +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i32* @i to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_8() { +; CHECK-LABEL: test_atomic_clear_8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, l@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, l@hi(%s0) +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i64* @l to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_16() { +; CHECK-LABEL: test_atomic_clear_16: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: lea %s0, it@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, it@hi(%s0) +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: st1b %s1, (,%s0) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + store atomic i8 0, i8* bitcast (i128* @it to i8*) seq_cst, align 32 + ret void +} + +; Function Attrs: norecurse nounwind +define i64 @test_atomic_compare_exchange_8stk(i64, i64) { +; CHECK-LABEL: test_atomic_compare_exchange_8stk: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cas.l %s1, 192(%s11), %s0 +; CHECK-NEXT: cmps.l %s0, %s1, %s0 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: cmov.l.eq %s1, (63)0, %s0 +; CHECK-NEXT: adds.w.zx %s0, %s1, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %2 = alloca i64, align 32 + %3 = cmpxchg i64* %2, i64 %0, i64 %1 seq_cst seq_cst + %4 = extractvalue { i64, i1 } %3, 1 + %conv = zext i1 %4 to i64 + ret i64 %conv +} + +; Function Attrs: norecurse nounwind +define void @test_atomic_clear_8stk() { +; CHECK-LABEL: test_atomic_clear_8stk: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: st1b %s0, 192(,%s11) +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 +entry: + %0 = alloca i64, align 32 + %1 = bitcast i64* %0 to i8* + store atomic i8 0, i8* %1 seq_cst, align 32 + ret void +} diff --git a/llvm/test/CodeGen/VE/bitcast.ll b/llvm/test/CodeGen/VE/bitcast.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/bitcast.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +; Function Attrs: noinline nounwind optnone +define dso_local i64 @bitcastd2l(double) { +; CHECK-LABEL: bitcastd2l: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s11, 0, %s9 + %2 = bitcast double %0 to i64 + ret i64 %2 +} + +; Function Attrs: noinline nounwind optnone +define dso_local double @bitcastl2d(i64) { +; CHECK-LABEL: bitcastl2d: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s11, 0, %s9 + %2 = bitcast i64 %0 to double + ret double %2 +} diff --git a/llvm/test/CodeGen/VE/bitreverse.ll b/llvm/test/CodeGen/VE/bitreverse.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/bitreverse.ll @@ -0,0 +1,104 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define i64 @func1(i64) { +; CHECK-LABEL: func1: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i64 @llvm.bitreverse.i64(i64 %0) + ret i64 %2 +} + +declare i64 @llvm.bitreverse.i64(i64) + +define i32 @func2(i32) { +; CHECK-LABEL: func2: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: srl %s0, %s0, 32 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i32 @llvm.bitreverse.i32(i32 %0) + ret i32 %2 +} + +declare i32 @llvm.bitreverse.i32(i32) + +define signext i16 @func3(i16 signext) { +; CHECK-LABEL: func3: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: srl %s0, %s0, 32 +; CHECK-NEXT: sra.w.sx %s0, %s0, 16 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i16 @llvm.bitreverse.i16(i16 %0) + ret i16 %2 +} + +declare i16 @llvm.bitreverse.i16(i16) + +define signext i8 @func4(i8 signext) { +; CHECK-LABEL: func4: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: srl %s0, %s0, 32 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i8 @llvm.bitreverse.i8(i8 %0) + ret i8 %2 +} + +declare i8 @llvm.bitreverse.i8(i8) + +define i64 @func5(i64) { +; CHECK-LABEL: func5: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i64 @llvm.bitreverse.i64(i64 %0) + ret i64 %2 +} + +define i32 @func6(i32) { +; CHECK-LABEL: func6: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: srl %s0, %s0, 32 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i32 @llvm.bitreverse.i32(i32 %0) + ret i32 %2 +} + +define zeroext i16 @func7(i16 zeroext) { +; CHECK-LABEL: func7: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: srl %s0, %s0, 32 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: srl %s0, %s0, 16 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i16 @llvm.bitreverse.i16(i16 %0) + ret i16 %2 +} + +define zeroext i8 @func8(i8 zeroext) { +; CHECK-LABEL: func8: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: # kill: def $sw0 killed $sw0 def $sx0 +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: srl %s0, %s0, 32 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: srl %s0, %s0, 24 +; CHECK-NEXT: # kill: def $sw0 killed $sw0 killed $sx0 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = tail call i8 @llvm.bitreverse.i8(i8 %0) + ret i8 %2 +} + diff --git a/llvm/test/CodeGen/VE/branch1.ll b/llvm/test/CodeGen/VE/branch1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/branch1.ll @@ -0,0 +1,376 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define signext i8 @func1(i8 signext, i8 signext) { +; CHECK-LABEL: func1: +; CHECK: .LBB{{[0-9]+}}_5: +; CHECK-NEXT: brle.w %s0, %s1, .LBB0_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: lea %s0, ret@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, ret@hi(%s0) +; CHECK-NEXT: or %s0, 2, (0)1 +; CHECK-NEXT: bsic %lr, (,%s12) +; CHECK-NEXT: br.l .LBB0_3 +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK: .LBB{{[0-9]+}}_3: +; CHECK-NEXT: sla.w.sx %s0, %s0, 24 +; CHECK-NEXT: sra.w.sx %s0, %s0, 24 +; CHECK-NEXT: or %s11, 0, %s9 + %3 = icmp sgt i8 %0, %1 + br i1 %3, label %4, label %7 + +;