diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -82,6 +82,7 @@ class MachineBasicBlock; class MachineFunction; class MachineInstr; +class MachineInstrBuilder; class MachineJumpTableInfo; class MachineLoop; class MachineRegisterInfo; @@ -4091,6 +4092,9 @@ const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const; + virtual bool lowerAsmOperandForConstraint(Value *Op, MachineInstrBuilder MIB, + std::string &Constraint) const; + //===--------------------------------------------------------------------===// // Div utility functions // diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1566,36 +1566,564 @@ return false; } +namespace { + +/// GISelAsmOperandInfo - This contains information for each constraint that we +/// are lowering. +class GISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { +public: + /// Regs - If this is a register or register class operand, this + /// contains the set of assigned registers corresponding to the operand. + SmallVector Regs; + + /// RegType - Keeps track of the physical register's type + /// FIXME: Do we really need to remember the type? + /// The bitwidth of the register should be enough, which we can always + /// recompute + MVT RegType; + + explicit GISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) + : TargetLowering::AsmOperandInfo(info) {} +}; + +using GISelAsmOperandInfoVector = SmallVector; + +class ExtraFlags { + unsigned Flags = 0; + +public: + explicit ExtraFlags(const CallInst &CI) { + const InlineAsm *IA = cast(CI.getCalledValue()); + if (IA->hasSideEffects()) + Flags |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + Flags |= InlineAsm::Extra_IsAlignStack; + if (CI.isConvergent()) + Flags |= InlineAsm::Extra_IsConvergent; + Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; + } + + void update(const TargetLowering::AsmOperandInfo &OpInfo) { + // Ideally, we would only check against memory constraints. However, the + // meaning of an Other constraint can be target-specific and we can't easily + // reason about it. Therefore, be conservative and set MayLoad/MayStore + // for Other constraints as well. + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.Type == InlineAsm::isInput) + Flags |= InlineAsm::Extra_MayLoad; + else if (OpInfo.Type == InlineAsm::isOutput) + Flags |= InlineAsm::Extra_MayStore; + else if (OpInfo.Type == InlineAsm::isClobber) + Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); + } + } + + unsigned get() const { return Flags; } +}; +} // namespace + +/// FIXME: Most of the static functions below are copied from the SelectionDAG +/// code. They are private helper functions and aren't available here. Find a +/// better way to reuse the code. + +/// GetRegistersForValue - Assign virtual/physical registers for the specified +/// register operand. +static void GetRegistersForValue(MachineFunction &MF, + MachineIRBuilder &MIRBuilder, + MachineInstrBuilder MIB, + GISelAsmOperandInfo &OpInfo, + GISelAsmOperandInfo &RefOpInfo) { + + const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + + // No work to do for memory operations. + if (OpInfo.ConstraintType == TargetLowering::C_Memory) + return; + + // If this is a constraint for a single physreg, or a constraint for a + // register class, find it. + unsigned AssignedReg; + const TargetRegisterClass *RC; + std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint( + &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT); + // RC is unset only on failure. Return immediately. + if (!RC) + return; + + // No need to allocate a matching input constraint since the constraint it's + // matching to has already been allocated. + if (OpInfo.isMatchingInputConstraint()) + return; + + // Remember the type of the register for later + OpInfo.RegType = *TRI.legalclasstypes_begin(*RC); + + // Initialize NumRegs. + unsigned NumRegs = 1; + if (OpInfo.ConstraintVT != MVT::Other) + NumRegs = + TLI.getNumRegisters(MF.getFunction().getContext(), OpInfo.ConstraintVT); + + // If this is a constraint for a specific physical register assign it now. + + // If this associated to a specific register, initialize iterator to correct + // place. If virtual, make sure we have enough registers + + // Initialize iterator if necessary + TargetRegisterClass::iterator I = RC->begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + // Do not check for single registers. + if (AssignedReg) { + for (; *I != AssignedReg; ++I) + assert(I != RC->end() && "AssignedReg should be member of RC"); + } + + for (; NumRegs; --NumRegs, ++I) { + assert(I != RC->end() && "Ran out of registers to allocate!"); + Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC); + OpInfo.Regs.push_back(R); + } +} + +/// Return an integer indicating how general CT is. +static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { + switch (CT) { + case TargetLowering::C_Immediate: + case TargetLowering::C_Other: + case TargetLowering::C_Unknown: + return 0; + case TargetLowering::C_Register: + return 1; + case TargetLowering::C_RegisterClass: + return 2; + case TargetLowering::C_Memory: + return 3; + } + llvm_unreachable("Invalid constraint type"); +} + +static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, + const TargetLowering *TLI) { + assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); + unsigned BestIdx = 0; + TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; + int BestGenerality = -1; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { + TargetLowering::ConstraintType CType = + TLI->getConstraintType(OpInfo.Codes[i]); + + // Indirect 'other' or 'immediate' constraints are not allowed. + if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory || + CType == TargetLowering::C_Register || + CType == TargetLowering::C_RegisterClass)) + continue; + + // If this is an 'other' or 'immediate' constraint, see if the operand is + // valid for it. For example, on X86 we might have an 'rI' constraint. If + // the operand is an integer in the range [0..31] we want to use I (saving a + // load of a register), otherwise we must use 'r'. + if ((CType == TargetLowering::C_Other || + CType == TargetLowering::C_Immediate)) { + assert(OpInfo.Codes[i].size() == 1 && + "Unhandled multi-letter 'other' constraint"); + // FIXME: prefer immediate constraints if the target allows it + } + + // Things with matching constraints can only be registers, per gcc + // documentation. This mainly affects "g" constraints. + if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) + continue; + + // This constraint letter is more general than the previous one, use it. + int Generality = getConstraintGenerality(CType); + if (Generality > BestGenerality) { + BestType = CType; + BestIdx = i; + BestGenerality = Generality; + } + } + + OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; + OpInfo.ConstraintType = BestType; +} + +static void ComputeConstraintToUse(const TargetLowering *TLI, + TargetLowering::AsmOperandInfo &OpInfo) { + assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); + + // Single-letter constraints ('r') are very common. + if (OpInfo.Codes.size() == 1) { + OpInfo.ConstraintCode = OpInfo.Codes[0]; + OpInfo.ConstraintType = TLI->getConstraintType(OpInfo.ConstraintCode); + } else { + ChooseConstraint(OpInfo, TLI); + } + + // 'X' matches anything. + if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { + // Labels and constants are handled elsewhere ('X' is the only thing + // that matches labels). For Functions, the type here is the type of + // the result, which is not what we want to look at; leave them alone. + Value *v = OpInfo.CallOperandVal; + if (isa(v) || isa(v) || isa(v)) { + return; + } + + // Otherwise, try to resolve it to something we know about by looking at + // the actual operand type. + if (const char *Repl = TLI->LowerXConstraint(OpInfo.ConstraintVT)) { + OpInfo.ConstraintCode = Repl; + OpInfo.ConstraintType = TLI->getConstraintType(OpInfo.ConstraintCode); + } + } +} + bool IRTranslator::translateInlineAsm(const CallInst &CI, MachineIRBuilder &MIRBuilder) { const InlineAsm &IA = cast(*CI.getCalledValue()); - StringRef ConstraintStr = IA.getConstraintString(); + ImmutableCallSite CS(&CI); - bool HasOnlyMemoryClobber = false; - if (!ConstraintStr.empty()) { - // Until we have full inline assembly support, we just try to handle the - // very simple case of just "~{memory}" to avoid falling back so often. - if (ConstraintStr != "~{memory}") - return false; - HasOnlyMemoryClobber = true; - } + /// ConstraintOperands - Information about all of the constraints. + GISelAsmOperandInfoVector ConstraintOperands; + + const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); + TargetLowering::AsmOperandInfoVector TargetConstraints = + TLI.ParseConstraints(*DL, MF->getSubtarget().getRegisterInfo(), CS); + + ExtraFlags ExtraInfo(CI); + + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + for (auto &T : TargetConstraints) { + ConstraintOperands.push_back(GISelAsmOperandInfo(T)); + GISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); + + // Compute the value type for each operand. + if (OpInfo.Type == InlineAsm::isInput || + (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) { + OpInfo.CallOperandVal = const_cast(CS.getArgument(ArgNo++)); + + if (const auto *BB = dyn_cast(OpInfo.CallOperandVal)) { + LLVM_DEBUG(dbgs() << "Basic block input operands not supported yet"); + return false; + } - unsigned ExtraInfo = 0; - if (IA.hasSideEffects()) - ExtraInfo |= InlineAsm::Extra_HasSideEffects; - if (IA.getDialect() == InlineAsm::AD_Intel) - ExtraInfo |= InlineAsm::Extra_AsmDialect; + llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); - // HACK: special casing for ~memory. - if (HasOnlyMemoryClobber) - ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); + // If this is an indirect operand, the operand is a pointer to the + // accessed type. + if (OpInfo.isIndirect) { + PointerType *PtrTy = dyn_cast(OpTy); + if (!PtrTy) + report_fatal_error("Indirect operand for inline asm not a pointer!"); + OpTy = PtrTy->getElementType(); + } - auto Inst = MIRBuilder.buildInstr(TargetOpcode::INLINEASM) + // FIXME: Support aggregate input operands + if (!OpTy->isSingleValueType()) { + LLVM_DEBUG(dbgs() << "Aggregate input operands are not supported yet"); + return false; + } + + OpInfo.ConstraintVT = TLI.getValueType(*DL, OpTy, true).getSimpleVT(); + + } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { + assert(!CI.getType()->isVoidTy() && "Bad inline asm!"); + if (StructType *STy = dyn_cast(CS.getType())) { + OpInfo.ConstraintVT = + TLI.getSimpleValueType(*DL, STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpInfo.ConstraintVT = TLI.getSimpleValueType(*DL, CI.getType()); + } + ++ResNo; + } else { + OpInfo.ConstraintVT = MVT::Other; + } + + // Compute the constraint code and ConstraintType to use. + ComputeConstraintToUse(&TLI, OpInfo); + + // The selected constraint type might expose new sideeffects + ExtraInfo.update(OpInfo); + } + + // At this point, all operand types are decided. + // Create the MachineInstr, but don't insert it yet since input + // operands still need to insert instructions before this one + auto Inst = MIRBuilder.buildInstrNoInsert(TargetOpcode::INLINEASM) .addExternalSymbol(IA.getAsmString().c_str()) - .addImm(ExtraInfo); + .addImm(ExtraInfo.get()); + + // Collects the output operands for later processing + GISelAsmOperandInfoVector OutputOperands; + + for (auto &OpInfo : ConstraintOperands) { + GISelAsmOperandInfo &RefOpInfo = + OpInfo.isMatchingInputConstraint() + ? ConstraintOperands[OpInfo.getMatchedOperand()] + : OpInfo; + + // Assign registers for register operands + GetRegistersForValue(*MF, MIRBuilder, Inst, OpInfo, RefOpInfo); + + switch (OpInfo.Type) { + case InlineAsm::isOutput: + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + + // Add information to the INLINEASM node to know about this output. + unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); + Inst.addImm(OpFlags); + ArrayRef SourceRegs = + getOrCreateVRegs(*OpInfo.CallOperandVal); + assert( + SourceRegs.size() == 1 && + "Expected the memory output to fit into a single virtual register"); + Inst.addReg(SourceRegs[0]); + } else { + // Otherwise, this outputs to a register (directly for C_Register / + // C_RegisterClass. Find a register that we can use. + assert(OpInfo.ConstraintType == TargetLowering::C_Register || + OpInfo.ConstraintType == TargetLowering::C_RegisterClass); + + if (OpInfo.Regs.empty()) { + LLVM_DEBUG(dbgs() + << "Couldn't allocate output register for constraint"); + return false; + } + + // Add information to the INLINEASM node to know that this register is + // set. + unsigned Flag = InlineAsm::getFlagWord( + OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber + : InlineAsm::Kind_RegDef, + OpInfo.Regs.size()); + if (!OpInfo.Regs.empty() && + Register::isVirtualRegister(OpInfo.Regs.front())) { + // Put the register class of the virtual registers in the flag word. + // That way, later passes can recompute register class constraints for + // inline assembly as well as normal instructions. Don't do this for + // tied operands that can use the regclass information from the def. + const TargetRegisterClass *RC = MRI->getRegClass(OpInfo.Regs.front()); + Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); + } + + Inst.addImm(Flag); + + for (unsigned j = 0, e = OpInfo.Regs.size(); j < e; j++) { + Register Reg = OpInfo.Regs[j]; + Inst.addReg(Reg, + RegState::Define | + getImplRegState(Register::isPhysicalRegister(Reg))); + } + + // Remember this output operand for later processing + OutputOperands.push_back(OpInfo); + } + + break; + + case InlineAsm::isInput: { + if (OpInfo.isMatchingInputConstraint()) { + unsigned MatchingIndex = OpInfo.getMatchedOperand(); + auto MatchedOpInfo = ConstraintOperands[MatchingIndex]; + assert(MatchedOpInfo.Type == InlineAsm::isOutput); + if (MatchedOpInfo.ConstraintType == TargetLowering::C_Register || + MatchedOpInfo.ConstraintType == TargetLowering::C_RegisterClass) { + + if (OpInfo.isIndirect) { + LLVM_DEBUG(dbgs() << "Don't know how to handle tied " + "indirect register inputs yet"); + return false; + } + + ArrayRef SourceRegs = + getOrCreateVRegs(*OpInfo.CallOperandVal); + unsigned NumRegs = SourceRegs.size(); + assert(NumRegs == MatchedOpInfo.Regs.size()); + + if (const TargetRegisterClass *RC = + MRI->getRegClass(MatchedOpInfo.Regs.front())) { + unsigned Flag = + InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs); + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIndex); + Inst.addImm(Flag); + + for (unsigned j = 0; j < NumRegs; j++) { + Register DstReg = MRI->createVirtualRegister(RC); + Register SrcReg = SourceRegs[j]; + MIRBuilder.buildCopy(DstReg, SrcReg); + Inst.addReg(DstReg); + } + } else { + LLVM_DEBUG( + dbgs() + << "Couldn't retrieve register class of tied register operand"); + return false; + ; + } + break; + } + + assert(MatchedOpInfo.ConstraintType == TargetLowering::C_Memory); + LLVM_DEBUG(dbgs() << "Tied memory operands not supported yet"); + return false; + } + + if (OpInfo.ConstraintType == TargetLowering::C_Other && + OpInfo.isIndirect) { + LLVM_DEBUG(dbgs() << "Indirect input operands with unknown constraint " + "not supported yet"); + return false; + } + + if (OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) { + bool Success = TLI.lowerAsmOperandForConstraint( + OpInfo.CallOperandVal, Inst, OpInfo.ConstraintCode); + + if (!Success) { + LLVM_DEBUG(dbgs() << "Can't handle target constraint " + << OpInfo.ConstraintCode << " yet"); + return false; + } + break; + } + + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); + + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); + Inst.addImm(OpFlags); + ArrayRef SourceRegs = + getOrCreateVRegs(*OpInfo.CallOperandVal); + assert( + SourceRegs.size() == 1 && + "Expected the memory input to fit into a single virtual register"); + Inst.addReg(SourceRegs[0]); + break; + } + + assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || + OpInfo.ConstraintType == TargetLowering::C_Register) && + "Unknown constraint type!"); + + if (OpInfo.isIndirect) { + LLVM_DEBUG(dbgs() << "Can't handle indirect register inputs yet " + "for constraint '" + << OpInfo.ConstraintCode << "'"); + return false; + } + + // Copy the input into the appropriate registers. + if (OpInfo.Regs.empty()) { + LLVM_DEBUG( + dbgs() + << "Couldn't allocate input register for register constraint"); + return false; + } + + unsigned NumRegs = OpInfo.Regs.size(); + ArrayRef SourceRegs = getOrCreateVRegs(*OpInfo.CallOperandVal); + assert(NumRegs == SourceRegs.size() && + "Expected the number of input registers to match the number of " + "source registers"); + + unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs); + Inst.addImm(Flag); + for (unsigned i = 0; i < NumRegs; i++) { + MIRBuilder.buildCopy(OpInfo.Regs[i], SourceRegs[0]); + Inst.addReg(OpInfo.Regs[i]); + } + + break; + } + case InlineAsm::isClobber: { + + unsigned NumRegs = OpInfo.Regs.size(); + if (NumRegs > 0) { + unsigned Flag = + InlineAsm::getFlagWord(InlineAsm::Kind_Clobber, NumRegs); + Inst.addImm(Flag); + + for (Register Reg : OpInfo.Regs) { + Inst.addReg(Reg, + RegState::Define | RegState::EarlyClobber | + getImplRegState(Register::isPhysicalRegister(Reg))); + } + } + break; + } + } + } + if (const MDNode *SrcLoc = CI.getMetadata("srcloc")) Inst.addMetadata(SrcLoc); + // All inputs are handled, insert the instruction now + MIRBuilder.insertInstr(Inst); + + // Finally, copy the output operands into the output registers + ArrayRef ResRegs = getOrCreateVRegs(CI); + if (ResRegs.size() != OutputOperands.size()) { + LLVM_DEBUG(dbgs() << "Expected the number of output registers to match the " + "number of destination registers"); + return false; + } + for (unsigned int i = 0, e = ResRegs.size(); i < e; i++) { + GISelAsmOperandInfo &OpInfo = OutputOperands[i]; + + if (OpInfo.Regs.empty()) + continue; + + switch (OpInfo.ConstraintType) { + case TargetLowering::C_Register: + case TargetLowering::C_RegisterClass: { + if (OpInfo.Regs.size() > 1) { + LLVM_DEBUG(dbgs() << "Output operands with multiple defining " + "registers are not supported yet"); + return false; + } + + Register SrcReg = OpInfo.Regs[0]; + if (MRI->getType(ResRegs[i]).getSizeInBits() < + OpInfo.RegType.getSizeInBits()) { + // First copy the non-typed virtual register into a generic virtual + // register + Register Tmp1Reg = + MRI->createGenericVirtualRegister(LLT(OpInfo.RegType)); + MIRBuilder.buildCopy(Tmp1Reg, SrcReg); + // Need to truncate the result of the register + MIRBuilder.buildTrunc(ResRegs[i], Tmp1Reg); + } else { + MIRBuilder.buildCopy(ResRegs[i], SrcReg); + } + break; + } + case TargetLowering::C_Immediate: + case TargetLowering::C_Other: + return false; + case TargetLowering::C_Memory: + break; // Already handled. + case TargetLowering::C_Unknown: + LLVM_DEBUG(dbgs() << "Unexpected unknown constraint"); + return false; + } + } + return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -690,7 +690,8 @@ // Ignore target-specific post-isel instructions: they should use proper // regclasses. - if (isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode()) + if ((isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode()) || + MI.isInlineAsm()) continue; if (!assignInstr(MI)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -4305,6 +4306,36 @@ return R; } +bool TargetLowering::lowerAsmOperandForConstraint( + Value *Op, MachineInstrBuilder MIB, std::string &Constraint) const { + + if (Constraint.length() > 1) + return false; + + char ConstraintLetter = Constraint[0]; + switch (ConstraintLetter) { + default: + return false; + case 'i': // Simple Integer or Relocatable Constant + if (ConstantInt *CI = dyn_cast(Op)) { + assert(CI->getBitWidth() <= 64 && + "expected immediate to fit into 64-bits"); + unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, 1); + bool IsBool = CI->getBitWidth() == 1; + BooleanContent BCont = getBooleanContents(MVT::i64); + ISD::NodeType ExtOpc = + IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND; + int64_t ExtVal = + ExtOpc == ISD::ZERO_EXTEND ? CI->getZExtValue() : CI->getSExtValue(); + + MIB.addImm(Flag); + MIB.addImm(ExtVal); + return true; + } + return false; + } +} + //===----------------------------------------------------------------------===// // Constraint Selection. diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -819,6 +819,53 @@ return CheckCopy(); } +static bool selectInlineAsm(MachineInstr &I, const TargetInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) { + assert(I.isInlineAsm() && "Expected an inline asm instruction"); + + // Iterate over the operands of the inline asm + // If there is a memory operand, we have to custom select it + + unsigned i = InlineAsm::MIOp_FirstOperand, e = I.getNumOperands(); + while (i != e) { + unsigned Flags = I.getOperand(i).getImm(); + unsigned NumOperands = InlineAsm::getNumOperandRegisters(Flags); + + if (!InlineAsm::isMemKind(Flags)) { + // Non-mem operands can be skipped + i += NumOperands + 1; + continue; + } + + assert(NumOperands == 1 && "Memory operands with multiple values?"); + unsigned TiedToOperand; + if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) { + LLVM_DEBUG(dbgs() << "Tied memory operands not supported yet"); + return false; + } + + unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags); + switch (ConstraintID) { + default: + LLVM_DEBUG(dbgs() << "Unknown memory operand"); + return false; + case InlineAsm::Constraint_m: + case InlineAsm::Constraint_Q: + const TargetRegisterClass *TRC = TRI.getPointerRegClass(*I.getMF()); + Register DstReg = MRI.createVirtualRegister(TRC); + MachineOperand &Src = I.getOperand(i + 1); + Register SrcReg = Src.getReg(); + MachineIRBuilder MIB(I); + MIB.buildCopy({DstReg}, {SrcReg}); + Src.setReg(DstReg); + i += 2; + } + } + return true; +} + static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { if (!DstTy.isScalar() || !SrcTy.isScalar()) return GenericOpc; @@ -1787,6 +1834,10 @@ if (I.isCopy()) return selectCopy(I, TII, MRI, TRI, RBI); + if (I.isInlineAsm()) { + return selectInlineAsm(I, TII, MRI, TRI, RBI); + } + return true; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll @@ -0,0 +1,322 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=aarch64-darwin-ios13 -O0 -global-isel -stop-after=irtranslator -o - %s | FileCheck %s + +define i32 @t1() nounwind ssp { + ; CHECK-LABEL: name: t1 + ; CHECK: bb.1.entry: + ; CHECK: INLINEASM &"mov ${0:w}, 7", 0, 589834, def %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK: $w0 = COPY [[COPY]](s32) + ; CHECK: RET_ReallyLR implicit $w0 +entry: + %0 = tail call i32 asm "mov ${0:w}, 7", "=r"() nounwind + ret i32 %0 +} + +define i64 @t2() nounwind ssp { + ; CHECK-LABEL: name: t2 + ; CHECK: bb.1.entry: + ; CHECK: INLINEASM &"mov $0, 7", 0, 1310730, def %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %0 + ; CHECK: $x0 = COPY [[COPY]](s64) + ; CHECK: RET_ReallyLR implicit $x0 +entry: + %0 = tail call i64 asm "mov $0, 7", "=r"() nounwind + ret i64 %0 +} + +define i64 @t3() nounwind ssp { + ; CHECK-LABEL: name: t3 + ; CHECK: bb.1.entry: + ; CHECK: INLINEASM &"mov ${0:w}, 7", 0, 1310730, def %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %0 + ; CHECK: $x0 = COPY [[COPY]](s64) + ; CHECK: RET_ReallyLR implicit $x0 +entry: + %0 = tail call i64 asm "mov ${0:w}, 7", "=r"() nounwind + ret i64 %0 +} + +; rdar://9281206 + +define void @t4(i64 %op) nounwind { + ; CHECK-LABEL: name: t4 + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:gpr64common = COPY [[DEF]](s64) + ; CHECK: INLINEASM &"mov x0, $1; svc #0;", 1, 1310730, def %1, 9, [[COPY1]], 9, [[COPY2]], 12, implicit-def early-clobber $x0 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY %1 + ; CHECK: RET_ReallyLR +entry: + %0 = tail call i64 asm sideeffect "mov x0, $1; svc #0;", "=r,r,r,~{x0}"(i64 %op, i64 undef) nounwind + ret void +} + +; rdar://9394290 + +define float @t5(float %x) nounwind { + ; CHECK-LABEL: name: t5 + ; CHECK: bb.1.entry: + ; CHECK: liveins: $s0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]](s32) + ; CHECK: INLINEASM &"fadd ${0:s}, ${0:s}, ${0:s}", 0, 393226, def %1, 2147483657, [[COPY1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK: $s0 = COPY [[COPY2]](s32) + ; CHECK: RET_ReallyLR implicit $s0 +entry: + %0 = tail call float asm "fadd ${0:s}, ${0:s}, ${0:s}", "=w,0"(float %x) nounwind + ret float %0 +} + +; rdar://9553599 + +define zeroext i8 @t6(i8* %src) nounwind { + ; CHECK-LABEL: name: t6 + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]](p0) + ; CHECK: INLINEASM &"ldtrb ${0:w}, [$1]", 0, 589834, def %1, 9, [[COPY1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) + ; CHECK: $w0 = COPY [[ZEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 +entry: + %0 = tail call i8 asm "ldtrb ${0:w}, [$1]", "=r,r"(i8* %src) nounwind + ret i8 %0 +} + +define void @t7(i8* %f, i32 %g) nounwind { + ; CHECK-LABEL: name: t7 + ; CHECK: bb.1.entry: + ; CHECK: liveins: $w1, $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.f.addr + ; CHECK: G_STORE [[COPY]](p0), [[FRAME_INDEX]](p0) :: (store 8 into %ir.f.addr) + ; CHECK: [[COPY2:%[0-9]+]]:gpr32common = COPY [[COPY1]](s32) + ; CHECK: INLINEASM &"str ${1:w}, $0", 16, 458766, [[FRAME_INDEX]](p0), 9, [[COPY2]] + ; CHECK: RET_ReallyLR +entry: + %f.addr = alloca i8*, align 8 + store i8* %f, i8** %f.addr, align 8 + call void asm "str ${1:w}, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind + ret void +} + +; rdar://10258229 +; ARM64TargetLowering::getRegForInlineAsmConstraint() should recognize 'v' +; registers. +define void @t8() nounwind ssp { + ; CHECK-LABEL: name: t8 + ; CHECK: bb.1.entry: + ; CHECK: INLINEASM &nop, 1, 12, implicit-def early-clobber $q8 + ; CHECK: RET_ReallyLR +entry: + tail call void asm sideeffect "nop", "~{v8}"() nounwind + ret void +} + +define void @t9() nounwind { + ; CHECK-LABEL: name: t9 + ; CHECK: bb.1.entry: + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.data + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 16 from %ir.data) + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY [[LOAD]](<2 x s64>) + ; CHECK: INLINEASM &"mov.2d v4, $0\0A", 1, 9, [[COPY]], 12, implicit-def early-clobber $q4 + ; CHECK: RET_ReallyLR +entry: + %data = alloca <2 x double>, align 16 + %0 = load <2 x double>, <2 x double>* %data, align 16 + call void asm sideeffect "mov.2d v4, $0\0A", "w,~{v4}"(<2 x double> %0) nounwind + ret void +} + +define void @t10() nounwind { + ; CHECK-LABEL: name: t10 + ; CHECK: bb.1.entry: + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.data + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.a + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX1]](p0) + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.data) + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]](p0) + ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY [[LOAD]](<2 x s32>) + ; CHECK: INLINEASM &"ldr ${1:z}, [$0]\0A", 1, 9, [[COPY1]], 9, [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:gpr64common = COPY [[COPY]](p0) + ; CHECK: [[COPY4:%[0-9]+]]:fpr64 = COPY [[LOAD]](<2 x s32>) + ; CHECK: INLINEASM &"ldr ${1:q}, [$0]\0A", 1, 9, [[COPY3]], 9, [[COPY4]] + ; CHECK: [[COPY5:%[0-9]+]]:gpr64common = COPY [[COPY]](p0) + ; CHECK: [[COPY6:%[0-9]+]]:fpr64 = COPY [[LOAD]](<2 x s32>) + ; CHECK: INLINEASM &"ldr ${1:d}, [$0]\0A", 1, 9, [[COPY5]], 9, [[COPY6]] + ; CHECK: [[COPY7:%[0-9]+]]:gpr64common = COPY [[COPY]](p0) + ; CHECK: [[COPY8:%[0-9]+]]:fpr64 = COPY [[LOAD]](<2 x s32>) + ; CHECK: INLINEASM &"ldr ${1:s}, [$0]\0A", 1, 9, [[COPY7]], 9, [[COPY8]] + ; CHECK: [[COPY9:%[0-9]+]]:gpr64common = COPY [[COPY]](p0) + ; CHECK: [[COPY10:%[0-9]+]]:fpr64 = COPY [[LOAD]](<2 x s32>) + ; CHECK: INLINEASM &"ldr ${1:h}, [$0]\0A", 1, 9, [[COPY9]], 9, [[COPY10]] + ; CHECK: [[COPY11:%[0-9]+]]:gpr64common = COPY [[COPY]](p0) + ; CHECK: [[COPY12:%[0-9]+]]:fpr64 = COPY [[LOAD]](<2 x s32>) + ; CHECK: INLINEASM &"ldr ${1:b}, [$0]\0A", 1, 9, [[COPY11]], 9, [[COPY12]] + ; CHECK: RET_ReallyLR +entry: + %data = alloca <2 x float>, align 8 + %a = alloca [2 x float], align 4 + %arraydecay = getelementptr inbounds [2 x float], [2 x float]* %a, i32 0, i32 0 + %0 = load <2 x float>, <2 x float>* %data, align 8 + call void asm sideeffect "ldr ${1:z}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind + call void asm sideeffect "ldr ${1:q}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind + call void asm sideeffect "ldr ${1:d}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind + call void asm sideeffect "ldr ${1:s}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind + call void asm sideeffect "ldr ${1:h}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind + call void asm sideeffect "ldr ${1:b}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind + ret void +} + +define void @t11() nounwind { + ; CHECK-LABEL: name: t11 + ; CHECK: bb.1.entry: + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.a + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 4 from %ir.a) + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY [[LOAD]](s32) + ; CHECK: INLINEASM &"mov ${1:x}, ${0:x}\0A", 9, 9, [[COPY]], 13, 0 + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 4 from %ir.a) + ; CHECK: [[COPY1:%[0-9]+]]:gpr32common = COPY [[LOAD1]](s32) + ; CHECK: INLINEASM &"mov ${1:w}, ${0:w}\0A", 9, 9, [[COPY1]], 13, 0 + ; CHECK: RET_ReallyLR +entry: + %a = alloca i32, align 4 + %0 = load i32, i32* %a, align 4 + call void asm sideeffect "mov ${1:x}, ${0:x}\0A", "r,i"(i32 %0, i32 0) nounwind + %1 = load i32, i32* %a, align 4 + call void asm sideeffect "mov ${1:w}, ${0:w}\0A", "r,i"(i32 %1, i32 0) nounwind + ret void +} + +define void @t12() nounwind { + ; CHECK-LABEL: name: t12 + ; CHECK: bb.1.entry: + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.data + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 16 from %ir.data) + ; CHECK: [[COPY:%[0-9]+]]:fpr128_lo = COPY [[LOAD]](<4 x s32>) + ; CHECK: INLINEASM &"mov.2d v4, $0\0A", 1, 9, [[COPY]], 12, implicit-def early-clobber $q4 + ; CHECK: RET_ReallyLR +entry: + %data = alloca <4 x float>, align 16 + %0 = load <4 x float>, <4 x float>* %data, align 16 + call void asm sideeffect "mov.2d v4, $0\0A", "x,~{v4}"(<4 x float> %0) nounwind + ret void +} + +define void @t15() nounwind { + ; CHECK-LABEL: name: t15 + ; CHECK: bb.1.entry: + ; CHECK: INLINEASM &"fmov $0, d8", 1, 1310730, def %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %0 + ; CHECK: RET_ReallyLR +entry: + %0 = tail call double asm sideeffect "fmov $0, d8", "=r"() nounwind + ret void +} + +define <2 x float> @test_vreg_64bit(<2 x float> %in) nounwind { + ; CHECK-LABEL: name: test_vreg_64bit + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]](<2 x s32>) + ; CHECK: INLINEASM &"fadd ${0}.2s, ${1}.2s, ${1}.2s", 1, 10, implicit-def $d14, 9, [[COPY1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $d14 + ; CHECK: $d0 = COPY [[COPY2]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %1 = tail call <2 x float> asm sideeffect "fadd ${0}.2s, ${1}.2s, ${1}.2s", "={v14},w"(<2 x float> %in) nounwind + ret <2 x float> %1 +} + +define <4 x float> @test_vreg_128bit(<4 x float> %in) nounwind { + ; CHECK-LABEL: name: test_vreg_128bit + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY [[COPY]](<4 x s32>) + ; CHECK: INLINEASM &"fadd ${0}.4s, ${1}.4s, ${1}.4s", 1, 10, implicit-def $q14, 9, [[COPY1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q14 + ; CHECK: $q0 = COPY [[COPY2]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %1 = tail call <4 x float> asm sideeffect "fadd ${0}.4s, ${1}.4s, ${1}.4s", "={v14},w"(<4 x float> %in) nounwind + ret <4 x float> %1 +} + +; PR33134 +define void @test_zero_address() { + ; CHECK-LABEL: name: test_zero_address + ; CHECK: bb.1.entry: + ; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; CHECK: INLINEASM &"ldr $0, $1 \0A", 9, 589834, def %0, 458766, [[C]](p0) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK: RET_ReallyLR +entry: + tail call i32 asm sideeffect "ldr $0, $1 \0A", "=r,*Q"(i32* null) + ret void +} + +@g1 = external global i32 + +define i32 @f1(i32 %x) nounwind { + ; CHECK-LABEL: name: f1 + ; CHECK: bb.1.entry: + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @g1 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32common = COPY [[COPY]](s32) + ; CHECK: INLINEASM &"str $1, $0", 16, 196622, [[FRAME_INDEX]](p0), 9, [[COPY1]] + ; CHECK: INLINEASM &"ldr $0, $1", 8, 589834, def %3, 196622, [[FRAME_INDEX]](p0) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 + ; CHECK: G_STORE [[COPY2]](s32), [[GV]](p0) :: (store 4 into @g1) + ; CHECK: $w0 = COPY [[COPY2]](s32) + ; CHECK: RET_ReallyLR implicit $w0 +entry: + %l1 = alloca i32, align 4 + call void asm "str $1, $0", "=*m,r"(i32* %l1, i32 %x) nounwind + %0 = call i32 asm "ldr $0, $1", "=r,*m"(i32* %l1) nounwind + store i32 %0, i32* @g1, align 4 + ret i32 %0 +} + +; Check support for returning several float values +define float @test_multiple_values() { + ; CHECK-LABEL: name: test_multiple_values + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0, 589834, def %0, 589834, def %1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] + ; CHECK: $s0 = COPY [[FADD]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %1 = call { float, float } asm "mov $0, #0; mov $1, #0", "=r,=r"() + %asmresult = extractvalue { float, float } %1, 0 + %asmresult1 = extractvalue { float, float } %1, 1 + %add = fadd float %asmresult, %asmresult1 + ret float %add +} + +; Check that the input operands of type s8 are copied into the constrained 32-bit registers +define i32 @input_operand(i8 %a) { + ; CHECK-LABEL: name: input_operand + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:gpr32common = COPY [[TRUNC]](s8) + ; CHECK: INLINEASM &"add $0, $1, $1", 1, 589834, def %2, 9, [[COPY1]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 + ; CHECK: $w0 = COPY [[COPY2]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1 = call i32 asm sideeffect "add $0, $1, $1", "=r,r"(i8 %a) nounwind + ret i32 %1 +}