Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -5044,7 +5044,7 @@ the operand. (The behavior for relocatable symbol expressions is a target-specific behavior for this typically target-independent modifier) - ``H``: Print a memory reference with additional offset +8. -- ``P``: Print a memory reference or operand for use as the argument of a call +- ``P``: Print branch target operand for use as the argument of a call instruction. (E.g. omit ``(rip)``, even though it's PC-relative.) XCore: Index: llvm/include/llvm/CodeGen/TargetSubtargetInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -20,6 +20,7 @@ #include "llvm/CodeGen/PBQPRAConstraint.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" #include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CodeGen.h" #include @@ -28,6 +29,7 @@ namespace llvm { class CallLowering; +class GlobalValue; class InlineAsmLowering; class InstrItineraryData; struct InstrStage; @@ -312,6 +314,11 @@ unsigned PhysReg) const { return false; } + + virtual unsigned char + classifyGlobalFunctionReference(const GlobalValue *GV) const { + return 0; + } }; } // end namespace llvm Index: llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -1137,11 +1137,135 @@ TLI->AdjustInstrPostInstrSelection(*MIB, Node); } +// Get call operands indexes in inline asm into CallOpNos. +// Modifier info {NUM:P} in inline asm show that "Print branch target +// operand for use as the argument of a call instruction." So let use +// it and flags operands in inline asm to calculate the corresponding +// operands indexes in inine asm. +static bool getInlineAsmCallOperands(const char *AsmStr, SDNode *Node, + SmallSet &CallOpNos) { + const char *CurPtr = AsmStr; + unsigned NumOperands = Node->getNumOperands(); + while (*CurPtr) { + if (*CurPtr++ != '$') // Operands start with '$' + continue; + + if (*CurPtr == '$') { // Skip "$$" + ++CurPtr; + continue; + } + + bool HasCurlyBraces = false; + if (*CurPtr == '{') { // ${variable} + ++CurPtr; // Consume '{' character. + HasCurlyBraces = true; + } + + if (!HasCurlyBraces) + continue; + + // If we have ${:xxx}, this is not a real operand reference, skip it. + if (*CurPtr == ':') { + ++CurPtr; + const char *StrStart = CurPtr; + const char *StrEnd = strchr(StrStart, '}'); + if (!StrEnd) + report_fatal_error("Unterminated ${:xxx} operand in inline asm" + " string: '" + + Twine(AsmStr) + "'"); + + CurPtr = StrEnd + 1; + continue; + } + + // Get the operand number. + const char *IDStart = CurPtr; + const char *IDEnd = IDStart; + while (isDigit(*IDEnd)) + ++IDEnd; + + unsigned Val; + if (StringRef(IDStart, IDEnd - IDStart).getAsInteger(10, Val)) + report_fatal_error("Bad $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + CurPtr = IDEnd; + + if (Val >= NumOperands - 1) + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + + if (*CurPtr != ':') { + ++CurPtr; + continue; + } + + // Check for a modifier character. Some like ${NUM:MODIFIER}. + ++CurPtr; // Consume ':' character. + + if (*CurPtr++ != 'P') // Not call operand + continue; + + if (*CurPtr != '}') + report_fatal_error("Bad ${} expression in inline asm string: '" + + Twine(AsmStr) + "'"); + ++CurPtr; // Consume '}' character. + + // We get the value number now, let's go on get the operand number. + // Scan to find the SDNode operand number for the Val. + unsigned OpNo = InlineAsm::Op_FirstOperand; + unsigned Flags = + cast(Node->getOperand(OpNo))->getZExtValue(); + for (; Val; --Val) { + if (OpNo >= Node->getNumOperands()) { + report_fatal_error("invalid operand in inline asm: '" + Twine(AsmStr) + + "'"); + } + OpNo += InlineAsm::getNumOperandRegisters(Flags) + 1; + Flags = cast(Node->getOperand(OpNo))->getZExtValue(); + } + + if (InlineAsm::getKind(Flags) != InlineAsm::Kind_Mem) + continue; + + // Collect the mem operand ID of call in inline asm. + CallOpNos.insert(OpNo); + } + + return !CallOpNos.empty(); +} + +static GlobalAddressSDNode *getGAFromLoad(SDNode *Node, unsigned OpNo) { + SDValue Op = Node->getOperand(OpNo); + + // First make sure Op is a load. + const MachineSDNode *MN = dyn_cast(Op.getNode()); + if (!MN) + return nullptr; + if (MN->memoperands_empty()) + return nullptr; + MachineSDNode::mmo_iterator I = MN->memoperands_begin(); + if (!(*I)->isLoad()) + return nullptr; + + // Make sure the Mem Index is noreg. + // TODO, Refine me, maybe no need to check these index for + // "call in inline" and "got load" with global address. + SDNode *NodeIndex = Node->getOperand(OpNo + 2).getNode(); + SDNode *OpIndex = Op.getOperand(2).getNode(); + if (cast(OpIndex)->getReg() || + cast(NodeIndex)->getReg()) + return nullptr; + + // Then make sure the offset is Global Address. + SDNode *GA = Op.getOperand(3).getNode(); + GlobalAddressSDNode *TGA = dyn_cast(GA); + return TGA; +} + /// EmitSpecialNode - Generate machine code for a target-independent node and /// needed dependencies. -void InstrEmitter:: -EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap &VRBaseMap) { +void InstrEmitter::EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap &VRBaseMap) { switch (Node->getOpcode()) { default: #ifndef NDEBUG @@ -1221,8 +1345,8 @@ case ISD::INLINEASM: case ISD::INLINEASM_BR: { unsigned NumOps = Node->getNumOperands(); - if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) - --NumOps; // Ignore the glue operand. + if (Node->getOperand(NumOps - 1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. // Create the inline asm machine instruction. unsigned TgtOpc = Node->getOpcode() == ISD::INLINEASM_BR @@ -1239,8 +1363,8 @@ // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore // bits. int64_t ExtraInfo = - cast(Node->getOperand(InlineAsm::Op_ExtraInfo))-> - getZExtValue(); + cast(Node->getOperand(InlineAsm::Op_ExtraInfo)) + ->getZExtValue(); MIB.addImm(ExtraInfo); // Remember to operand index of the group flags. @@ -1249,15 +1373,44 @@ // Remember registers that are part of early-clobber defs. SmallVector ECRegs; + // Find if call operand in inine asm need to be replaced. + // In Linux PIC model, Global Address (GA) of Global Variable (GV) will be + // got from loading GOT slot. Different instructions has different + // understanding about using Global Address. + // + // For example in X86: + // 1. We assign a value to GV, we can use "MOV GV, ...", it corresping to + // "MOV (Global Address) ...". + // 2. We got related address of GV, we can use "LEA GV, ...", it corresping + // to "LEA (Global Address) ...". + // 3. But if we call the label (GV can be a label), we can use "CALL GV", it + // didn't equal with "CALL (Global Address)". + // + // So, it is obvious that Global Address of Global Variable should be + // specially handled case by case. This is done in normal IR/MIR which has + // "single" purpose use of the Global Address. But things changed in inline + // asm which represented by only one IR/MIR but may contains a lot of + // instructions with mult-purpose on same or different Global Address. What + // is more, llvm didn't distinguish the instructions in inline asm IR/MIR. + // + // TODO: The other targets may also has this problem, we need to fix them + // too. It is an arch defect for llvm inline asm. + bool Replace = false; + SmallSet CallOpNos; + const TargetMachine &TM = MF->getTarget(); + if (TM.getTargetTriple().isX86() && TM.isPositionIndependent() && + TM.getTargetTriple().isOSLinux()) + Replace = getInlineAsmCallOperands(AsmStr, Node, CallOpNos); + // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = - cast(Node->getOperand(i))->getZExtValue(); + cast(Node->getOperand(i))->getZExtValue(); const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); GroupIdx.push_back(MIB->getNumOperands()); MIB.addImm(Flags); - ++i; // Skip the ID value. + ++i; // Skip the ID value. switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); @@ -1282,27 +1435,83 @@ ECRegs.push_back(Reg); } break; - case InlineAsm::Kind_RegUse: // Use of register. - case InlineAsm::Kind_Imm: // Immediate. - case InlineAsm::Kind_Mem: // Addressing mode. + case InlineAsm::Kind_RegUse: { // Use of register. // The addressing mode has been selected, just add all of the // operands to the machine instruction. - for (unsigned j = 0; j != NumVals; ++j, ++i) + for (unsigned J = 0; J != NumVals; ++J, ++i) + AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap, + /*IsDebug=*/false, IsClone, IsCloned); + + unsigned DefGroup = 0; + if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { + unsigned DefIdx = GroupIdx[DefGroup] + 1; + unsigned UseIdx = GroupIdx.back() + 1; + for (unsigned J = 0; J != NumVals; ++J) + MIB->tieOperands(DefIdx + J, UseIdx + J); + } + break; + } + case InlineAsm::Kind_Imm: // Immediate. + for (unsigned J = 0; J != NumVals; ++J, ++i) AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + break; + case InlineAsm::Kind_Mem: { // Addressing mode. + bool IsCallOp = CallOpNos.contains(i - 1); + + if (Replace && IsCallOp) { + GlobalAddressSDNode *TGA = nullptr; + if (TM.getCodeModel() == CodeModel::Large) { + // Handle "call func_label" in large code model. + // In large code model, funciton label can only be got from load. + // let's directly call the *(load address). + // TODO: Refine me: + // Here assume the index of "inline call func_label" is always + // noreg. And here may be no need to check load for "call + // func_label", seems func_label must be got from load in large code + // model. + SDNode *LoadCallAddr = Node->getOperand(i).getNode(); + const MachineSDNode *MN = dyn_cast(LoadCallAddr); + if (MN && !MN->memoperands_empty() && + (*MN->memoperands_begin())->isLoad()) { + for (unsigned J = 0; J != NumVals; ++J, ++i) { + AddOperand(MIB, LoadCallAddr->getOperand(J), 0, nullptr, + VRBaseMap, + /*IsDebug=*/false, IsClone, IsCloned); + } + break; + } + } else { + TGA = getGAFromLoad(Node, i); + } - // Manually set isTied bits. - if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) { - unsigned DefGroup = 0; - if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { - unsigned DefIdx = GroupIdx[DefGroup] + 1; - unsigned UseIdx = GroupIdx.back() + 1; - for (unsigned j = 0; j != NumVals; ++j) - MIB->tieOperands(DefIdx + j, UseIdx + j); + // In PIC model (non-large code model) + // We want to print "call func_label" to "call func_label@plt". + if (TGA) { + MachineInstr *MI = MIB.getInstr(); + unsigned OldFlagsID = MI->getNumOperands() - 1; + unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags); + unsigned NewFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID); + MI->RemoveOperand(OldFlagsID); + MIB.addImm(NewFlags); + + const GlobalValue *GV = TGA->getGlobal(); + unsigned char OpFlags = + MF->getSubtarget().classifyGlobalFunctionReference(GV); + MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(), OpFlags); + i += NumVals; + break; } } + + for (unsigned J = 0; J != NumVals; ++J, ++i) { + AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap, + /*IsDebug=*/false, IsClone, IsCloned); + } break; } + } } // GCC inline assembly allows input operands to also be early-clobber Index: llvm/lib/Target/X86/X86AsmPrinter.cpp =================================================================== --- llvm/lib/Target/X86/X86AsmPrinter.cpp +++ llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -605,9 +605,13 @@ PrintMemReference(MI, OpNo, O, "H"); } return false; - case 'P': // Don't print @PLT, but do print as memory. + case 'P': // Call operand modifer, E.g. omit RIP, add @PLT. if (MI->getInlineAsmDialect() == InlineAsm::AD_Intel) { - PrintIntelMemReference(MI, OpNo, O, "no-rip"); + if (Subtarget->isPositionIndependent() && + MI->getOperand(OpNo).isGlobal()) + PrintSymbolOperand(MI->getOperand(OpNo), O); + else + PrintIntelMemReference(MI, OpNo, O, "no-rip"); } else { PrintMemReference(MI, OpNo, O, "no-rip"); } Index: llvm/test/CodeGen/X86/inline-asm-call.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/inline-asm-call.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic | FileCheck %s --check-prefix=CHECK-X64 +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -relocation-model=pic | FileCheck %s --check-prefix=CHECK-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic -code-model=large | FileCheck %s --check-prefix=CHECK-X64-LARGE +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -relocation-model=pic -code-model=large | FileCheck %s --check-prefix=CHECK-X86-LARGE + +; The tested IR came from following test.c +; We hope "call sincos" in inline asm should correctly generate the code with +; PIC model, not "call *(load sincos@GOT)" which is wrong for doing 1 more +; time load and bad for relocation. + +; test.c: +; extern void sincos(); +; int Arr[10] = {1,}; +; void foo() { +; asm { +; lea ecx, Arr +; lea edx, sincos +; call sincos +; mov eax, 0 +; ret } +; } + +; clang t.c -fasm-blocks -S -fpic -emit-llvm test.c + +@Arr = global <{ i32, [9 x i32] }> <{ i32 1, [9 x i32] zeroinitializer }>, align 16 + +define void @foo() { +; CHECK-X64-LABEL: foo: +; CHECK-X64: # %bb.0: # %entry +; CHECK-X64-NEXT: movq sincos@GOTPCREL(%rip), %rsi +; CHECK-X64-NEXT: movq Arr@GOTPCREL(%rip), %rdi +; CHECK-X64-NEXT: #APP +; CHECK-X64-EMPTY: +; CHECK-X64-NEXT: leal (%rdi), %ecx +; CHECK-X64-NEXT: leal (%rsi), %edx +; CHECK-X64-NEXT: callq sincos@PLT +; CHECK-X64-NEXT: movl $0, %eax +; CHECK-X64-NEXT: retq +; CHECK-X64-EMPTY: +; CHECK-X64-NEXT: #NO_APP +; CHECK-X64-NEXT: retq +; +; CHECK-X86-LABEL: foo: +; CHECK-X86: # %bb.0: # %entry +; CHECK-X86-NEXT: pushl %edi +; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X86-NEXT: pushl %esi +; CHECK-X86-NEXT: .cfi_def_cfa_offset 12 +; CHECK-X86-NEXT: .cfi_offset %esi, -12 +; CHECK-X86-NEXT: .cfi_offset %edi, -8 +; CHECK-X86-NEXT: calll .L0$pb +; CHECK-X86-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-X86-NEXT: .L0$pb: +; CHECK-X86-NEXT: popl %eax +; CHECK-X86-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-X86-NEXT: .Ltmp0: +; CHECK-X86-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %eax +; CHECK-X86-NEXT: movl sincos@GOT(%eax), %esi +; CHECK-X86-NEXT: movl Arr@GOT(%eax), %edi +; CHECK-X86-NEXT: #APP +; CHECK-X86-EMPTY: +; CHECK-X86-NEXT: leal (%edi), %ecx +; CHECK-X86-NEXT: leal (%esi), %edx +; CHECK-X86-NEXT: calll sincos@PLT +; CHECK-X86-NEXT: movl $0, %eax +; CHECK-X86-NEXT: retl +; CHECK-X86-EMPTY: +; CHECK-X86-NEXT: #NO_APP +; CHECK-X86-NEXT: popl %esi +; CHECK-X86-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X86-NEXT: popl %edi +; CHECK-X86-NEXT: .cfi_def_cfa_offset 4 +; CHECK-X86-NEXT: retl +; +; CHECK-X64-LARGE-LABEL: foo: +; CHECK-X64-LARGE: # %bb.0: # %entry +; CHECK-X64-LARGE-NEXT: .L0$pb: +; CHECK-X64-LARGE-NEXT: leaq .L0$pb(%rip), %rax +; CHECK-X64-LARGE-NEXT: movabsq $_GLOBAL_OFFSET_TABLE_-.L0$pb, %rsi +; CHECK-X64-LARGE-NEXT: addq %rax, %rsi +; CHECK-X64-LARGE-NEXT: movabsq $sincos@GOT, %r9 +; CHECK-X64-LARGE-NEXT: movq (%rsi,%r9), %r8 +; CHECK-X64-LARGE-NEXT: movabsq $Arr@GOT, %rax +; CHECK-X64-LARGE-NEXT: movq (%rsi,%rax), %rdi +; CHECK-X64-LARGE-NEXT: #APP +; CHECK-X64-LARGE-EMPTY: +; CHECK-X64-LARGE-NEXT: leal (%rdi), %ecx +; CHECK-X64-LARGE-NEXT: leal (%r8), %edx +; CHECK-X64-LARGE-NEXT: callq *(%rsi,%r9) +; CHECK-X64-LARGE-NEXT: movl $0, %eax +; CHECK-X64-LARGE-NEXT: retq +; CHECK-X64-LARGE-EMPTY: +; CHECK-X64-LARGE-NEXT: #NO_APP +; CHECK-X64-LARGE-NEXT: retq +; +; CHECK-X86-LARGE-LABEL: foo: +; CHECK-X86-LARGE: # %bb.0: # %entry +; CHECK-X86-LARGE-NEXT: pushl %ebx +; CHECK-X86-LARGE-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X86-LARGE-NEXT: pushl %edi +; CHECK-X86-LARGE-NEXT: .cfi_def_cfa_offset 12 +; CHECK-X86-LARGE-NEXT: pushl %esi +; CHECK-X86-LARGE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-X86-LARGE-NEXT: .cfi_offset %esi, -16 +; CHECK-X86-LARGE-NEXT: .cfi_offset %edi, -12 +; CHECK-X86-LARGE-NEXT: .cfi_offset %ebx, -8 +; CHECK-X86-LARGE-NEXT: calll .L0$pb +; CHECK-X86-LARGE-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK-X86-LARGE-NEXT: .L0$pb: +; CHECK-X86-LARGE-NEXT: popl %esi +; CHECK-X86-LARGE-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK-X86-LARGE-NEXT: .Ltmp0: +; CHECK-X86-LARGE-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %esi +; CHECK-X86-LARGE-NEXT: movl sincos@GOT(%esi), %edi +; CHECK-X86-LARGE-NEXT: movl Arr@GOT(%esi), %ebx +; CHECK-X86-LARGE-NEXT: #APP +; CHECK-X86-LARGE-EMPTY: +; CHECK-X86-LARGE-NEXT: leal (%ebx), %ecx +; CHECK-X86-LARGE-NEXT: leal (%edi), %edx +; CHECK-X86-LARGE-NEXT: lcalll *sincos@GOT(%esi) +; CHECK-X86-LARGE-NEXT: movl $0, %eax +; CHECK-X86-LARGE-NEXT: retl +; CHECK-X86-LARGE-EMPTY: +; CHECK-X86-LARGE-NEXT: #NO_APP +; CHECK-X86-LARGE-NEXT: popl %esi +; CHECK-X86-LARGE-NEXT: .cfi_def_cfa_offset 12 +; CHECK-X86-LARGE-NEXT: popl %edi +; CHECK-X86-LARGE-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X86-LARGE-NEXT: popl %ebx +; CHECK-X86-LARGE-NEXT: .cfi_def_cfa_offset 4 +; CHECK-X86-LARGE-NEXT: retl +entry: + call void asm sideeffect inteldialect "lea ecx, $0\0A\09lea edx, qword ptr $1\0A\09call qword ptr ${2:P}\0A\09mov eax, $$0\0A\09ret", "*m,*m,*m,~{eax},~{ecx},~{edx},~{dirflag},~{fpsr},~{flags}"([10 x i32]* bitcast (<{ i32, [9 x i32] }>* @Arr to [10 x i32]*), void (...)* @sincos, void (...)* @sincos) + ret void +} + +declare void @sincos(...)