Index: clang/test/CodeGen/ms-inline-asm-functions.c =================================================================== --- clang/test/CodeGen/ms-inline-asm-functions.c +++ clang/test/CodeGen/ms-inline-asm-functions.c @@ -22,7 +22,7 @@ __asm call r; // CHECK: calll *({{.*}}) __asm call kimport; - // CHECK: calll *({{.*}}) + // CHECK: calll __imp__kimport // Broken case: Call through a global function pointer. __asm call kptr; Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -3494,6 +3494,12 @@ /// legal. It is frequently not legal in PIC relocation models. virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + virtual bool + isInlineAsmTargetBranch(const SmallVectorImpl &AsmStrs, + unsigned OpNo) const { + return false; + } + bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const; Index: llvm/include/llvm/CodeGen/TargetSubtargetInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -18,6 +18,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/PBQPRAConstraint.h" #include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/CodeGen.h" #include @@ -29,6 +30,7 @@ class MachineFunction; class ScheduleDAGMutation; class CallLowering; +class GlobalValue; class InlineAsmLowering; class InstrItineraryData; struct InstrStage; @@ -308,6 +310,11 @@ unsigned PhysReg) const { return false; } + + virtual unsigned char + classifyGlobalFunctionReference(const GlobalValue *GV) const { + return 0; + } }; } // end namespace llvm Index: llvm/include/llvm/IR/InlineAsm.h =================================================================== --- llvm/include/llvm/IR/InlineAsm.h +++ llvm/include/llvm/IR/InlineAsm.h @@ -15,6 +15,7 @@ #ifndef LLVM_IR_INLINEASM_H #define LLVM_IR_INLINEASM_H +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Value.h" #include "llvm/Support/ErrorHandling.h" @@ -83,6 +84,7 @@ const std::string &getAsmString() const { return AsmString; } const std::string &getConstraintString() const { return Constraints; } + void collectAsmStrs(SmallVectorImpl &AsmStrs) const; /// This static method can be used by the parser to check to see if the /// specified constraint string is legal for the type. @@ -241,6 +243,7 @@ Kind_Clobber = 4, // Clobbered register, "~r". Kind_Imm = 5, // Immediate. Kind_Mem = 6, // Memory operand, "m", or an address, "p". + Kind_Func = 7, // Address operand of function call, "pc" // Memory constraint codes. // These could be tablegenerated but there's little need to do that since @@ -287,13 +290,14 @@ static unsigned getFlagWord(unsigned Kind, unsigned NumOps) { assert(((NumOps << 3) & ~0xffff) == 0 && "Too many inline asm operands!"); - assert(Kind >= Kind_RegUse && Kind <= Kind_Mem && "Invalid Kind"); + assert(Kind >= Kind_RegUse && Kind <= Kind_Func && "Invalid Kind"); return Kind | (NumOps << 3); } static bool isRegDefKind(unsigned Flag){ return getKind(Flag) == Kind_RegDef;} static bool isImmKind(unsigned Flag) { return getKind(Flag) == Kind_Imm; } static bool isMemKind(unsigned Flag) { return getKind(Flag) == Kind_Mem; } + static bool isFuncKind(unsigned Flag) { return getKind(Flag) == Kind_Func; } static bool isRegDefEarlyClobberKind(unsigned Flag) { return getKind(Flag) == Kind_RegDefEarlyClobber; } @@ -329,7 +333,8 @@ /// Augment an existing flag word returned by getFlagWord with the constraint /// code for a memory constraint. static unsigned getFlagWordForMem(unsigned InputFlag, unsigned Constraint) { - assert(isMemKind(InputFlag) && "InputFlag is not a memory constraint!"); + assert((isMemKind(InputFlag) || isFuncKind(InputFlag)) && + "InputFlag is not a memory constraint!"); assert(Constraint <= 0x7fff && "Too large a memory constraint ID"); assert(Constraint <= Constraints_Max && "Unknown constraint ID"); assert((InputFlag & ~0xffff) == 0 && "High bits already contain data"); @@ -346,7 +351,7 @@ } static unsigned getMemoryConstraintID(unsigned Flag) { - assert(isMemKind(Flag)); + assert(isMemKind(Flag) || isFuncKind(Flag)); return (Flag >> Constraints_ShiftAmount) & 0x7fff; } @@ -416,6 +421,7 @@ case InlineAsm::Kind_Imm: return "imm"; case InlineAsm::Kind_Mem: + case InlineAsm::Kind_Func: return "mem"; default: llvm_unreachable("Unknown operand kind"); Index: llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -1297,7 +1297,7 @@ break; case InlineAsm::Kind_RegUse: // Use of register. case InlineAsm::Kind_Imm: // Immediate. - case InlineAsm::Kind_Mem: // Addressing mode. + case InlineAsm::Kind_Mem: // Non-function addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (unsigned j = 0; j != NumVals; ++j, ++i) @@ -1315,6 +1315,21 @@ } } break; + case InlineAsm::Kind_Func: // Function addressing mode. + for (unsigned j = 0; j != NumVals; ++j, ++i) { + SDValue Op = Node->getOperand(i); + AddOperand(MIB, Op, 0, nullptr, VRBaseMap, + /*IsDebug=*/false, IsClone, IsCloned); + + // Adjust Target Flags when AdjustFlag set in inline asm. + if (GlobalAddressSDNode *TGA = dyn_cast(Op)) { + unsigned NewFlags = + MF->getSubtarget().classifyGlobalFunctionReference( + TGA->getGlobal()); + unsigned LastIdx = MIB.getInstr()->getNumOperands() - 1; + MIB.getInstr()->getOperand(LastIdx).setTargetFlags(NewFlags); + } + } } } Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8756,8 +8756,15 @@ Chain = lowerStartEH(Chain, EHPadBB, BeginLabel); } + int ArgNo = -1; + SmallVector AsmStrs; + IA->collectAsmStrs(AsmStrs); + // Second pass over the constraints: compute which constraint option to use. for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { + if (OpInfo.hasArg() || OpInfo.Type == InlineAsm::isOutput) + ArgNo++; + // If this is an output operand with a matching input operand, look up the // matching input. If their types mismatch, e.g. one is an integer, the // other is floating point, or their sizes are different, flag it as an @@ -8775,6 +8782,59 @@ OpInfo.ConstraintType == TargetLowering::C_Address) continue; + // TODO: Refine me. This is a temporary/quick fix, it will generate more + // load/store. + // Current arch of inline asm is not friendly to identify the kinds of + // instructions in IR/DAG/MIR level. + // + // In inline asm, we can't (or hard to) distinguish a global address is used + // for what instruction. (In normal instructions we can distinguish them by + // checking the instruction type or opcode). + // So in following case, though there is "call func", we don't know there is + // fucntion call in such an (inline asm) IR/MIR. + // + // extern float func(float x); float GV; + // float test(float x) { + // GV=x+1; + // __asm { movss xmm0, GV; call func; movss GV, xmm0 } + // return GV; + // } + // + // The global address will be see as normal global value's address, like GV + // in movss. This cause problems in 64 bit pic mode, becasue in 64 bit pic + // mode if we used a global value we first get a indepent address and then + // load the context from it. But "call func" is just use the address of + // "func", though "func" is also a global value. + // + // So it generated following wrong asm: + // + // movq func@GOTPCREL(%rip), %rcx + // callq *(%rcx) // There is 1 more dereference + // + // Correct code should be: + // callq *%rcx // Replace "(%rcx)" --> "%rcx" + // + // (Normally, except inline asm, these differentiated lowering for a + // global value happened in ISel by checking instruction/dag's type.) + // + // So here we termporly fix it by remove the isIndirect flag to "reduce" + // the times of dereference. + bool IsFunc = false; + if (OpInfo.isIndirect && OpInfo.CallOperand.getNode() && + OpInfo.CallOperand.getOpcode() == ISD::GlobalAddress) { + if (auto *GA = dyn_cast(OpInfo.CallOperand)) { + const GlobalValue *GV = GA->getGlobal(); + IsFunc = GV && dyn_cast(GV); + } + } + + // Some targets may see function as direct address not indirect mem. + if (IsFunc && TLI.isInlineAsmTargetBranch(AsmStrs, ArgNo)) { + OpInfo.isIndirect = false; + if (TM.getCodeModel() != CodeModel::Large) + OpInfo.ConstraintType = TargetLowering::C_Address; + } + // If this is a memory input, and if the operand is not indirect, do what we // need to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && @@ -8984,8 +9044,7 @@ break; } - if (OpInfo.ConstraintType == TargetLowering::C_Memory || - OpInfo.ConstraintType == TargetLowering::C_Address) { + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert((OpInfo.isIndirect || OpInfo.ConstraintType != TargetLowering::C_Memory) && "Operand must be indirect to be a mem!"); @@ -9008,6 +9067,39 @@ break; } + if (OpInfo.ConstraintType == TargetLowering::C_Address) { + assert(InOperandVal.getValueType() == + TLI.getPointerTy(DAG.getDataLayout()) && + "Address operands expect pointer values"); + + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + + unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + + SDValue AsmOp = InOperandVal; + if (auto *GA = dyn_cast(InOperandVal)) { + const GlobalValue *GV = GA->getGlobal(); + if (GV && dyn_cast(GV)) { + ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Func, 1); + AsmOp = DAG.getTargetGlobalAddress(GV, getCurSDLoc(), + InOperandVal.getValueType(), + GA->getOffset()); + } + } + + // Add information to the INLINEASM node to know about this input. + ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); + + AsmNodeOperands.push_back( + DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32)); + + AsmNodeOperands.push_back(AsmOp); + break; + } + assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || OpInfo.ConstraintType == TargetLowering::C_Register) && "Unknown constraint type!"); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1957,7 +1957,7 @@ while (i != e) { unsigned Flags = cast(InOps[i])->getZExtValue(); - if (!InlineAsm::isMemKind(Flags)) { + if (!InlineAsm::isMemKind(Flags) && !InlineAsm::isFuncKind(Flags)) { // Just skip over this operand, copying the operands verbatim. Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); @@ -1986,7 +1986,9 @@ // Add this to the output node. unsigned NewFlags = - InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); + InlineAsm::isMemKind(Flags) + ? InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()) + : InlineAsm::getFlagWord(InlineAsm::Kind_Func, SelOps.size()); NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID); Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32)); llvm::append_range(Ops, SelOps); Index: llvm/lib/IR/InlineAsm.cpp =================================================================== --- llvm/lib/IR/InlineAsm.cpp +++ llvm/lib/IR/InlineAsm.cpp @@ -59,6 +59,12 @@ return FTy; } +void InlineAsm::collectAsmStrs(SmallVectorImpl &AsmStrs) const { + StringRef AsmStr(AsmString); + AsmStrs.clear(); + AsmStr.split(AsmStrs, "\n\t", -1, false); +} + /// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the /// fields in this structure. If the constraint string is not understood, /// return true, otherwise return false. Index: llvm/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.h +++ llvm/lib/Target/X86/X86ISelLowering.h @@ -1465,6 +1465,9 @@ unsigned getMaxSupportedInterleaveFactor() const override { return 4; } + bool isInlineAsmTargetBranch(const SmallVectorImpl &AsmStrs, + unsigned OpNo) const override; + /// Lower interleaved load(s) into target specific /// instructions/intrinsics. bool lowerInterleavedLoad(LoadInst *LI, Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32276,6 +32276,38 @@ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); } +static StringRef getInstrStrFromOpNo(const SmallVectorImpl &AsmStrs, + unsigned OpNo) { + const APInt Operand(32, OpNo); + std::string OpNoStr = llvm::toString(Operand, 10, false); + std::string Str(" $"); + + std::string OpNoStr1(Str + OpNoStr); // e.g. " $1" (OpNo=1) + std::string OpNoStr2(Str + "{" + OpNoStr + ":"); // With modifier, e.g. ${1:P} + + for (auto &AsmStr : AsmStrs) { + // Match the OpNo string. We should match exactly to exclude match + // sub-string, e.g. "$12" contain "$1" + if (AsmStr.contains(OpNoStr1 + ",") || AsmStr.endswith(OpNoStr1) || + AsmStr.contains(OpNoStr2)) + return AsmStr; + } + + return StringRef(); +} + +bool X86TargetLowering::isInlineAsmTargetBranch( + const SmallVectorImpl &AsmStrs, unsigned OpNo) const { + StringRef InstrStr = getInstrStrFromOpNo(AsmStrs, OpNo); + + // Do not use "startswith" here, because label may stay ahead. + // For example: ".L__MSASMLABEL_.${:uid}__l:call dword ptr ${0:P}" + if (InstrStr.contains("call")) + return true; + + return false; +} + /// Provide custom lowering hooks for some operations. SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { Index: llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/inline-asm-function-call-pic.ll @@ -0,0 +1,74 @@ +; RUN: llc -O2 --relocation-model=pic -mtriple=i386-unknown-linux-gnu < %s 2>&1 | FileCheck %s + +; List the source code: +; 1 // clang -m32 -fasm-blocks -S t.c -O2 -fpic -emit-llvm +; 2 int GV = 17; +; 3 +; 4 extern unsigned int extern_func(); +; 5 static unsigned int static_func() __attribute__((noinline)); +; 6 static unsigned int static_func() { +; 7 return GV++; +; 8 } +; 9 +;10 void func() { +;11 static_func(); +;12 __asm { +;13 call static_func +;14 call extern_func +;15 shr eax, 0 +;16 shr ebx, 0 +;17 shr ecx, 0 +;18 shr edx, 0 +;19 shr edi, 0 +;20 shr esi, 0 +;21 shr ebp, 0 +;22 shr esp, 0 +;23 } +;24 } + +@GV = local_unnamed_addr global i32 17, align 4 + +; Function Attrs: nounwind uwtable +define void @func() local_unnamed_addr #0 { +; CHECK-LABEL: func: +; CHECK: calll .L0$pb +; CHECK-NEXT: .L0$pb: +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx +; CHECK-NEXT: calll static_func +; CHECK-NEXT: #APP +; CHECK-EMPTY: +; CHECK-NEXT: calll static_func +; CHECK-NEXT: calll extern_func@PLT +; CHECK-NEXT: shrl $0, %eax +; CHECK-NEXT: shrl $0, %ebx +; CHECK-NEXT: shrl $0, %ecx +; CHECK-NEXT: shrl $0, %edx +; CHECK-NEXT: shrl $0, %edi +; CHECK-NEXT: shrl $0, %esi +; CHECK-NEXT: shrl $0, %ebp +; CHECK-NEXT: shrl $0, %esp +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +entry: + %call = tail call i32 @static_func() + tail call void asm sideeffect inteldialect "call dword ptr ${0:P}\0A\09call dword ptr ${1:P}\0A\09shr eax, $$0\0A\09shr ebx, $$0\0A\09shr ecx, $$0\0A\09shr edx, $$0\0A\09shr edi, $$0\0A\09shr esi, $$0\0A\09shr ebp, $$0\0A\09shr esp, $$0", "*m,*m,~{eax},~{ebp},~{ebx},~{ecx},~{edi},~{edx},~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(ptr nonnull elementtype(i32 (...)) @static_func, ptr nonnull elementtype(i32 (...)) @extern_func) #3 + ret void +} + +declare i32 @extern_func(...) #1 + +; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn uwtable +define internal i32 @static_func() #2 { +entry: + %0 = load i32, ptr @GV, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, ptr @GV, align 4 + ret i32 %0 +} + +attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { mustprogress nofree noinline norecurse nosync nounwind willreturn uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #3 = { nounwind } Index: llvm/test/CodeGen/X86/inline-asm-p-constraint.ll =================================================================== --- llvm/test/CodeGen/X86/inline-asm-p-constraint.ll +++ llvm/test/CodeGen/X86/inline-asm-p-constraint.ll @@ -1,10 +1,16 @@ ; RUN: llc -mtriple=x86_64-unknown-unknown -no-integrated-as < %s 2>&1 | FileCheck %s -define ptr @foo(ptr %ptr) { +define ptr @foo(ptr %Ptr) { ; CHECK-LABEL: foo: - %1 = tail call ptr asm "lea $1, $0", "=r,p,~{dirflag},~{fpsr},~{flags}"(ptr %ptr) -; CHECK: #APP -; CHECK-NEXT: lea (%rdi), %rax +; asm {mov rax, Pointer; lea rax, Pointer} +; LEA: Computes the effective address of the second operand and stores it in the first operand + %Ptr.addr = alloca ptr, align 8 + store ptr %Ptr, ptr %Ptr.addr, align 8 +; CHECK: movq %rdi, -8(%rsp) + %1 = tail call ptr asm "mov $1, $0\0A\09lea $2, $0", "=r,p,*m,~{dirflag},~{fpsr},~{flags}"(ptr %Ptr, ptr elementtype(ptr) %Ptr.addr) +; CHECK-NEXT: #APP +; CHECK-NEXT: mov (%rdi), %rax +; CHECK-NEXT: lea -8(%rsp), %rax ; CHECK-NEXT: #NO_APP ret ptr %1 ; CHECK-NEXT: retq