Index: llvm/trunk/include/llvm/CodeGen/AsmPrinter.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/AsmPrinter.h +++ llvm/trunk/include/llvm/CodeGen/AsmPrinter.h @@ -238,6 +238,7 @@ TAIL_CALL = 2, LOG_ARGS_ENTER = 3, CUSTOM_EVENT = 4, + TYPED_EVENT = 5, }; // The table will contain these structs that point to the sled, the function @@ -644,8 +645,7 @@ GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C); /// Emit GlobalAlias or GlobalIFunc. - void emitGlobalIndirectSymbol(Module &M, - const GlobalIndirectSymbol& GIS); + void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol &GIS); void setupCodePaddingContext(const MachineBasicBlock &MBB, MCCodePaddingContext &Context) const; }; Index: llvm/trunk/include/llvm/CodeGen/FastISel.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/FastISel.h +++ llvm/trunk/include/llvm/CodeGen/FastISel.h @@ -535,6 +535,7 @@ bool selectExtractValue(const User *I); bool selectInsertValue(const User *I); bool selectXRayCustomEvent(const CallInst *II); + bool selectXRayTypedEvent(const CallInst *II); private: /// \brief Handle PHI nodes in successor blocks. Index: llvm/trunk/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/TargetLowering.h +++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h @@ -2547,6 +2547,11 @@ /// details. MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI, MachineBasicBlock *MBB) const; + + /// Replace/modify the XRay typed event operands with target-dependent + /// details. + MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI, + MachineBasicBlock *MBB) const; }; /// This class defines information used to lower LLVM code to legal SelectionDAG Index: llvm/trunk/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/trunk/include/llvm/IR/Intrinsics.td +++ llvm/trunk/include/llvm/IR/Intrinsics.td @@ -894,6 +894,10 @@ // Takes a pointer to a string and the length of the string. def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [NoCapture<0>, ReadOnly<0>, IntrWriteMem]>; +// Typed event logging for x-ray. +// Takes a numeric type tag, a pointer to a string and the length of the string. +def int_xray_typedevent : Intrinsic<[], [llvm_i16_ty, llvm_ptr_ty, llvm_i32_ty], + [NoCapture<1>, ReadOnly<1>, IntrWriteMem]>; //===----------------------------------------------------------------------===// //===------ Memory intrinsics with element-wise atomicity guarantees ------===// Index: llvm/trunk/include/llvm/Support/TargetOpcodes.def =================================================================== --- llvm/trunk/include/llvm/Support/TargetOpcodes.def +++ llvm/trunk/include/llvm/Support/TargetOpcodes.def @@ -183,10 +183,14 @@ /// PATCHABLE_RET which specifically only works for return instructions. HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL) -/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be -/// patched to insert instrumentation instructions. +/// Wraps a logging call and its arguments with nop sleds. At runtime, this can +/// be patched to insert instrumentation instructions. HANDLE_TARGET_OPCODE(PATCHABLE_EVENT_CALL) +/// Wraps a typed logging call and its argument with nop sleds. At runtime, this +/// can be patched to insert instrumentation instructions. +HANDLE_TARGET_OPCODE(PATCHABLE_TYPED_EVENT_CALL) + HANDLE_TARGET_OPCODE(ICALL_BRANCH_FUNNEL) /// The following generic opcodes are not supposed to appear after ISel. Index: llvm/trunk/include/llvm/Target/Target.td =================================================================== --- llvm/trunk/include/llvm/Target/Target.td +++ llvm/trunk/include/llvm/Target/Target.td @@ -1133,6 +1133,16 @@ let mayStore = 1; let hasSideEffects = 1; } +def PATCHABLE_TYPED_EVENT_CALL : StandardPseudoInstruction { + let OutOperandList = (outs); + let InOperandList = (ins i16imm:$type, ptr_rc:$event, i32imm:$size); + let AsmString = "# XRay Typed Event Log."; + let usesCustomInserter = 1; + let isCall = 1; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 1; +} def FENTRY_CALL : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); Index: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1039,6 +1039,26 @@ return true; } +bool FastISel::selectXRayTypedEvent(const CallInst *I) { + const auto &Triple = TM.getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return true; // don't do anything to this instruction. + SmallVector Ops; + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)), + /*IsDef=*/false)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL)); + for (auto &MO : Ops) + MIB.add(MO); + + // Insert the Patchable Typed Event Call instruction, that gets lowered properly. + return true; +} /// Returns an AttributeList representing the attributes applied to the return /// value of the given call. @@ -1433,6 +1453,8 @@ case Intrinsic::xray_customevent: return selectXRayCustomEvent(II); + case Intrinsic::xray_typedevent: + return selectXRayTypedEvent(II); } return fastLowerIntrinsicCall(II); Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6067,6 +6067,41 @@ setValue(&I, patchableNode); return nullptr; } + case Intrinsic::xray_typedevent: { + // Here we want to make sure that the intrinsic behaves as if it has a + // specific calling convention, and only for x86_64. + // FIXME: Support other platforms later. + const auto &Triple = DAG.getTarget().getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return nullptr; + + SDLoc DL = getCurSDLoc(); + SmallVector Ops; + + // We want to say that we always want the arguments in registers. + // It's unclear to me how manipulating the selection DAG here forces callers + // to provide arguments in registers instead of on the stack. + SDValue LogTypeId = getValue(I.getArgOperand(0)); + SDValue LogEntryVal = getValue(I.getArgOperand(1)); + SDValue StrSizeVal = getValue(I.getArgOperand(2)); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = getRoot(); + Ops.push_back(LogTypeId); + Ops.push_back(LogEntryVal); + Ops.push_back(StrSizeVal); + Ops.push_back(Chain); + + // We need to enforce the calling convention for the callsite, so that + // argument ordering is enforced correctly, and that register allocation can + // see that some registers may be assumed clobbered and have to preserve + // them across calls to the intrinsic. + MachineSDNode *MN = DAG.getMachineNode( + TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops); + SDValue patchableNode = SDValue(MN, 0); + DAG.setRoot(patchableNode); + setValue(&I, patchableNode); + return nullptr; + } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; Index: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp @@ -1001,6 +1001,21 @@ return MBB; } +MachineBasicBlock * +TargetLoweringBase::emitXRayTypedEvent(MachineInstr &MI, + MachineBasicBlock *MBB) const { + assert(MI.getOpcode() == TargetOpcode::PATCHABLE_TYPED_EVENT_CALL && + "Called emitXRayTypedEvent on the wrong MI!"); + auto &MF = *MI.getMF(); + auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc()); + for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx) + MIB.add(MI.getOperand(OpIdx)); + + MBB->insert(MachineBasicBlock::iterator(MI), MIB); + MI.eraseFromParent(); + return MBB; +} + /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". // This function is in TargetLowering because it uses RegClassForVT which would Index: llvm/trunk/lib/Target/X86/X86AsmPrinter.h =================================================================== --- llvm/trunk/lib/Target/X86/X86AsmPrinter.h +++ llvm/trunk/lib/Target/X86/X86AsmPrinter.h @@ -95,6 +95,8 @@ void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); + void LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, + X86MCInstLower &MCIL); void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -28219,6 +28219,9 @@ case TargetOpcode::PATCHABLE_EVENT_CALL: return emitXRayCustomEvent(MI, BB); + case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: + return emitXRayTypedEvent(MI, BB); + case X86::LCMPXCHG8B: { const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); // In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B Index: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp +++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp @@ -56,6 +56,7 @@ const TargetMachine &TM; const MCAsmInfo &MAI; X86AsmPrinter &AsmPrinter; + public: X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter); @@ -115,13 +116,12 @@ return MF.getMMI().getObjFileInfo(); } - /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol /// operand to an MCSymbol. -MCSymbol *X86MCInstLower:: -GetSymbolFromOperand(const MachineOperand &MO) const { +MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const { const DataLayout &DL = MF.getDataLayout(); - assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference"); + assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && + "Isn't a symbol reference"); MCSymbol *Sym = nullptr; SmallString<128> Name; @@ -158,17 +158,17 @@ // If the target flags on the operand changes the name of the symbol, do that // before we return the symbol. switch (MO.getTargetFlags()) { - default: break; + default: + break; case X86II::MO_DARWIN_NONLAZY: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { MachineModuleInfoImpl::StubValueTy &StubSym = - getMachOMMI().getGVStubEntry(Sym); + getMachOMMI().getGVStubEntry(Sym); if (!StubSym.getPointer()) { assert(MO.isGlobal() && "Extern symbol not handled yet"); - StubSym = - MachineModuleInfoImpl:: - StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()), - !MO.getGlobal()->hasInternalLinkage()); + StubSym = MachineModuleInfoImpl::StubValueTy( + AsmPrinter.getSymbol(MO.getGlobal()), + !MO.getGlobal()->hasInternalLinkage()); } break; } @@ -185,44 +185,74 @@ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; switch (MO.getTargetFlags()) { - default: llvm_unreachable("Unknown target flag on GV operand"); - case X86II::MO_NO_FLAG: // No flag. + default: + llvm_unreachable("Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: // No flag. // These affect the name of the symbol, not any suffix. case X86II::MO_DARWIN_NONLAZY: case X86II::MO_DLLIMPORT: break; - case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; + case X86II::MO_TLVP: + RefKind = MCSymbolRefExpr::VK_TLVP; + break; case X86II::MO_TLVP_PIC_BASE: Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); // Subtract the pic base. - Expr = MCBinaryExpr::createSub(Expr, - MCSymbolRefExpr::create(MF.getPICBaseSymbol(), - Ctx), - Ctx); - break; - case X86II::MO_SECREL: RefKind = MCSymbolRefExpr::VK_SECREL; break; - case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break; - case X86II::MO_TLSLD: RefKind = MCSymbolRefExpr::VK_TLSLD; break; - case X86II::MO_TLSLDM: RefKind = MCSymbolRefExpr::VK_TLSLDM; break; - case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; - case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break; - case X86II::MO_TPOFF: RefKind = MCSymbolRefExpr::VK_TPOFF; break; - case X86II::MO_DTPOFF: RefKind = MCSymbolRefExpr::VK_DTPOFF; break; - case X86II::MO_NTPOFF: RefKind = MCSymbolRefExpr::VK_NTPOFF; break; - case X86II::MO_GOTNTPOFF: RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; break; - case X86II::MO_GOTPCREL: RefKind = MCSymbolRefExpr::VK_GOTPCREL; break; - case X86II::MO_GOT: RefKind = MCSymbolRefExpr::VK_GOT; break; - case X86II::MO_GOTOFF: RefKind = MCSymbolRefExpr::VK_GOTOFF; break; - case X86II::MO_PLT: RefKind = MCSymbolRefExpr::VK_PLT; break; - case X86II::MO_ABS8: RefKind = MCSymbolRefExpr::VK_X86_ABS8; break; + Expr = MCBinaryExpr::createSub( + Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); + break; + case X86II::MO_SECREL: + RefKind = MCSymbolRefExpr::VK_SECREL; + break; + case X86II::MO_TLSGD: + RefKind = MCSymbolRefExpr::VK_TLSGD; + break; + case X86II::MO_TLSLD: + RefKind = MCSymbolRefExpr::VK_TLSLD; + break; + case X86II::MO_TLSLDM: + RefKind = MCSymbolRefExpr::VK_TLSLDM; + break; + case X86II::MO_GOTTPOFF: + RefKind = MCSymbolRefExpr::VK_GOTTPOFF; + break; + case X86II::MO_INDNTPOFF: + RefKind = MCSymbolRefExpr::VK_INDNTPOFF; + break; + case X86II::MO_TPOFF: + RefKind = MCSymbolRefExpr::VK_TPOFF; + break; + case X86II::MO_DTPOFF: + RefKind = MCSymbolRefExpr::VK_DTPOFF; + break; + case X86II::MO_NTPOFF: + RefKind = MCSymbolRefExpr::VK_NTPOFF; + break; + case X86II::MO_GOTNTPOFF: + RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; + break; + case X86II::MO_GOTPCREL: + RefKind = MCSymbolRefExpr::VK_GOTPCREL; + break; + case X86II::MO_GOT: + RefKind = MCSymbolRefExpr::VK_GOT; + break; + case X86II::MO_GOTOFF: + RefKind = MCSymbolRefExpr::VK_GOTOFF; + break; + case X86II::MO_PLT: + RefKind = MCSymbolRefExpr::VK_PLT; + break; + case X86II::MO_ABS8: + RefKind = MCSymbolRefExpr::VK_X86_ABS8; + break; case X86II::MO_PIC_BASE_OFFSET: case X86II::MO_DARWIN_NONLAZY_PIC_BASE: Expr = MCSymbolRefExpr::create(Sym, Ctx); // Subtract the pic base. - Expr = MCBinaryExpr::createSub(Expr, - MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), - Ctx); + Expr = MCBinaryExpr::createSub( + Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx); if (MO.isJTI()) { assert(MAI.doesSetDirectiveSuppressReloc()); // If .set directive is supported, use it to reduce the number of @@ -240,13 +270,11 @@ Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx); if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) - Expr = MCBinaryExpr::createAdd(Expr, - MCConstantExpr::create(MO.getOffset(), Ctx), - Ctx); + Expr = MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); return MCOperand::createExpr(Expr); } - /// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with /// a short fixed-register form. static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) { @@ -255,7 +283,8 @@ (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) && ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) || - Inst.getNumOperands() == 2) && "Unexpected instruction!"); + Inst.getNumOperands() == 2) && + "Unexpected instruction!"); // Check whether the destination register can be fixed. unsigned Reg = Inst.getOperand(0).getReg(); @@ -277,7 +306,7 @@ switch (Inst.getOpcode()) { default: llvm_unreachable("Unexpected instruction!"); - case X86::MOVSX16rr8: // movsbw %al, %ax --> cbtw + case X86::MOVSX16rr8: // movsbw %al, %ax --> cbtw if (Op0 == X86::AX && Op1 == X86::AL) NewOpcode = X86::CBW; break; @@ -309,14 +338,14 @@ unsigned AddrBase = IsStore; unsigned RegOp = IsStore ? 0 : 5; unsigned AddrOp = AddrBase + 3; - assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() && - Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() && - Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() && - Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() && - Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() && - (Inst.getOperand(AddrOp).isExpr() || - Inst.getOperand(AddrOp).isImm()) && - "Unexpected instruction!"); + assert( + Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() && + Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() && + Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() && + Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() && + Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() && + (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) && + "Unexpected instruction!"); // Check whether the destination register can be fixed. unsigned Reg = Inst.getOperand(RegOp).getReg(); @@ -401,9 +430,9 @@ case X86::LEA16r: case X86::LEA32r: // LEA should have a segment register, but it must be empty. - assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands && + assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands && "Unexpected # of LEA operands"); - assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 && + assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 && "LEA has segment specified!"); break; @@ -426,20 +455,47 @@ X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) { unsigned NewOpc; switch (OutMI.getOpcode()) { - default: llvm_unreachable("Invalid opcode"); - case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break; - case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; - case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; - case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; - case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; - case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; - case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; - case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; - case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; - case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; - case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; - case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; - case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; + default: + llvm_unreachable("Invalid opcode"); + case X86::VMOVZPQILo2PQIrr: + NewOpc = X86::VMOVPQI2QIrr; + break; + case X86::VMOVAPDrr: + NewOpc = X86::VMOVAPDrr_REV; + break; + case X86::VMOVAPDYrr: + NewOpc = X86::VMOVAPDYrr_REV; + break; + case X86::VMOVAPSrr: + NewOpc = X86::VMOVAPSrr_REV; + break; + case X86::VMOVAPSYrr: + NewOpc = X86::VMOVAPSYrr_REV; + break; + case X86::VMOVDQArr: + NewOpc = X86::VMOVDQArr_REV; + break; + case X86::VMOVDQAYrr: + NewOpc = X86::VMOVDQAYrr_REV; + break; + case X86::VMOVDQUrr: + NewOpc = X86::VMOVDQUrr_REV; + break; + case X86::VMOVDQUYrr: + NewOpc = X86::VMOVDQUYrr_REV; + break; + case X86::VMOVUPDrr: + NewOpc = X86::VMOVUPDrr_REV; + break; + case X86::VMOVUPDYrr: + NewOpc = X86::VMOVUPDYrr_REV; + break; + case X86::VMOVUPSrr: + NewOpc = X86::VMOVUPSrr_REV; + break; + case X86::VMOVUPSYrr: + NewOpc = X86::VMOVUPSYrr_REV; + break; } OutMI.setOpcode(NewOpc); } @@ -451,9 +507,14 @@ X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) { unsigned NewOpc; switch (OutMI.getOpcode()) { - default: llvm_unreachable("Invalid opcode"); - case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; - case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; + default: + llvm_unreachable("Invalid opcode"); + case X86::VMOVSDrr: + NewOpc = X86::VMOVSDrr_REV; + break; + case X86::VMOVSSrr: + NewOpc = X86::VMOVSSrr_REV; + break; } OutMI.setOpcode(NewOpc); } @@ -499,24 +560,30 @@ break; } - // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction. - { unsigned Opcode; - case X86::TAILJMPr: Opcode = X86::JMP32r; goto SetTailJmpOpcode; - case X86::TAILJMPd: - case X86::TAILJMPd64: Opcode = X86::JMP_1; goto SetTailJmpOpcode; - case X86::TAILJMPd_CC: - case X86::TAILJMPd64_CC: - Opcode = X86::GetCondBranchFromCond( - static_cast(MI->getOperand(1).getImm())); - goto SetTailJmpOpcode; - - SetTailJmpOpcode: - MCOperand Saved = OutMI.getOperand(0); - OutMI = MCInst(); - OutMI.setOpcode(Opcode); - OutMI.addOperand(Saved); - break; - } + // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump + // instruction. + { + unsigned Opcode; + case X86::TAILJMPr: + Opcode = X86::JMP32r; + goto SetTailJmpOpcode; + case X86::TAILJMPd: + case X86::TAILJMPd64: + Opcode = X86::JMP_1; + goto SetTailJmpOpcode; + case X86::TAILJMPd_CC: + case X86::TAILJMPd64_CC: + Opcode = X86::GetCondBranchFromCond( + static_cast(MI->getOperand(1).getImm())); + goto SetTailJmpOpcode; + + SetTailJmpOpcode: + MCOperand Saved = OutMI.getOperand(0); + OutMI = MCInst(); + OutMI.setOpcode(Opcode); + OutMI.addOperand(Saved); + break; + } case X86::DEC16r: case X86::DEC32r: @@ -526,11 +593,20 @@ if (!AsmPrinter.getSubtarget().is64Bit()) { unsigned Opcode; switch (OutMI.getOpcode()) { - default: llvm_unreachable("Invalid opcode"); - case X86::DEC16r: Opcode = X86::DEC16r_alt; break; - case X86::DEC32r: Opcode = X86::DEC32r_alt; break; - case X86::INC16r: Opcode = X86::INC16r_alt; break; - case X86::INC32r: Opcode = X86::INC32r_alt; break; + default: + llvm_unreachable("Invalid opcode"); + case X86::DEC16r: + Opcode = X86::DEC16r_alt; + break; + case X86::DEC32r: + Opcode = X86::DEC32r_alt; + break; + case X86::INC16r: + Opcode = X86::INC16r_alt; + break; + case X86::INC32r: + Opcode = X86::INC32r_alt; + break; } OutMI.setOpcode(Opcode); } @@ -539,63 +615,169 @@ // These are pseudo-ops for OR to help with the OR->ADD transformation. We do // this with an ugly goto in case the resultant OR uses EAX and needs the // short form. - case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify; - case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify; - case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify; - case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify; - case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify; - case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify; - case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify; - case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify; - case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify; + case X86::ADD16rr_DB: + OutMI.setOpcode(X86::OR16rr); + goto ReSimplify; + case X86::ADD32rr_DB: + OutMI.setOpcode(X86::OR32rr); + goto ReSimplify; + case X86::ADD64rr_DB: + OutMI.setOpcode(X86::OR64rr); + goto ReSimplify; + case X86::ADD16ri_DB: + OutMI.setOpcode(X86::OR16ri); + goto ReSimplify; + case X86::ADD32ri_DB: + OutMI.setOpcode(X86::OR32ri); + goto ReSimplify; + case X86::ADD64ri32_DB: + OutMI.setOpcode(X86::OR64ri32); + goto ReSimplify; + case X86::ADD16ri8_DB: + OutMI.setOpcode(X86::OR16ri8); + goto ReSimplify; + case X86::ADD32ri8_DB: + OutMI.setOpcode(X86::OR32ri8); + goto ReSimplify; + case X86::ADD64ri8_DB: + OutMI.setOpcode(X86::OR64ri8); + goto ReSimplify; // Atomic load and store require a separate pseudo-inst because Acquire // implies mayStore and Release implies mayLoad; fix these to regular MOV // instructions here - case X86::ACQUIRE_MOV8rm: OutMI.setOpcode(X86::MOV8rm); goto ReSimplify; - case X86::ACQUIRE_MOV16rm: OutMI.setOpcode(X86::MOV16rm); goto ReSimplify; - case X86::ACQUIRE_MOV32rm: OutMI.setOpcode(X86::MOV32rm); goto ReSimplify; - case X86::ACQUIRE_MOV64rm: OutMI.setOpcode(X86::MOV64rm); goto ReSimplify; - case X86::RELEASE_MOV8mr: OutMI.setOpcode(X86::MOV8mr); goto ReSimplify; - case X86::RELEASE_MOV16mr: OutMI.setOpcode(X86::MOV16mr); goto ReSimplify; - case X86::RELEASE_MOV32mr: OutMI.setOpcode(X86::MOV32mr); goto ReSimplify; - case X86::RELEASE_MOV64mr: OutMI.setOpcode(X86::MOV64mr); goto ReSimplify; - case X86::RELEASE_MOV8mi: OutMI.setOpcode(X86::MOV8mi); goto ReSimplify; - case X86::RELEASE_MOV16mi: OutMI.setOpcode(X86::MOV16mi); goto ReSimplify; - case X86::RELEASE_MOV32mi: OutMI.setOpcode(X86::MOV32mi); goto ReSimplify; - case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify; - case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify; - case X86::RELEASE_ADD8mr: OutMI.setOpcode(X86::ADD8mr); goto ReSimplify; - case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify; - case X86::RELEASE_ADD32mr: OutMI.setOpcode(X86::ADD32mr); goto ReSimplify; - case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify; - case X86::RELEASE_ADD64mr: OutMI.setOpcode(X86::ADD64mr); goto ReSimplify; - case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify; - case X86::RELEASE_AND8mr: OutMI.setOpcode(X86::AND8mr); goto ReSimplify; - case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify; - case X86::RELEASE_AND32mr: OutMI.setOpcode(X86::AND32mr); goto ReSimplify; - case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify; - case X86::RELEASE_AND64mr: OutMI.setOpcode(X86::AND64mr); goto ReSimplify; - case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify; - case X86::RELEASE_OR8mr: OutMI.setOpcode(X86::OR8mr); goto ReSimplify; - case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify; - case X86::RELEASE_OR32mr: OutMI.setOpcode(X86::OR32mr); goto ReSimplify; - case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify; - case X86::RELEASE_OR64mr: OutMI.setOpcode(X86::OR64mr); goto ReSimplify; - case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify; - case X86::RELEASE_XOR8mr: OutMI.setOpcode(X86::XOR8mr); goto ReSimplify; - case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify; - case X86::RELEASE_XOR32mr: OutMI.setOpcode(X86::XOR32mr); goto ReSimplify; - case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify; - case X86::RELEASE_XOR64mr: OutMI.setOpcode(X86::XOR64mr); goto ReSimplify; - case X86::RELEASE_INC8m: OutMI.setOpcode(X86::INC8m); goto ReSimplify; - case X86::RELEASE_INC16m: OutMI.setOpcode(X86::INC16m); goto ReSimplify; - case X86::RELEASE_INC32m: OutMI.setOpcode(X86::INC32m); goto ReSimplify; - case X86::RELEASE_INC64m: OutMI.setOpcode(X86::INC64m); goto ReSimplify; - case X86::RELEASE_DEC8m: OutMI.setOpcode(X86::DEC8m); goto ReSimplify; - case X86::RELEASE_DEC16m: OutMI.setOpcode(X86::DEC16m); goto ReSimplify; - case X86::RELEASE_DEC32m: OutMI.setOpcode(X86::DEC32m); goto ReSimplify; - case X86::RELEASE_DEC64m: OutMI.setOpcode(X86::DEC64m); goto ReSimplify; + case X86::ACQUIRE_MOV8rm: + OutMI.setOpcode(X86::MOV8rm); + goto ReSimplify; + case X86::ACQUIRE_MOV16rm: + OutMI.setOpcode(X86::MOV16rm); + goto ReSimplify; + case X86::ACQUIRE_MOV32rm: + OutMI.setOpcode(X86::MOV32rm); + goto ReSimplify; + case X86::ACQUIRE_MOV64rm: + OutMI.setOpcode(X86::MOV64rm); + goto ReSimplify; + case X86::RELEASE_MOV8mr: + OutMI.setOpcode(X86::MOV8mr); + goto ReSimplify; + case X86::RELEASE_MOV16mr: + OutMI.setOpcode(X86::MOV16mr); + goto ReSimplify; + case X86::RELEASE_MOV32mr: + OutMI.setOpcode(X86::MOV32mr); + goto ReSimplify; + case X86::RELEASE_MOV64mr: + OutMI.setOpcode(X86::MOV64mr); + goto ReSimplify; + case X86::RELEASE_MOV8mi: + OutMI.setOpcode(X86::MOV8mi); + goto ReSimplify; + case X86::RELEASE_MOV16mi: + OutMI.setOpcode(X86::MOV16mi); + goto ReSimplify; + case X86::RELEASE_MOV32mi: + OutMI.setOpcode(X86::MOV32mi); + goto ReSimplify; + case X86::RELEASE_MOV64mi32: + OutMI.setOpcode(X86::MOV64mi32); + goto ReSimplify; + case X86::RELEASE_ADD8mi: + OutMI.setOpcode(X86::ADD8mi); + goto ReSimplify; + case X86::RELEASE_ADD8mr: + OutMI.setOpcode(X86::ADD8mr); + goto ReSimplify; + case X86::RELEASE_ADD32mi: + OutMI.setOpcode(X86::ADD32mi); + goto ReSimplify; + case X86::RELEASE_ADD32mr: + OutMI.setOpcode(X86::ADD32mr); + goto ReSimplify; + case X86::RELEASE_ADD64mi32: + OutMI.setOpcode(X86::ADD64mi32); + goto ReSimplify; + case X86::RELEASE_ADD64mr: + OutMI.setOpcode(X86::ADD64mr); + goto ReSimplify; + case X86::RELEASE_AND8mi: + OutMI.setOpcode(X86::AND8mi); + goto ReSimplify; + case X86::RELEASE_AND8mr: + OutMI.setOpcode(X86::AND8mr); + goto ReSimplify; + case X86::RELEASE_AND32mi: + OutMI.setOpcode(X86::AND32mi); + goto ReSimplify; + case X86::RELEASE_AND32mr: + OutMI.setOpcode(X86::AND32mr); + goto ReSimplify; + case X86::RELEASE_AND64mi32: + OutMI.setOpcode(X86::AND64mi32); + goto ReSimplify; + case X86::RELEASE_AND64mr: + OutMI.setOpcode(X86::AND64mr); + goto ReSimplify; + case X86::RELEASE_OR8mi: + OutMI.setOpcode(X86::OR8mi); + goto ReSimplify; + case X86::RELEASE_OR8mr: + OutMI.setOpcode(X86::OR8mr); + goto ReSimplify; + case X86::RELEASE_OR32mi: + OutMI.setOpcode(X86::OR32mi); + goto ReSimplify; + case X86::RELEASE_OR32mr: + OutMI.setOpcode(X86::OR32mr); + goto ReSimplify; + case X86::RELEASE_OR64mi32: + OutMI.setOpcode(X86::OR64mi32); + goto ReSimplify; + case X86::RELEASE_OR64mr: + OutMI.setOpcode(X86::OR64mr); + goto ReSimplify; + case X86::RELEASE_XOR8mi: + OutMI.setOpcode(X86::XOR8mi); + goto ReSimplify; + case X86::RELEASE_XOR8mr: + OutMI.setOpcode(X86::XOR8mr); + goto ReSimplify; + case X86::RELEASE_XOR32mi: + OutMI.setOpcode(X86::XOR32mi); + goto ReSimplify; + case X86::RELEASE_XOR32mr: + OutMI.setOpcode(X86::XOR32mr); + goto ReSimplify; + case X86::RELEASE_XOR64mi32: + OutMI.setOpcode(X86::XOR64mi32); + goto ReSimplify; + case X86::RELEASE_XOR64mr: + OutMI.setOpcode(X86::XOR64mr); + goto ReSimplify; + case X86::RELEASE_INC8m: + OutMI.setOpcode(X86::INC8m); + goto ReSimplify; + case X86::RELEASE_INC16m: + OutMI.setOpcode(X86::INC16m); + goto ReSimplify; + case X86::RELEASE_INC32m: + OutMI.setOpcode(X86::INC32m); + goto ReSimplify; + case X86::RELEASE_INC64m: + OutMI.setOpcode(X86::INC64m); + goto ReSimplify; + case X86::RELEASE_DEC8m: + OutMI.setOpcode(X86::DEC8m); + goto ReSimplify; + case X86::RELEASE_DEC16m: + OutMI.setOpcode(X86::DEC16m); + goto ReSimplify; + case X86::RELEASE_DEC32m: + OutMI.setOpcode(X86::DEC32m); + goto ReSimplify; + case X86::RELEASE_DEC64m: + OutMI.setOpcode(X86::DEC64m); + goto ReSimplify; // We don't currently select the correct instruction form for instructions // which have a short %eax, etc. form. Handle this by custom lowering, for @@ -614,68 +796,181 @@ case X86::MOV32rm: { unsigned NewOpc; switch (OutMI.getOpcode()) { - default: llvm_unreachable("Invalid opcode"); + default: + llvm_unreachable("Invalid opcode"); case X86::MOV8mr_NOREX: - case X86::MOV8mr: NewOpc = X86::MOV8o32a; break; + case X86::MOV8mr: + NewOpc = X86::MOV8o32a; + break; case X86::MOV8rm_NOREX: - case X86::MOV8rm: NewOpc = X86::MOV8ao32; break; - case X86::MOV16mr: NewOpc = X86::MOV16o32a; break; - case X86::MOV16rm: NewOpc = X86::MOV16ao32; break; - case X86::MOV32mr: NewOpc = X86::MOV32o32a; break; - case X86::MOV32rm: NewOpc = X86::MOV32ao32; break; + case X86::MOV8rm: + NewOpc = X86::MOV8ao32; + break; + case X86::MOV16mr: + NewOpc = X86::MOV16o32a; + break; + case X86::MOV16rm: + NewOpc = X86::MOV16ao32; + break; + case X86::MOV32mr: + NewOpc = X86::MOV32o32a; + break; + case X86::MOV32rm: + NewOpc = X86::MOV32ao32; + break; } SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc); break; } - case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32: - case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32: - case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32: - case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32: - case X86::OR8ri: case X86::OR16ri: case X86::OR32ri: case X86::OR64ri32: - case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32: - case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32: - case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32: - case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: { + case X86::ADC8ri: + case X86::ADC16ri: + case X86::ADC32ri: + case X86::ADC64ri32: + case X86::ADD8ri: + case X86::ADD16ri: + case X86::ADD32ri: + case X86::ADD64ri32: + case X86::AND8ri: + case X86::AND16ri: + case X86::AND32ri: + case X86::AND64ri32: + case X86::CMP8ri: + case X86::CMP16ri: + case X86::CMP32ri: + case X86::CMP64ri32: + case X86::OR8ri: + case X86::OR16ri: + case X86::OR32ri: + case X86::OR64ri32: + case X86::SBB8ri: + case X86::SBB16ri: + case X86::SBB32ri: + case X86::SBB64ri32: + case X86::SUB8ri: + case X86::SUB16ri: + case X86::SUB32ri: + case X86::SUB64ri32: + case X86::TEST8ri: + case X86::TEST16ri: + case X86::TEST32ri: + case X86::TEST64ri32: + case X86::XOR8ri: + case X86::XOR16ri: + case X86::XOR32ri: + case X86::XOR64ri32: { unsigned NewOpc; switch (OutMI.getOpcode()) { - default: llvm_unreachable("Invalid opcode"); - case X86::ADC8ri: NewOpc = X86::ADC8i8; break; - case X86::ADC16ri: NewOpc = X86::ADC16i16; break; - case X86::ADC32ri: NewOpc = X86::ADC32i32; break; - case X86::ADC64ri32: NewOpc = X86::ADC64i32; break; - case X86::ADD8ri: NewOpc = X86::ADD8i8; break; - case X86::ADD16ri: NewOpc = X86::ADD16i16; break; - case X86::ADD32ri: NewOpc = X86::ADD32i32; break; - case X86::ADD64ri32: NewOpc = X86::ADD64i32; break; - case X86::AND8ri: NewOpc = X86::AND8i8; break; - case X86::AND16ri: NewOpc = X86::AND16i16; break; - case X86::AND32ri: NewOpc = X86::AND32i32; break; - case X86::AND64ri32: NewOpc = X86::AND64i32; break; - case X86::CMP8ri: NewOpc = X86::CMP8i8; break; - case X86::CMP16ri: NewOpc = X86::CMP16i16; break; - case X86::CMP32ri: NewOpc = X86::CMP32i32; break; - case X86::CMP64ri32: NewOpc = X86::CMP64i32; break; - case X86::OR8ri: NewOpc = X86::OR8i8; break; - case X86::OR16ri: NewOpc = X86::OR16i16; break; - case X86::OR32ri: NewOpc = X86::OR32i32; break; - case X86::OR64ri32: NewOpc = X86::OR64i32; break; - case X86::SBB8ri: NewOpc = X86::SBB8i8; break; - case X86::SBB16ri: NewOpc = X86::SBB16i16; break; - case X86::SBB32ri: NewOpc = X86::SBB32i32; break; - case X86::SBB64ri32: NewOpc = X86::SBB64i32; break; - case X86::SUB8ri: NewOpc = X86::SUB8i8; break; - case X86::SUB16ri: NewOpc = X86::SUB16i16; break; - case X86::SUB32ri: NewOpc = X86::SUB32i32; break; - case X86::SUB64ri32: NewOpc = X86::SUB64i32; break; - case X86::TEST8ri: NewOpc = X86::TEST8i8; break; - case X86::TEST16ri: NewOpc = X86::TEST16i16; break; - case X86::TEST32ri: NewOpc = X86::TEST32i32; break; - case X86::TEST64ri32: NewOpc = X86::TEST64i32; break; - case X86::XOR8ri: NewOpc = X86::XOR8i8; break; - case X86::XOR16ri: NewOpc = X86::XOR16i16; break; - case X86::XOR32ri: NewOpc = X86::XOR32i32; break; - case X86::XOR64ri32: NewOpc = X86::XOR64i32; break; + default: + llvm_unreachable("Invalid opcode"); + case X86::ADC8ri: + NewOpc = X86::ADC8i8; + break; + case X86::ADC16ri: + NewOpc = X86::ADC16i16; + break; + case X86::ADC32ri: + NewOpc = X86::ADC32i32; + break; + case X86::ADC64ri32: + NewOpc = X86::ADC64i32; + break; + case X86::ADD8ri: + NewOpc = X86::ADD8i8; + break; + case X86::ADD16ri: + NewOpc = X86::ADD16i16; + break; + case X86::ADD32ri: + NewOpc = X86::ADD32i32; + break; + case X86::ADD64ri32: + NewOpc = X86::ADD64i32; + break; + case X86::AND8ri: + NewOpc = X86::AND8i8; + break; + case X86::AND16ri: + NewOpc = X86::AND16i16; + break; + case X86::AND32ri: + NewOpc = X86::AND32i32; + break; + case X86::AND64ri32: + NewOpc = X86::AND64i32; + break; + case X86::CMP8ri: + NewOpc = X86::CMP8i8; + break; + case X86::CMP16ri: + NewOpc = X86::CMP16i16; + break; + case X86::CMP32ri: + NewOpc = X86::CMP32i32; + break; + case X86::CMP64ri32: + NewOpc = X86::CMP64i32; + break; + case X86::OR8ri: + NewOpc = X86::OR8i8; + break; + case X86::OR16ri: + NewOpc = X86::OR16i16; + break; + case X86::OR32ri: + NewOpc = X86::OR32i32; + break; + case X86::OR64ri32: + NewOpc = X86::OR64i32; + break; + case X86::SBB8ri: + NewOpc = X86::SBB8i8; + break; + case X86::SBB16ri: + NewOpc = X86::SBB16i16; + break; + case X86::SBB32ri: + NewOpc = X86::SBB32i32; + break; + case X86::SBB64ri32: + NewOpc = X86::SBB64i32; + break; + case X86::SUB8ri: + NewOpc = X86::SUB8i8; + break; + case X86::SUB16ri: + NewOpc = X86::SUB16i16; + break; + case X86::SUB32ri: + NewOpc = X86::SUB32i32; + break; + case X86::SUB64ri32: + NewOpc = X86::SUB64i32; + break; + case X86::TEST8ri: + NewOpc = X86::TEST8i8; + break; + case X86::TEST16ri: + NewOpc = X86::TEST16i16; + break; + case X86::TEST32ri: + NewOpc = X86::TEST32i32; + break; + case X86::TEST64ri32: + NewOpc = X86::TEST64i32; + break; + case X86::XOR8ri: + NewOpc = X86::XOR8i8; + break; + case X86::XOR16ri: + NewOpc = X86::XOR16i16; + break; + case X86::XOR32ri: + NewOpc = X86::XOR32i32; + break; + case X86::XOR64ri32: + NewOpc = X86::XOR64i32; + break; } SimplifyShortImmForm(OutMI, NewOpc); break; @@ -705,18 +1000,18 @@ MCSymbolRefExpr::VariantKind SRVK; switch (MI.getOpcode()) { - case X86::TLS_addr32: - case X86::TLS_addr64: - SRVK = MCSymbolRefExpr::VK_TLSGD; - break; - case X86::TLS_base_addr32: - SRVK = MCSymbolRefExpr::VK_TLSLDM; - break; - case X86::TLS_base_addr64: - SRVK = MCSymbolRefExpr::VK_TLSLD; - break; - default: - llvm_unreachable("unexpected opcode"); + case X86::TLS_addr32: + case X86::TLS_addr64: + SRVK = MCSymbolRefExpr::VK_TLSGD; + break; + case X86::TLS_base_addr32: + SRVK = MCSymbolRefExpr::VK_TLSLDM; + break; + case X86::TLS_base_addr64: + SRVK = MCSymbolRefExpr::VK_TLSLD; + break; + default: + llvm_unreachable("unexpected opcode"); } MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)); @@ -759,13 +1054,11 @@ StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr"; MCSymbol *tlsGetAddr = context.getOrCreateSymbol(name); const MCSymbolRefExpr *tlsRef = - MCSymbolRefExpr::create(tlsGetAddr, - MCSymbolRefExpr::VK_PLT, - context); - - EmitAndCountInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32 - : X86::CALLpcrel32) - .addExpr(tlsRef)); + MCSymbolRefExpr::create(tlsGetAddr, MCSymbolRefExpr::VK_PLT, context); + + EmitAndCountInstruction( + MCInstBuilder(is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32) + .addExpr(tlsRef)); } /// \brief Emit the largest nop instruction smaller than or equal to \p NumBytes @@ -782,22 +1075,62 @@ BaseReg = X86::RAX; ScaleVal = 1; switch (NumBytes) { - case 0: llvm_unreachable("Zero nops?"); break; - case 1: NopSize = 1; Opc = X86::NOOP; break; - case 2: NopSize = 2; Opc = X86::XCHG16ar; break; - case 3: NopSize = 3; Opc = X86::NOOPL; break; - case 4: NopSize = 4; Opc = X86::NOOPL; Displacement = 8; break; - case 5: NopSize = 5; Opc = X86::NOOPL; Displacement = 8; - IndexReg = X86::RAX; break; - case 6: NopSize = 6; Opc = X86::NOOPW; Displacement = 8; - IndexReg = X86::RAX; break; - case 7: NopSize = 7; Opc = X86::NOOPL; Displacement = 512; break; - case 8: NopSize = 8; Opc = X86::NOOPL; Displacement = 512; - IndexReg = X86::RAX; break; - case 9: NopSize = 9; Opc = X86::NOOPW; Displacement = 512; - IndexReg = X86::RAX; break; - default: NopSize = 10; Opc = X86::NOOPW; Displacement = 512; - IndexReg = X86::RAX; SegmentReg = X86::CS; break; + case 0: + llvm_unreachable("Zero nops?"); + break; + case 1: + NopSize = 1; + Opc = X86::NOOP; + break; + case 2: + NopSize = 2; + Opc = X86::XCHG16ar; + break; + case 3: + NopSize = 3; + Opc = X86::NOOPL; + break; + case 4: + NopSize = 4; + Opc = X86::NOOPL; + Displacement = 8; + break; + case 5: + NopSize = 5; + Opc = X86::NOOPL; + Displacement = 8; + IndexReg = X86::RAX; + break; + case 6: + NopSize = 6; + Opc = X86::NOOPW; + Displacement = 8; + IndexReg = X86::RAX; + break; + case 7: + NopSize = 7; + Opc = X86::NOOPL; + Displacement = 512; + break; + case 8: + NopSize = 8; + Opc = X86::NOOPL; + Displacement = 512; + IndexReg = X86::RAX; + break; + case 9: + NopSize = 9; + Opc = X86::NOOPW; + Displacement = 512; + IndexReg = X86::RAX; + break; + default: + NopSize = 10; + Opc = X86::NOOPW; + Displacement = 512; + IndexReg = X86::RAX; + SegmentReg = X86::CS; + break; } unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U); @@ -971,7 +1304,7 @@ unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(), getSubtargetInfo()); assert(NopSize == MinSize && "Could not implement MinSize!"); - (void) NopSize; + (void)NopSize; } } @@ -1016,9 +1349,8 @@ break; case MachineOperand::MO_ExternalSymbol: case MachineOperand::MO_GlobalAddress: - CalleeMCOp = - MCIL.LowerSymbolOperand(CalleeMO, - MCIL.GetSymbolFromOperand(CalleeMO)); + CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO, + MCIL.GetSymbolFromOperand(CalleeMO)); break; } @@ -1084,8 +1416,10 @@ // The default C calling convention will place two arguments into %rcx and // %rdx -- so we only work with those. - unsigned UsedRegs[] = {X86::RDI, X86::RSI}; + unsigned DestRegs[] = {X86::RDI, X86::RSI}; bool UsedMask[] = {false, false}; + // Filled out in loop. + unsigned SrcRegs[] = {0, 0}; // Then we put the operands in the %rdi and %rsi registers. We spill the // values in the register before we clobber them, and mark them as used in @@ -1095,18 +1429,22 @@ for (unsigned I = 0; I < MI.getNumOperands(); ++I) if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { assert(Op->isReg() && "Only support arguments in registers"); - if (Op->getReg() != UsedRegs[I]) { + SrcRegs[I] = Op->getReg(); + if (SrcRegs[I] != DestRegs[I]) { UsedMask[I] = true; EmitAndCountInstruction( - MCInstBuilder(X86::PUSH64r).addReg(UsedRegs[I])); - EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr) - .addReg(UsedRegs[I]) - .addReg(Op->getReg())); + MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); } else { EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); } } + // Now that the register values are stashed, mov arguments into place. + for (unsigned I = 0; I < MI.getNumOperands(); ++I) + if (SrcRegs[I] != DestRegs[I]) + EmitAndCountInstruction( + MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); + // We emit a hard dependency on the __xray_CustomEvent symbol, which is the // name of the trampoline to be implemented by the XRay runtime. auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent"); @@ -1121,7 +1459,7 @@ // Restore caller-saved and used registers. for (unsigned I = sizeof UsedMask; I-- > 0;) if (UsedMask[I]) - EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(UsedRegs[I])); + EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); else EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo()); @@ -1133,6 +1471,102 @@ recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1); } +void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, + X86MCInstLower &MCIL) { + assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64"); + + // We want to emit the following pattern, which follows the x86 calling + // convention to prepare for the trampoline call to be patched in. + // + // .p2align 1, ... + // .Lxray_event_sled_N: + // jmp +N // jump across the instrumentation sled + // ... // set up arguments in register + // callq __xray_TypedEvent@plt // force dependency to symbol + // ... + // + // + // After patching, it would look something like: + // + // nopw (2-byte nop) + // ... + // callq __xrayTypedEvent // already lowered + // ... + // + // --- + // First we emit the label and the jump. + auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true); + OutStreamer->AddComment("# XRay Typed Event Log"); + OutStreamer->EmitCodeAlignment(2); + OutStreamer->EmitLabel(CurSled); + + // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as + // an operand (computed as an offset from the jmp instruction). + // FIXME: Find another less hacky way do force the relative jump. + OutStreamer->EmitBinaryData("\xeb\x14"); + + // An x86-64 convention may place three arguments into %rcx, %rdx, and R8, + // so we'll work with those. Or we may be called via SystemV, in which case + // we don't have to do any translation. + unsigned DestRegs[] = {X86::RDI, X86::RSI, X86::RDX}; + bool UsedMask[] = {false, false, false}; + + // Will fill out src regs in the loop. + unsigned SrcRegs[] = {0, 0, 0}; + + // Then we put the operands in the SystemV registers. We spill the values in + // the registers before we clobber them, and mark them as used in UsedMask. + // In case the arguments are already in the correct register, we emit nops + // appropriately sized to keep the sled the same size in every situation. + for (unsigned I = 0; I < MI.getNumOperands(); ++I) + if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { + // TODO: Is register only support adequate? + assert(Op->isReg() && "Only supports arguments in registers"); + SrcRegs[I] = Op->getReg(); + if (SrcRegs[I] != DestRegs[I]) { + UsedMask[I] = true; + EmitAndCountInstruction( + MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I])); + } else { + EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); + } + } + + // In the above loop we only stash all of the destination registers or emit + // nops if the arguments are already in the right place. Doing the actually + // moving is postponed until after all the registers are stashed so nothing + // is clobbers. We've already added nops to account for the size of mov and + // push if the register is in the right place, so we only have to worry about + // emitting movs. + for (unsigned I = 0; I < MI.getNumOperands(); ++I) + if (UsedMask[I]) + EmitAndCountInstruction( + MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I])); + + // We emit a hard dependency on the __xray_TypedEvent symbol, which is the + // name of the trampoline to be implemented by the XRay runtime. + auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent"); + MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); + if (isPositionIndependent()) + TOp.setTargetFlags(X86II::MO_PLT); + + // Emit the call instruction. + EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) + .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); + + // Restore caller-saved and used registers. + for (unsigned I = sizeof UsedMask; I-- > 0;) + if (UsedMask[I]) + EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I])); + else + EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo()); + + OutStreamer->AddComment("xray typed event end."); + + // Record the sled version. + recordSled(CurSled, MI, SledKind::TYPED_EVENT, 0); +} + void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, X86MCInstLower &MCIL) { // We want to emit the following pattern: @@ -1190,7 +1624,8 @@ recordSled(CurSled, MI, SledKind::FUNCTION_EXIT); } -void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) { +void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, + X86MCInstLower &MCIL) { // Like PATCHABLE_RET, we have the actual instruction in the operands to this // instruction so we lower that particular instruction and its operands. // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how @@ -1244,8 +1679,7 @@ ArrayRef Constants = MI.getParent()->getParent()->getConstantPool()->getConstants(); - const MachineConstantPoolEntry &ConstantEntry = - Constants[Op.getIndex()]; + const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()]; // Bail if this is a machine constant pool entry, we won't be able to dig out // anything useful. @@ -1258,10 +1692,8 @@ return C; } -static std::string getShuffleComment(const MachineInstr *MI, - unsigned SrcOp1Idx, - unsigned SrcOp2Idx, - ArrayRef Mask) { +static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx, + unsigned SrcOp2Idx, ArrayRef Mask) { std::string Comment; // Compute the name for a register. This is really goofy because we have @@ -1449,7 +1881,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(*MF, *this); - const X86RegisterInfo *RI = MF->getSubtarget().getRegisterInfo(); + const X86RegisterInfo *RI = + MF->getSubtarget().getRegisterInfo(); // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that // are compressed from EVEX encoding to VEX encoding. @@ -1467,7 +1900,6 @@ OutStreamer->emitRawComment("MEMBARRIER"); return; - case X86::EH_RETURN: case X86::EH_RETURN64: { // Lower these as normal, but add some comments. @@ -1519,13 +1951,14 @@ MCSymbol *PICBase = MF->getPICBaseSymbol(); // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. - EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32) - .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); + EmitAndCountInstruction( + MCInstBuilder(X86::CALLpcrel32) + .addExpr(MCSymbolRefExpr::create(PICBase, OutContext))); - const X86FrameLowering* FrameLowering = + const X86FrameLowering *FrameLowering = MF->getSubtarget().getFrameLowering(); bool hasFP = FrameLowering->hasFP(*MF); - + // TODO: This is needed only if we require precise CFA. bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() && !OutStreamer->getDwarfFrameInfos().back().End; @@ -1540,8 +1973,8 @@ OutStreamer->EmitLabel(PICBase); // popl $reg - EmitAndCountInstruction(MCInstBuilder(X86::POP32r) - .addReg(MI->getOperand(0).getReg())); + EmitAndCountInstruction( + MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg())); if (HasActiveDwarfFrame && !hasFP) { OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth); @@ -1569,16 +2002,16 @@ const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext); const MCExpr *PICBase = - MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); + MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext); DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext); - DotExpr = MCBinaryExpr::createAdd(MCSymbolRefExpr::create(OpSym,OutContext), - DotExpr, OutContext); + DotExpr = MCBinaryExpr::createAdd( + MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext); EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) - .addReg(MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()) - .addExpr(DotExpr)); + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addExpr(DotExpr)); return; } case TargetOpcode::STATEPOINT: @@ -1607,10 +2040,13 @@ case TargetOpcode::PATCHABLE_TAIL_CALL: return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); - + case TargetOpcode::PATCHABLE_EVENT_CALL: return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); + case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: + return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering); + case X86::MORESTACK_RET: EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); return; @@ -1618,9 +2054,8 @@ case X86::MORESTACK_RET_RESTORE_R10: // Return, then restore R10. EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); - EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr) - .addReg(X86::R10) - .addReg(X86::RAX)); + EmitAndCountInstruction( + MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX)); return; case X86::SEH_PushReg: @@ -1670,22 +2105,29 @@ break; unsigned SrcIdx, MaskIdx; switch (MI->getOpcode()) { - default: llvm_unreachable("Invalid opcode"); + default: + llvm_unreachable("Invalid opcode"); case X86::PSHUFBrm: case X86::VPSHUFBrm: case X86::VPSHUFBYrm: case X86::VPSHUFBZ128rm: case X86::VPSHUFBZ256rm: case X86::VPSHUFBZrm: - SrcIdx = 1; MaskIdx = 5; break; + SrcIdx = 1; + MaskIdx = 5; + break; case X86::VPSHUFBZ128rmkz: case X86::VPSHUFBZ256rmkz: case X86::VPSHUFBZrmkz: - SrcIdx = 2; MaskIdx = 6; break; + SrcIdx = 2; + MaskIdx = 6; + break; case X86::VPSHUFBZ128rmk: case X86::VPSHUFBZ256rmk: case X86::VPSHUFBZrmk: - SrcIdx = 3; MaskIdx = 7; break; + SrcIdx = 3; + MaskIdx = 7; + break; } assert(MI->getNumOperands() >= 6 && @@ -1729,35 +2171,54 @@ unsigned SrcIdx, MaskIdx; unsigned ElSize; switch (MI->getOpcode()) { - default: llvm_unreachable("Invalid opcode"); + default: + llvm_unreachable("Invalid opcode"); case X86::VPERMILPSrm: case X86::VPERMILPSYrm: case X86::VPERMILPSZ128rm: case X86::VPERMILPSZ256rm: case X86::VPERMILPSZrm: - SrcIdx = 1; MaskIdx = 5; ElSize = 32; break; + SrcIdx = 1; + MaskIdx = 5; + ElSize = 32; + break; case X86::VPERMILPSZ128rmkz: case X86::VPERMILPSZ256rmkz: case X86::VPERMILPSZrmkz: - SrcIdx = 2; MaskIdx = 6; ElSize = 32; break; + SrcIdx = 2; + MaskIdx = 6; + ElSize = 32; + break; case X86::VPERMILPSZ128rmk: case X86::VPERMILPSZ256rmk: case X86::VPERMILPSZrmk: - SrcIdx = 3; MaskIdx = 7; ElSize = 32; break; + SrcIdx = 3; + MaskIdx = 7; + ElSize = 32; + break; case X86::VPERMILPDrm: case X86::VPERMILPDYrm: case X86::VPERMILPDZ128rm: case X86::VPERMILPDZ256rm: case X86::VPERMILPDZrm: - SrcIdx = 1; MaskIdx = 5; ElSize = 64; break; + SrcIdx = 1; + MaskIdx = 5; + ElSize = 64; + break; case X86::VPERMILPDZ128rmkz: case X86::VPERMILPDZ256rmkz: case X86::VPERMILPDZrmkz: - SrcIdx = 2; MaskIdx = 6; ElSize = 64; break; + SrcIdx = 2; + MaskIdx = 6; + ElSize = 64; + break; case X86::VPERMILPDZ128rmk: case X86::VPERMILPDZ256rmk: case X86::VPERMILPDZrmk: - SrcIdx = 3; MaskIdx = 7; ElSize = 64; break; + SrcIdx = 3; + MaskIdx = 7; + ElSize = 64; + break; } assert(MI->getNumOperands() >= 6 && @@ -1789,9 +2250,16 @@ unsigned ElSize; switch (MI->getOpcode()) { - default: llvm_unreachable("Invalid opcode"); - case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break; - case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break; + default: + llvm_unreachable("Invalid opcode"); + case X86::VPERMIL2PSrm: + case X86::VPERMIL2PSYrm: + ElSize = 32; + break; + case X86::VPERMIL2PDrm: + case X86::VPERMIL2PDYrm: + ElSize = 64; + break; } const MachineOperand &MaskOp = MI->getOperand(6); @@ -1840,37 +2308,37 @@ break; } -#define MOV_CASE(Prefix, Suffix) \ - case X86::Prefix##MOVAPD##Suffix##rm: \ - case X86::Prefix##MOVAPS##Suffix##rm: \ - case X86::Prefix##MOVUPD##Suffix##rm: \ - case X86::Prefix##MOVUPS##Suffix##rm: \ - case X86::Prefix##MOVDQA##Suffix##rm: \ +#define MOV_CASE(Prefix, Suffix) \ + case X86::Prefix##MOVAPD##Suffix##rm: \ + case X86::Prefix##MOVAPS##Suffix##rm: \ + case X86::Prefix##MOVUPD##Suffix##rm: \ + case X86::Prefix##MOVUPS##Suffix##rm: \ + case X86::Prefix##MOVDQA##Suffix##rm: \ case X86::Prefix##MOVDQU##Suffix##rm: -#define MOV_AVX512_CASE(Suffix) \ - case X86::VMOVDQA64##Suffix##rm: \ - case X86::VMOVDQA32##Suffix##rm: \ - case X86::VMOVDQU64##Suffix##rm: \ - case X86::VMOVDQU32##Suffix##rm: \ - case X86::VMOVDQU16##Suffix##rm: \ - case X86::VMOVDQU8##Suffix##rm: \ - case X86::VMOVAPS##Suffix##rm: \ - case X86::VMOVAPD##Suffix##rm: \ - case X86::VMOVUPS##Suffix##rm: \ +#define MOV_AVX512_CASE(Suffix) \ + case X86::VMOVDQA64##Suffix##rm: \ + case X86::VMOVDQA32##Suffix##rm: \ + case X86::VMOVDQU64##Suffix##rm: \ + case X86::VMOVDQU32##Suffix##rm: \ + case X86::VMOVDQU16##Suffix##rm: \ + case X86::VMOVDQU8##Suffix##rm: \ + case X86::VMOVAPS##Suffix##rm: \ + case X86::VMOVAPD##Suffix##rm: \ + case X86::VMOVUPS##Suffix##rm: \ case X86::VMOVUPD##Suffix##rm: -#define CASE_ALL_MOV_RM() \ - MOV_CASE(, ) /* SSE */ \ - MOV_CASE(V, ) /* AVX-128 */ \ - MOV_CASE(V, Y) /* AVX-256 */ \ - MOV_AVX512_CASE(Z) \ - MOV_AVX512_CASE(Z256) \ +#define CASE_ALL_MOV_RM() \ + MOV_CASE(, ) /* SSE */ \ + MOV_CASE(V, ) /* AVX-128 */ \ + MOV_CASE(V, Y) /* AVX-256 */ \ + MOV_AVX512_CASE(Z) \ + MOV_AVX512_CASE(Z256) \ MOV_AVX512_CASE(Z128) - // For loads from a constant pool to a vector register, print the constant - // loaded. - CASE_ALL_MOV_RM() + // For loads from a constant pool to a vector register, print the constant + // loaded. + CASE_ALL_MOV_RM() case X86::VBROADCASTF128: case X86::VBROADCASTI128: case X86::VBROADCASTF32X4Z256rm: @@ -1893,20 +2361,48 @@ int NumLanes = 1; // Override NumLanes for the broadcast instructions. switch (MI->getOpcode()) { - case X86::VBROADCASTF128: NumLanes = 2; break; - case X86::VBROADCASTI128: NumLanes = 2; break; - case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break; - case X86::VBROADCASTF32X4rm: NumLanes = 4; break; - case X86::VBROADCASTF32X8rm: NumLanes = 2; break; - case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break; - case X86::VBROADCASTF64X2rm: NumLanes = 4; break; - case X86::VBROADCASTF64X4rm: NumLanes = 2; break; - case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break; - case X86::VBROADCASTI32X4rm: NumLanes = 4; break; - case X86::VBROADCASTI32X8rm: NumLanes = 2; break; - case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break; - case X86::VBROADCASTI64X2rm: NumLanes = 4; break; - case X86::VBROADCASTI64X4rm: NumLanes = 2; break; + case X86::VBROADCASTF128: + NumLanes = 2; + break; + case X86::VBROADCASTI128: + NumLanes = 2; + break; + case X86::VBROADCASTF32X4Z256rm: + NumLanes = 2; + break; + case X86::VBROADCASTF32X4rm: + NumLanes = 4; + break; + case X86::VBROADCASTF32X8rm: + NumLanes = 2; + break; + case X86::VBROADCASTF64X2Z128rm: + NumLanes = 2; + break; + case X86::VBROADCASTF64X2rm: + NumLanes = 4; + break; + case X86::VBROADCASTF64X4rm: + NumLanes = 2; + break; + case X86::VBROADCASTI32X4Z256rm: + NumLanes = 2; + break; + case X86::VBROADCASTI32X4rm: + NumLanes = 4; + break; + case X86::VBROADCASTI32X8rm: + NumLanes = 2; + break; + case X86::VBROADCASTI64X2Z128rm: + NumLanes = 2; + break; + case X86::VBROADCASTI64X2rm: + NumLanes = 4; + break; + case X86::VBROADCASTI64X4rm: + NumLanes = 2; + break; } std::string Comment; @@ -1916,7 +2412,8 @@ if (auto *CDS = dyn_cast(C)) { CS << "["; for (int l = 0; l != NumLanes; ++l) { - for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) { + for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; + ++i) { if (i != 0 || l != 0) CS << ","; if (CDS->getElementType()->isIntegerTy()) @@ -1934,7 +2431,8 @@ } else if (auto *CV = dyn_cast(C)) { CS << "<"; for (int l = 0; l != NumLanes; ++l) { - for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) { + for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; + ++i) { if (i != 0 || l != 0) CS << ","; printConstant(CV->getOperand(i), CS); @@ -1980,35 +2478,92 @@ if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { int NumElts; switch (MI->getOpcode()) { - default: llvm_unreachable("Invalid opcode"); - case X86::VBROADCASTSSrm: NumElts = 4; break; - case X86::VBROADCASTSSYrm: NumElts = 8; break; - case X86::VBROADCASTSSZ128m: NumElts = 4; break; - case X86::VBROADCASTSSZ256m: NumElts = 8; break; - case X86::VBROADCASTSSZm: NumElts = 16; break; - case X86::VBROADCASTSDYrm: NumElts = 4; break; - case X86::VBROADCASTSDZ256m: NumElts = 4; break; - case X86::VBROADCASTSDZm: NumElts = 8; break; - case X86::VPBROADCASTBrm: NumElts = 16; break; - case X86::VPBROADCASTBYrm: NumElts = 32; break; - case X86::VPBROADCASTBZ128m: NumElts = 16; break; - case X86::VPBROADCASTBZ256m: NumElts = 32; break; - case X86::VPBROADCASTBZm: NumElts = 64; break; - case X86::VPBROADCASTDrm: NumElts = 4; break; - case X86::VPBROADCASTDYrm: NumElts = 8; break; - case X86::VPBROADCASTDZ128m: NumElts = 4; break; - case X86::VPBROADCASTDZ256m: NumElts = 8; break; - case X86::VPBROADCASTDZm: NumElts = 16; break; - case X86::VPBROADCASTQrm: NumElts = 2; break; - case X86::VPBROADCASTQYrm: NumElts = 4; break; - case X86::VPBROADCASTQZ128m: NumElts = 2; break; - case X86::VPBROADCASTQZ256m: NumElts = 4; break; - case X86::VPBROADCASTQZm: NumElts = 8; break; - case X86::VPBROADCASTWrm: NumElts = 8; break; - case X86::VPBROADCASTWYrm: NumElts = 16; break; - case X86::VPBROADCASTWZ128m: NumElts = 8; break; - case X86::VPBROADCASTWZ256m: NumElts = 16; break; - case X86::VPBROADCASTWZm: NumElts = 32; break; + default: + llvm_unreachable("Invalid opcode"); + case X86::VBROADCASTSSrm: + NumElts = 4; + break; + case X86::VBROADCASTSSYrm: + NumElts = 8; + break; + case X86::VBROADCASTSSZ128m: + NumElts = 4; + break; + case X86::VBROADCASTSSZ256m: + NumElts = 8; + break; + case X86::VBROADCASTSSZm: + NumElts = 16; + break; + case X86::VBROADCASTSDYrm: + NumElts = 4; + break; + case X86::VBROADCASTSDZ256m: + NumElts = 4; + break; + case X86::VBROADCASTSDZm: + NumElts = 8; + break; + case X86::VPBROADCASTBrm: + NumElts = 16; + break; + case X86::VPBROADCASTBYrm: + NumElts = 32; + break; + case X86::VPBROADCASTBZ128m: + NumElts = 16; + break; + case X86::VPBROADCASTBZ256m: + NumElts = 32; + break; + case X86::VPBROADCASTBZm: + NumElts = 64; + break; + case X86::VPBROADCASTDrm: + NumElts = 4; + break; + case X86::VPBROADCASTDYrm: + NumElts = 8; + break; + case X86::VPBROADCASTDZ128m: + NumElts = 4; + break; + case X86::VPBROADCASTDZ256m: + NumElts = 8; + break; + case X86::VPBROADCASTDZm: + NumElts = 16; + break; + case X86::VPBROADCASTQrm: + NumElts = 2; + break; + case X86::VPBROADCASTQYrm: + NumElts = 4; + break; + case X86::VPBROADCASTQZ128m: + NumElts = 2; + break; + case X86::VPBROADCASTQZ256m: + NumElts = 4; + break; + case X86::VPBROADCASTQZm: + NumElts = 8; + break; + case X86::VPBROADCASTWrm: + NumElts = 8; + break; + case X86::VPBROADCASTWYrm: + NumElts = 16; + break; + case X86::VPBROADCASTWZ128m: + NumElts = 8; + break; + case X86::VPBROADCASTWZ256m: + NumElts = 16; + break; + case X86::VPBROADCASTWZm: + NumElts = 32; + break; } std::string Comment; Index: llvm/trunk/test/CodeGen/X86/xray-custom-log.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/xray-custom-log.ll +++ llvm/trunk/test/CodeGen/X86/xray-custom-log.ll @@ -11,8 +11,8 @@ ; CHECK-LABEL: Lxray_event_sled_0: ; CHECK: .byte 0xeb, 0x0f ; CHECK-NEXT: pushq %rdi - ; CHECK-NEXT: movq {{.*}}, %rdi ; CHECK-NEXT: pushq %rsi + ; CHECK-NEXT: movq {{.*}}, %rdi ; CHECK-NEXT: movq {{.*}}, %rsi ; CHECK-NEXT: callq __xray_CustomEvent ; CHECK-NEXT: popq %rsi @@ -21,8 +21,8 @@ ; PIC-LABEL: Lxray_event_sled_0: ; PIC: .byte 0xeb, 0x0f ; PIC-NEXT: pushq %rdi - ; PIC-NEXT: movq {{.*}}, %rdi ; PIC-NEXT: pushq %rsi + ; PIC-NEXT: movq {{.*}}, %rdi ; PIC-NEXT: movq {{.*}}, %rsi ; PIC-NEXT: callq __xray_CustomEvent@PLT ; PIC-NEXT: popq %rsi Index: llvm/trunk/test/CodeGen/X86/xray-typed-event-log.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/xray-typed-event-log.ll +++ llvm/trunk/test/CodeGen/X86/xray-typed-event-log.ll @@ -0,0 +1,45 @@ +; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu \ +; RUN: -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC + +define i32 @fn() nounwind noinline uwtable "function-instrument"="xray-always" { + %eventptr = alloca i8 + %eventsize = alloca i32 + %eventtype = alloca i16 + store i16 6, i16* %eventtype + %type = load i16, i16* %eventtype + store i32 3, i32* %eventsize + %val = load i32, i32* %eventsize + call void @llvm.xray.typedevent(i16 %type, i8* %eventptr, i32 %val) + ; CHECK-LABEL: Lxray_typed_event_sled_0: + ; CHECK: .byte 0xeb, 0x14 + ; CHECK-NEXT: pushq %rdi + ; CHECK-NEXT: pushq %rsi + ; CHECK-NEXT: pushq %rdx + ; CHECK-NEXT: movq {{.*}}, %rdi + ; CHECK-NEXT: movq {{.*}}, %rsi + ; CHECK-NEXT: movq {{.*}}, %rdx + ; CHECK-NEXT: callq __xray_TypedEvent + ; CHECK-NEXT: popq %rdx + ; CHECK-NEXT: popq %rsi + ; CHECK-NEXT: popq %rdi + + ; PIC-LABEL: Lxray_typed_event_sled_0: + ; PIC: .byte 0xeb, 0x14 + ; PIC-NEXT: pushq %rdi + ; PIC-NEXT: pushq %rsi + ; PIC-NEXT: pushq %rdx + ; PIC-NEXT: movq {{.*}}, %rdi + ; PIC-NEXT: movq {{.*}}, %rsi + ; PIC-NEXT: movq {{.*}}, %rdx + ; PIC-NEXT: callq __xray_TypedEvent@PLT + ; PIC-NEXT: popq %rdx + ; PIC-NEXT: popq %rsi + ; PIC-NEXT: popq %rdi + ret i32 0 +} +; CHECK-LABEL: xray_instr_map +; CHECK-LABEL: Lxray_sleds_start0: +; CHECK: .quad {{.*}}xray_typed_event_sled_0 + +declare void @llvm.xray.typedevent(i16, i8*, i32)