Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -14306,6 +14306,55 @@ This intrinsic actually does nothing, but optimizers must assume that it has externally observable side effects. + +'``llvm.nospeculateload``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use llvm.nospeculateload on any +integer type that can legally be loaded on the target, and any pointer type. +However, not all targets support this intrinsic at the moment. + +:: + + declare T @llvm.nospeculateload.T(T* %ptr, i8* %lower_bound, i8* %upper_bound, + T failval, i8* %cmpptr) + declare T @llvm.nospeculateload_nolower.T(T* %ptr, i8* %upper_bound, + T failval, i8* %cmpptr) + declare T @llvm.nospeculateload_noupper.T(T* %ptr, i8* %lower_bound, + T failval, i8* %cmpptr) + +Overview: +""""""""" + +The '``llvm.nospeculateload``' intrinsic. + +Arguments: +"""""""""" + +The first argument is a pointer, the second is a pointer used as a lower bound, +the third is a pointer to an upper bound. +The fourth argument is a value of the overloaded type. +The fifth argument is a pointer. + +In the ``llvm.nospeculateload_nolower`` and ``llvm.nospeculateload_noupper`` +variants, the lower and upper bound arguments are missing respectively. + +Semantics: +"""""""""" + +If %cmpptr lies within the range (%lower_bound <= %cmpptr < %upper_bound) then +the value at address %ptr is returned. Otherwise, value %failval is returned. + +Furthermore, the builtin will ensure that if %ptr is dereferenced speculatively +at execution time (that is, without checking the boundary conditions) the +result will not be used for further speculation unless the boundary conditions +are satisfied. Speculation may continue, however, using failval as the +speculative result. + + Stack Map Intrinsics -------------------- Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -830,6 +830,10 @@ /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants. VECREDUCE_FMAX, VECREDUCE_FMIN, + NOSPECULATELOAD, + NOSPECULATELOAD_NOLOWER, + NOSPECULATELOAD_NOUPPER, + /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific pre-isel opcode values start here. BUILTIN_OP_END Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -1261,6 +1261,9 @@ return N->getOpcode() == ISD::LOAD || N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::PREFETCH || + N->getOpcode() == ISD::NOSPECULATELOAD || + N->getOpcode() == ISD::NOSPECULATELOAD_NOLOWER || + N->getOpcode() == ISD::NOSPECULATELOAD_NOUPPER || N->getOpcode() == ISD::ATOMIC_CMP_SWAP || N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS || N->getOpcode() == ISD::ATOMIC_SWAP || @@ -1348,6 +1351,9 @@ // early a node with a target opcode can be of this class return N->isMemIntrinsic() || N->getOpcode() == ISD::PREFETCH || + N->getOpcode() == ISD::NOSPECULATELOAD || + N->getOpcode() == ISD::NOSPECULATELOAD_NOLOWER || + N->getOpcode() == ISD::NOSPECULATELOAD_NOUPPER || N->isTargetMemoryOpcode(); } }; Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -963,6 +963,23 @@ def int_ssa_copy : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, Returned<0>]>; + +//===------------------ Intrinsics to avoid speculation ------------------===// +def int_nospeculateload + : Intrinsic<[llvm_any_ty], + [LLVMPointerTo<0>, llvm_ptr_ty, llvm_ptr_ty, LLVMMatchType<0>, llvm_ptr_ty], + []>; +def int_nospeculateload_nolower + : Intrinsic<[llvm_any_ty], + [LLVMPointerTo<0>, llvm_ptr_ty, LLVMMatchType<0>, llvm_ptr_ty], + []>; +def int_nospeculateload_noupper + : Intrinsic<[llvm_any_ty], + [LLVMPointerTo<0>, llvm_ptr_ty, LLVMMatchType<0>, llvm_ptr_ty], + []>; + + + //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -280,6 +280,14 @@ SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>, SDTCisPtrTy<4>, SDTCisPtrTy<5> ]>; +def SDTNoSpeculateLoad: SDTypeProfile<1, 5, [ + SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisPtrTy<3>, SDTCisSameAs<4, 0>, SDTCisPtrTy<5> +]>; + +def SDTNoSpeculateLoadOneCheck: SDTypeProfile<1, 4, [ + SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisSameAs<3, 0>, SDTCisPtrTy<4> +]>; + class SDCallSeqStart constraints> : SDTypeProfile<0, 2, constraints>; class SDCallSeqEnd constraints> : @@ -562,6 +570,13 @@ def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>; def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>; +def nospeculateload : SDNode<"ISD::NOSPECULATELOAD", SDTNoSpeculateLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def nospeculateload_nolower : SDNode<"ISD::NOSPECULATELOAD_NOLOWER", SDTNoSpeculateLoadOneCheck, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def nospeculateload_noupper : SDNode<"ISD::NOSPECULATELOAD_NOUPPER", SDTNoSpeculateLoadOneCheck, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + //===----------------------------------------------------------------------===// // Selection DAG Condition Codes Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -160,6 +160,13 @@ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: Res = PromoteIntRes_AtomicCmpSwap(cast(N), ResNo); break; + case ISD::NOSPECULATELOAD: + Res = PromoteIntRes_NOSPECULATELOAD(cast(N)); + break; + case ISD::NOSPECULATELOAD_NOLOWER: + case ISD::NOSPECULATELOAD_NOUPPER: + Res = PromoteIntRes_NOSPECULATELOAD_OneCheck(cast(N)); + break; } // If the result is null then the sub-method took care of registering it. @@ -644,6 +651,52 @@ LHS.getValueType(), LHS, RHS); } +SDValue DAGTypeLegalizer::PromoteIntRes_NOSPECULATELOAD(MemSDNode *N) { + SDValue FailVal = GetPromotedInteger(N->getOperand(4)); + SDVTList NodeTys = DAG.getVTList(FailVal.getValueType(), MVT::Other); + + SDValue Ops[] = { + N->getOperand(0), // Chain + N->getOperand(1), // Ptr + N->getOperand(2), // LowerBound + N->getOperand(3), // UpperBound + FailVal, + N->getOperand(5), // CmpPtr + }; + + SDValue Result = DAG.getMemIntrinsicNode( + ISD::NOSPECULATELOAD, SDLoc(N), NodeTys, Ops, + SDValue(N, 0).getValueType(), N->getMemOperand()); + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + + return Result; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_NOSPECULATELOAD_OneCheck(MemSDNode *N) { + SDValue FailVal = GetPromotedInteger(N->getOperand(3)); + SDVTList NodeTys = DAG.getVTList(FailVal.getValueType(), MVT::Other); + + SDValue Ops[] = { + N->getOperand(0), // Chain + N->getOperand(1), // Ptr + N->getOperand(2), // Bound + FailVal, + N->getOperand(4), // CmpPtr + }; + SDValue Result = DAG.getMemIntrinsicNode( + N->getOpcode(), SDLoc(N), NodeTys, Ops, + SDValue(N, 0).getValueType(), N->getMemOperand()); + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + + return Result; +} + SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { // Sign extend the input. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -280,6 +280,8 @@ SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_NOSPECULATELOAD(MemSDNode *N); + SDValue PromoteIntRes_NOSPECULATELOAD_OneCheck(MemSDNode *N); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); @@ -367,6 +369,9 @@ void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_NOSPECULATELOAD (MemSDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_NOSPECULATELOAD_OneCheck(MemSDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandShiftByConstant(SDNode *N, const APInt &Amt, SDValue &Lo, SDValue &Hi); bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5801,6 +5801,9 @@ assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || + Opcode == ISD::NOSPECULATELOAD || + Opcode == ISD::NOSPECULATELOAD_NOUPPER || + Opcode == ISD::NOSPECULATELOAD_NOLOWER || Opcode == ISD::LIFETIME_START || Opcode == ISD::LIFETIME_END || ((int)Opcode <= std::numeric_limits::max() && Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5835,6 +5835,63 @@ Flags)); return nullptr; } + case Intrinsic::nospeculateload: { + SDValue Chain = getRoot(); + EVT LoadVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + SDVTList NodeTys = DAG.getVTList(LoadVT, MVT::Other); + + SDValue Ops[] { + Chain, + getValue(I.getArgOperand(0)), // Ptr + getValue(I.getArgOperand(1)), // LowerBound + getValue(I.getArgOperand(2)), // UpperBound + getValue(I.getArgOperand(3)), // Failval + getValue(I.getArgOperand(4)), // CmpPtr + }; + + SDValue Result = DAG.getMemIntrinsicNode( + ISD::NOSPECULATELOAD, sdl, NodeTys, Ops, LoadVT, + MachinePointerInfo(I.getArgOperand(0)), 0, + MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile); + + assert(Result.getNode()->getNumValues() == 2); + SDValue OutChain = Result.getValue(1); + DAG.setRoot(OutChain); + SDValue LoadedVal = Result.getValue(0); + setValue(&I, LoadedVal); + + return nullptr; + } + case Intrinsic::nospeculateload_nolower: + case Intrinsic::nospeculateload_noupper: { + SDValue Chain = getRoot(); + EVT LoadVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + SDVTList NodeTys = DAG.getVTList(LoadVT, MVT::Other); + + SDValue Ops[] = { + Chain, + getValue(I.getArgOperand(0)), // Ptr + getValue(I.getArgOperand(1)), // Bound + getValue(I.getArgOperand(2)), // FailVal + getValue(I.getArgOperand(3)) // CmpPtr + }; + + unsigned Opcode = Intrinsic == Intrinsic::nospeculateload_nolower + ? ISD::NOSPECULATELOAD_NOLOWER + : ISD::NOSPECULATELOAD_NOUPPER; + SDValue Result = DAG.getMemIntrinsicNode( + Opcode, sdl, NodeTys, Ops, LoadVT, + MachinePointerInfo(I.getArgOperand(0)), 0, + MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile); + + assert(Result.getNode()->getNumValues() == 2); + SDValue OutChain = Result.getValue(1); + DAG.setRoot(OutChain); + SDValue LoadedVal = Result.getValue(0); + setValue(&I, LoadedVal); + + return nullptr; + } case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { bool IsStart = (Intrinsic == Intrinsic::lifetime_start); Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -77,6 +77,9 @@ #ifndef NDEBUG case ISD::DELETED_NODE: return "<>"; #endif + case ISD::NOSPECULATELOAD: return "NoSpeculateLoad"; + case ISD::NOSPECULATELOAD_NOLOWER: return "NoSpeculateLoadNoLower"; + case ISD::NOSPECULATELOAD_NOUPPER: return "NoSpeculateLoadNoUpper"; case ISD::PREFETCH: return "Prefetch"; case ISD::ATOMIC_FENCE: return "AtomicFence"; case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; Index: lib/Target/AArch64/AArch64AsmPrinter.cpp =================================================================== --- lib/Target/AArch64/AArch64AsmPrinter.cpp +++ lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -85,6 +85,9 @@ void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); + void LowerNOSPECULATELOAD(const MachineInstr *MI, unsigned LoadOpc, + bool NoLower, bool NoUpper, bool RegPair, + bool XRegs); void EmitSled(const MachineInstr &MI, SledKind Kind); @@ -199,6 +202,83 @@ recordSled(CurSled, MI, Kind); } +void AArch64AsmPrinter::LowerNOSPECULATELOAD(const MachineInstr *MI, + unsigned LoadOpc, bool NoLower, + bool NoUpper, bool RegPair, + bool XRegs) { + unsigned Op = 0; + unsigned Dst = MI->getOperand(Op++).getReg(); + unsigned DstHi = RegPair ? MI->getOperand(Op++).getReg() : 0; + unsigned Ptr = MI->getOperand(Op++).getReg(); + unsigned LowerBound = NoLower ? 0 : MI->getOperand(Op++).getReg(); + unsigned UpperBound = NoUpper ? 0 : MI->getOperand(Op++).getReg(); + unsigned FailVal = MI->getOperand(Op++).getReg(); + unsigned FailValHi = RegPair ? MI->getOperand(Op++).getReg() : 0; + unsigned CmpPtr = MI->getOperand(Op++).getReg(); + MCSymbol *FailLabel = OutContext.createTempSymbol(); + + unsigned FirstBoundReg = NoLower ? UpperBound : LowerBound; + unsigned BranchCond = AArch64CC::HS, CSelCond = AArch64CC::LO; + if (NoUpper) + std::swap(BranchCond, CSelCond); + if (RegPair && !STI->isLittleEndian()) { + std::swap(Dst, DstHi); + std::swap(FailVal, FailValHi); + } + + // CMP cmpptr, lower + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::SUBSXrs) + .addReg(AArch64::XZR) + .addReg(CmpPtr) + .addReg(FirstBoundReg) + .addImm(AArch64_AM::getShiftValue(0))); + + // CCMP cmpptr, upper, 2, hs + if (!NoLower && !NoUpper) + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::CCMPXr) + .addReg(CmpPtr) + .addReg(UpperBound) + .addImm(2) + .addImm(AArch64CC::HS)); + + // B.HS FAIL + EmitToStreamer(*OutStreamer, + MCInstBuilder(AArch64::Bcc) + .addImm(BranchCond) + .addExpr(MCSymbolRefExpr::create(FailLabel, OutContext))); + + // LDR{P||H|B} dst, [ptr] + MCInstBuilder LDRBuilder(LoadOpc); + LDRBuilder.addReg(Dst); + if (RegPair) LDRBuilder.addReg(DstHi); + LDRBuilder.addReg(Ptr).addImm(0); + EmitToStreamer(*OutStreamer, LDRBuilder); // Offset + + // Label + OutStreamer->EmitLabel(FailLabel); + + // CSEL dst, dst, failval, LO + unsigned CselOpcode = XRegs ? AArch64::CSELXr : AArch64::CSELWr; + EmitToStreamer( + *OutStreamer, + MCInstBuilder(CselOpcode) + .addReg(Dst) + .addReg(Dst) + .addReg(FailVal) + .addImm(CSelCond)); + if (RegPair) + EmitToStreamer( + *OutStreamer, + MCInstBuilder(CselOpcode) + .addReg(DstHi) + .addReg(DstHi) + .addReg(FailValHi) + .addImm(CSelCond)); + + // HINT #0x14 // CSDB + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0x14)); +} + void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { const Triple &TT = TM.getTargetTriple(); if (TT.isOSBinFormatMachO()) { @@ -680,6 +760,39 @@ return; } + case AArch64::NoSpeculateLoadXPair_both: + return LowerNOSPECULATELOAD(MI, AArch64::LDPXi, false, false, true, true); + case AArch64::NoSpeculateLoadX_both: + return LowerNOSPECULATELOAD(MI, AArch64::LDRXui, false, false, false, true); + case AArch64::NoSpeculateLoadW_both: + return LowerNOSPECULATELOAD(MI, AArch64::LDRWui, false, false, false, false); + case AArch64::NoSpeculateLoadH_both: + return LowerNOSPECULATELOAD(MI, AArch64::LDRHHui, false, false, false, false); + case AArch64::NoSpeculateLoadB_both: + return LowerNOSPECULATELOAD(MI, AArch64::LDRBBui, false, false, false, false); + + case AArch64::NoSpeculateLoadXPair_nolower: + return LowerNOSPECULATELOAD(MI, AArch64::LDPXi, true, false, true, true); + case AArch64::NoSpeculateLoadX_nolower: + return LowerNOSPECULATELOAD(MI, AArch64::LDRXui, true, false, false, true); + case AArch64::NoSpeculateLoadW_nolower: + return LowerNOSPECULATELOAD(MI, AArch64::LDRWui, true, false, false, false); + case AArch64::NoSpeculateLoadH_nolower: + return LowerNOSPECULATELOAD(MI, AArch64::LDRHHui, true, false, false, false); + case AArch64::NoSpeculateLoadB_nolower: + return LowerNOSPECULATELOAD(MI, AArch64::LDRBBui, true, false, false, false); + + case AArch64::NoSpeculateLoadXPair_noupper: + return LowerNOSPECULATELOAD(MI, AArch64::LDPXi, false, true, true, true); + case AArch64::NoSpeculateLoadX_noupper: + return LowerNOSPECULATELOAD(MI, AArch64::LDRXui, false, true, false, true); + case AArch64::NoSpeculateLoadW_noupper: + return LowerNOSPECULATELOAD(MI, AArch64::LDRWui, false, true, false, false); + case AArch64::NoSpeculateLoadH_noupper: + return LowerNOSPECULATELOAD(MI, AArch64::LDRHHui, false, true, false, false); + case AArch64::NoSpeculateLoadB_noupper: + return LowerNOSPECULATELOAD(MI, AArch64::LDRBBui, false, true, false, false); + case AArch64::FMOVH0: case AArch64::FMOVS0: case AArch64::FMOVD0: Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -463,6 +463,10 @@ setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + setOperationAction(ISD::NOSPECULATELOAD, MVT::i128, Custom); + setOperationAction(ISD::NOSPECULATELOAD_NOLOWER, MVT::i128, Custom); + setOperationAction(ISD::NOSPECULATELOAD_NOUPPER, MVT::i128, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0. @@ -10663,6 +10667,67 @@ Results.push_back(SDValue(CmpSwap, 3)); } +static void ReplaceNOSPECULATELOADResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) { + assert(N->getValueType(0) == MVT::i128 && + "NOSPECULATELOAD on types less than 128 should be legal"); + auto FailVal = splitInt128(N->getOperand(4), DAG); + SDValue Ops[] = { + N->getOperand(1), // Ptr + N->getOperand(2), // LowerBound + N->getOperand(3), // UpperBound + FailVal.first, + FailVal.second, + N->getOperand(5), // CmpPtr + N->getOperand(0), // Chain + }; + SDVTList NodeTys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other); + MachineSDNode *NewNode = DAG.getMachineNode( + AArch64::NoSpeculateLoadXPair_both, SDLoc(N), NodeTys, Ops); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + NewNode->setMemRefs(MemOp, MemOp + 1); + + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), + MVT::i128, SDValue(NewNode, 0), + SDValue(NewNode, 1))); + Results.push_back(SDValue(NewNode, 2)); // Chain +} + +static void ReplaceNOSPECULATELOADOneCheckResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) { + assert(N->getValueType(0) == MVT::i128 && + "NOSPECULATELOAD on types less than 128 should be legal"); + auto FailVal = splitInt128(N->getOperand(3), DAG); + SDValue Ops[] = { + N->getOperand(1), // Ptr + N->getOperand(2), // Bound + FailVal.first, + FailVal.second, + N->getOperand(4), // CmpPtr + N->getOperand(0), // Chain + }; + SDVTList NodeTys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other); + unsigned Opcode = N->getOpcode() == ISD::NOSPECULATELOAD_NOLOWER + ? AArch64::NoSpeculateLoadXPair_nolower + : AArch64::NoSpeculateLoadXPair_noupper; + MachineSDNode *NewNode = DAG.getMachineNode(Opcode, SDLoc(N), NodeTys, Ops); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + NewNode->setMemRefs(MemOp, MemOp + 1); + + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), + MVT::i128, SDValue(NewNode, 0), + SDValue(NewNode, 1))); + Results.push_back(SDValue(NewNode, 2)); // Chain +} + void AArch64TargetLowering::ReplaceNodeResults( SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { @@ -10705,6 +10770,13 @@ case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_128Results(N, Results, DAG); return; + case ISD::NOSPECULATELOAD: + ReplaceNOSPECULATELOADResults(N, Results, DAG); + return; + case ISD::NOSPECULATELOAD_NOLOWER: + case ISD::NOSPECULATELOAD_NOUPPER: + ReplaceNOSPECULATELOADOneCheckResults(N, Results, DAG); + return; } } Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3010,6 +3010,18 @@ case AArch64::ST1Twov1d: case AArch64::ST1Threev1d: case AArch64::ST1Fourv1d: + case AArch64::NoSpeculateLoadX_both: + case AArch64::NoSpeculateLoadW_both: + case AArch64::NoSpeculateLoadH_both: + case AArch64::NoSpeculateLoadB_both: + case AArch64::NoSpeculateLoadX_nolower: + case AArch64::NoSpeculateLoadW_nolower: + case AArch64::NoSpeculateLoadH_nolower: + case AArch64::NoSpeculateLoadB_nolower: + case AArch64::NoSpeculateLoadX_noupper: + case AArch64::NoSpeculateLoadW_noupper: + case AArch64::NoSpeculateLoadH_noupper: + case AArch64::NoSpeculateLoadB_noupper: return AArch64FrameOffsetCannotUpdate; case AArch64::PRFMui: Scale = 8; Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -1379,6 +1379,100 @@ (TLSDESC_CALLSEQ texternalsym:$sym)>; //===----------------------------------------------------------------------===// +// Speculation barrier intrinsics +//===----------------------------------------------------------------------===// +multiclass NoSpeculateLoad { + def _both_frag : PatFrag<(ops node:$ptr, node:$lower_bound, + node:$upper_bound, node:$failval, + node:$cmpptr), + (nospeculateload node:$ptr, node:$lower_bound, + node:$upper_bound, node:$failval, + node:$cmpptr), TypeCheck>; + def _nolower_frag : PatFrag<(ops node:$ptr, + node:$upper_bound, node:$failval, + node:$cmpptr), + (nospeculateload_nolower node:$ptr, + node:$upper_bound, node:$failval, + node:$cmpptr), TypeCheck>; + def _noupper_frag : PatFrag<(ops node:$ptr, node:$lower_bound, + node:$failval, node:$cmpptr), + (nospeculateload_noupper node:$ptr, node:$lower_bound, + node:$failval, + node:$cmpptr), TypeCheck>; + + let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [NZCV], mayLoad = 1, + Constraints = "@earlyclobber $dst" in { + let Size = 24 in + def _both + : Pseudo<(outs ValueClass:$dst), + (ins GPR64sp:$ptr, GPR64:$lower_bound, + GPR64:$upper_bound, ValueClass:$failval, GPR64:$cmpptr), + [(set ValueClass:$dst, + (!cast(NAME # "_both_frag") GPR64sp:$ptr, + GPR64:$lower_bound, GPR64:$upper_bound, + ValueClass:$failval, GPR64:$cmpptr))]>, + Sched<[]>; + let Size = 20 in + def _nolower + : Pseudo<(outs ValueClass:$dst), + (ins GPR64sp:$ptr, GPR64:$upper_bound, + ValueClass:$failval, GPR64:$cmpptr), + [(set ValueClass:$dst, + (!cast(NAME # "_nolower_frag") GPR64sp:$ptr, + GPR64:$upper_bound, ValueClass:$failval, + GPR64:$cmpptr))]>, + Sched<[]>; + let Size = 20 in + def _noupper + : Pseudo<(outs ValueClass:$dst), + (ins GPR64sp:$ptr, GPR64:$lower_bound, + ValueClass:$failval, GPR64:$cmpptr), + [(set ValueClass:$dst, + (!cast(NAME # "_noupper_frag") GPR64sp:$ptr, + GPR64:$lower_bound, ValueClass:$failval, + GPR64:$cmpptr))]>, + Sched<[]>; + } +} + +defm NoSpeculateLoadX: NoSpeculateLoad(N)->getMemoryVT() == MVT::i64;}]>; +defm NoSpeculateLoadW: NoSpeculateLoad(N)->getMemoryVT() == MVT::i32;}]>; +defm NoSpeculateLoadH: NoSpeculateLoad(N)->getMemoryVT() == MVT::i16;}]>; +defm NoSpeculateLoadB: NoSpeculateLoad(N)->getMemoryVT() == MVT::i8;}]>; + +let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [NZCV], + mayLoad = 1, Constraints = "@earlyclobber $dstlo,@earlyclobber $dsthi" in { + let Size = 28 in + def NoSpeculateLoadXPair_both + : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi), + (ins GPR64sp:$ptr, GPR64:$lower_bound, + GPR64:$upper_bound, GPR64:$failvallo, + GPR64:$failvalhi, GPR64:$cmpptr), + []>, + Sched<[]>; + let Size = 24 in + def NoSpeculateLoadXPair_nolower + : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi), + (ins GPR64sp:$ptr, + GPR64:$upper_bound, GPR64:$failvallo, + GPR64:$failvalhi, GPR64:$cmpptr), + []>, + Sched<[]>; + let Size = 24 in + def NoSpeculateLoadXPair_noupper + : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi), + (ins GPR64sp:$ptr, GPR64:$lower_bound, + GPR64:$failvallo, + GPR64:$failvalhi, GPR64:$cmpptr), + []>, + Sched<[]>; +} + +//===----------------------------------------------------------------------===// // Conditional branch (immediate) instruction. //===----------------------------------------------------------------------===// def Bcc : BranchCond; Index: lib/Target/ARM/ARMAsmPrinter.h =================================================================== --- lib/Target/ARM/ARMAsmPrinter.h +++ lib/Target/ARM/ARMAsmPrinter.h @@ -89,6 +89,10 @@ void EmitJumpTableAddrs(const MachineInstr *MI); void EmitJumpTableInsts(const MachineInstr *MI); void EmitJumpTableTBInst(const MachineInstr *MI, unsigned OffsetWidth); + void EmitNOSPECULATELOAD(const MachineInstr *MI, bool NoLower, bool NoUpper, + int Width); + void EmitThumbNOSPECULATELOAD(const MachineInstr *MI, bool NoLower, + bool NoUpper, int Width); void EmitInstruction(const MachineInstr *MI) override; bool runOnMachineFunction(MachineFunction &F) override; Index: lib/Target/ARM/ARMAsmPrinter.cpp =================================================================== --- lib/Target/ARM/ARMAsmPrinter.cpp +++ lib/Target/ARM/ARMAsmPrinter.cpp @@ -1191,6 +1191,240 @@ } } +void ARMAsmPrinter::EmitNOSPECULATELOAD(const MachineInstr *MI, bool NoLower, + bool NoUpper, int Width) { + MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); + ARMTargetStreamer &ATS = static_cast(TS); + const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); + + unsigned Op = 0; + unsigned Dst = MI->getOperand(Op++).getReg(); + unsigned Ptr = MI->getOperand(Op++).getReg(); + unsigned LowerBound = NoLower ? 0 : MI->getOperand(Op++).getReg(); + unsigned UpperBound = NoUpper ? 0 : MI->getOperand(Op++).getReg(); + unsigned FailVal = MI->getOperand(Op++).getReg(); + unsigned FailValHi = Width == 64 ? MI->getOperand(Op++).getReg() : 0; + unsigned CmpPtr = MI->getOperand(Op++).getReg(); + + unsigned FirstBoundCheck = NoLower ? UpperBound : LowerBound; + unsigned LoadCond, FailCond; + if (NoLower) { + LoadCond = ARMCC::LO; + FailCond = ARMCC::HS; + } else if (NoUpper) { + LoadCond = ARMCC::HS; + FailCond = ARMCC::LO; + } else { + LoadCond = ARMCC::HI; + FailCond = ARMCC::LS; + } + unsigned DstHi = 0; + if (Width == 64) { + DstHi = TRI->getSubReg(Dst, ARM::gsub_1); + Dst = TRI->getSubReg(Dst, ARM::gsub_0); + if (!Subtarget->isLittle()) + std::swap(FailVal, FailValHi); + } + + // CMP cmpptr, lower + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::CMPrr) + .addReg(CmpPtr) + .addReg(FirstBoundCheck) + .addImm(ARMCC::AL) // Predicate + .addReg(0)); // CPSR in + + // CMPHS upper, cmpptr + if (!NoLower && !NoUpper) { + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::CMPrr) + .addReg(UpperBound) + .addReg(CmpPtr) + .addImm(ARMCC::HS) // Predicate + .addReg(ARM::CPSR)); // CPSR in + } + + // LDxHI val, [ptr] + switch (Width) { + default: + llvm_unreachable("unexpected load width"); + case 8: + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRBi12) + .addReg(Dst) + .addReg(Ptr) + .addImm(0) // Offset + .addImm(LoadCond) // Predicate + .addReg(ARM::CPSR)); // CPSR in + break; + case 16: + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRH) + .addReg(Dst) + .addReg(Ptr) + .addReg(0) // Offset register + .addImm(0) // Offset immediate + .addImm(LoadCond) // Predicate + .addReg(ARM::CPSR)); // CPSR in + break; + case 32: + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) + .addReg(Dst) + .addReg(Ptr) + .addImm(0) // Offset + .addImm(LoadCond) // Predicate + .addReg(ARM::CPSR)); // CPSR in + break; + case 64: { + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRD) + .addReg(Dst) + .addReg(DstHi) + .addReg(Ptr) + .addReg(0) // Offset reg + .addImm(0) // Offset imm + .addImm(LoadCond) // Predicate + .addReg(ARM::CPSR)); // CPSR in + break; + } + } + + // MOVLS val, failval + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVr) + .addReg(Dst) + .addReg(FailVal) + .addImm(FailCond) // Predicate + .addReg(ARM::CPSR) // CPSR in + .addReg(0)); // CPSR out + + if (Width == 64) + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::MOVr) + .addReg(DstHi) + .addReg(FailValHi) + .addImm(FailCond) // Predicate + .addReg(ARM::CPSR) // CPSR in + .addReg(0)); // CPSR out + + // CSDB + uint32_t Csdb = 0xe320f014UL; + OutStreamer->AddComment("csdb"); + ATS.emitInst(Csdb); +} + +void ARMAsmPrinter::EmitThumbNOSPECULATELOAD(const MachineInstr *MI, + bool NoLower, bool NoUpper, + int Width) { + MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); + ARMTargetStreamer &ATS = static_cast(TS); + + unsigned Op = 0; + unsigned Dst = MI->getOperand(Op++).getReg(); + unsigned DstHi = Width == 64 ? MI->getOperand(Op++).getReg() : 0; + unsigned Ptr = MI->getOperand(Op++).getReg(); + unsigned LowerBound = NoLower ? 0 : MI->getOperand(Op++).getReg(); + unsigned UpperBound = NoUpper ? 0 : MI->getOperand(Op++).getReg(); + unsigned FailVal = MI->getOperand(Op++).getReg(); + unsigned FailValHi = Width == 64 ? MI->getOperand(Op++).getReg() : 0; + unsigned CmpPtr = MI->getOperand(Op++).getReg(); + + unsigned FirstBoundCheck = NoLower ? UpperBound : LowerBound; + unsigned LoadCond, FailCond; + if (NoLower) { + LoadCond = ARMCC::LO; + FailCond = ARMCC::HS; + } else if (NoUpper) { + LoadCond = ARMCC::HS; + FailCond = ARMCC::LO; + } else { + LoadCond = ARMCC::HI; + FailCond = ARMCC::LS; + } + if (Width == 64 && !Subtarget->isLittle()) { + std::swap(Dst, DstHi); + std::swap(FailVal, FailValHi); + } + + // CMP cmpptr, lower + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tCMPr) + .addReg(CmpPtr) + .addReg(FirstBoundCheck) + .addImm(ARMCC::AL) // Predicate + .addReg(0)); // CPSR in + + if (!NoLower && !NoUpper) { + // IT HS + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::t2IT).addImm(ARMCC::HS).addImm(8)); // Mask: T + + // CMPHS upper, cmpptr + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tCMPr) + .addReg(UpperBound) + .addReg(CmpPtr) + .addImm(ARMCC::HS) // Predicate + .addReg(ARM::CPSR)); // CPSR in + } + + // IT HI + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::t2IT).addImm(LoadCond).addImm(8)); // Mask: T + + // LDxHI dst, [ptr] + unsigned LdrOpc; + switch (Width) { + default: + llvm_unreachable("unexpected load width"); + case 8: + LdrOpc = ARM::tLDRBi; + break; + case 16: + LdrOpc = ARM::tLDRHi; + break; + case 32: + LdrOpc = ARM::tLDRi; + break; + case 64: + LdrOpc = ARM::t2LDRDi8; + break; + } + MCInstBuilder LdrBuilder(LdrOpc); + LdrBuilder.addReg(Dst); + if (Width == 64) LdrBuilder.addReg(DstHi); + LdrBuilder.addReg(Ptr) + .addImm(0) // Offset + .addImm(LoadCond) // Predicate + .addReg(ARM::CPSR); // CPSR in + EmitToStreamer(*OutStreamer, LdrBuilder); + + // TODO: extens this IT block for 64-bit loads if not deprecated + // IT LS + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::t2IT).addImm(FailCond).addImm(8)); // Mask: T + + // MOVLS dst, failval + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr) + .addReg(Dst) + .addReg(FailVal) + .addImm(FailCond) // Predicate + .addReg(ARM::CPSR)); // CPSR in + + if (Width == 64) { + // IT LS + EmitToStreamer( + *OutStreamer, + MCInstBuilder(ARM::t2IT).addImm(FailCond).addImm(8)); // Mask: T + + // MOVLS dsthi, failvalhi + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr) + .addReg(DstHi) + .addReg(FailValHi) + .addImm(FailCond) // Predicate + .addReg(ARM::CPSR)); // CPSR in + } + + // CSDB + uint32_t Csdb = 0xf3af8014UL; + OutStreamer->AddComment("csdb"); + ATS.emitInst(Csdb, 'w'); +} + // Simple pseudo-instructions have their lowering (with expansion to real // instructions) auto-generated. #include "ARMGenMCPseudoLowering.inc" @@ -2029,6 +2263,80 @@ case ARM::PATCHABLE_TAIL_CALL: LowerPATCHABLE_TAIL_CALL(*MI); return; + + case ARM::NOSPECULATELOAD8_both: + EmitNOSPECULATELOAD(MI, false, false, 8); + return; + case ARM::NOSPECULATELOAD16_both: + EmitNOSPECULATELOAD(MI, false, false, 16); + return; + case ARM::NOSPECULATELOAD32_both: + EmitNOSPECULATELOAD(MI, false, false, 32); + return; + case ARM::NOSPECULATELOAD64_both: + EmitNOSPECULATELOAD(MI, false, false, 64); + return; + case ARM::NOSPECULATELOAD8_nolower: + EmitNOSPECULATELOAD(MI, true, false, 8); + return; + case ARM::NOSPECULATELOAD16_nolower: + EmitNOSPECULATELOAD(MI, true, false, 16); + return; + case ARM::NOSPECULATELOAD32_nolower: + EmitNOSPECULATELOAD(MI, true, false, 32); + return; + case ARM::NOSPECULATELOAD64_nolower: + EmitNOSPECULATELOAD(MI, true, false, 64); + return; + case ARM::NOSPECULATELOAD8_noupper: + EmitNOSPECULATELOAD(MI, false, true, 8); + return; + case ARM::NOSPECULATELOAD16_noupper: + EmitNOSPECULATELOAD(MI, false, true, 16); + return; + case ARM::NOSPECULATELOAD32_noupper: + EmitNOSPECULATELOAD(MI, false, true, 32); + return; + case ARM::NOSPECULATELOAD64_noupper: + EmitNOSPECULATELOAD(MI, false, true, 64); + return; + + case ARM::tNOSPECULATELOAD8_both: + EmitThumbNOSPECULATELOAD(MI, false, false, 8); + return; + case ARM::tNOSPECULATELOAD16_both: + EmitThumbNOSPECULATELOAD(MI, false, false, 16); + return; + case ARM::tNOSPECULATELOAD32_both: + EmitThumbNOSPECULATELOAD(MI, false, false, 32); + return; + case ARM::tNOSPECULATELOAD64_both: + EmitThumbNOSPECULATELOAD(MI, false, false, 64); + return; + case ARM::tNOSPECULATELOAD8_nolower: + EmitThumbNOSPECULATELOAD(MI, true, false, 8); + return; + case ARM::tNOSPECULATELOAD16_nolower: + EmitThumbNOSPECULATELOAD(MI, true, false, 16); + return; + case ARM::tNOSPECULATELOAD32_nolower: + EmitThumbNOSPECULATELOAD(MI, true, false, 32); + return; + case ARM::tNOSPECULATELOAD64_nolower: + EmitThumbNOSPECULATELOAD(MI, true, false, 64); + return; + case ARM::tNOSPECULATELOAD8_noupper: + EmitThumbNOSPECULATELOAD(MI, false, true, 8); + return; + case ARM::tNOSPECULATELOAD16_noupper: + EmitThumbNOSPECULATELOAD(MI, false, true, 16); + return; + case ARM::tNOSPECULATELOAD32_noupper: + EmitThumbNOSPECULATELOAD(MI, false, true, 32); + return; + case ARM::tNOSPECULATELOAD64_noupper: + EmitThumbNOSPECULATELOAD(MI, false, true, 64); + return; } MCInst TmpInst; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -1004,6 +1004,10 @@ setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + setOperationAction(ISD::NOSPECULATELOAD, MVT::i64, Custom); + setOperationAction(ISD::NOSPECULATELOAD_NOLOWER, MVT::i64, Custom); + setOperationAction(ISD::NOSPECULATELOAD_NOUPPER, MVT::i64, Custom); + // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. if (!Subtarget->hasV6Ops()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); @@ -7968,6 +7972,114 @@ Results.push_back(LongMul.getValue(1)); } +static void ReplaceNOSPECULATELOADResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + assert(N->getValueType(0) == MVT::i64 && + "NOSPECULATELOAD on types less than 64 should be legal"); + SDLoc dl(N); + SDValue FailValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(4), + DAG.getConstant(0, dl, MVT::i32)); + SDValue FailValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(4), + DAG.getConstant(1, dl, MVT::i32)); + + SDValue Ops[] = { + N->getOperand(1), // Ptr + N->getOperand(2), // LowerBound + N->getOperand(3), // UpperBound + FailValLo, + FailValHi, + N->getOperand(5), // CmpPtr + N->getOperand(0), // Chain + }; + SDVTList NodeTys = Subtarget->isThumb() + ? DAG.getVTList(MVT::i32, MVT::i32, MVT::Other) + : DAG.getVTList(MVT::Untyped, MVT::Other); + unsigned Opcode = Subtarget->isThumb() ? ARM::tNOSPECULATELOAD64_both + : ARM::NOSPECULATELOAD64_both; + MachineSDNode *NewNode = DAG.getMachineNode( + Opcode, dl, NodeTys, Ops); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + NewNode->setMemRefs(MemOp, MemOp + 1); + + if (Subtarget->isThumb()) { + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, + MVT::i64, SDValue(NewNode, 0), + SDValue(NewNode, 1))); + Results.push_back(SDValue(NewNode, 2)); // Chain + } else { + bool isBigEndian = DAG.getDataLayout().isBigEndian(); + Results.push_back( + DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0, + SDLoc(N), MVT::i32, SDValue(NewNode, 0))); + Results.push_back( + DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1, + SDLoc(N), MVT::i32, SDValue(NewNode, 0))); + Results.push_back(SDValue(NewNode, 1)); // Chain + } +} + +static void ReplaceNOSPECULATELOADOneCheckResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + assert(N->getValueType(0) == MVT::i64 && + "NOSPECULATELOAD on types less than 64 should be legal"); + SDLoc dl(N); + SDValue FailValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(3), + DAG.getConstant(0, dl, MVT::i32)); + SDValue FailValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, + N->getOperand(3), + DAG.getConstant(1, dl, MVT::i32)); + + SDValue Ops[] = { + N->getOperand(1), // Ptr + N->getOperand(2), // Bound + FailValLo, + FailValHi, + N->getOperand(4), // CmpPtr + N->getOperand(0), // Chain + }; + SDVTList NodeTys = Subtarget->isThumb() + ? DAG.getVTList(MVT::i32, MVT::i32, MVT::Other) + : DAG.getVTList(MVT::Untyped, MVT::Other); + unsigned Opcode; + if (N->getOpcode() == ISD::NOSPECULATELOAD_NOLOWER) + Opcode = Subtarget->isThumb() ? ARM::tNOSPECULATELOAD64_nolower + : ARM::NOSPECULATELOAD64_nolower; + else + Opcode = Subtarget->isThumb() ? ARM::tNOSPECULATELOAD64_noupper + : ARM::NOSPECULATELOAD64_noupper; + MachineSDNode *NewNode = DAG.getMachineNode(Opcode, dl, NodeTys, Ops); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + NewNode->setMemRefs(MemOp, MemOp + 1); + + if (Subtarget->isThumb()) { + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, + MVT::i64, SDValue(NewNode, 0), + SDValue(NewNode, 1))); + Results.push_back(SDValue(NewNode, 2)); // Chain + } else { + bool isBigEndian = DAG.getDataLayout().isBigEndian(); + Results.push_back( + DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0, + SDLoc(N), MVT::i32, SDValue(NewNode, 0))); + Results.push_back( + DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1, + SDLoc(N), MVT::i32, SDValue(NewNode, 0))); + Results.push_back(SDValue(NewNode, 1)); // Chain + } +} + /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. void ARMTargetLowering::ReplaceNodeResults(SDNode *N, @@ -8011,6 +8123,13 @@ return; case ISD::INTRINSIC_WO_CHAIN: return ReplaceLongIntrinsic(N, Results, DAG); + case ISD::NOSPECULATELOAD: + ReplaceNOSPECULATELOADResults(N, Results, DAG, Subtarget); + return; + case ISD::NOSPECULATELOAD_NOLOWER: + case ISD::NOSPECULATELOAD_NOUPPER: + ReplaceNOSPECULATELOADOneCheckResults(N, Results, DAG, Subtarget); + return; } if (Res.getNode()) Results.push_back(Res); Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -5552,6 +5552,95 @@ Requires<[IsARM, IsReadTPHard]>; //===----------------------------------------------------------------------===// +// Speculation barrier intrinsics +// +multiclass NoSpeculateLoad { + def _both_frag : PatFrag<(ops node:$ptr, node:$lower_bound, + node:$upper_bound, node:$failval, + node:$cmpptr), + (nospeculateload node:$ptr, node:$lower_bound, + node:$upper_bound, node:$failval, + node:$cmpptr), TypeCheck>; + def _nolower_frag : PatFrag<(ops node:$ptr, + node:$upper_bound, node:$failval, + node:$cmpptr), + (nospeculateload_nolower node:$ptr, + node:$upper_bound, node:$failval, + node:$cmpptr), TypeCheck>; + def _noupper_frag : PatFrag<(ops node:$ptr, node:$lower_bound, + node:$failval, node:$cmpptr), + (nospeculateload_noupper node:$ptr, node:$lower_bound, + node:$failval, + node:$cmpptr), TypeCheck>; + + let Defs = [CPSR], hasSideEffects = 1, isCodeGenOnly = 1, mayLoad = 1, + Constraints = "@earlyclobber $dst" in { + def _both : ARMPseudoInst<(outs GPRnopc:$dst), + (ins GPRnopc:$ptr, GPRnopc:$lower_bound, + GPRnopc:$upper_bound, GPRnopc:$failval, + GPRnopc:$cmpptr), + 20, IIC_iCMPr, + [(set GPRnopc:$dst, + (!cast(NAME # "_both_frag") GPRnopc:$ptr, + GPRnopc:$lower_bound, GPRnopc:$upper_bound, + GPRnopc:$failval, GPRnopc:$cmpptr))]>, + Sched<[]>; + def _nolower : + ARMPseudoInst<(outs GPRnopc:$dst), + (ins GPRnopc:$ptr, GPRnopc:$upper_bound, + GPRnopc:$failval, GPRnopc:$cmpptr), + 16, IIC_iCMPr, + [(set GPRnopc:$dst, + (!cast(NAME # "_nolower_frag") + GPRnopc:$ptr, GPRnopc:$upper_bound, + GPRnopc:$failval, GPRnopc:$cmpptr))]>, + Sched<[]>; + def _noupper : + ARMPseudoInst<(outs GPRnopc:$dst), + (ins GPRnopc:$ptr, GPRnopc:$lower_bound, + GPRnopc:$failval, GPRnopc:$cmpptr), + 16, IIC_iCMPr, + [(set GPRnopc:$dst, + (!cast(NAME # "_noupper_frag") + GPRnopc:$ptr, GPRnopc:$lower_bound, + GPRnopc:$failval, GPRnopc:$cmpptr))]>, + Sched<[]>; + } +} + +defm NOSPECULATELOAD8 : NoSpeculateLoad< + [{return cast(N)->getMemoryVT() == MVT::i8;}]>; +defm NOSPECULATELOAD16 : NoSpeculateLoad< + [{return cast(N)->getMemoryVT() == MVT::i16;}]>; +defm NOSPECULATELOAD32 : NoSpeculateLoad< + [{return cast(N)->getMemoryVT() == MVT::i32;}]>; + +let Defs = [CPSR], hasSideEffects = 1, isCodeGenOnly = 1, mayLoad = 1, + Constraints = "@earlyclobber $dst" in { + def NOSPECULATELOAD64_both : ARMPseudoInst<(outs GPRPair:$dst), + (ins GPRnopc:$ptr, GPRnopc:$lower_bound, + GPRnopc:$upper_bound, GPRnopc:$failvallo, + GPRnopc:$failvalhi, GPRnopc:$cmpptr), + 24, IIC_iCMPr, + []>, + Sched<[]>; + def NOSPECULATELOAD64_nolower : ARMPseudoInst<(outs GPRPair:$dst), + (ins GPRnopc:$ptr, GPRnopc:$upper_bound, + GPRnopc:$failvallo, GPRnopc:$failvalhi, + GPRnopc:$cmpptr), + 20, IIC_iCMPr, + []>, + Sched<[]>; + def NOSPECULATELOAD64_noupper : ARMPseudoInst<(outs GPRPair:$dst), + (ins GPRnopc:$ptr, GPRnopc:$lower_bound, + GPRnopc:$failvallo, GPRnopc:$failvalhi, + GPRnopc:$cmpptr), + 20, IIC_iCMPr, + []>, + Sched<[]>; +} + +//===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics // eh_sjlj_setjmp() is an instruction sequence to store the return // address and save #0 in R0 for the non-longjmp case. Index: lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- lib/Target/ARM/ARMInstrThumb2.td +++ lib/Target/ARM/ARMInstrThumb2.td @@ -3882,6 +3882,97 @@ NoItinerary, []>, Requires<[IsThumb2]>; } + +//===----------------------------------------------------------------------===// +// Speculation barrier intrinsics +// +multiclass tNoSpeculateLoad { + def _both_frag : PatFrag<(ops node:$ptr, node:$lower_bound, + node:$upper_bound, node:$failval, + node:$cmpptr), + (nospeculateload node:$ptr, node:$lower_bound, + node:$upper_bound, node:$failval, + node:$cmpptr), TypeCheck>; + def _nolower_frag : PatFrag<(ops node:$ptr, + node:$upper_bound, node:$failval, + node:$cmpptr), + (nospeculateload_nolower node:$ptr, + node:$upper_bound, node:$failval, + node:$cmpptr), TypeCheck>; + def _noupper_frag : PatFrag<(ops node:$ptr, node:$lower_bound, + node:$failval, node:$cmpptr), + (nospeculateload_noupper node:$ptr, node:$lower_bound, + node:$failval, + node:$cmpptr), TypeCheck>; + + let Defs = [CPSR], hasSideEffects = 1, isCodeGenOnly = 1, mayLoad = 1, + Constraints = "@earlyclobber $dst" in { + def _both : tPseudoInst<(outs tGPR:$dst), + (ins tGPR:$ptr, tGPR:$lower_bound, + tGPR:$upper_bound, rGPR:$failval, + tGPR:$cmpptr), + 18, IIC_iCMPr, + [(set tGPR:$dst, + (!cast(NAME # "_both_frag") tGPR:$ptr, tGPR:$lower_bound, + tGPR:$upper_bound, rGPR:$failval, + tGPR:$cmpptr))]>, + Sched<[]>; + def _nolower : + tPseudoInst<(outs tGPR:$dst), + (ins tGPR:$ptr, tGPR:$upper_bound, + rGPR:$failval, tGPR:$cmpptr), + 14, IIC_iCMPr, + [(set tGPR:$dst, + (!cast(NAME # "_nolower_frag") tGPR:$ptr, + tGPR:$upper_bound, rGPR:$failval, + tGPR:$cmpptr))]>, + Sched<[]>; + def _noupper : + tPseudoInst<(outs tGPR:$dst), + (ins tGPR:$ptr, tGPR:$lower_bound, + rGPR:$failval, tGPR:$cmpptr), + 14, IIC_iCMPr, + [(set tGPR:$dst, + (!cast(NAME # "_noupper_frag") tGPR:$ptr, tGPR:$lower_bound, + rGPR:$failval, + tGPR:$cmpptr))]>, + Sched<[]>; + } +} + +defm tNOSPECULATELOAD8 : tNoSpeculateLoad< + [{return cast(N)->getMemoryVT() == MVT::i8;}]>; +defm tNOSPECULATELOAD16 : tNoSpeculateLoad< + [{return cast(N)->getMemoryVT() == MVT::i16;}]>; +defm tNOSPECULATELOAD32 : tNoSpeculateLoad< + [{return cast(N)->getMemoryVT() == MVT::i32;}]>; + +let Defs = [CPSR], hasSideEffects = 1, isCodeGenOnly = 1, mayLoad = 1, + Constraints = "@earlyclobber $dstlo,@earlyclobber $dsthi" in { + def tNOSPECULATELOAD64_both : ARMPseudoInst<(outs rGPR:$dstlo, rGPR:$dsthi), + (ins tGPR:$ptr, tGPR:$lower_bound, + tGPR:$upper_bound, rGPR:$failvallo, rGPR:$failvalhi, + tGPR:$cmpptr), + 24, IIC_iCMPr, + []>, + Sched<[]>; + def tNOSPECULATELOAD64_nolower : ARMPseudoInst<(outs rGPR:$dstlo, rGPR:$dsthi), + (ins tGPR:$ptr, + tGPR:$upper_bound, rGPR:$failvallo, rGPR:$failvalhi, + tGPR:$cmpptr), + 20, IIC_iCMPr, + []>, + Sched<[]>; + def tNOSPECULATELOAD64_noupper : ARMPseudoInst<(outs rGPR:$dstlo, rGPR:$dsthi), + (ins tGPR:$ptr, + tGPR:$lower_bound, rGPR:$failvallo, rGPR:$failvalhi, + tGPR:$cmpptr), + 20, IIC_iCMPr, + []>, + Sched<[]>; +} + + //===----------------------------------------------------------------------===// // Coprocessor load/store -- for disassembly only // Index: test/CodeGen/AArch64/no-speculate.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/no-speculate.ll @@ -0,0 +1,255 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-eabi | FileCheck %s --check-prefixes=CHECK +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-eabi | FileCheck %s --check-prefixes=CHECK +; RUN: llc -fast-isel -verify-machineinstrs < %s -mtriple=aarch64-eabi | FileCheck %s --check-prefixes=CHECK +; RUN: llc -global-isel -global-isel-abort=0 -verify-machineinstrs < %s -mtriple=aarch64-eabi | FileCheck %s --check-prefixes=CHECK + +declare i8 @llvm.nospeculateload.i8(i8*, i8*, i8*, i8, i8*) +declare i8 @llvm.nospeculateload.nolower.i8(i8*, i8*, i8, i8*) +declare i8 @llvm.nospeculateload.noupper.i8(i8*, i8*, i8, i8*) +declare i16 @llvm.nospeculateload.i16(i16*, i8*, i8*, i16, i8*) +declare i16 @llvm.nospeculateload.nolower.i16(i16*, i8*, i16, i8*) +declare i16 @llvm.nospeculateload.noupper.i16(i16*, i8*, i16, i8*) +declare i32 @llvm.nospeculateload.i32(i32*, i8*, i8*, i32, i8*) +declare i32 @llvm.nospeculateload.nolower.i32(i32*, i8*, i32, i8*) +declare i32 @llvm.nospeculateload.noupper.i32(i32*, i8*, i32, i8*) +declare i64 @llvm.nospeculateload.i64(i64*, i8*, i8*, i64, i8*) +declare i64 @llvm.nospeculateload.nolower.i64(i64*, i8*, i64, i8*) +declare i64 @llvm.nospeculateload.noupper.i64(i64*, i8*, i64, i8*) +declare i128 @llvm.nospeculateload.i128(i128*, i8*, i8*, i128, i8*) +declare i128 @llvm.nospeculateload.nolower.i128(i128*, i8*, i128, i8*) +declare i128 @llvm.nospeculateload.noupper.i128(i128*, i8*, i128, i8*) + +define i8 @f_i8(i8* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i8 %failval) { +entry: + %0 = tail call i8 @llvm.nospeculateload.i8(i8* %ptr, i8* %lowerbound, i8* %upperbound, i8 %failval, i8* %cmpptr) + ret i8 %0 + ; CHECK-LABEL: f_i8: + ; CHECK: cmp x3, x1 + ; CHECK-NEXT: ccmp x3, x2, #2, hs + ; CHECK-NEXT: b.hs [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldrb [[DST:w[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST]], [[DST]], w4, lo + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov w0, [[DST]] + ; CHECK-NEXT: ret +} + +define i8 @f_i8_nolower(i8* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i8 %failval) { +entry: + %0 = tail call i8 @llvm.nospeculateload.nolower.i8(i8* %ptr, i8* %upperbound, i8 %failval, i8* %cmpptr) + ret i8 %0 + ; CHECK-LABEL: f_i8_nolower: + ; CHECK: cmp x3, x2 + ; CHECK-NEXT: b.hs [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldrb [[DST:w[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST]], [[DST]], w4, lo + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov w0, [[DST]] + ; CHECK-NEXT: ret +} + +define i8 @f_i8_noupper(i8* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i8 %failval) { +entry: + %0 = tail call i8 @llvm.nospeculateload.noupper.i8(i8* %ptr, i8* %lowerbound, i8 %failval, i8* %cmpptr) + ret i8 %0 + ; CHECK-LABEL: f_i8_noupper: + ; CHECK: cmp x3, x1 + ; CHECK-NEXT: b.lo [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldrb [[DST:w[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST]], [[DST]], w4, hs + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov w0, [[DST]] + ; CHECK-NEXT: ret +} + +define i16 @f_i16(i16* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i16 %failval) { +entry: + %0 = tail call i16 @llvm.nospeculateload.i16(i16* %ptr, i8* %lowerbound, i8* %upperbound, i16 %failval, i8* %cmpptr) + ret i16 %0 + ; CHECK-LABEL: f_i16: + ; CHECK: cmp x3, x1 + ; CHECK-NEXT: ccmp x3, x2, #2, hs + ; CHECK-NEXT: b.hs [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldrh [[DST:w[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST]], [[DST]], w4, lo + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov w0, [[DST]] + ; CHECK-NEXT: ret +} + +define i16 @f_i16_nolower(i16* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i16 %failval) { +entry: + %0 = tail call i16 @llvm.nospeculateload.nolower.i16(i16* %ptr, i8* %upperbound, i16 %failval, i8* %cmpptr) + ret i16 %0 + ; CHECK-LABEL: f_i16_nolower: + ; CHECK: cmp x3, x2 + ; CHECK-NEXT: b.hs [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldrh [[DST:w[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST]], [[DST]], w4, lo + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov w0, [[DST]] + ; CHECK-NEXT: ret +} + +define i16 @f_i16_noupper(i16* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i16 %failval) { +entry: + %0 = tail call i16 @llvm.nospeculateload.noupper.i16(i16* %ptr, i8* %lowerbound, i16 %failval, i8* %cmpptr) + ret i16 %0 + ; CHECK-LABEL: f_i16_noupper: + ; CHECK: cmp x3, x1 + ; CHECK-NEXT: b.lo [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldrh [[DST:w[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST]], [[DST]], w4, hs + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov w0, [[DST]] + ; CHECK-NEXT: ret +} + +define i32 @f_i32(i32* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i32 %failval) { +entry: + %0 = tail call i32 @llvm.nospeculateload.i32(i32* %ptr, i8* %lowerbound, i8* %upperbound, i32 %failval, i8* %cmpptr) + ret i32 %0 + ; CHECK-LABEL: f_i32: + ; CHECK: cmp x3, x1 + ; CHECK-NEXT: ccmp x3, x2, #2, hs + ; CHECK-NEXT: b.hs [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldr [[DST:w[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST]], [[DST]], w4, lo + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov w0, [[DST]] + ; CHECK-NEXT: ret +} + +define i32 @f_i32_nolower(i32* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i32 %failval) { +entry: + %0 = tail call i32 @llvm.nospeculateload.nolower.i32(i32* %ptr, i8* %upperbound, i32 %failval, i8* %cmpptr) + ret i32 %0 + ; CHECK-LABEL: f_i32_nolower: + ; CHECK: cmp x3, x2 + ; CHECK-NEXT: b.hs [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldr [[DST:w[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST]], [[DST]], w4, lo + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov w0, [[DST]] + ; CHECK-NEXT: ret +} + +define i32 @f_i32_noupper(i32* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i32 %failval) { +entry: + %0 = tail call i32 @llvm.nospeculateload.noupper.i32(i32* %ptr, i8* %lowerbound, i32 %failval, i8* %cmpptr) + ret i32 %0 + ; CHECK-LABEL: f_i32_noupper: + ; CHECK: cmp x3, x1 + ; CHECK-NEXT: b.lo [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldr [[DST:w[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST]], [[DST]], w4, hs + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov w0, [[DST]] + ; CHECK-NEXT: ret +} + +define i64 @f_i64(i64* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i64 %failval) { +entry: + %0 = tail call i64 @llvm.nospeculateload.i64(i64* %ptr, i8* %lowerbound, i8* %upperbound, i64 %failval, i8* %cmpptr) + ret i64 %0 + ; CHECK-LABEL: f_i64: + ; CHECK: cmp x3, x1 + ; CHECK-NEXT: ccmp x3, x2, #2, hs + ; CHECK-NEXT: b.hs [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldr [[DST:x[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST]], [[DST]], x4, lo + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov x0, [[DST]] + ; CHECK-NEXT: ret +} + +define i64 @f_i64_nolower(i64* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i64 %failval) { +entry: + %0 = tail call i64 @llvm.nospeculateload.nolower.i64(i64* %ptr, i8* %upperbound, i64 %failval, i8* %cmpptr) + ret i64 %0 + ; CHECK-LABEL: f_i64_nolower: + ; CHECK: cmp x3, x2 + ; CHECK-NEXT: b.hs [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldr [[DST:x[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST]], [[DST]], x4, lo + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov x0, [[DST]] + ; CHECK-NEXT: ret +} + +define i64 @f_i64_noupper(i64* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i64 %failval) { +entry: + %0 = tail call i64 @llvm.nospeculateload.noupper.i64(i64* %ptr, i8* %lowerbound, i64 %failval, i8* %cmpptr) + ret i64 %0 + ; CHECK-LABEL: f_i64_noupper: + ; CHECK: cmp x3, x1 + ; CHECK-NEXT: b.lo [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldr [[DST:x[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST]], [[DST]], x4, hs + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov x0, [[DST]] + ; CHECK-NEXT: ret +} + +define i128 @f_i128(i128* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i128 %failval) { +entry: + %0 = tail call i128 @llvm.nospeculateload.i128(i128* %ptr, i8* %lowerbound, i8* %upperbound, i128 %failval, i8* %cmpptr) + ret i128 %0 + ; CHECK-LABEL: f_i128: + ; CHECK: cmp x3, x1 + ; CHECK-NEXT: ccmp x3, x2, #2, hs + ; CHECK-NEXT: b.hs [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST1]], [[DST1]], x4, lo + ; CHECK-NEXT: csel [[DST2]], [[DST2]], x5, lo + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov x0, [[DST1]] + ; CHECK-NEXT: mov x1, [[DST2]] + ; CHECK-NEXT: ret +} + +define i128 @f_i128_nolower(i128* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i128 %failval) { +entry: + %0 = tail call i128 @llvm.nospeculateload.nolower.i128(i128* %ptr, i8* %upperbound, i128 %failval, i8* %cmpptr) + ret i128 %0 + ; CHECK-LABEL: f_i128_nolower: + ; CHECK: cmp x3, x2 + ; CHECK-NEXT: b.hs [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldp [[DST1:x[0-9]+]], x1, [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST1]], [[DST1]], x4, lo + ; CHECK-NEXT: csel x1, x1, x5, lo + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov x0, [[DST1]] + ; CHECK-NEXT: ret +} + +define i128 @f_i128_noupper(i128* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i128 %failval) { +entry: + %0 = tail call i128 @llvm.nospeculateload.noupper.i128(i128* %ptr, i8* %lowerbound, i128 %failval, i8* %cmpptr) + ret i128 %0 + ; CHECK-LABEL: f_i128_noupper: + ; CHECK: cmp x3, x1 + ; CHECK-NEXT: b.lo [[FAILLABEL:.[0-9a-zA-Z]+]] + ; CHECK-NEXT: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0] + ; CHECK-NEXT: [[FAILLABEL]]: + ; CHECK-NEXT: csel [[DST1]], [[DST1]], x4, hs + ; CHECK-NEXT: csel [[DST2]], [[DST2]], x5, hs + ; CHECK-NEXT: hint #20 + ; CHECK-NEXT: mov x0, [[DST1]] + ; CHECK-NEXT: mov x1, [[DST2]] + ; CHECK-NEXT: ret +} Index: test/CodeGen/ARM/no-speculate.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/no-speculate.ll @@ -0,0 +1,243 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=armv7a-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-ARM,CHECK-LE +; RUN: llc -verify-machineinstrs < %s -mtriple=thumbv7a-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-THUMB,CHECK-LE +; RUN: llc -verify-machineinstrs < %s -mtriple=thumbv8a-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-THUMB,CHECK-LE +; RUN: llc -verify-machineinstrs < %s -mtriple=armv7a_be-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-ARM,CHECK-BE +; RUN: llc -verify-machineinstrs < %s -mtriple=thumbv7a_be-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-THUMB,CHECK-BE +; RUN: llc -verify-machineinstrs < %s -mtriple=thumbv8a_be-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-THUMB,CHECK-BE +; RUN: llc -fast-isel -verify-machineinstrs < %s -mtriple=armv7a-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-ARM +; RUN: llc -global-isel -global-isel-abort=0 -verify-machineinstrs < %s -mtriple=armv7a-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-ARM + + +declare i8 @llvm.nospeculateload.i8(i8*, i8*, i8*, i8, i8*) +declare i8 @llvm.nospeculateload.nolower.i8(i8*, i8*, i8, i8*) +declare i8 @llvm.nospeculateload.noupper.i8(i8*, i8*, i8, i8*) +declare i16 @llvm.nospeculateload.i16(i16*, i8*, i8*, i16, i8*) +declare i16 @llvm.nospeculateload.nolower.i16(i16*, i8*, i16, i8*) +declare i16 @llvm.nospeculateload.noupper.i16(i16*, i8*, i16, i8*) +declare i32 @llvm.nospeculateload.i32(i32*, i8*, i8*, i32, i8*) +declare i32 @llvm.nospeculateload.nolower.i32(i32*, i8*, i32, i8*) +declare i32 @llvm.nospeculateload.noupper.i32(i32*, i8*, i32, i8*) +declare i64 @llvm.nospeculateload.i64(i64*, i8*, i8*, i64, i8*) +declare i64 @llvm.nospeculateload.nolower.i64(i64*, i8*, i64, i8*) +declare i64 @llvm.nospeculateload.noupper.i64(i64*, i8*, i64, i8*) + +define i8 @f_i8(i8* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i8 %failval) { +entry: + %0 = tail call i8 @llvm.nospeculateload.i8(i8* %ptr, i8* %lowerbound, i8* %upperbound, i8 %failval, i8* %cmpptr) + ret i8 %0 + ; CHECK-LABEL: f_i8: + ; %failval is passed on the stack + ; CHECK: ldr{{()|b}}{{()|.w}} [[FAILVAL:r[0-9]+|lr]], [sp{{()|, #[0-9]+}}] + ; CHECK-NEXT: cmp r3, r1 + ; CHECK-THUMB-NEXT: it hs + ; CHECK-NEXT: cmphs r2, r3 + ; CHECK-THUMB-NEXT: it hi + ; CHECK-NEXT: ldrbhi [[DST:r[0-9]+]], [r0] + ; CHECK-THUMB-NEXT: it ls + ; CHECK-NEXT: movls [[DST]], [[FAILVAL]] + ; check for csdb encoding: + ; CHECK-ARM-NEXT: .inst 0xe320f014 + ; CHECK-THUMB-NEXT: .inst.w 0xf3af8014 +} + +define i8 @f_i8_nolower(i8* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i8 %failval) { +entry: + %0 = tail call i8 @llvm.nospeculateload.nolower.i8(i8* %ptr, i8* %upperbound, i8 %failval, i8* %cmpptr) + ret i8 %0 + ; CHECK-LABEL: f_i8_nolower: + ; %failval is passed on the stack + ; CHECK: ldr{{()|b}}{{()|.w}} [[FAILVAL:r[0-9]+|lr]], [sp{{()|, #[0-9]+}}] + ; CHECK-NEXT: cmp r3, r2 + ; CHECK-THUMB-NEXT: it lo + ; CHECK-NEXT: ldrblo [[DST:r[0-9]+]], [r0] + ; CHECK-THUMB-NEXT: it hs + ; CHECK-NEXT: movhs [[DST]], [[FAILVAL]] + ; check for csdb encoding: + ; CHECK-ARM-NEXT: .inst 0xe320f014 + ; CHECK-THUMB-NEXT: .inst.w 0xf3af8014 +} + +define i8 @f_i8_noupper(i8* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i8 %failval) { +entry: + %0 = tail call i8 @llvm.nospeculateload.noupper.i8(i8* %ptr, i8* %lowerbound, i8 %failval, i8* %cmpptr) + ret i8 %0 + ; CHECK-LABEL: f_i8_noupper: + ; %failval is passed on the stack + ; CHECK: ldr{{()|b}}{{()|.w}} [[FAILVAL:r[0-9]+|lr]], [sp{{()|, #[0-9]+}}] + ; CHECK-NEXT: cmp r3, r1 + ; CHECK-THUMB-NEXT: it hs + ; CHECK-NEXT: ldrbhs [[DST:r[0-9]+]], [r0] + ; CHECK-THUMB-NEXT: it lo + ; CHECK-NEXT: movlo [[DST]], [[FAILVAL]] + ; check for csdb encoding: + ; CHECK-ARM-NEXT: .inst 0xe320f014 + ; CHECK-THUMB-NEXT: .inst.w 0xf3af8014 +} + +define i16 @f_i16(i16* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i16 %failval) { +entry: + %0 = tail call i16 @llvm.nospeculateload.i16(i16* %ptr, i8* %lowerbound, i8* %upperbound, i16 %failval, i8* %cmpptr) + ret i16 %0 + ; CHECK-LABEL: f_i16: + ; %failval is passed on the stack + ; CHECK: ldr{{()|h}}{{()|.w}} [[FAILVAL:r[0-9]+|lr]], [sp{{()|, #[0-9]+}}] + ; CHECK-NEXT: cmp r3, r1 + ; CHECK-THUMB-NEXT: it hs + ; CHECK-NEXT: cmphs r2, r3 + ; CHECK-THUMB-NEXT: it hi + ; CHECK-NEXT: ldrhhi [[DST:r[0-9]+]], [r0] + ; CHECK-THUMB-NEXT: it ls + ; CHECK-NEXT: movls [[DST]], [[FAILVAL]] + ; check for csdb encoding: + ; CHECK-ARM-NEXT: .inst 0xe320f014 + ; CHECK-THUMB-NEXT: .inst.w 0xf3af8014 +} + +define i16 @f_i16_nolower(i16* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i16 %failval) { +entry: + %0 = tail call i16 @llvm.nospeculateload.nolower.i16(i16* %ptr, i8* %upperbound, i16 %failval, i8* %cmpptr) + ret i16 %0 + ; CHECK-LABEL: f_i16_nolower: + ; %failval is passed on the stack + ; CHECK: ldr{{()|h}}{{()|.w}} [[FAILVAL:r[0-9]+|lr]], [sp{{()|, #[0-9]+}}] + ; CHECK-NEXT: cmp r3, r2 + ; CHECK-THUMB-NEXT: it lo + ; CHECK-NEXT: ldrhlo [[DST:r[0-9]+]], [r0] + ; CHECK-THUMB-NEXT: it hs + ; CHECK-NEXT: movhs [[DST]], [[FAILVAL]] + ; check for csdb encoding: + ; CHECK-ARM-NEXT: .inst 0xe320f014 + ; CHECK-THUMB-NEXT: .inst.w 0xf3af8014 +} + +define i16 @f_i16_noupper(i16* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i16 %failval) { +entry: + %0 = tail call i16 @llvm.nospeculateload.noupper.i16(i16* %ptr, i8* %lowerbound, i16 %failval, i8* %cmpptr) + ret i16 %0 + ; CHECK-LABEL: f_i16_noupper: + ; %failval is passed on the stack + ; CHECK: ldr{{()|h}}{{()|.w}} [[FAILVAL:r[0-9]+|lr]], [sp{{()|, #[0-9]+}}] + ; CHECK-NEXT: cmp r3, r1 + ; CHECK-THUMB-NEXT: it hs + ; CHECK-NEXT: ldrhhs [[DST:r[0-9]+]], [r0] + ; CHECK-THUMB-NEXT: it lo + ; CHECK-NEXT: movlo [[DST]], [[FAILVAL]] + ; check for csdb encoding: + ; CHECK-ARM-NEXT: .inst 0xe320f014 + ; CHECK-THUMB-NEXT: .inst.w 0xf3af8014 +} + +define i32 @f_i32(i32* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i32 %failval) { +entry: + %0 = tail call i32 @llvm.nospeculateload.i32(i32* %ptr, i8* %lowerbound, i8* %upperbound, i32 %failval, i8* %cmpptr) + ret i32 %0 + ; CHECK-LABEL: f_i32: + ; %failval is passed on the stack + ; CHECK: ldr{{()|.w}} [[FAILVAL:r[0-9]+|lr]], [sp{{()|, #[0-9]+}}] + ; CHECK-NEXT: cmp r3, r1 + ; CHECK-THUMB-NEXT: it hs + ; CHECK-NEXT: cmphs r2, r3 + ; CHECK-THUMB-NEXT: it hi + ; CHECK-NEXT: ldrhi [[DST:r[0-9]+]], [r0] + ; CHECK-THUMB-NEXT: it ls + ; CHECK-NEXT: movls [[DST]], [[FAILVAL]] + ; check for csdb encoding: + ; CHECK-ARM-NEXT: .inst 0xe320f014 + ; CHECK-THUMB-NEXT: .inst.w 0xf3af8014 +} + +define i32 @f_i32_nolower(i32* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i32 %failval) { +entry: + %0 = tail call i32 @llvm.nospeculateload.nolower.i32(i32* %ptr, i8* %upperbound, i32 %failval, i8* %cmpptr) + ret i32 %0 + ; CHECK-LABEL: f_i32_nolower: + ; %failval is passed on the stack + ; CHECK: ldr{{()|.w}} [[FAILVAL:r[0-9]+|lr]], [sp{{()|, #[0-9]+}}] + ; CHECK-NEXT: cmp r3, r2 + ; CHECK-THUMB-NEXT: it lo + ; CHECK-NEXT: ldrlo [[DST:r[0-9]+]], [r0] + ; CHECK-THUMB-NEXT: it hs + ; CHECK-NEXT: movhs [[DST]], [[FAILVAL]] + ; check for csdb encoding: + ; CHECK-ARM-NEXT: .inst 0xe320f014 + ; CHECK-THUMB-NEXT: .inst.w 0xf3af8014 +} + +define i32 @f_i32_noupper(i32* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i32 %failval) { +entry: + %0 = tail call i32 @llvm.nospeculateload.noupper.i32(i32* %ptr, i8* %lowerbound, i32 %failval, i8* %cmpptr) + ret i32 %0 + ; CHECK-LABEL: f_i32_noupper: + ; %failval is passed on the stack + ; CHECK: ldr{{()|.w}} [[FAILVAL:r[0-9]+|lr]], [sp{{()|, #[0-9]+}}] + ; CHECK-NEXT: cmp r3, r1 + ; CHECK-THUMB-NEXT: it hs + ; CHECK-NEXT: ldrhs [[DST:r[0-9]+]], [r0] + ; CHECK-THUMB-NEXT: it lo + ; CHECK-NEXT: movlo [[DST]], [[FAILVAL]] + ; check for csdb encoding: + ; CHECK-ARM-NEXT: .inst 0xe320f014 + ; CHECK-THUMB-NEXT: .inst.w 0xf3af8014 +} + +define i64 @f_i64(i64* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i64 %failval) { +entry: + %0 = tail call i64 @llvm.nospeculateload.i64(i64* %ptr, i8* %lowerbound, i8* %upperbound, i64 %failval, i8* %cmpptr) + ret i64 %0 + ; CHECK-LABEL: f_i64: + ; %failval is passed on the stack, in 2 4-byte slots + ; different variants (e.g. Arm vs Thumb) either load those as 2 ldrs, or 1 + ; ldrd. This is too complex to check explicitly here. + ; CHECK: cmp r3, r1 + ; CHECK-THUMB-NEXT: it hs + ; CHECK-NEXT: cmphs r2, r3 + ; CHECK-THUMB-NEXT: it hi + ; CHECK-NEXT: ldrdhi [[DST1:r[0-9]+|lr]], [[DST2:r[0-9]+|lr]], [r0] + ; CHECK-THUMB-NEXT: it ls + ; CHECK-NEXT: movls [[DST1]], [[FAILVAL1:r[0-9]+|lr]] + ; CHECK-THUMB-NEXT: it ls + ; CHECK-NEXT: movls [[DST2]], [[FAILVAL2:r[0-9]+|lr]] + ; check for csdb encoding: + ; CHECK-ARM-NEXT: .inst 0xe320f014 + ; CHECK-THUMB-NEXT: .inst.w 0xf3af8014 +} + +define i64 @f_i64_nolower(i64* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i64 %failval) { +entry: + %0 = tail call i64 @llvm.nospeculateload.nolower.i64(i64* %ptr, i8* %upperbound, i64 %failval, i8* %cmpptr) + ret i64 %0 + ; CHECK-LABEL: f_i64_nolower: + ; %failval is passed on the stack, in 2 4-byte slots + ; different variants (e.g. Arm vs Thumb) either load those as 2 ldrs, or 1 + ; ldrd. This is too complex to check explicitly here. + ; CHECK: cmp r3, r2 + ; CHECK-THUMB-NEXT: it lo + ; CHECK-NEXT: ldrdlo [[DST1:r[0-9]+|lr]], [[DST2:r[0-9]+|lr]], [r0] + ; CHECK-THUMB-NEXT: it hs + ; CHECK-NEXT: movhs [[DST1]], [[FAILVAL1:r[0-9]+|lr]] + ; CHECK-THUMB-NEXT: it hs + ; CHECK-NEXT: movhs [[DST2]], [[FAILVAL2:r[0-9]+|lr]] + ; check for csdb encoding: + ; CHECK-ARM-NEXT: .inst 0xe320f014 + ; CHECK-THUMB-NEXT: .inst.w 0xf3af8014 +} + +define i64 @f_i64_noupper(i64* %ptr, i8* %lowerbound, i8* %upperbound, i8* %cmpptr, i64 %failval) { +entry: + %0 = tail call i64 @llvm.nospeculateload.noupper.i64(i64* %ptr, i8* %lowerbound, i64 %failval, i8* %cmpptr) + ret i64 %0 + ; CHECK-LABEL: f_i64_noupper: + ; %failval is passed on the stack, in 2 4-byte slots + ; different variants (e.g. Arm vs Thumb) either load those as 2 ldrs, or 1 + ; ldrd. This is too complex to check explicitly here. + ; CHECK: cmp r3, r1 + ; CHECK-THUMB-NEXT: it hs + ; CHECK-NEXT: ldrdhs [[DST1:r[0-9]+|lr]], [[DST2:r[0-9]+|lr]], [r0] + ; CHECK-THUMB-NEXT: it lo + ; CHECK-NEXT: movlo [[DST1]], [[FAILVAL1:r[0-9]+|lr]] + ; CHECK-THUMB-NEXT: it lo + ; CHECK-NEXT: movlo [[DST2]], [[FAILVAL2:r[0-9]+|lr]] + ; check for csdb encoding: + ; CHECK-ARM-NEXT: .inst 0xe320f014 + ; CHECK-THUMB-NEXT: .inst.w 0xf3af8014 +}