diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -32,6 +32,8 @@ GETSTACKTOP, // retrieve address of stack top (first address of // locals and temporaries) + MEMBARRIER, // Compiler barrier only; generate a no-op. + CALL, // A call instruction. RET_FLAG, // Return with a flag operand. GLOBAL_BASE_REG, // Global base reg for PIC. @@ -77,6 +79,7 @@ /// Custom Lower { SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -817,6 +817,17 @@ /// } Floating-point math functions + /// Atomic instructions { + + setMaxAtomicSizeInBitsSupported(64); + setMinCmpXchgSizeInBits(32); + setSupportsUnalignedAtomics(false); + + // Use custom inserter for ATOMIC_FENCE. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + /// } Atomic isntructions + setStackPointerRegisterToSaveRestore(VE::SX11); // We have target-specific dag combine patterns for the following nodes: @@ -843,6 +854,7 @@ TARGET_NODE_CASE(GETFUNPLT) TARGET_NODE_CASE(GETSTACKTOP) TARGET_NODE_CASE(GETTLSADDR) + TARGET_NODE_CASE(MEMBARRIER) TARGET_NODE_CASE(CALL) TARGET_NODE_CASE(RET_FLAG) TARGET_NODE_CASE(GLOBAL_BASE_REG) @@ -945,6 +957,51 @@ /// Custom Lower { +SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + AtomicOrdering FenceOrdering = static_cast( + cast(Op.getOperand(1))->getZExtValue()); + SyncScope::ID FenceSSID = static_cast( + cast(Op.getOperand(2))->getZExtValue()); + + // VE uses Release consistency, so need a fence instruction if it is a + // cross-thread fence. + if (FenceSSID == SyncScope::System) { + switch (FenceOrdering) { + case AtomicOrdering::NotAtomic: + case AtomicOrdering::Unordered: + case AtomicOrdering::Monotonic: + // No need to generate fencem instruction here. + break; + case AtomicOrdering::Acquire: + // Generate "fencem 2" as acquire fence. + return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other, + DAG.getTargetConstant(2, DL, MVT::i32), + Op.getOperand(0)), + 0); + case AtomicOrdering::Release: + // Generate "fencem 1" as release fence. + return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other, + DAG.getTargetConstant(1, DL, MVT::i32), + Op.getOperand(0)), + 0); + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + // Generate "fencem 3" as acq_rel and seq_cst fence. + // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses, + // so seq_cst may require more instruction for them. + return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other, + DAG.getTargetConstant(3, DL, MVT::i32), + Op.getOperand(0)), + 0); + } + } + + // MEMBARRIER is a compiler barrier; it codegens to a no-op. + return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); +} + SDValue VETargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return makeAddress(Op, DAG); @@ -1263,6 +1320,8 @@ switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); + case ISD::ATOMIC_FENCE: + return lowerATOMIC_FENCE(Op, DAG); case ISD::BlockAddress: return lowerBlockAddress(Op, DAG); case ISD::ConstantPool: diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -442,6 +442,9 @@ def GetStackTop : SDNode<"VEISD::GETSTACKTOP", SDTNone, [SDNPHasChain, SDNPSideEffect]>; +// MEMBARRIER +def MemBarrier : SDNode<"VEISD::MEMBARRIER", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; //===----------------------------------------------------------------------===// // VE Flag Conditions @@ -1782,6 +1785,14 @@ "# GET STACK TOP", [(set iPTR:$dst, (GetStackTop))]>; +// MEMBARRIER +let hasSideEffects = 1 in +def MEMBARRIER : Pseudo<(outs), (ins), "# MEMBARRIER", [(MemBarrier)] >; + +//===----------------------------------------------------------------------===// +// Other patterns +//===----------------------------------------------------------------------===// + // SETCC pattern matches // // CMP %tmp, lhs, rhs ; compare lhs and rhs diff --git a/llvm/test/CodeGen/VE/atomic_fence.ll b/llvm/test/CodeGen/VE/atomic_fence.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/atomic_fence.ll @@ -0,0 +1,61 @@ +; RUN: llc < %s -mtriple=ve | FileCheck %s + +;;; Test atomic fence for all memory order + +; Function Attrs: norecurse nounwind readnone +define void @_Z20atomic_fence_relaxedv() { +; CHECK-LABEL: _Z20atomic_fence_relaxedv: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s11, 0, %s9 + ret void +} + +; Function Attrs: nofree norecurse nounwind +define void @_Z20atomic_fence_consumev() { +; CHECK-LABEL: _Z20atomic_fence_consumev: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 + fence acquire + ret void +} + +; Function Attrs: nofree norecurse nounwind +define void @_Z20atomic_fence_acquirev() { +; CHECK-LABEL: _Z20atomic_fence_acquirev: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 2 +; CHECK-NEXT: or %s11, 0, %s9 + fence acquire + ret void +} + +; Function Attrs: nofree norecurse nounwind +define void @_Z20atomic_fence_releasev() { +; CHECK-LABEL: _Z20atomic_fence_releasev: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 1 +; CHECK-NEXT: or %s11, 0, %s9 + fence release + ret void +} + +; Function Attrs: nofree norecurse nounwind +define void @_Z20atomic_fence_acq_relv() { +; CHECK-LABEL: _Z20atomic_fence_acq_relv: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 + fence acq_rel + ret void +} + +; Function Attrs: nofree norecurse nounwind +define void @_Z20atomic_fence_seq_cstv() { +; CHECK-LABEL: _Z20atomic_fence_seq_cstv: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fencem 3 +; CHECK-NEXT: or %s11, 0, %s9 + fence seq_cst + ret void +}