diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4182,6 +4182,36 @@ } break; } + + case AArch64ISD::LDP: { + AArch64LoadPairSDNode *LD = cast(Node); + const SDValue &Offset = + LD->getOffset()->isUndef() + ? CurDAG->getTargetConstant(0, SDLoc(Node), MVT::i64) + : LD->getOffset(); + MachineSDNode *Result = CurDAG->getMachineNode( + AArch64::LDPXi, SDLoc(Node), MVT::i64, MVT::i64, MVT::Other, + LD->getBasePtr(), Offset, LD->getChain()); + MachineMemOperand *MemOp = LD->getMemOperand(); + CurDAG->setNodeMemRefs(Result, {MemOp}); + ReplaceNode(LD, Result); + return; + } + case AArch64ISD::STP: { + AArch64StorePairSDNode *ST = cast(Node); + const SDValue &Offset = + ST->getOffset()->isUndef() + ? CurDAG->getTargetConstant(0, SDLoc(Node), MVT::i64) + : ST->getOffset(); + MachineSDNode *Result = + CurDAG->getMachineNode(AArch64::STPXi, SDLoc(Node), MVT::Other, + {ST->getLoValue(), ST->getHiValue(), + ST->getBasePtr(), Offset, ST->getChain()}); + MachineMemOperand *MemOp = ST->getMemOperand(); + CurDAG->setNodeMemRefs(Result, {MemOp}); + ReplaceNode(ST, Result); + return; + } } // Select the default instruction diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -224,8 +224,10 @@ STG, STZG, ST2G, - STZ2G + STZ2G, + LDP, + STP }; } // end namespace AArch64ISD @@ -767,6 +769,36 @@ const TargetLibraryInfo *libInfo); } // end namespace AArch64 +class AArch64LoadPairSDNode : public MemSDNode { +public: + AArch64LoadPairSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, + EVT MemVT, MachineMemOperand *MMO) + : MemSDNode(AArch64ISD::LDP, Order, dl, VTs, MemVT, MMO) {} + + const SDValue &getBasePtr() const { return getOperand(1); } + const SDValue &getOffset() const { return getOperand(2); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == AArch64ISD::LDP; + } +}; + +class AArch64StorePairSDNode : public MemSDNode { +public: + AArch64StorePairSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, + EVT MemVT, MachineMemOperand *MMO) + : MemSDNode(AArch64ISD::STP, Order, dl, VTs, MemVT, MMO) {} + + const SDValue &getLoValue() const { return getOperand(1); } + const SDValue &getHiValue() const { return getOperand(2); } + const SDValue &getBasePtr() const { return getOperand(3); } + const SDValue &getOffset() const { return getOperand(4); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == AArch64ISD::STP; + } +}; + } // end namespace llvm #endif diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -517,6 +517,10 @@ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); + // 128-bit loads and stores can be done without expanding + setOperationAction(ISD::LOAD, MVT::i128, Custom); + setOperationAction(ISD::STORE, MVT::i128, Custom); + // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0. // This requires the Performance Monitors extension. if (Subtarget->hasPerfMon()) @@ -574,6 +578,7 @@ setIndexedLoadAction(im, MVT::i16, Legal); setIndexedLoadAction(im, MVT::i32, Legal); setIndexedLoadAction(im, MVT::i64, Legal); + setIndexedLoadAction(im, MVT::i128, Legal); setIndexedLoadAction(im, MVT::f64, Legal); setIndexedLoadAction(im, MVT::f32, Legal); setIndexedLoadAction(im, MVT::f16, Legal); @@ -581,6 +586,7 @@ setIndexedStoreAction(im, MVT::i16, Legal); setIndexedStoreAction(im, MVT::i32, Legal); setIndexedStoreAction(im, MVT::i64, Legal); + setIndexedStoreAction(im, MVT::i128, Legal); setIndexedStoreAction(im, MVT::f64, Legal); setIndexedStoreAction(im, MVT::f32, Legal); setIndexedStoreAction(im, MVT::f16, Legal); @@ -1333,6 +1339,8 @@ case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO"; case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI"; case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO"; + case AArch64ISD::LDP: return "AArch64ISD::LDP"; + case AArch64ISD::STP: return "AArch64ISD::STP"; } return nullptr; } @@ -2946,7 +2954,7 @@ // Custom lowering for any store, vector or scalar and/or default or with // a truncate operations. Currently only custom lower truncate operation -// from vector v4i16 to v4i8. +// from vector v4i16 to v4i8 or volatile stores of i128. SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDLoc Dl(Op); @@ -2958,18 +2966,33 @@ EVT VT = Value.getValueType(); EVT MemVT = StoreNode->getMemoryVT(); - assert (VT.isVector() && "Can only custom lower vector store types"); - - unsigned AS = StoreNode->getAddressSpace(); - unsigned Align = StoreNode->getAlignment(); - if (Align < MemVT.getStoreSize() && - !allowsMisalignedMemoryAccesses( - MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) { - return scalarizeVectorStore(StoreNode, DAG); - } - - if (StoreNode->isTruncatingStore()) { - return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG); + if (VT.isVector()) { + unsigned AS = StoreNode->getAddressSpace(); + unsigned Align = StoreNode->getAlignment(); + if (Align < MemVT.getStoreSize() && + !allowsMisalignedMemoryAccesses(MemVT, AS, Align, + StoreNode->getMemOperand()->getFlags(), + nullptr)) { + return scalarizeVectorStore(StoreNode, DAG); + } + + if (StoreNode->isTruncatingStore()) { + return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG); + } + } else if (MemVT == MVT::i128 && StoreNode->isVolatile() && + !StoreNode->isTruncatingStore()) { + SDValue Lo = + DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(), + DAG.getConstant(0, Dl, MVT::i64)); + SDValue Hi = + DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(), + DAG.getConstant(1, Dl, MVT::i64)); + SDValue Result = DAG.getTargetMemSDNode( + DAG.getVTList(MVT::Other), + {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr(), + StoreNode->getOffset()}, + Dl, MemVT, StoreNode->getMemOperand()); + return Result.getValue(0); } return SDValue(); @@ -12115,6 +12138,29 @@ case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget); return; + case ISD::LOAD: { + assert(SDValue(N, 0).getValueType() == MVT::i128 && + "unexpected load's value type"); + LoadSDNode *LoadNode = cast(N); + if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD) { + // Loads with extensions are not lowered to LDPXi. + return; + } + if (!LoadNode->isVolatile()) { + // Non-volatile loads are optimized later in AArch64's load/store + // optimizer. + return; + } + SDValue Result = DAG.getTargetMemSDNode( + DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}), + {LoadNode->getChain(), LoadNode->getBasePtr(), LoadNode->getOffset()}, + SDLoc(N), LoadNode->getMemoryVT(), LoadNode->getMemOperand()); + + SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, + Result.getValue(0), Result.getValue(1)); + Results.append({Pair, Result.getValue(2) /* Chain */}); + return; + } } } diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll --- a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll @@ -87,10 +87,8 @@ define {i128, i1} @test_cmpxchg_128_unsplit(i128* %addr) { ; CHECK-LABEL: test_cmpxchg_128_unsplit: ; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 -; CHECK: ldr [[DESIRED_HI:x[0-9]+]], [x[[VAR128]], #8] -; CHECK: ldr [[DESIRED_LO:x[0-9]+]], [x[[VAR128]]] -; CHECK: ldr [[NEW_HI:x[0-9]+]], [x[[VAR128]], #8] -; CHECK: ldr [[NEW_LO:x[0-9]+]], [x[[VAR128]]] +; CHECK: ldp [[DESIRED_LO:x[0-9]+]], [[DESIRED_HI:x[0-9]+]], [x[[VAR128]]] +; CHECK: ldp [[NEW_LO:x[0-9]+]], [[NEW_HI:x[0-9]+]], [x[[VAR128]]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: ; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0] ; CHECK: cmp [[OLD_LO]], [[DESIRED_LO]] diff --git a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll @@ -0,0 +1,23 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64 %s -o - | FileCheck %s + +@var = common dso_local global i128 0, align 16 + +; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] +define i128 @load() { + %v = load volatile i128, i128* @var, align 16 + ret i128 %v +} + +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] +define void @store(i128 %arg) { + store volatile i128 %arg, i128* @var, align 16 + ret void +} + +; CHECK: ldp [[LO:x[0-9]+]], [[HI:x[0-9]+]], {{\[}}[[ADDR:x[0-9]+]]{{\]}} +; CHECK: stp [[LO]], [[HI]], {{\[}}[[ADDR]]{{\]}} +define void @load_store() { + %v = load volatile i128, i128* @var, align 16 + store volatile i128 %v, i128* @var, align 16 + ret void +}