diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -510,6 +510,10 @@ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); + // 128-bit loads and stores can be done without expanding + setOperationAction(ISD::LOAD, MVT::i128, Custom); + setOperationAction(ISD::STORE, MVT::i128, Custom); + // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0. // This requires the Performance Monitors extension. if (Subtarget->hasPerfMon()) @@ -561,6 +565,7 @@ setIndexedLoadAction(im, MVT::i16, Legal); setIndexedLoadAction(im, MVT::i32, Legal); setIndexedLoadAction(im, MVT::i64, Legal); + setIndexedLoadAction(im, MVT::i128, Legal); setIndexedLoadAction(im, MVT::f64, Legal); setIndexedLoadAction(im, MVT::f32, Legal); setIndexedLoadAction(im, MVT::f16, Legal); @@ -568,6 +573,7 @@ setIndexedStoreAction(im, MVT::i16, Legal); setIndexedStoreAction(im, MVT::i32, Legal); setIndexedStoreAction(im, MVT::i64, Legal); + setIndexedStoreAction(im, MVT::i128, Legal); setIndexedStoreAction(im, MVT::f64, Legal); setIndexedStoreAction(im, MVT::f32, Legal); setIndexedStoreAction(im, MVT::f16, Legal); @@ -2927,7 +2933,7 @@ // Custom lowering for any store, vector or scalar and/or default or with // a truncate operations. Currently only custom lower truncate operation -// from vector v4i16 to v4i8. +// from vector v4i16 to v4i8 or volatile stores of i128. SDValue AArch64TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDLoc Dl(Op); @@ -2939,18 +2945,36 @@ EVT VT = Value.getValueType(); EVT MemVT = StoreNode->getMemoryVT(); - assert (VT.isVector() && "Can only custom lower vector store types"); - - unsigned AS = StoreNode->getAddressSpace(); - unsigned Align = StoreNode->getAlignment(); - if (Align < MemVT.getStoreSize() && - !allowsMisalignedMemoryAccesses( - MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) { - return scalarizeVectorStore(StoreNode, DAG); - } - - if (StoreNode->isTruncatingStore()) { - return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG); + if (VT.isVector()) { + unsigned AS = StoreNode->getAddressSpace(); + unsigned Align = StoreNode->getAlignment(); + if (Align < MemVT.getStoreSize() && + !allowsMisalignedMemoryAccesses(MemVT, AS, Align, + StoreNode->getMemOperand()->getFlags(), + nullptr)) { + return scalarizeVectorStore(StoreNode, DAG); + } + + if (StoreNode->isTruncatingStore()) { + return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG); + } + } else if (MemVT == MVT::i128 && StoreNode->isVolatile() && + !StoreNode->isTruncatingStore()) { + const SDValue &Offset = StoreNode->isIndexed() + ? StoreNode->getOffset() + : DAG.getTargetConstant(0, Dl, MVT::i64); + SDValue Lo = + DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(), + DAG.getConstant(0, Dl, MVT::i64)); + SDValue Hi = + DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(), + DAG.getConstant(1, Dl, MVT::i64)); + MachineSDNode *Result = DAG.getMachineNode( + AArch64::STPXi, Dl, MVT::Other, + {Lo, Hi, StoreNode->getBasePtr(), Offset, StoreNode->getChain()}); + MachineMemOperand *MemOp = cast(Op)->getMemOperand(); + DAG.setNodeMemRefs(cast(Result), {MemOp}); + return SDValue(Result, 0); } return SDValue(); @@ -12093,6 +12117,33 @@ case ISD::ATOMIC_CMP_SWAP: ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget); return; + case ISD::LOAD: { + assert(SDValue(N, 0).getValueType() == MVT::i128 && + "unexpected load's value type"); + LoadSDNode *LoadNode = cast(N); + if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD) { + // Loads with extensions are not lowered to LDPXi. + return; + } + if (!LoadNode->isVolatile()) { + // Non-volatile loads are optimized later in AArch64's load/store + // optimizer. + return; + } + const SDValue &Offset = LoadNode->isIndexed() + ? LoadNode->getOffset() + : DAG.getTargetConstant(0, SDLoc(N), MVT::i64); + MachineSDNode *Result = DAG.getMachineNode( + AArch64::LDPXi, SDLoc(N), MVT::i64, MVT::i64, MVT::Other, + LoadNode->getBasePtr(), Offset, LoadNode->getChain()); + MachineMemOperand *MemOp = cast(N)->getMemOperand(); + DAG.setNodeMemRefs(cast(Result), {MemOp}); + + SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, + SDValue(Result, 0), SDValue(Result, 1)); + Results.append({Pair, SDValue(Result, 2) /* Chain */}); + return; + } } } diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll --- a/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-O0.ll @@ -87,10 +87,8 @@ define {i128, i1} @test_cmpxchg_128_unsplit(i128* %addr) { ; CHECK-LABEL: test_cmpxchg_128_unsplit: ; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 -; CHECK: ldr [[DESIRED_HI:x[0-9]+]], [x[[VAR128]], #8] -; CHECK: ldr [[DESIRED_LO:x[0-9]+]], [x[[VAR128]]] -; CHECK: ldr [[NEW_HI:x[0-9]+]], [x[[VAR128]], #8] -; CHECK: ldr [[NEW_LO:x[0-9]+]], [x[[VAR128]]] +; CHECK: ldp [[DESIRED_LO:x[0-9]+]], [[DESIRED_HI:x[0-9]+]], [x[[VAR128]]] +; CHECK: ldp [[NEW_LO:x[0-9]+]], [[NEW_HI:x[0-9]+]], [x[[VAR128]]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: ; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0] ; CHECK: cmp [[OLD_LO]], [[DESIRED_LO]] diff --git a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll @@ -0,0 +1,23 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64 %s -o - | FileCheck %s + +@var = common dso_local global i128 0, align 16 + +; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] +define i128 @load() { + %v = load volatile i128, i128* @var, align 16 + ret i128 %v +} + +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [{{x[0-9]+}}] +define void @store(i128 %arg) { + store volatile i128 %arg, i128* @var, align 16 + ret void +} + +; CHECK: ldp [[LO:x[0-9]+]], [[HI:x[0-9]+]], {{\[}}[[ADDR:x[0-9]+]]{{\]}} +; CHECK: stp [[LO]], [[HI]], {{\[}}[[ADDR]]{{\]}} +define void @load_store() { + %v = load volatile i128, i128* @var, align 16 + store volatile i128 %v, i128* @var, align 16 + ret void +}