diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3465,6 +3465,45 @@ break; } + case ARMISD::LDRD: { + ARMLoadDualSDNode *LD = cast(N); + const SDValue &Offset = LD->getOffset()->isUndef() + ? CurDAG->getTargetConstant(0, dl, MVT::i32) + : LD->getOffset(); + unsigned OpCode = Subtarget->isThumb2() ? ARM::t2LDRDi8 : ARM::LDRD; + SmallVector Ops = {LD->getBasePtr()}; + if (!Subtarget->isThumb2()) { + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); + } + Ops.append({Offset, + CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32), + CurDAG->getRegister(0, MVT::i32), LD->getChain()}); + MachineSDNode *Result = CurDAG->getMachineNode( + OpCode, dl, {MVT::i32, MVT::i32, MVT::Other}, Ops); + transferMemOperands(LD, Result); + ReplaceNode(LD, Result); + return; + } + case ARMISD::STRD: { + ARMStoreDualSDNode *ST = cast(N); + const SDValue &Offset = ST->getOffset()->isUndef() + ? CurDAG->getTargetConstant(0, dl, MVT::i32) + : ST->getOffset(); + unsigned OpCode = Subtarget->isThumb2() ? ARM::t2STRDi8 : ARM::STRD; + SmallVector Ops = {ST->getLoValue(), ST->getHiValue(), + ST->getBasePtr()}; + if (!Subtarget->isThumb2()) { + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); + } + Ops.append({Offset, + CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32), + CurDAG->getRegister(0, MVT::i32), ST->getChain()}); + MachineSDNode *Result = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops); + transferMemOperands(ST, Result); + ReplaceNode(ST, Result); + return; + } + case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -278,7 +278,11 @@ VST4_UPD, VST2LN_UPD, VST3LN_UPD, - VST4LN_UPD + VST4LN_UPD, + + // Load/Store of dual registers + LDRD, + STRD }; } // end namespace ARMISD @@ -731,6 +735,8 @@ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; void lowerABS(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const; + void LowerLOAD(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const; Register getRegisterByName(const char* RegName, EVT VT, const MachineFunction &MF) const override; @@ -854,6 +860,36 @@ } // end namespace ARM + class ARMLoadDualSDNode : public MemSDNode { + public: + ARMLoadDualSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, + EVT MemVT, MachineMemOperand *MMO) + : MemSDNode(ARMISD::LDRD, Order, dl, VTs, MemVT, MMO) {} + + const SDValue &getBasePtr() const { return getOperand(1); } + const SDValue &getOffset() const { return getOperand(2); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ARMISD::LDRD; + } + }; + + class ARMStoreDualSDNode : public MemSDNode { + public: + ARMStoreDualSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, + EVT MemVT, MachineMemOperand *MMO) + : MemSDNode(ARMISD::STRD, Order, dl, VTs, MemVT, MMO) {} + + const SDValue &getLoValue() const { return getOperand(1); } + const SDValue &getHiValue() const { return getOperand(2); } + const SDValue &getBasePtr() const { return getOperand(3); } + const SDValue &getOffset() const { return getOperand(4); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ARMISD::STRD; + } + }; + } // end namespace llvm #endif // LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1050,6 +1050,8 @@ setOperationAction(ISD::SRA, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::LOAD, MVT::i64, Custom); + setOperationAction(ISD::STORE, MVT::i64, Custom); // MVE lowers 64 bit shifts to lsll and lsrl // assuming that ISD::SRL and SRA of i64 are already marked custom @@ -1573,6 +1575,11 @@ case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; + case ARMISD::LDRD: + return "ARMISD::LDRD"; + case ARMISD::STRD: + return "ARMISD::STRD"; + case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK"; case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; @@ -8957,6 +8964,25 @@ return DAG.getMergeValues({Pred, Load.getValue(1)}, dl); } +void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const { + LoadSDNode *LD = cast(N); + EVT MemVT = LD->getMemoryVT(); + + if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() && + !Subtarget->isThumb1Only() && LD->isVolatile()) { + SDLoc dl(N); + + SDValue Result = DAG.getTargetMemSDNode( + DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}), + {LD->getChain(), LD->getBasePtr(), LD->getOffset()}, dl, MemVT, + LD->getMemOperand()); + SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, + Result.getValue(0), Result.getValue(1)); + Results.append({Pair, Result.getValue(2)}); + } +} + static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { StoreSDNode *ST = cast(Op.getNode()); EVT MemVT = ST->getMemoryVT(); @@ -8986,6 +9012,35 @@ ST->getMemOperand()); } +static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + StoreSDNode *ST = cast(Op.getNode()); + EVT MemVT = ST->getMemoryVT(); + + if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() && + !Subtarget->isThumb1Only() && !ST->isTruncatingStore() && + ST->isVolatile()) { + SDNode *N = Op.getNode(); + SDLoc dl(N); + + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(), + DAG.getTargetConstant(0, dl, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(), + DAG.getTargetConstant(1, dl, MVT::i32)); + + return DAG.getTargetMemSDNode( + DAG.getVTList(MVT::Other), + {ST->getChain(), Lo, Hi, ST->getBasePtr(), ST->getOffset()}, dl, MemVT, + ST->getMemOperand()); + } else if ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || + MemVT == MVT::v16i1) && + !ST->isTruncatingStore() && ST->isUnindexed()) { + return LowerPredicateStore(Op, DAG); + } + + return SDValue(); +} + static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { MaskedLoadSDNode *N = cast(Op.getNode()); MVT VT = Op.getSimpleValueType(); @@ -9170,7 +9225,7 @@ case ISD::LOAD: return LowerPredicateLoad(Op, DAG); case ISD::STORE: - return LowerPredicateStore(Op, DAG); + return LowerSTORE(Op, DAG, Subtarget); case ISD::MLOAD: return LowerMLOAD(Op, DAG); case ISD::ATOMIC_LOAD: @@ -9270,7 +9325,9 @@ case ISD::ABS: lowerABS(N, Results, DAG); return ; - + case ISD::LOAD: + LowerLOAD(N, Results, DAG); + break; } if (Res.getNode()) Results.push_back(Res); diff --git a/llvm/test/CodeGen/ARM/i64_volatile_load_store.ll b/llvm/test/CodeGen/ARM/i64_volatile_load_store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/i64_volatile_load_store.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple=armv5e-arm-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-ARMV5TE +; RUN: llc -mtriple=thumbv6t2-arm-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-T2 +; RUN: llc -mtriple=armv4t-arm-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-ARMV4T + +@x = common dso_local global i64 0, align 8 +@y = common dso_local global i64 0, align 8 + +define void @test() { +entry: +; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV5TE: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV5TE: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-ARMV5TE: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]] +; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x +; CHECK-T2: movw [[ADDR1:r[0-9]+]], :lower16:y +; CHECK-T2: movt [[ADDR0]], :upper16:x +; CHECK-T2: movt [[ADDR1]], :upper16:y +; CHECK-T2: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-T2: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]] +; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV4T: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV4T: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-ARMV4T: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #4] +; CHECK-ARMV4T: str [[R0]], {{\[}}[[ADDR1]], #4] +; CHECK-ARMV4T: str [[R1]], {{\[}}[[ADDR1]]] + %0 = load volatile i64, i64* @x, align 8 + store volatile i64 %0, i64* @y, align 8 + ret void +} +