diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -731,6 +731,8 @@ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; void lowerABS(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const; + void LowerLOAD(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const; Register getRegisterByName(const char* RegName, EVT VT, const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1050,6 +1050,8 @@ setOperationAction(ISD::SRA, MVT::i64, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::LOAD, MVT::i64, Custom); + setOperationAction(ISD::STORE, MVT::i64, Custom); // MVE lowers 64 bit shifts to lsll and lsrl // assuming that ISD::SRL and SRA of i64 are already marked custom @@ -8957,6 +8959,36 @@ return DAG.getMergeValues({Pred, Load.getValue(1)}, dl); } +void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const { + LoadSDNode *LD = cast(N); + EVT MemVT = LD->getMemoryVT(); + + if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() && + !Subtarget->isThumb1Only() && + LD->getExtensionType() == ISD::NON_EXTLOAD && LD->isVolatile()) { + SDLoc dl(N); + const SDValue &Offset = LD->isIndexed() + ? LD->getOffset() + : DAG.getTargetConstant(0, dl, MVT::i32); + unsigned OpCode = Subtarget->isThumb2() ? ARM::t2LDRDi8 : ARM::LDRD; + SmallVector Ops = {LD->getBasePtr()}; + if (!Subtarget->isThumb2()) { + Ops.push_back(DAG.getRegister(0, MVT::i32)); + } + Ops.append({Offset, + DAG.getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32), + DAG.getRegister(0, MVT::i32), LD->getChain()}); + MachineSDNode *Result = + DAG.getMachineNode(OpCode, dl, {MVT::i32, MVT::i32, MVT::Other}, Ops); + MachineMemOperand *MemOp = cast(N)->getMemOperand(); + DAG.setNodeMemRefs(cast(Result), {MemOp}); + SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, + SDValue(Result, 0), SDValue(Result, 1)); + Results.append({Pair, SDValue(Result, 2) /* Chain */}); + } +} + static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG) { StoreSDNode *ST = cast(Op.getNode()); EVT MemVT = ST->getMemoryVT(); @@ -8986,6 +9018,46 @@ ST->getMemOperand()); } +static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + StoreSDNode *ST = cast(Op.getNode()); + EVT MemVT = ST->getMemoryVT(); + + if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() && + !Subtarget->isThumb1Only() && !ST->isTruncatingStore() && + ST->isVolatile()) { + SDNode *N = Op.getNode(); + SDLoc dl(N); + + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(), + DAG.getTargetConstant(0, dl, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(), + DAG.getTargetConstant(1, dl, MVT::i32)); + const SDValue &Offset = ST->isIndexed() + ? ST->getOffset() + : DAG.getTargetConstant(0, dl, MVT::i32); + unsigned OpCode = Subtarget->isThumb2() ? ARM::t2STRDi8 : ARM::STRD; + SmallVector Ops = {Lo, Hi, ST->getBasePtr()}; + if (!Subtarget->isThumb2()) { + Ops.push_back(DAG.getRegister(0, MVT::i32)); + } + Ops.append({Offset, + DAG.getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32), + DAG.getRegister(0, MVT::i32), ST->getChain()}); + MachineSDNode *Result = DAG.getMachineNode(OpCode, dl, MVT::Other, Ops); + MachineMemOperand *MemOp = cast(N)->getMemOperand(); + DAG.setNodeMemRefs(cast(Result), {MemOp}); + + return SDValue(Result, 0); + } else if ((MemVT == MVT::v4i1 || MemVT == MVT::v8i1 || + MemVT == MVT::v16i1) && + !ST->isTruncatingStore() && ST->isUnindexed()) { + return LowerPredicateStore(Op, DAG); + } + + return SDValue(); +} + static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { MaskedLoadSDNode *N = cast(Op.getNode()); MVT VT = Op.getSimpleValueType(); @@ -9170,7 +9242,7 @@ case ISD::LOAD: return LowerPredicateLoad(Op, DAG); case ISD::STORE: - return LowerPredicateStore(Op, DAG); + return LowerSTORE(Op, DAG, Subtarget); case ISD::MLOAD: return LowerMLOAD(Op, DAG); case ISD::ATOMIC_LOAD: @@ -9270,7 +9342,9 @@ case ISD::ABS: lowerABS(N, Results, DAG); return ; - + case ISD::LOAD: + LowerLOAD(N, Results, DAG); + break; } if (Res.getNode()) Results.push_back(Res); diff --git a/llvm/test/CodeGen/ARM/i64_volatile_load_store.ll b/llvm/test/CodeGen/ARM/i64_volatile_load_store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/i64_volatile_load_store.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple=armv5e-arm-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-ARMV5TE +; RUN: llc -mtriple=thumbv6t2-arm-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-T2 +; RUN: llc -mtriple=armv4t-arm-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-ARMV4T + +@x = common dso_local global i64 0, align 8 +@y = common dso_local global i64 0, align 8 + +define void @test() { +entry: +; CHECK-ARMV5TE: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV5TE: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV5TE: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-ARMV5TE: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]] +; CHECK-T2: movw [[ADDR0:r[0-9]+]], :lower16:x +; CHECK-T2: movw [[ADDR1:r[0-9]+]], :lower16:y +; CHECK-T2: movt [[ADDR0]], :upper16:x +; CHECK-T2: movt [[ADDR1]], :upper16:y +; CHECK-T2: ldrd [[R0:r[0-9]+]], [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-T2: strd [[R0]], [[R1]], {{\[}}[[ADDR1]]] +; CHECK-ARMV4T: ldr [[ADDR0:r[0-9]+]] +; CHECK-ARMV4T: ldr [[ADDR1:r[0-9]+]] +; CHECK-ARMV4T: ldr [[R1:r[0-9]+]], {{\[}}[[ADDR0]]] +; CHECK-ARMV4T: ldr [[R0:r[0-9]+]], {{\[}}[[ADDR0]], #4] +; CHECK-ARMV4T: str [[R0]], {{\[}}[[ADDR1]], #4] +; CHECK-ARMV4T: str [[R1]], {{\[}}[[ADDR1]]] + %0 = load volatile i64, i64* @x, align 8 + store volatile i64 %0, i64* @y, align 8 + ret void +} +