diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -562,6 +562,9 @@ SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; + SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; + bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, const SmallVector &ArgLocs) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -523,6 +523,9 @@ for (unsigned VPOpc : IntegerVPOps) setOperationAction(VPOpc, VT, Custom); + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setOperationAction(ISD::MLOAD, VT, Custom); setOperationAction(ISD::MSTORE, VT, Custom); setOperationAction(ISD::MGATHER, VT, Custom); @@ -584,6 +587,9 @@ setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); setOperationAction(ISD::FCOPYSIGN, VT, Legal); + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setOperationAction(ISD::MLOAD, VT, Custom); setOperationAction(ISD::MSTORE, VT, Custom); setOperationAction(ISD::MGATHER, VT, Custom); @@ -1891,6 +1897,66 @@ return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL); } +// While RVV has alignment restrictions, we should always be able to load as a +// legal equivalently-sized byte-typed vector instead. This method is +// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If +// the load is already correctly-aligned, it returns SDValue(). +SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op, + SelectionDAG &DAG) const { + auto *Load = dyn_cast(Op); + assert(Load && Load->getMemoryVT().isVector() && "Expected vector load"); + + if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + Load->getMemoryVT(), + *Load->getMemOperand())) + return SDValue(); + + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + unsigned EltSizeBits = VT.getScalarSizeInBits(); + assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && + "Unexpected unaligned RVV load type"); + MVT NewVT = + MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); + assert(NewVT.isValid() && + "Expecting equally-sized RVV vector types to be legal"); + SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(), + Load->getPointerInfo(), Load->getOriginalAlign(), + Load->getMemOperand()->getFlags()); + return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL); +} + +// While RVV has alignment restrictions, we should always be able to store as a +// legal equivalently-sized byte-typed vector instead. This method is +// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It +// returns SDValue() if the store is already correctly aligned. +SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op, + SelectionDAG &DAG) const { + auto *Store = dyn_cast(Op); + assert(Store && Store->getValue().getValueType().isVector() && + "Expected vector store"); + + if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + Store->getMemoryVT(), + *Store->getMemOperand())) + return SDValue(); + + SDLoc DL(Op); + SDValue StoredVal = Store->getValue(); + MVT VT = StoredVal.getSimpleValueType(); + unsigned EltSizeBits = VT.getScalarSizeInBits(); + assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) && + "Unexpected unaligned RVV store type"); + MVT NewVT = + MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8)); + assert(NewVT.isValid() && + "Expecting equally-sized RVV vector types to be legal"); + StoredVal = DAG.getBitcast(NewVT, StoredVal); + return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(), + Store->getPointerInfo(), Store->getOriginalAlign(), + Store->getMemOperand()->getFlags()); +} + SDValue RISCVTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -2310,9 +2376,17 @@ return Vec; } case ISD::LOAD: - return lowerFixedLengthVectorLoadToRVV(Op, DAG); + if (auto V = expandUnalignedRVVLoad(Op, DAG)) + return V; + if (Op.getValueType().isFixedLengthVector()) + return lowerFixedLengthVectorLoadToRVV(Op, DAG); + return Op; case ISD::STORE: - return lowerFixedLengthVectorStoreToRVV(Op, DAG); + if (auto V = expandUnalignedRVVStore(Op, DAG)) + return V; + if (Op.getOperand(1).getValueType().isFixedLengthVector()) + return lowerFixedLengthVectorStoreToRVV(Op, DAG); + return Op; case ISD::MLOAD: return lowerMLOAD(Op, DAG); case ISD::MSTORE: @@ -4031,13 +4105,10 @@ SDLoc DL(Op); auto *Load = cast(Op); - if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), - Load->getMemoryVT(), - *Load->getMemOperand())) { - SDValue Result, Chain; - std::tie(Result, Chain) = expandUnalignedLoad(Load, DAG); - return DAG.getMergeValues({Result, Chain}, DL); - } + assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + Load->getMemoryVT(), + *Load->getMemOperand()) && + "Expecting a correctly-aligned load"); MVT VT = Op.getSimpleValueType(); MVT ContainerVT = getContainerForFixedLengthVector(VT); @@ -4060,10 +4131,10 @@ SDLoc DL(Op); auto *Store = cast(Op); - if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), - Store->getMemoryVT(), - *Store->getMemOperand())) - return expandUnalignedStore(Store, DAG); + assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + Store->getMemoryVT(), + *Store->getMemOperand()) && + "Expecting a correctly-aligned store"); SDValue StoreVal = Store->getValue(); MVT VT = StoreVal.getSimpleValueType(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -7,112 +7,14 @@ define <4 x i32> @load_v4i32_align1(<4 x i32>* %ptr) { ; RV32-LABEL: load_v4i32_align1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lbu a1, 13(a0) -; RV32-NEXT: lbu a2, 12(a0) -; RV32-NEXT: lbu a3, 15(a0) -; RV32-NEXT: lbu a4, 14(a0) -; RV32-NEXT: slli a1, a1, 8 -; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: slli a2, a3, 8 -; RV32-NEXT: or a2, a2, a4 -; RV32-NEXT: slli a2, a2, 16 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: lbu a1, 9(a0) -; RV32-NEXT: lbu a2, 8(a0) -; RV32-NEXT: lbu a3, 11(a0) -; RV32-NEXT: lbu a4, 10(a0) -; RV32-NEXT: slli a1, a1, 8 -; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: slli a2, a3, 8 -; RV32-NEXT: or a2, a2, a4 -; RV32-NEXT: slli a2, a2, 16 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lbu a1, 5(a0) -; RV32-NEXT: lbu a2, 4(a0) -; RV32-NEXT: lbu a3, 7(a0) -; RV32-NEXT: lbu a4, 6(a0) -; RV32-NEXT: slli a1, a1, 8 -; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: slli a2, a3, 8 -; RV32-NEXT: or a2, a2, a4 -; RV32-NEXT: slli a2, a2, 16 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: lbu a1, 1(a0) -; RV32-NEXT: lbu a2, 0(a0) -; RV32-NEXT: lbu a3, 3(a0) -; RV32-NEXT: lbu a0, 2(a0) -; RV32-NEXT: slli a1, a1, 8 -; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: slli a2, a3, 8 -; RV32-NEXT: or a0, a2, a0 -; RV32-NEXT: slli a0, a0, 16 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: sw a0, 16(sp) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-NEXT: vle8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: load_v4i32_align1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: lbu a1, 9(a0) -; RV64-NEXT: lbu a2, 8(a0) -; RV64-NEXT: lbu a3, 11(a0) -; RV64-NEXT: lbu a4, 10(a0) -; RV64-NEXT: slli a1, a1, 8 -; RV64-NEXT: or a1, a1, a2 -; RV64-NEXT: slli a2, a3, 8 -; RV64-NEXT: or a2, a2, a4 -; RV64-NEXT: slli a2, a2, 16 -; RV64-NEXT: or a1, a2, a1 -; RV64-NEXT: lbu a2, 13(a0) -; RV64-NEXT: lbu a3, 12(a0) -; RV64-NEXT: lbu a4, 15(a0) -; RV64-NEXT: lbu a5, 14(a0) -; RV64-NEXT: slli a2, a2, 8 -; RV64-NEXT: or a2, a2, a3 -; RV64-NEXT: slli a3, a4, 8 -; RV64-NEXT: or a3, a3, a5 -; RV64-NEXT: slli a3, a3, 16 -; RV64-NEXT: or a2, a3, a2 -; RV64-NEXT: slli a2, a2, 32 -; RV64-NEXT: or a1, a2, a1 -; RV64-NEXT: sd a1, 24(sp) -; RV64-NEXT: lbu a1, 1(a0) -; RV64-NEXT: lbu a2, 0(a0) -; RV64-NEXT: lbu a3, 3(a0) -; RV64-NEXT: lbu a4, 2(a0) -; RV64-NEXT: slli a1, a1, 8 -; RV64-NEXT: or a1, a1, a2 -; RV64-NEXT: slli a2, a3, 8 -; RV64-NEXT: or a2, a2, a4 -; RV64-NEXT: slli a2, a2, 16 -; RV64-NEXT: or a1, a2, a1 -; RV64-NEXT: lbu a2, 5(a0) -; RV64-NEXT: lbu a3, 4(a0) -; RV64-NEXT: lbu a4, 7(a0) -; RV64-NEXT: lbu a0, 6(a0) -; RV64-NEXT: slli a2, a2, 8 -; RV64-NEXT: or a2, a2, a3 -; RV64-NEXT: slli a3, a4, 8 -; RV64-NEXT: or a0, a3, a0 -; RV64-NEXT: slli a0, a0, 16 -; RV64-NEXT: or a0, a0, a2 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64-NEXT: vle8.v v8, (a0) ; RV64-NEXT: ret %z = load <4 x i32>, <4 x i32>* %ptr, align 1 ret <4 x i32> %z @@ -121,64 +23,14 @@ define <4 x i32> @load_v4i32_align2(<4 x i32>* %ptr) { ; RV32-LABEL: load_v4i32_align2: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: lhu a1, 14(a0) -; RV32-NEXT: lhu a2, 12(a0) -; RV32-NEXT: slli a1, a1, 16 -; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: lhu a1, 10(a0) -; RV32-NEXT: lhu a2, 8(a0) -; RV32-NEXT: slli a1, a1, 16 -; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: lhu a1, 6(a0) -; RV32-NEXT: lhu a2, 4(a0) -; RV32-NEXT: slli a1, a1, 16 -; RV32-NEXT: or a1, a1, a2 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: lhu a1, 2(a0) -; RV32-NEXT: lhu a0, 0(a0) -; RV32-NEXT: slli a1, a1, 16 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: sw a0, 16(sp) -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-NEXT: vle8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: load_v4i32_align2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: lhu a1, 10(a0) -; RV64-NEXT: lhu a2, 8(a0) -; RV64-NEXT: lhu a3, 14(a0) -; RV64-NEXT: lhu a4, 12(a0) -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: or a1, a1, a2 -; RV64-NEXT: slli a2, a3, 16 -; RV64-NEXT: or a2, a2, a4 -; RV64-NEXT: slli a2, a2, 32 -; RV64-NEXT: or a1, a2, a1 -; RV64-NEXT: sd a1, 24(sp) -; RV64-NEXT: lhu a1, 2(a0) -; RV64-NEXT: lhu a2, 0(a0) -; RV64-NEXT: lhu a3, 6(a0) -; RV64-NEXT: lhu a0, 4(a0) -; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: or a1, a1, a2 -; RV64-NEXT: slli a2, a3, 16 -; RV64-NEXT: or a0, a2, a0 -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: or a0, a0, a1 -; RV64-NEXT: sd a0, 16(sp) -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64-NEXT: vle8.v v8, (a0) ; RV64-NEXT: ret %z = load <4 x i32>, <4 x i32>* %ptr, align 2 ret <4 x i32> %z @@ -187,86 +39,14 @@ define void @store_v4i32_align1(<4 x i32> %x, <4 x i32>* %ptr) { ; RV32-LABEL: store_v4i32_align1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vse32.v v8, (a1) -; RV32-NEXT: lw a1, 28(sp) -; RV32-NEXT: sb a1, 12(a0) -; RV32-NEXT: lw a2, 24(sp) -; RV32-NEXT: sb a2, 8(a0) -; RV32-NEXT: lw a3, 20(sp) -; RV32-NEXT: sb a3, 4(a0) -; RV32-NEXT: lw a4, 16(sp) -; RV32-NEXT: sb a4, 0(a0) -; RV32-NEXT: srli a5, a1, 24 -; RV32-NEXT: sb a5, 15(a0) -; RV32-NEXT: srli a5, a1, 16 -; RV32-NEXT: sb a5, 14(a0) -; RV32-NEXT: srli a1, a1, 8 -; RV32-NEXT: sb a1, 13(a0) -; RV32-NEXT: srli a1, a2, 24 -; RV32-NEXT: sb a1, 11(a0) -; RV32-NEXT: srli a1, a2, 16 -; RV32-NEXT: sb a1, 10(a0) -; RV32-NEXT: srli a1, a2, 8 -; RV32-NEXT: sb a1, 9(a0) -; RV32-NEXT: srli a1, a3, 24 -; RV32-NEXT: sb a1, 7(a0) -; RV32-NEXT: srli a1, a3, 16 -; RV32-NEXT: sb a1, 6(a0) -; RV32-NEXT: srli a1, a3, 8 -; RV32-NEXT: sb a1, 5(a0) -; RV32-NEXT: srli a1, a4, 24 -; RV32-NEXT: sb a1, 3(a0) -; RV32-NEXT: srli a1, a4, 16 -; RV32-NEXT: sb a1, 2(a0) -; RV32-NEXT: srli a1, a4, 8 -; RV32-NEXT: sb a1, 1(a0) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: store_v4i32_align1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vse32.v v8, (a1) -; RV64-NEXT: ld a1, 24(sp) -; RV64-NEXT: sb a1, 8(a0) -; RV64-NEXT: ld a2, 16(sp) -; RV64-NEXT: sb a2, 0(a0) -; RV64-NEXT: srli a3, a1, 56 -; RV64-NEXT: sb a3, 15(a0) -; RV64-NEXT: srli a3, a1, 48 -; RV64-NEXT: sb a3, 14(a0) -; RV64-NEXT: srli a3, a1, 40 -; RV64-NEXT: sb a3, 13(a0) -; RV64-NEXT: srli a3, a1, 32 -; RV64-NEXT: sb a3, 12(a0) -; RV64-NEXT: srli a3, a1, 24 -; RV64-NEXT: sb a3, 11(a0) -; RV64-NEXT: srli a3, a1, 16 -; RV64-NEXT: sb a3, 10(a0) -; RV64-NEXT: srli a1, a1, 8 -; RV64-NEXT: sb a1, 9(a0) -; RV64-NEXT: srli a1, a2, 40 -; RV64-NEXT: sb a1, 5(a0) -; RV64-NEXT: srli a1, a2, 32 -; RV64-NEXT: sb a1, 4(a0) -; RV64-NEXT: srli a1, a2, 56 -; RV64-NEXT: sb a1, 7(a0) -; RV64-NEXT: srli a1, a2, 48 -; RV64-NEXT: sb a1, 6(a0) -; RV64-NEXT: srli a1, a2, 24 -; RV64-NEXT: sb a1, 3(a0) -; RV64-NEXT: srli a1, a2, 16 -; RV64-NEXT: sb a1, 2(a0) -; RV64-NEXT: srli a1, a2, 8 -; RV64-NEXT: sb a1, 1(a0) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret store <4 x i32> %x, <4 x i32>* %ptr, align 1 ret void @@ -275,54 +55,14 @@ define void @store_v4i32_align2(<4 x i32> %x, <4 x i32>* %ptr) { ; RV32-LABEL: store_v4i32_align2: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vse32.v v8, (a1) -; RV32-NEXT: lw a1, 28(sp) -; RV32-NEXT: sh a1, 12(a0) -; RV32-NEXT: lw a2, 24(sp) -; RV32-NEXT: sh a2, 8(a0) -; RV32-NEXT: lw a3, 20(sp) -; RV32-NEXT: sh a3, 4(a0) -; RV32-NEXT: lw a4, 16(sp) -; RV32-NEXT: sh a4, 0(a0) -; RV32-NEXT: srli a1, a1, 16 -; RV32-NEXT: sh a1, 14(a0) -; RV32-NEXT: srli a1, a2, 16 -; RV32-NEXT: sh a1, 10(a0) -; RV32-NEXT: srli a1, a3, 16 -; RV32-NEXT: sh a1, 6(a0) -; RV32-NEXT: srli a1, a4, 16 -; RV32-NEXT: sh a1, 2(a0) -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: store_v4i32_align2: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vse32.v v8, (a1) -; RV64-NEXT: ld a1, 24(sp) -; RV64-NEXT: sh a1, 8(a0) -; RV64-NEXT: ld a2, 16(sp) -; RV64-NEXT: sh a2, 0(a0) -; RV64-NEXT: srli a3, a1, 48 -; RV64-NEXT: sh a3, 14(a0) -; RV64-NEXT: srli a3, a1, 32 -; RV64-NEXT: sh a3, 12(a0) -; RV64-NEXT: srli a1, a1, 16 -; RV64-NEXT: sh a1, 10(a0) -; RV64-NEXT: srli a1, a2, 48 -; RV64-NEXT: sh a1, 6(a0) -; RV64-NEXT: srli a1, a2, 32 -; RV64-NEXT: sh a1, 4(a0) -; RV64-NEXT: srli a1, a2, 16 -; RV64-NEXT: sh a1, 2(a0) -; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret store <4 x i32> %x, <4 x i32>* %ptr, align 2 ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll @@ -0,0 +1,192 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv32 -mattr=+d,+experimental-zfh,+experimental-v < %s \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: llc -mtriple riscv64 -mattr=+d,+experimental-zfh,+experimental-v < %s \ +; RUN: -verify-machineinstrs | FileCheck %s + +define @unaligned_load_nxv1i32_a1(* %ptr) { +; CHECK-LABEL: unaligned_load_nxv1i32_a1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 1 + ret %v +} + +define @unaligned_load_nxv1i32_a2(* %ptr) { +; CHECK-LABEL: unaligned_load_nxv1i32_a2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 2 + ret %v +} + +define @aligned_load_nxv1i32_a4(* %ptr) { +; CHECK-LABEL: aligned_load_nxv1i32_a4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 4 + ret %v +} + +define @unaligned_load_nxv1i64_a1(* %ptr) { +; CHECK-LABEL: unaligned_load_nxv1i64_a1: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1r.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 1 + ret %v +} + +define @unaligned_load_nxv1i64_a4(* %ptr) { +; CHECK-LABEL: unaligned_load_nxv1i64_a4: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1r.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 4 + ret %v +} + +define @aligned_load_nxv1i64_a8(* %ptr) { +; CHECK-LABEL: aligned_load_nxv1i64_a8: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re64.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 8 + ret %v +} + +define @unaligned_load_nxv2i64_a1(* %ptr) { +; CHECK-LABEL: unaligned_load_nxv2i64_a1: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2r.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 1 + ret %v +} + +define @unaligned_load_nxv2i64_a4(* %ptr) { +; CHECK-LABEL: unaligned_load_nxv2i64_a4: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2r.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 4 + ret %v +} + +define @aligned_load_nxv2i64_a8(* %ptr) { +; CHECK-LABEL: aligned_load_nxv2i64_a8: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re64.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 8 + ret %v +} + +; Masks should always be aligned +define @unaligned_load_nxv1i1_a1(* %ptr) { +; CHECK-LABEL: unaligned_load_nxv1i1_a1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vle1.v v0, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 1 + ret %v +} + +define @unaligned_load_nxv4f32_a1(* %ptr) { +; CHECK-LABEL: unaligned_load_nxv4f32_a1: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2r.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 1 + ret %v +} + +define @unaligned_load_nxv4f32_a2(* %ptr) { +; CHECK-LABEL: unaligned_load_nxv4f32_a2: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2r.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 2 + ret %v +} + +define @aligned_load_nxv4f32_a4(* %ptr) { +; CHECK-LABEL: aligned_load_nxv4f32_a4: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re32.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 4 + ret %v +} + +define @unaligned_load_nxv8f16_a1(* %ptr) { +; CHECK-LABEL: unaligned_load_nxv8f16_a1: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2r.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 1 + ret %v +} + +define @aligned_load_nxv8f16_a2(* %ptr) { +; CHECK-LABEL: aligned_load_nxv8f16_a2: +; CHECK: # %bb.0: +; CHECK-NEXT: vl2re16.v v8, (a0) +; CHECK-NEXT: ret + %v = load , * %ptr, align 2 + ret %v +} + +define void @unaligned_store_nxv4i32_a1( %x, * %ptr) { +; CHECK-LABEL: unaligned_store_nxv4i32_a1: +; CHECK: # %bb.0: +; CHECK-NEXT: vs2r.v v8, (a0) +; CHECK-NEXT: ret + store %x, * %ptr, align 1 + ret void +} + +define void @unaligned_store_nxv4i32_a2( %x, * %ptr) { +; CHECK-LABEL: unaligned_store_nxv4i32_a2: +; CHECK: # %bb.0: +; CHECK-NEXT: vs2r.v v8, (a0) +; CHECK-NEXT: ret + store %x, * %ptr, align 2 + ret void +} + +define void @aligned_store_nxv4i32_a4( %x, * %ptr) { +; CHECK-LABEL: aligned_store_nxv4i32_a4: +; CHECK: # %bb.0: +; CHECK-NEXT: vs2r.v v8, (a0) +; CHECK-NEXT: ret + store %x, * %ptr, align 4 + ret void +} + +define void @unaligned_store_nxv1i16_a1( %x, * %ptr) { +; CHECK-LABEL: unaligned_store_nxv1i16_a1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: ret + store %x, * %ptr, align 1 + ret void +} + +define void @aligned_store_nxv1i16_a2( %x, * %ptr) { +; CHECK-LABEL: aligned_store_nxv1i16_a2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + store %x, * %ptr, align 2 + ret void +}