diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1266,7 +1266,7 @@ /// Return true if the specified store with truncation has solution on this /// target. - bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { + virtual bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { return isTypeLegal(ValVT) && (getTruncStoreAction(ValVT, MemVT) == Legal || getTruncStoreAction(ValVT, MemVT) == Custom); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18064,10 +18064,11 @@ // If this is an FP_ROUND or TRUNC followed by a store, fold this into a // truncating store. We can do this even if this is already a truncstore. - if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) - && Value.getNode()->hasOneUse() && ST->isUnindexed() && - TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), - ST->getMemoryVT())) { + if ((Value.getOpcode() == ISD::FP_ROUND || + Value.getOpcode() == ISD::TRUNCATE) && + Value.getNode()->hasOneUse() && ST->isUnindexed() && + TLI.isTruncStoreLegalOrCustom(Value.getOperand(0).getValueType(), + ST->getMemoryVT())) { return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getMemoryVT(), ST->getMemOperand()); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1233,6 +1233,13 @@ } } + // SVE supports truncating stores of 64 and 128-bit vectors + setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom); + setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom); + setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom); + setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); + setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); + for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64}) { setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); @@ -1475,6 +1482,16 @@ setCondCodeAction(ISD::SETUNE, VT, Expand); } + // Mark integer truncating stores as having custom lowering + if (VT.isInteger()) { + MVT InnerVT = VT.changeVectorElementType(MVT::i8); + while (InnerVT != VT) { + setTruncStoreAction(VT, InnerVT, Custom); + InnerVT = InnerVT.changeVectorElementType( + MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits())); + } + } + // Lower fixed length vector operations to scalable equivalents. setOperationAction(ISD::ABS, VT, Custom); setOperationAction(ISD::ADD, VT, Custom); @@ -4425,7 +4442,7 @@ EVT MemVT = StoreNode->getMemoryVT(); if (VT.isVector()) { - if (useSVEForFixedLengthVectorVT(VT)) + if (useSVEForFixedLengthVectorVT(VT, true)) return LowerFixedLengthVectorStoreToSVE(Op, DAG); unsigned AS = StoreNode->getAddressSpace(); @@ -4437,7 +4454,8 @@ return scalarizeVectorStore(StoreNode, DAG); } - if (StoreNode->isTruncatingStore()) { + if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 && + MemVT == MVT::v4i8) { return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG); } // 256 bit non-temporal stores can be lowered to STNP. Do this as part of @@ -14998,6 +15016,30 @@ return false; } +static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) { + auto OpCode = N->getOpcode(); + assert(OpCode == ISD::STORE || + OpCode == ISD::MSTORE && "Expected STORE dag node in input!"); + + if (auto Store = dyn_cast(N)) { + if (!Store->isTruncatingStore()) + return SDValue(); + SDValue Ext = Store->getValue(); + auto ExtOpCode = Ext.getOpcode(); + if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND && + ExtOpCode != ISD::ANY_EXTEND) + return SDValue(); + SDValue Orig = Ext->getOperand(0); + if (Store->getMemoryVT() != Orig->getValueType(0)) + return SDValue(); + return DAG.getStore(Store->getChain(), SDLoc(Store), Orig, + Store->getBasePtr(), Store->getPointerInfo(), + Store->getAlign()); + } + + return SDValue(); +} + static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, @@ -15009,6 +15051,9 @@ performTBISimplification(N->getOperand(2), DCI, DAG)) return SDValue(N, 0); + if (SDValue Store = foldTruncStoreOfExt(DAG, N)) + return Store; + return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h @@ -54,6 +54,13 @@ MachineMemOperand::Flags Flags = MachineMemOperand::MONone, bool *IsFast = nullptr) const override; + virtual bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const override { + // If this allows Custom lowered types here then additional truncates are + // merged in to stores in the DAG combine, which isn't wanted here as those + // truncating stores are not actually supported. + return TargetLowering::isTruncStoreLegal(ValVT, MemVT); + } + private: unsigned Gen; /// Each OpenCL kernel has nine implicit parameters that are stored in the diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll @@ -0,0 +1,218 @@ +; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE +; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_EQ_256 +; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK +; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_GE_1024,VBITS_GE_512 +; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK,VBITS_GE_2048,VBITS_GE_1024,VBITS_GE_512 + +target triple = "aarch64-unknown-linux-gnu" + +; Don't use SVE when its registers are no bigger than NEON. +; NO_SVE-NOT: ptrue + +define void @store_trunc_v2i64i8(<2 x i64>* %ap, <2 x i8>* %dest) #0 { +; CHECK-LABEL: store_trunc_v2i64i8 +; CHECK: ldr q[[Q0:[0-9]+]], [x0] +; CHECK: ptrue p[[P0:[0-9]+]].d, vl2 +; CHECK-NEXT: st1b { z[[Q0]].d }, p[[P0]], [x{{[0-9]+}}] +; CHECK-NEXT: ret + %a = load <2 x i64>, <2 x i64>* %ap + %val = trunc <2 x i64> %a to <2 x i8> + store <2 x i8> %val, <2 x i8>* %dest + ret void +} + +define void @store_trunc_v4i64i8(<4 x i64>* %ap, <4 x i8>* %dest) #0 { +; CHECK-LABEL: store_trunc_v4i64i8 +; CHECK: ptrue p[[P0:[0-9]+]].d, vl4 +; CHECK-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0] +; CHECK-NEXT: st1b { z[[Q0]].d }, p[[P0]], [x{{[0-9]+}}] +; CHECK-NEXT: ret + %a = load <4 x i64>, <4 x i64>* %ap + %val = trunc <4 x i64> %a to <4 x i8> + store <4 x i8> %val, <4 x i8>* %dest + ret void +} + +define void @store_trunc_v8i64i8(<8 x i64>* %ap, <8 x i8>* %dest) #0 { +; CHECK-LABEL: store_trunc_v8i64i8: +; VBITS_GE_512: ptrue p[[P0:[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0] +; VBITS_GE_512-NEXT: st1b { [[Z0]].d }, p[[P0]], [x{{[0-9]+}}] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: ld1d { [[Z0:z[0-9]+]].d }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[Z1:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ptrue [[PG]].s, vl4 +; VBITS_EQ_256-DAG: uzp1 [[Z0]].s, [[Z0]].s, [[Z0]].s +; VBITS_EQ_256-DAG: uzp1 [[Z1]].s, [[Z1]].s, [[Z1]].s +; VBITS_EQ_256-DAG: splice [[Z1]].s, [[PG]], [[Z1]].s, [[Z0]].s +; VBITS_EQ_256-DAG: ptrue [[PG]].s, vl8 +; VBITS_EQ_256-DAG: st1b { [[Z1]].s }, [[PG]], [x1] +; VBITS_EQ_256-DAG: ret + %a = load <8 x i64>, <8 x i64>* %ap + %val = trunc <8 x i64> %a to <8 x i8> + store <8 x i8> %val, <8 x i8>* %dest + ret void +} + +define void @store_trunc_v16i64i8(<16 x i64>* %ap, <16 x i8>* %dest) #0 { +; CHECK-LABEL: store_trunc_v16i64i8: +; VBITS_GE_1024: ptrue p[[P0:[0-9]+]].d, vl16 +; VBITS_GE_1024-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0] +; VBITS_GE_1024-NEXT: st1b { [[Z0]].d }, p[[P0]], [x{{[0-9]+}}] +; VBITS_GE_1024-NEXT: ret + %a = load <16 x i64>, <16 x i64>* %ap + %val = trunc <16 x i64> %a to <16 x i8> + store <16 x i8> %val, <16 x i8>* %dest + ret void +} + +define void @store_trunc_v32i64i8(<32 x i64>* %ap, <32 x i8>* %dest) #0 { +; CHECK-LABEL: store_trunc_v32i64i8: +; VBITS_GE_2048: ptrue p[[P0:[0-9]+]].d, vl32 +; VBITS_GE_2048-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0] +; VBITS_GE_2048-NEXT: st1b { [[Z0]].d }, p[[P0]], [x{{[0-9]+}}] +; VBITS_GE_2048-NEXT: ret + %a = load <32 x i64>, <32 x i64>* %ap + %val = trunc <32 x i64> %a to <32 x i8> + store <32 x i8> %val, <32 x i8>* %dest + ret void +} + +define void @store_trunc_v8i64i16(<8 x i64>* %ap, <8 x i16>* %dest) #0 { +; CHECK-LABEL: store_trunc_v8i64i16: +; VBITS_GE_512: ptrue p[[P0:[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0] +; VBITS_GE_512-NEXT: st1h { [[Z0]].d }, p[[P0]], [x{{[0-9]+}}] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; Currently does not use the truncating store +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: ld1d { [[Z0:z[0-9]+]].d }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[Z1:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: uzp1 [[Z0]].s, [[Z0]].s, [[Z0]].s +; VBITS_EQ_256-DAG: uzp1 [[Z1]].s, [[Z1]].s, [[Z1]].s +; VBITS_EQ_256-DAG: uzp1 [[Z1]].h, [[Z1]].h, [[Z1]].h +; VBITS_EQ_256-DAG: uzp1 [[Z0]].h, [[Z0]].h, [[Z0]].h +; VBITS_EQ_256-DAG: mov v[[V0:[0-9]+]].d[1], v{{[0-9]+}}.d[0] +; VBITS_EQ_256-DAG: str q[[V0]], [x1] +; VBITS_EQ_256-DAG: ret + %a = load <8 x i64>, <8 x i64>* %ap + %val = trunc <8 x i64> %a to <8 x i16> + store <8 x i16> %val, <8 x i16>* %dest + ret void +} + +define void @store_trunc_v8i64i32(<8 x i64>* %ap, <8 x i32>* %dest) #0 { +; CHECK-LABEL: store_trunc_v8i64i32: +; VBITS_GE_512: ptrue p[[P0:[0-9]+]].d, vl8 +; VBITS_GE_512-NEXT: ld1d { [[Z0:z[0-9]+]].d }, p0/z, [x0] +; VBITS_GE_512-NEXT: st1w { [[Z0]].d }, p[[P0]], [x{{[0-9]+}}] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 +; VBITS_EQ_256-DAG: ld1d { [[Z0:z[0-9]+]].d }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1d { [[Z1:z[0-9]+]].d }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ptrue [[PG]].s, vl4 +; VBITS_EQ_256-DAG: uzp1 [[Z0]].s, [[Z0]].s, [[Z0]].s +; VBITS_EQ_256-DAG: uzp1 [[Z1]].s, [[Z1]].s, [[Z1]].s +; VBITS_EQ_256-DAG: splice [[Z1]].s, [[PG]], [[Z1]].s, [[Z0]].s +; VBITS_EQ_256-DAG: ptrue [[PG]].s, vl8 +; VBITS_EQ_256-DAG: st1w { [[Z1]].s }, [[PG]], [x1] +; VBITS_EQ_256-DAG: ret + %a = load <8 x i64>, <8 x i64>* %ap + %val = trunc <8 x i64> %a to <8 x i32> + store <8 x i32> %val, <8 x i32>* %dest + ret void +} + +define void @store_trunc_v16i32i8(<16 x i32>* %ap, <16 x i8>* %dest) #0 { +; CHECK-LABEL: store_trunc_v16i32i8: +; VBITS_GE_512: ptrue p[[P0:[0-9]+]].s, vl16 +; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0] +; VBITS_GE_512-NEXT: st1b { [[Z0]].s }, p[[P0]], [x{{[0-9]+}}] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation. +; Currently does not use the truncating store +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: ld1w { [[Z0:z[0-9]+]].s }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1w { [[Z1:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: uzp1 [[Z0]].h, [[Z0]].h, [[Z0]].h +; VBITS_EQ_256-DAG: uzp1 [[Z1]].h, [[Z1]].h, [[Z1]].h +; VBITS_EQ_256-DAG: uzp1 [[Z1]].b, [[Z1]].b, [[Z1]].b +; VBITS_EQ_256-DAG: uzp1 [[Z0]].b, [[Z0]].b, [[Z0]].b +; VBITS_EQ_256-DAG: mov v[[V0:[0-9]+]].d[1], v{{[0-9]+}}.d[0] +; VBITS_EQ_256-DAG: str q[[V0]], [x1] +; VBITS_EQ_256-DAG: ret + %a = load <16 x i32>, <16 x i32>* %ap + %val = trunc <16 x i32> %a to <16 x i8> + store <16 x i8> %val, <16 x i8>* %dest + ret void +} + +define void @store_trunc_v16i32i16(<16 x i32>* %ap, <16 x i16>* %dest) #0 { +; CHECK-LABEL: store_trunc_v16i32i16: +; VBITS_GE_512: ptrue p[[P0:[0-9]+]].s, vl16 +; VBITS_GE_512-NEXT: ld1w { [[Z0:z[0-9]+]].s }, p0/z, [x0] +; VBITS_GE_512-NEXT: st1h { [[Z0]].s }, p[[P0]], [x{{[0-9]+}}] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 +; VBITS_EQ_256-DAG: ld1w { [[Z0:z[0-9]+]].s }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1w { [[Z1:z[0-9]+]].s }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ptrue [[PG]].h, vl8 +; VBITS_EQ_256-DAG: uzp1 [[Z0]].h, [[Z0]].h, [[Z0]].h +; VBITS_EQ_256-DAG: uzp1 [[Z1]].h, [[Z1]].h, [[Z1]].h +; VBITS_EQ_256-DAG: splice [[Z1]].h, [[PG]], [[Z1]].h, [[Z0]].h +; VBITS_EQ_256-DAG: ptrue [[PG]].h, vl16 +; VBITS_EQ_256-DAG: st1h { [[Z1]].h }, [[PG]], [x1] +; VBITS_EQ_256-DAG: ret + %a = load <16 x i32>, <16 x i32>* %ap + %val = trunc <16 x i32> %a to <16 x i16> + store <16 x i16> %val, <16 x i16>* %dest + ret void +} + +define void @store_trunc_v32i16i8(<32 x i16>* %ap, <32 x i8>* %dest) #0 { +; CHECK-LABEL: store_trunc_v32i16i8: +; VBITS_GE_512: ptrue p[[P0:[0-9]+]].h, vl32 +; VBITS_GE_512-NEXT: ld1h { [[Z0:z[0-9]+]].h }, p0/z, [x0] +; VBITS_GE_512-NEXT: st1b { [[Z0]].h }, p[[P0]], [x{{[0-9]+}}] +; VBITS_GE_512-NEXT: ret + +; Ensure sensible type legalisation +; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 +; VBITS_EQ_256-DAG: ld1h { [[Z0:z[0-9]+]].h }, [[PG]]/z, [x8] +; VBITS_EQ_256-DAG: ld1h { [[Z1:z[0-9]+]].h }, [[PG]]/z, [x0] +; VBITS_EQ_256-DAG: ptrue [[PG]].b, vl16 +; VBITS_EQ_256-DAG: uzp1 [[Z0]].b, [[Z0]].b, [[Z0]].b +; VBITS_EQ_256-DAG: uzp1 [[Z1]].b, [[Z1]].b, [[Z1]].b +; VBITS_EQ_256-DAG: splice [[Z1]].b, [[PG]], [[Z1]].b, [[Z0]].b +; VBITS_EQ_256-DAG: ptrue [[PG]].b, vl32 +; VBITS_EQ_256-DAG: st1b { [[Z1]].b }, [[PG]], [x1] +; VBITS_EQ_256-DAG: ret + %a = load <32 x i16>, <32 x i16>* %ap + %val = trunc <32 x i16> %a to <32 x i8> + store <32 x i8> %val, <32 x i8>* %dest + ret void +} + + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/Mips/cconv/byval.ll b/llvm/test/CodeGen/Mips/cconv/byval.ll --- a/llvm/test/CodeGen/Mips/cconv/byval.ll +++ b/llvm/test/CodeGen/Mips/cconv/byval.ll @@ -255,10 +255,9 @@ ; N32-NEXT: .cfi_offset 31, -8 ; N32-NEXT: .cfi_offset 16, -16 ; N32-NEXT: move $5, $4 -; N32-NEXT: sll $1, $5, 0 -; N32-NEXT: lui $2, 1 -; N32-NEXT: addu $2, $sp, $2 -; N32-NEXT: sw $1, -4($2) +; N32-NEXT: lui $1, 1 +; N32-NEXT: addu $1, $sp, $1 +; N32-NEXT: sw $4, -4($1) ; N32-NEXT: addiu $16, $sp, 8 ; N32-NEXT: ori $6, $zero, 65520 ; N32-NEXT: jal memcpy @@ -389,10 +388,8 @@ ; N32-NEXT: .cfi_def_cfa_offset 16 ; N32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill ; N32-NEXT: .cfi_offset 31, -8 -; N32-NEXT: sll $1, $5, 0 -; N32-NEXT: sw $1, 0($sp) -; N32-NEXT: sll $1, $4, 0 -; N32-NEXT: sw $1, 4($sp) +; N32-NEXT: sw $5, 0($sp) +; N32-NEXT: sw $4, 4($sp) ; N32-NEXT: jal memcpy ; N32-NEXT: ori $6, $zero, 65520 ; N32-NEXT: addiu $2, $zero, 4 diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll --- a/llvm/test/CodeGen/Mips/cconv/vector.ll +++ b/llvm/test/CodeGen/Mips/cconv/vector.ll @@ -554,10 +554,8 @@ ; MIPS64R5: # %bb.0: ; MIPS64R5-NEXT: daddiu $sp, $sp, -16 ; MIPS64R5-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R5-NEXT: sll $1, $5, 0 -; MIPS64R5-NEXT: sw $1, 8($sp) -; MIPS64R5-NEXT: sll $1, $4, 0 -; MIPS64R5-NEXT: sw $1, 12($sp) +; MIPS64R5-NEXT: sw $5, 8($sp) +; MIPS64R5-NEXT: sw $4, 12($sp) ; MIPS64R5-NEXT: lbu $1, 9($sp) ; MIPS64R5-NEXT: lbu $2, 8($sp) ; MIPS64R5-NEXT: insert.w $w0[0], $2 @@ -1263,10 +1261,8 @@ ; MIPS64R5: # %bb.0: ; MIPS64R5-NEXT: daddiu $sp, $sp, -16 ; MIPS64R5-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R5-NEXT: sll $1, $5, 0 -; MIPS64R5-NEXT: sw $1, 8($sp) -; MIPS64R5-NEXT: sll $1, $4, 0 -; MIPS64R5-NEXT: sw $1, 12($sp) +; MIPS64R5-NEXT: sw $5, 8($sp) +; MIPS64R5-NEXT: sw $4, 12($sp) ; MIPS64R5-NEXT: lh $1, 10($sp) ; MIPS64R5-NEXT: lh $2, 8($sp) ; MIPS64R5-NEXT: insert.d $w0[0], $2 @@ -5950,10 +5946,9 @@ ; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(mixed_32))) ; MIPS64EB-NEXT: daddu $1, $1, $25 ; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(mixed_32))) +; MIPS64EB-NEXT: sw $6, 12($sp) ; MIPS64EB-NEXT: lui $2, 17200 ; MIPS64EB-NEXT: sw $2, 8($sp) -; MIPS64EB-NEXT: sll $2, $6, 0 -; MIPS64EB-NEXT: sw $2, 12($sp) ; MIPS64EB-NEXT: ld $1, %got_page(.LCPI41_0)($1) ; MIPS64EB-NEXT: ldc1 $f0, %got_ofst(.LCPI41_0)($1) ; MIPS64EB-NEXT: ldc1 $f1, 8($sp) @@ -5963,24 +5958,24 @@ ; MIPS64EB-NEXT: sll $1, $1, 0 ; MIPS64EB-NEXT: mtc1 $1, $f1 ; MIPS64EB-NEXT: add.s $f1, $f0, $f1 -; MIPS64EB-NEXT: dsrl $1, $5, 32 -; MIPS64EB-NEXT: mfc1 $2, $f1 -; MIPS64EB-NEXT: sll $3, $4, 0 -; MIPS64EB-NEXT: sll $1, $1, 0 -; MIPS64EB-NEXT: mtc1 $1, $f1 -; MIPS64EB-NEXT: add.s $f1, $f0, $f1 ; MIPS64EB-NEXT: mfc1 $1, $f1 -; MIPS64EB-NEXT: mtc1 $3, $f1 -; MIPS64EB-NEXT: sll $3, $5, 0 -; MIPS64EB-NEXT: mtc1 $3, $f2 -; MIPS64EB-NEXT: dsll $2, $2, 32 +; MIPS64EB-NEXT: dsrl $2, $5, 32 +; MIPS64EB-NEXT: sll $2, $2, 0 +; MIPS64EB-NEXT: mtc1 $2, $f1 ; MIPS64EB-NEXT: add.s $f1, $f0, $f1 ; MIPS64EB-NEXT: mfc1 $3, $f1 -; MIPS64EB-NEXT: dsll $3, $3, 32 -; MIPS64EB-NEXT: dsrl $3, $3, 32 -; MIPS64EB-NEXT: or $2, $3, $2 ; MIPS64EB-NEXT: dsll $1, $1, 32 -; MIPS64EB-NEXT: add.s $f0, $f0, $f2 +; MIPS64EB-NEXT: sll $2, $4, 0 +; MIPS64EB-NEXT: mtc1 $2, $f1 +; MIPS64EB-NEXT: add.s $f1, $f0, $f1 +; MIPS64EB-NEXT: mfc1 $2, $f1 +; MIPS64EB-NEXT: dsll $2, $2, 32 +; MIPS64EB-NEXT: dsrl $2, $2, 32 +; MIPS64EB-NEXT: or $2, $2, $1 +; MIPS64EB-NEXT: dsll $1, $3, 32 +; MIPS64EB-NEXT: sll $3, $5, 0 +; MIPS64EB-NEXT: mtc1 $3, $f1 +; MIPS64EB-NEXT: add.s $f0, $f0, $f1 ; MIPS64EB-NEXT: mfc1 $3, $f0 ; MIPS64EB-NEXT: dsll $3, $3, 32 ; MIPS64EB-NEXT: dsrl $3, $3, 32 @@ -6022,10 +6017,9 @@ ; MIPS64R5EB-NEXT: lui $1, %hi(%neg(%gp_rel(mixed_32))) ; MIPS64R5EB-NEXT: daddu $1, $1, $25 ; MIPS64R5EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(mixed_32))) +; MIPS64R5EB-NEXT: sw $6, 12($sp) ; MIPS64R5EB-NEXT: lui $2, 17200 ; MIPS64R5EB-NEXT: sw $2, 8($sp) -; MIPS64R5EB-NEXT: sll $2, $6, 0 -; MIPS64R5EB-NEXT: sw $2, 12($sp) ; MIPS64R5EB-NEXT: ld $1, %got_page(.LCPI41_0)($1) ; MIPS64R5EB-NEXT: ldc1 $f0, %got_ofst(.LCPI41_0)($1) ; MIPS64R5EB-NEXT: ldc1 $f1, 8($sp) @@ -6081,8 +6075,7 @@ ; MIPS64EL-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(mixed_32))) ; MIPS64EL-NEXT: lui $2, 17200 ; MIPS64EL-NEXT: sw $2, 12($sp) -; MIPS64EL-NEXT: sll $2, $6, 0 -; MIPS64EL-NEXT: sw $2, 8($sp) +; MIPS64EL-NEXT: sw $6, 8($sp) ; MIPS64EL-NEXT: ld $1, %got_page(.LCPI41_0)($1) ; MIPS64EL-NEXT: ldc1 $f0, %got_ofst(.LCPI41_0)($1) ; MIPS64EL-NEXT: ldc1 $f1, 8($sp) @@ -6092,24 +6085,24 @@ ; MIPS64EL-NEXT: sll $1, $1, 0 ; MIPS64EL-NEXT: mtc1 $1, $f1 ; MIPS64EL-NEXT: add.s $f1, $f0, $f1 -; MIPS64EL-NEXT: dsrl $1, $5, 32 -; MIPS64EL-NEXT: mfc1 $2, $f1 -; MIPS64EL-NEXT: sll $3, $4, 0 -; MIPS64EL-NEXT: sll $1, $1, 0 -; MIPS64EL-NEXT: mtc1 $1, $f1 -; MIPS64EL-NEXT: add.s $f1, $f0, $f1 ; MIPS64EL-NEXT: mfc1 $1, $f1 -; MIPS64EL-NEXT: mtc1 $3, $f1 -; MIPS64EL-NEXT: sll $3, $5, 0 -; MIPS64EL-NEXT: mtc1 $3, $f2 -; MIPS64EL-NEXT: dsll $2, $2, 32 +; MIPS64EL-NEXT: dsrl $2, $5, 32 +; MIPS64EL-NEXT: sll $2, $2, 0 +; MIPS64EL-NEXT: mtc1 $2, $f1 ; MIPS64EL-NEXT: add.s $f1, $f0, $f1 ; MIPS64EL-NEXT: mfc1 $3, $f1 -; MIPS64EL-NEXT: dsll $3, $3, 32 -; MIPS64EL-NEXT: dsrl $3, $3, 32 -; MIPS64EL-NEXT: or $2, $3, $2 ; MIPS64EL-NEXT: dsll $1, $1, 32 -; MIPS64EL-NEXT: add.s $f0, $f0, $f2 +; MIPS64EL-NEXT: sll $2, $4, 0 +; MIPS64EL-NEXT: mtc1 $2, $f1 +; MIPS64EL-NEXT: add.s $f1, $f0, $f1 +; MIPS64EL-NEXT: mfc1 $2, $f1 +; MIPS64EL-NEXT: dsll $2, $2, 32 +; MIPS64EL-NEXT: dsrl $2, $2, 32 +; MIPS64EL-NEXT: or $2, $2, $1 +; MIPS64EL-NEXT: dsll $1, $3, 32 +; MIPS64EL-NEXT: sll $3, $5, 0 +; MIPS64EL-NEXT: mtc1 $3, $f1 +; MIPS64EL-NEXT: add.s $f0, $f0, $f1 ; MIPS64EL-NEXT: mfc1 $3, $f0 ; MIPS64EL-NEXT: dsll $3, $3, 32 ; MIPS64EL-NEXT: dsrl $3, $3, 32 @@ -6153,8 +6146,7 @@ ; MIPS64R5EL-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(mixed_32))) ; MIPS64R5EL-NEXT: lui $2, 17200 ; MIPS64R5EL-NEXT: sw $2, 12($sp) -; MIPS64R5EL-NEXT: sll $2, $6, 0 -; MIPS64R5EL-NEXT: sw $2, 8($sp) +; MIPS64R5EL-NEXT: sw $6, 8($sp) ; MIPS64R5EL-NEXT: ld $1, %got_page(.LCPI41_0)($1) ; MIPS64R5EL-NEXT: ldc1 $f0, %got_ofst(.LCPI41_0)($1) ; MIPS64R5EL-NEXT: ldc1 $f1, 8($sp) @@ -6229,33 +6221,29 @@ ; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(cast))) ; MIPS64EB-NEXT: daddu $1, $1, $25 ; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(cast))) -; MIPS64EB-NEXT: sll $2, $4, 0 -; MIPS64EB-NEXT: lui $3, 17200 -; MIPS64EB-NEXT: sw $3, 0($sp) -; MIPS64EB-NEXT: sw $2, 4($sp) -; MIPS64EB-NEXT: sll $2, $5, 0 -; MIPS64EB-NEXT: sw $3, 8($sp) -; MIPS64EB-NEXT: sw $2, 12($sp) +; MIPS64EB-NEXT: sw $4, 4($sp) +; MIPS64EB-NEXT: lui $2, 17200 +; MIPS64EB-NEXT: sw $2, 0($sp) +; MIPS64EB-NEXT: sw $5, 12($sp) +; MIPS64EB-NEXT: sw $2, 8($sp) ; MIPS64EB-NEXT: ld $1, %got_page(.LCPI42_0)($1) ; MIPS64EB-NEXT: ldc1 $f0, %got_ofst(.LCPI42_0)($1) ; MIPS64EB-NEXT: ldc1 $f1, 0($sp) -; MIPS64EB-NEXT: sub.d $f1, $f1, $f0 -; MIPS64EB-NEXT: cvt.s.d $f1, $f1 ; MIPS64EB-NEXT: ldc1 $f2, 8($sp) ; MIPS64EB-NEXT: sub.d $f2, $f2, $f0 +; MIPS64EB-NEXT: sub.d $f1, $f1, $f0 +; MIPS64EB-NEXT: cvt.s.d $f1, $f1 ; MIPS64EB-NEXT: mfc1 $1, $f1 -; MIPS64EB-NEXT: dsrl $2, $4, 32 -; MIPS64EB-NEXT: sll $2, $2, 0 -; MIPS64EB-NEXT: sw $3, 16($sp) -; MIPS64EB-NEXT: sw $2, 20($sp) -; MIPS64EB-NEXT: sw $3, 24($sp) +; MIPS64EB-NEXT: dsrl $3, $4, 32 +; MIPS64EB-NEXT: sw $2, 16($sp) +; MIPS64EB-NEXT: sw $3, 20($sp) ; MIPS64EB-NEXT: dsll $1, $1, 32 ; MIPS64EB-NEXT: cvt.s.d $f1, $f2 -; MIPS64EB-NEXT: dsrl $2, $5, 32 -; MIPS64EB-NEXT: sll $2, $2, 0 -; MIPS64EB-NEXT: sw $2, 28($sp) -; MIPS64EB-NEXT: mfc1 $2, $f1 -; MIPS64EB-NEXT: dsll $3, $2, 32 +; MIPS64EB-NEXT: mfc1 $3, $f1 +; MIPS64EB-NEXT: dsrl $4, $5, 32 +; MIPS64EB-NEXT: sw $2, 24($sp) +; MIPS64EB-NEXT: sw $4, 28($sp) +; MIPS64EB-NEXT: dsll $3, $3, 32 ; MIPS64EB-NEXT: dsrl $1, $1, 32 ; MIPS64EB-NEXT: ldc1 $f1, 16($sp) ; MIPS64EB-NEXT: sub.d $f1, $f1, $f0 @@ -6343,33 +6331,29 @@ ; MIPS64EL-NEXT: lui $1, %hi(%neg(%gp_rel(cast))) ; MIPS64EL-NEXT: daddu $1, $1, $25 ; MIPS64EL-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(cast))) -; MIPS64EL-NEXT: sll $2, $4, 0 -; MIPS64EL-NEXT: lui $3, 17200 -; MIPS64EL-NEXT: sw $3, 4($sp) -; MIPS64EL-NEXT: sw $2, 0($sp) -; MIPS64EL-NEXT: sll $2, $5, 0 -; MIPS64EL-NEXT: sw $3, 12($sp) -; MIPS64EL-NEXT: sw $2, 8($sp) +; MIPS64EL-NEXT: lui $2, 17200 +; MIPS64EL-NEXT: sw $2, 4($sp) +; MIPS64EL-NEXT: sw $4, 0($sp) +; MIPS64EL-NEXT: sw $2, 12($sp) +; MIPS64EL-NEXT: sw $5, 8($sp) ; MIPS64EL-NEXT: ld $1, %got_page(.LCPI42_0)($1) ; MIPS64EL-NEXT: ldc1 $f0, %got_ofst(.LCPI42_0)($1) ; MIPS64EL-NEXT: ldc1 $f1, 0($sp) -; MIPS64EL-NEXT: sub.d $f1, $f1, $f0 -; MIPS64EL-NEXT: cvt.s.d $f1, $f1 ; MIPS64EL-NEXT: ldc1 $f2, 8($sp) ; MIPS64EL-NEXT: sub.d $f2, $f2, $f0 +; MIPS64EL-NEXT: sub.d $f1, $f1, $f0 +; MIPS64EL-NEXT: cvt.s.d $f1, $f1 ; MIPS64EL-NEXT: mfc1 $1, $f1 -; MIPS64EL-NEXT: dsrl $2, $4, 32 -; MIPS64EL-NEXT: sll $2, $2, 0 -; MIPS64EL-NEXT: sw $3, 20($sp) -; MIPS64EL-NEXT: sw $2, 16($sp) -; MIPS64EL-NEXT: sw $3, 28($sp) +; MIPS64EL-NEXT: dsrl $3, $4, 32 +; MIPS64EL-NEXT: sw $2, 20($sp) +; MIPS64EL-NEXT: sw $3, 16($sp) ; MIPS64EL-NEXT: dsll $1, $1, 32 ; MIPS64EL-NEXT: cvt.s.d $f1, $f2 -; MIPS64EL-NEXT: dsrl $2, $5, 32 -; MIPS64EL-NEXT: sll $2, $2, 0 -; MIPS64EL-NEXT: sw $2, 24($sp) -; MIPS64EL-NEXT: mfc1 $2, $f1 -; MIPS64EL-NEXT: dsll $3, $2, 32 +; MIPS64EL-NEXT: mfc1 $3, $f1 +; MIPS64EL-NEXT: dsrl $4, $5, 32 +; MIPS64EL-NEXT: sw $2, 28($sp) +; MIPS64EL-NEXT: sw $4, 24($sp) +; MIPS64EL-NEXT: dsll $3, $3, 32 ; MIPS64EL-NEXT: dsrl $1, $1, 32 ; MIPS64EL-NEXT: ldc1 $f1, 16($sp) ; MIPS64EL-NEXT: sub.d $f1, $f1, $f0 diff --git a/llvm/test/CodeGen/Mips/llvm-ir/store.ll b/llvm/test/CodeGen/Mips/llvm-ir/store.ll --- a/llvm/test/CodeGen/Mips/llvm-ir/store.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/store.ll @@ -285,65 +285,57 @@ ; ; MIPS4-LABEL: f3: ; MIPS4: # %bb.0: -; MIPS4-NEXT: sll $1, $4, 0 # -; MIPS4-NEXT: # -; MIPS4-NEXT: # > -; MIPS4-NEXT: lui $2, %highest(c) # ; MIPS4-NEXT: # > -; MIPS4-NEXT: daddiu $2, $2, %higher(c) # ; MIPS4-NEXT: # ; MIPS4-NEXT: # > -; MIPS4-NEXT: dsll $2, $2, 16 # ; MIPS4-NEXT: # ; MIPS4-NEXT: # > -; MIPS4-NEXT: daddiu $2, $2, %hi(c) # ; MIPS4-NEXT: # ; MIPS4-NEXT: # > -; MIPS4-NEXT: dsll $2, $2, 16 # ; MIPS4-NEXT: # ; MIPS4-NEXT: # > ; MIPS4-NEXT: jr $ra # > -; MIPS4-NEXT: sw $1, %lo(c)($2) # ; MIPS4-NEXT: # ; MIPS4-NEXT: # > ; ; MIPS64R6-LABEL: f3: ; MIPS64R6: # %bb.0: -; MIPS64R6-NEXT: sll $1, $4, 0 # -; MIPS64R6-NEXT: # -; MIPS64R6-NEXT: # > -; MIPS64R6-NEXT: lui $2, %highest(c) # ; MIPS64R6-NEXT: # > -; MIPS64R6-NEXT: daddiu $2, $2, %higher(c) # ; MIPS64R6-NEXT: # ; MIPS64R6-NEXT: # > -; MIPS64R6-NEXT: dsll $2, $2, 16 # ; MIPS64R6-NEXT: # ; MIPS64R6-NEXT: # > -; MIPS64R6-NEXT: daddiu $2, $2, %hi(c) # ; MIPS64R6-NEXT: # ; MIPS64R6-NEXT: # > -; MIPS64R6-NEXT: dsll $2, $2, 16 # ; MIPS64R6-NEXT: # ; MIPS64R6-NEXT: # > ; MIPS64R6-NEXT: jr $ra # ; MIPS64R6-NEXT: # > -; MIPS64R6-NEXT: sw $1, %lo(c)($2) # ; MIPS64R6-NEXT: # ; MIPS64R6-NEXT: # > diff --git a/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll b/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll --- a/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll +++ b/llvm/test/CodeGen/Mips/msa/f16-llvm-ir.ll @@ -405,8 +405,7 @@ ; MIPS64-N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(uitofp))) ; MIPS64-N32-NEXT: lui $2, 17200 ; MIPS64-N32-NEXT: sw $2, 12($sp) -; MIPS64-N32-NEXT: sll $2, $4, 0 -; MIPS64-N32-NEXT: sw $2, 8($sp) +; MIPS64-N32-NEXT: sw $4, 8($sp) ; MIPS64-N32-NEXT: lw $2, %got_page(.LCPI5_0)($1) ; MIPS64-N32-NEXT: ldc1 $f0, %got_ofst(.LCPI5_0)($2) ; MIPS64-N32-NEXT: ldc1 $f1, 8($sp) @@ -430,8 +429,7 @@ ; MIPS64-N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(uitofp))) ; MIPS64-N64-NEXT: lui $2, 17200 ; MIPS64-N64-NEXT: sw $2, 12($sp) -; MIPS64-N64-NEXT: sll $2, $4, 0 -; MIPS64-N64-NEXT: sw $2, 8($sp) +; MIPS64-N64-NEXT: sw $4, 8($sp) ; MIPS64-N64-NEXT: ld $2, %got_page(.LCPI5_0)($1) ; MIPS64-N64-NEXT: ldc1 $f0, %got_ofst(.LCPI5_0)($2) ; MIPS64-N64-NEXT: ldc1 $f1, 8($sp)