diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1280,6 +1280,40 @@ return isTruncStoreLegalOrCustom(ValVT, MemVT); } + /// Return how this store with truncation should be treated: either it is + /// legal, needs to be promoted to a larger size, needs to be expanded to some + /// other code sequence, or the target has a custom expander for it. + LegalizeAction getTruncMStoreAction(EVT ValVT, EVT MemVT) const { + if (ValVT.isExtended() || MemVT.isExtended()) + return Expand; + unsigned ValI = (unsigned)ValVT.getSimpleVT().SimpleTy; + unsigned MemI = (unsigned)MemVT.getSimpleVT().SimpleTy; + assert(ValI < MVT::VALUETYPE_SIZE && MemI < MVT::VALUETYPE_SIZE && + "Table isn't big enough!"); + return TruncMStoreActions[ValI][MemI]; + } + + /// Return true if the specified store with truncation is legal on this + /// target. + bool isTruncMStoreLegal(EVT ValVT, EVT MemVT) const { + return isTypeLegal(ValVT) && getTruncMStoreAction(ValVT, MemVT) == Legal; + } + + /// Return true if the specified store with truncation has solution on this + /// target. + bool isTruncMStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { + return isTypeLegal(ValVT) && (getTruncMStoreAction(ValVT, MemVT) == Legal || + getTruncMStoreAction(ValVT, MemVT) == Custom); + } + + virtual bool canCombineTruncMStore(EVT ValVT, EVT MemVT, + bool LegalOnly) const { + if (LegalOnly) + return isTruncMStoreLegal(ValVT, MemVT); + + return isTruncMStoreLegalOrCustom(ValVT, MemVT); + } + /// Return how the indexed load should be treated: either it is legal, needs /// to be promoted to a larger size, needs to be expanded to some other code /// sequence, or the target has a custom expander for it. @@ -2219,6 +2253,13 @@ TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; } + /// Indicate that the specified masked truncating store does not work with the + /// specified type and indicate what to do about it. + void setTruncMStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { + assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); + TruncMStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; + } + /// Indicate that the specified indexed load does or does not work with the /// specified type and indicate what to do abort it. /// @@ -3008,6 +3049,11 @@ /// truncating store of a specific value type and truncating type is legal. LegalizeAction TruncStoreActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; + /// For each value type pair keep a LegalizeAction that indicates whether a + /// masked truncating store of a specific value type and truncating type is + /// legal. + LegalizeAction TruncMStoreActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; + /// For each indexed mode and each value type, keep a quad of LegalizeAction /// that indicates how instruction selection should deal with the load / /// store / maskedload / maskedstore. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9830,6 +9830,8 @@ MaskedStoreSDNode *MST = cast(N); SDValue Mask = MST->getMask(); SDValue Chain = MST->getChain(); + SDValue Value = MST->getValue(); + SDValue Ptr = MST->getBasePtr(); SDLoc DL(N); // Zap masked stores with a zero mask. @@ -9844,6 +9846,40 @@ return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(), MST->getBasePtr(), MST->getMemOperand()); + if (MST->isTruncatingStore() && MST->isUnindexed() && + Value.getValueType().isInteger() && + (!isa(Value) || + !cast(Value)->isOpaque())) { + APInt TruncDemandedBits = + APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), + MST->getMemoryVT().getScalarSizeInBits()); + + // See if we can simplify the operation with + // SimplifyDemandedBits, which only works if the value has a single use. + if (SimplifyDemandedBits(Value, TruncDemandedBits)) { + // Re-visit the store if anything changed and the store hasn't been merged + // with another node (N is deleted) SimplifyDemandedBits will add Value's + // node back to the worklist if necessary, but we also need to re-visit + // the Store node itself. + if (N->getOpcode() != ISD::DELETED_NODE) + AddToWorklist(N); + return SDValue(N, 0); + } + } + + // If this is an FP_ROUND or TRUNC followed by a store, fold this into a + // truncating store. We can do this even if this is already a truncstore. + if ((Value.getOpcode() == ISD::FP_ROUND || + Value.getOpcode() == ISD::TRUNCATE) && + Value.getNode()->hasOneUse() && MST->isUnindexed() && + TLI.canCombineTruncMStore(Value.getOperand(0).getValueType(), + MST->getMemoryVT(), LegalOperations)) { + return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, + MST->getOffset(), MST->getMask(), + MST->getMemoryVT(), MST->getMemOperand(), + MST->getAddressingMode(), /*IsTruncating=*/true); + } + // Try transforming N to an indexed store. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -733,6 +733,7 @@ memset(OpActions, 0, sizeof(OpActions)); memset(LoadExtActions, 0, sizeof(LoadExtActions)); memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); + memset(TruncMStoreActions, 0, sizeof(TruncMStoreActions)); memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); memset(CondCodeActions, 0, sizeof(CondCodeActions)); std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1122,6 +1122,7 @@ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); + setTruncMStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); @@ -1259,6 +1260,7 @@ // Avoid marking truncating FP stores as legal to prevent the // DAGCombiner from creating unsupported truncating stores. setTruncStoreAction(VT, InnerVT, Expand); + setTruncMStoreAction(VT, InnerVT, Expand); // SVE does not have floating-point extending loads. setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1607,6 +1607,7 @@ } // Turn FP truncstore into trunc + store. setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncMStoreAction(MVT::f64, MVT::f32, Expand); // Turn FP extload into load/fpextend. for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); @@ -1665,6 +1666,7 @@ setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand); setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand); setTruncStoreAction(VT, TargetVT, Expand); + setTruncMStoreAction(VT, TargetVT, Expand); } // Normalize all inputs to SELECT to be vectors of i32. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -190,6 +190,12 @@ setTruncStoreAction(MVT::i32, MVT::i16, Expand); setTruncStoreAction(MVT::i32, MVT::i8 , Expand); setTruncStoreAction(MVT::i16, MVT::i8, Expand); + setTruncMStoreAction(MVT::i64, MVT::i32, Expand); + setTruncMStoreAction(MVT::i64, MVT::i16, Expand); + setTruncMStoreAction(MVT::i64, MVT::i8, Expand); + setTruncMStoreAction(MVT::i32, MVT::i16, Expand); + setTruncMStoreAction(MVT::i32, MVT::i8, Expand); + setTruncMStoreAction(MVT::i16, MVT::i8, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -863,6 +869,7 @@ setOperationAction(ISD::SELECT_CC, VT, Expand); for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(InnerVT, VT, Expand); + setTruncMStoreAction(InnerVT, VT, Expand); setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand); setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);