diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -635,6 +635,13 @@ /// modes that operate across loop iterations. bool shouldFavorBackedgeIndex(const Loop *L) const; + /// Return true if the target supports variable-length load. + bool isLegalVariableLengthLoad(Type *DataType, Align Alignment, + Type *LengthType) const; + /// Return true if the target supports variable-length store. + bool isLegalVariableLengthStore(Type *DataType, Align Alignment, + Type *LengthType) const; + /// Return true if the target supports masked store. bool isLegalMaskedStore(Type *DataType, Align Alignment) const; /// Return true if the target supports masked load. @@ -1408,6 +1415,10 @@ TargetLibraryInfo *LibInfo) = 0; virtual bool shouldFavorPostInc() const = 0; virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0; + virtual bool isLegalVariableLengthLoad(Type *DataType, Align Alignment, + Type *LengthType) = 0; + virtual bool isLegalVariableLengthStore(Type *DataType, Align Alignment, + Type *LengthType) = 0; virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0; virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0; virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; @@ -1732,6 +1743,14 @@ bool shouldFavorBackedgeIndex(const Loop *L) const override { return Impl.shouldFavorBackedgeIndex(L); } + bool isLegalVariableLengthLoad(Type *DataType, Align Alignment, + Type *LengthType) override { + return Impl.isLegalVariableLengthLoad(DataType, Alignment, LengthType); + } + bool isLegalVariableLengthStore(Type *DataType, Align Alignment, + Type *LengthType) override { + return Impl.isLegalVariableLengthStore(DataType, Alignment, LengthType); + } bool isLegalMaskedStore(Type *DataType, Align Alignment) override { return Impl.isLegalMaskedStore(DataType, Alignment); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -206,6 +206,15 @@ bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } + bool isLegalVariableLengthLoad(Type *DataType, Align Alignment, + Type *LengthType) { + return false; + } + bool isLegalVariableLengthStore(Type *DataType, Align Alignment, + Type *LengthType) { + return false; + } + bool isLegalMaskedStore(Type *DataType, Align Alignment) { return false; } bool isLegalMaskedLoad(Type *DataType, Align Alignment) { return false; } diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1045,6 +1045,17 @@ ATOMIC_LOAD_FADD, ATOMIC_LOAD_FSUB, + // Variable-length load and store - consecutive vector load and store + // operations with + // additional length operand that prevents memory accesses beyond the first + // consecutive + // "length" lanes. + // + // Val, OutChain = VARLEN_LOAD(BasePtr, Length, PassThru) + // OutChain = VARLEN_STORE(Value, BasePtr, Length) + VARLEN_LOAD, + VARLEN_STORE, + // Masked load and store - consecutive vector load and store operations // with additional mask operand that prevents memory accesses to the // masked-off lanes. diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -72,6 +72,10 @@ /// matching during instruction selection. FunctionPass *createCodeGenPreparePass(); + /// createScalarizeVariableLengthMemIntrinPass - Replace variable-length load + /// and store intrinsics with scalar code when target doesn't support them. + FunctionPass *createScalarizeVariableLengthMemIntrinPass(); + /// createScalarizeMaskedMemIntrinPass - Replace masked load, store, gather /// and scatter intrinsics with scalar code when target doesn't support them. FunctionPass *createScalarizeMaskedMemIntrinPass(); diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1306,6 +1306,23 @@ SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM); + SDValue getVariableLengthLoad(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Base, SDValue Offset, SDValue Length, + SDValue Src0, EVT MemVT, MachineMemOperand *MMO, + ISD::MemIndexedMode AM, ISD::LoadExtType, + bool IsExpanding = false); + SDValue getIndexedVariableLengthLoad(SDValue OrigLoad, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM); + SDValue getVariableLengthStore(SDValue Chain, const SDLoc &dl, SDValue Val, + SDValue Base, SDValue Offset, SDValue Length, + EVT MemVT, MachineMemOperand *MMO, + ISD::MemIndexedMode AM, + bool IsTruncating = false); + SDValue getIndexedVariableLengthStore(SDValue OrigStore, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM); + SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -542,6 +542,7 @@ class LSBaseSDNodeBitfields { friend class LSBaseSDNode; + friend class VariableLengthLoadStoreSDNode; friend class MaskedLoadStoreSDNode; friend class MaskedGatherScatterSDNode; @@ -550,6 +551,7 @@ // This storage is shared between disparate class hierarchies to hold an // enumeration specific to the class hierarchy in use. // LSBaseSDNode => enum ISD::MemIndexedMode + // VariableLengthLoadStoreBaseSDNode => enum ISD::MemIndexedMode // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode // MaskedGatherScatterSDNode => enum ISD::MemIndexType uint16_t AddressingMode : 3; @@ -558,6 +560,7 @@ class LoadSDNodeBitfields { friend class LoadSDNode; + friend class VariableLengthLoadSDNode; friend class MaskedLoadSDNode; uint16_t : NumLSBaseSDNodeBits; @@ -568,6 +571,7 @@ class StoreSDNodeBitfields { friend class StoreSDNode; + friend class VariableLengthStoreSDNode; friend class MaskedStoreSDNode; uint16_t : NumLSBaseSDNodeBits; @@ -2285,6 +2289,106 @@ } }; +/// This base class is used to represent VARLEN_LOAD and VARLEN_STORE nodes +class VariableLengthLoadStoreSDNode : public MemSDNode { +public: + friend class SelectionDAG; + + VariableLengthLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, + const DebugLoc &dl, SDVTList VTs, + ISD::MemIndexedMode AM, EVT MemVT, + MachineMemOperand *MMO) + : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { + LSBaseSDNodeBits.AddressingMode = AM; + assert(getAddressingMode() == AM && "Value truncated"); + } + + // VariableLengthLoadSDNode (Chain, ptr, offset, length, passthru) + // VariableLengthStoreSDNode (Chain, data, ptr, offset, length) + // Length is an unsigned int. + const SDValue &getBasePtr() const { + return getOperand(getOpcode() == ISD::VARLEN_LOAD ? 1 : 2); + } + const SDValue &getOffset() const { + return getOperand(getOpcode() == ISD::VARLEN_LOAD ? 2 : 3); + } + const SDValue &getLength() const { + return getOperand(getOpcode() == ISD::VARLEN_LOAD ? 3 : 4); + } + + /// Return the addressing mode for this load or store: + /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. + ISD::MemIndexedMode getAddressingMode() const { + return static_cast(LSBaseSDNodeBits.AddressingMode); + } + + /// Return true if this is a pre/post inc/dec load/store. + bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } + + /// Return true if this is NOT a pre/post inc/dec load/store. + bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::VARLEN_LOAD || + N->getOpcode() == ISD::VARLEN_STORE; + } +}; + +/// This class is used to represent an VARLEN_LOAD node +class VariableLengthLoadSDNode : public VariableLengthLoadStoreSDNode { +public: + friend class SelectionDAG; + + VariableLengthLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, + ISD::MemIndexedMode AM, ISD::LoadExtType ETy, + bool IsExpanding, EVT MemVT, MachineMemOperand *MMO) + : VariableLengthLoadStoreSDNode(ISD::VARLEN_LOAD, Order, dl, VTs, AM, + MemVT, MMO) { + LoadSDNodeBits.ExtTy = ETy; + LoadSDNodeBits.IsExpanding = IsExpanding; + } + + ISD::LoadExtType getExtensionType() const { + return static_cast(LoadSDNodeBits.ExtTy); + } + + const SDValue &getBasePtr() const { return getOperand(1); } + const SDValue &getOffset() const { return getOperand(2); } + const SDValue &getLength() const { return getOperand(3); } + const SDValue &getPassThru() const { return getOperand(4); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::VARLEN_LOAD; + } + + bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; } +}; + +/// This class is used to represent a VARLEN_STORE node +class VariableLengthStoreSDNode : public VariableLengthLoadStoreSDNode { +public: + friend class SelectionDAG; + + VariableLengthStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, + ISD::MemIndexedMode AM, bool IsTruncating, + EVT MemVT, MachineMemOperand *MMO) + : VariableLengthLoadStoreSDNode(ISD::VARLEN_STORE, Order, dl, VTs, AM, + MemVT, MMO) { + StoreSDNodeBits.IsTruncating = IsTruncating; + } + + const SDValue &getValue() const { return getOperand(1); } + const SDValue &getBasePtr() const { return getOperand(2); } + const SDValue &getOffset() const { return getOperand(3); } + const SDValue &getLength() const { return getOperand(4); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::VARLEN_STORE; + } + + bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } +}; + /// This base class is used to represent MLOAD and MSTORE nodes class MaskedLoadStoreSDNode : public MemSDNode { public: diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -733,6 +733,15 @@ /// If the pointer isn't i8* it will be converted. CallInst *CreateInvariantStart(Value *Ptr, ConstantInt *Size = nullptr); + /// Create a call to Variable-Length Load intrinsic + CallInst *CreateVariableLengthLoad(Value *Ptr, Align Alignment, Value *Length, + Value *PassThru = nullptr, + const Twine &Name = ""); + + /// Create a call to Variable-Length Store intrinsic + CallInst *CreateVariableLengthStore(Value *Val, Value *Ptr, Align Alignment, + Value *Length); + /// Create a call to Masked Load intrinsic LLVM_ATTRIBUTE_DEPRECATED( CallInst *CreateMaskedLoad(Value *Ptr, unsigned Alignment, Value *Mask, diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1347,6 +1347,29 @@ [llvm_anyint_ty, LLVMMatchType<1>], [IntrNoMem, IntrNoSync, IntrWillReturn]>; +//===-------------------- Variable-Length Intrinsics --------------------===// +def int_variable_length_load + : Intrinsic<[llvm_anyvector_ty], + [ LLVMAnyPointerType>, + llvm_i32_ty, + llvm_anyint_ty, + LLVMMatchType<0>, + ], + [ IntrArgMemOnly, IntrWillReturn, + ImmArg>, + IntrReadMem, ]>; + +def int_variable_length_store + : Intrinsic<[], + [ llvm_anyvector_ty, + LLVMAnyPointerType>, + llvm_i32_ty, + llvm_anyint_ty ], + [ IntrArgMemOnly, + ImmArg>, + IntrWriteMem, ]>; + + //===-------------------------- Masked Intrinsics -------------------------===// // def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -383,6 +383,7 @@ void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&); void initializeModuleSanitizerCoverageLegacyPassPass(PassRegistry &); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); +void initializeScalarizeVariableLengthMemIntrinPass(PassRegistry &); void initializeScalarizeMaskedMemIntrinPass(PassRegistry&); void initializeScalarizerLegacyPassPass(PassRegistry&); void initializeScavengerTestPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -224,6 +224,7 @@ (void) llvm::createMustBeExecutedContextPrinter(); (void) llvm::createFloat2IntPass(); (void) llvm::createEliminateAvailableExternallyPass(); + (void)llvm::createScalarizeVariableLengthMemIntrinPass(); (void) llvm::createScalarizeMaskedMemIntrinPass(); (void) llvm::createWarnMissedTransformationsPass(); (void) llvm::createHardwareLoopsPass(); diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -224,6 +224,13 @@ SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3> ]>; +def SDTVariableLengthLoad: SDTypeProfile<1, 4, [ // partial load + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisInt<3>, SDTCisSameAs<0, 4>, +]>; +def SDTVariableLengthStore: SDTypeProfile<0, 4, [ // partial store + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisInt<3> +]>; + def SDTMaskedStore: SDTypeProfile<0, 4, [ // masked store SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>, SDTCisVec<3>, SDTCisSameNumEltsAs<0, 3> ]>; @@ -622,6 +629,11 @@ def atomic_store : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def varlen_ld : SDNode<"ISD::VARLEN_LOAD", SDTVariableLengthLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def varlen_st : SDNode<"ISD::VARLEN_STORE", SDTVariableLengthStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + def masked_st : SDNode<"ISD::MSTORE", SDTMaskedStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def masked_ld : SDNode<"ISD::MLOAD", SDTMaskedLoad, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -399,6 +399,18 @@ return TTIImpl->shouldFavorBackedgeIndex(L); } +bool TargetTransformInfo::isLegalVariableLengthLoad(Type *DataType, + Align Alignment, + Type *LengthType) const { + return TTIImpl->isLegalVariableLengthLoad(DataType, Alignment, LengthType); +} + +bool TargetTransformInfo::isLegalVariableLengthStore(Type *DataType, + Align Alignment, + Type *LengthType) const { + return TTIImpl->isLegalVariableLengthStore(DataType, Alignment, LengthType); +} + bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, Align Alignment) const { return TTIImpl->isLegalMaskedStore(DataType, Alignment); diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -144,6 +144,7 @@ ResetMachineFunctionPass.cpp SafeStack.cpp SafeStackLayout.cpp + ScalarizeVariableLengthMemIntrin.cpp ScalarizeMaskedMemIntrin.cpp ScheduleDAG.cpp ScheduleDAGInstrs.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -98,6 +98,7 @@ initializeRegisterCoalescerPass(Registry); initializeRenameIndependentSubregsPass(Registry); initializeSafeStackLegacyPassPass(Registry); + initializeScalarizeVariableLengthMemIntrinPass(Registry); initializeScalarizeMaskedMemIntrinPass(Registry); initializeShrinkWrapPass(Registry); initializeSjLjEHPreparePass(Registry); diff --git a/llvm/lib/CodeGen/ScalarizeVariableLengthMemIntrin.cpp b/llvm/lib/CodeGen/ScalarizeVariableLengthMemIntrin.cpp new file mode 100755 --- /dev/null +++ b/llvm/lib/CodeGen/ScalarizeVariableLengthMemIntrin.cpp @@ -0,0 +1,306 @@ +//===- ScalarizeVariableLengthMemIntrin.cpp - Scalarize unsupported +//variable-length mem --===// +// instrinsics +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass replaces variable-length memory intrinsics - when unsupported by +// the target +// - with a chain of basic blocks, that deal with power-of-two subvectors +// one-by-one if the appropriate bits are set in the length. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "scalarize-varlen-mem-intrin" + +namespace { + +class ScalarizeVariableLengthMemIntrin : public FunctionPass { + const TargetTransformInfo *TTI = nullptr; + const DataLayout *DL = nullptr; + +public: + static char ID; // Pass identification, replacement for typeid + + explicit ScalarizeVariableLengthMemIntrin() : FunctionPass(ID) { + initializeScalarizeVariableLengthMemIntrinPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { + return "Scalarize VariableLength Memory Intrinsics"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } + +private: + bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); + bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); +}; + +} // end anonymous namespace + +char ScalarizeVariableLengthMemIntrin::ID = 0; + +INITIALIZE_PASS(ScalarizeVariableLengthMemIntrin, DEBUG_TYPE, + "Scalarize unsupported variable-length memory intrinsics", + false, false) + +FunctionPass *llvm::createScalarizeVariableLengthMemIntrinPass() { + return new ScalarizeVariableLengthMemIntrin(); +} + +static void scalarizeVariableLengthLoad(CallInst *CI, bool &ModifiedDT) { + Value *Ptr = CI->getArgOperand(0); + const Align Alignment = + cast(CI->getArgOperand(1))->getAlignValue(); + Value *Length = CI->getArgOperand(2); + Value *Pass = CI->getArgOperand(3); + + VectorType *VecType = cast(CI->getType()); + unsigned VectorWidth = VecType->getNumElements(); + Type *EltTy = VecType->getElementType(); + Type *LengthType = Length->getType(); + unsigned PtrAS = Ptr->getType()->getPointerAddressSpace(); + + IRBuilder<> Builder(CI); + Value *BasePtr = Builder.CreateBitCast(Ptr, EltTy->getPointerTo(PtrAS)); + Value *VResult = Pass; + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + if (isa(Length)) { + Value *VResult = Builder.CreateAlignedLoad(VecType, Ptr, Alignment); + CI->replaceAllUsesWith(VResult); + CI->eraseFromParent(); + return; + } + + unsigned LastBranchBit = + findLastSet(VectorWidth); // note that bits are 0-indexed + unsigned BranchMask = maskTrailingOnes(LastBranchBit + 1); + for (unsigned BranchBit = LastBranchBit; BranchBit--;) { + unsigned BranchOffsetMask = + maskTrailingOnes(BranchBit + 1) ^ BranchMask; + unsigned BranchWidth = 1 << BranchBit; + Value *BranchWidthValue = ConstantInt::get(LengthType, BranchWidth); + Value *BranchOffsetMaskValue = + ConstantInt::get(LengthType, BranchOffsetMask); + + BasicBlock *BB = CI->getParent(); + BasicBlock *ContBB = BB->splitBasicBlock(CI, "cont"); + BasicBlock *BranchBB = ContBB->splitBasicBlock(CI, "cond.load"); + + Instruction *OldTerm = BB->getTerminator(); + Value *BranchPredicate = Builder.CreateICmpUGE(Length, BranchWidthValue); + BranchInst::Create(BranchBB, ContBB, BranchPredicate, OldTerm); + OldTerm->eraseFromParent(); + + // auto *BranchTy = VectorType::get(EltTy, BranchWidth); + // auto *BranchPtrTy = BranchTy->getPointerTo(PtrAS); + const Align BranchAlignment = + commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8); + + Builder.SetInsertPoint(BranchBB->getTerminator()); + Value *BranchOffsetValue = Builder.CreateAnd(Length, BranchOffsetMaskValue); + // Value *BranchPtr = Builder.CreatePointerCast( + // Builder.CreateInBoundsGEP(BasePtr, BranchOffsetValue), + // BranchPtrTy); + Value *BranchPtr = Builder.CreateInBoundsGEP(BasePtr, BranchOffsetValue); + + // todo: investigate possibilities for vectorizing variable-offset + // insertelements + Value *BranchRes = VResult; + for (unsigned i = 0; i < BranchWidth; ++i) { + auto *vi = ConstantInt::get(LengthType, i); + Value *BranchLoad = Builder.CreateAlignedLoad( + BasePtr->getType(), Builder.CreateInBoundsGEP(BranchPtr, vi), + BranchAlignment); + BranchRes = Builder.CreateInsertElement( + BranchRes, BranchLoad, Builder.CreateAdd(BranchOffsetValue, vi)); + } + + Builder.SetInsertPoint(CI); + PHINode *Phi = Builder.CreatePHI(VecType, 2); + Phi->addIncoming(BranchRes, BranchBB); + Phi->addIncoming(VResult, BB); + VResult = Phi; + } + CI->replaceAllUsesWith(VResult); + CI->eraseFromParent(); + ModifiedDT = true; + return; +} + +static void scalarizeVariableLengthStore(CallInst *CI, bool &ModifiedDT) { + Value *Src = CI->getArgOperand(0); + Value *Ptr = CI->getArgOperand(1); + const Align Alignment = + cast(CI->getArgOperand(2))->getAlignValue(); + Value *Length = CI->getArgOperand(3); + + VectorType *VecType = cast(Src->getType()); + unsigned VectorWidth = VecType->getNumElements(); + Type *EltTy = VecType->getElementType(); + Type *LengthType = Length->getType(); + unsigned PtrAS = Ptr->getType()->getPointerAddressSpace(); + + IRBuilder<> Builder(CI); + Value *BasePtr = Builder.CreateBitCast(Ptr, EltTy->getPointerTo(PtrAS)); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + if (isa(Length)) { + Builder.CreateAlignedStore(Src, Ptr, Alignment); + CI->eraseFromParent(); + return; + } + + unsigned LastBranchBit = + findLastSet(VectorWidth); // note that bits are 0-indexed + unsigned BranchMask = maskTrailingOnes(LastBranchBit + 1); + for (unsigned BranchBit = LastBranchBit; BranchBit--;) { + unsigned BranchOffsetMask = + maskTrailingOnes(BranchBit + 1) ^ BranchMask; + unsigned BranchWidth = 1 << BranchBit; + Value *BranchWidthValue = ConstantInt::get(LengthType, BranchWidth); + Value *BranchOffsetMaskValue = + ConstantInt::get(LengthType, BranchOffsetMask); + + BasicBlock *BB = CI->getParent(); + BasicBlock *ContBB = BB->splitBasicBlock(CI, "cont"); + BasicBlock *BranchBB = ContBB->splitBasicBlock(CI, "cond.store"); + + Instruction *OldTerm = BB->getTerminator(); + Value *BranchPredicate = Builder.CreateICmpUGE(Length, BranchWidthValue); + BranchInst::Create(BranchBB, ContBB, BranchPredicate, OldTerm); + OldTerm->eraseFromParent(); + + // auto *BranchTy = VectorType::get(EltTy, BranchWidth); + // auto *BranchPtrTy = BranchTy->getPointerTo(PtrAS); + const Align BranchAlignment = + commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8); + + Builder.SetInsertPoint(BranchBB->getTerminator()); + Value *BranchOffsetValue = Builder.CreateAnd(Length, BranchOffsetMaskValue); + // Value *BranchPtr = Builder.CreatePointerCast( + // Builder.CreateInBoundsGEP(BasePtr, BranchOffsetValue), + // BranchPtrTy); + Value *BranchPtr = Builder.CreateInBoundsGEP(BasePtr, BranchOffsetValue); + + // todo: investigate possibilities for vectorizing variable-offset + // extractelements + for (unsigned i = 0; i < BranchWidth; ++i) { + auto *vi = ConstantInt::get(LengthType, i); + Value *BranchLoad = Builder.CreateExtractElement( + Src, Builder.CreateAdd(BranchOffsetValue, vi)); + Builder.CreateAlignedStore( + BranchLoad, + Builder.CreateInBoundsGEP(BasePtr->getType(), BranchPtr, vi), + BranchAlignment); + } + } + CI->eraseFromParent(); + ModifiedDT = true; + return; +} + +bool ScalarizeVariableLengthMemIntrin::runOnFunction(Function &F) { + bool EverMadeChange = false; + + TTI = &getAnalysis().getTTI(F); + DL = &F.getParent()->getDataLayout(); + + bool MadeChange = true; + while (MadeChange) { + MadeChange = false; + for (Function::iterator I = F.begin(); I != F.end();) { + BasicBlock *BB = &*I++; + bool ModifiedDTOnIteration = false; + MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration); + + // Restart BB iteration if the dominator tree of the Function was changed + if (ModifiedDTOnIteration) + break; + } + + EverMadeChange |= MadeChange; + } + + return EverMadeChange; +} + +bool ScalarizeVariableLengthMemIntrin::optimizeBlock(BasicBlock &BB, + bool &ModifiedDT) { + bool MadeChange = false; + + BasicBlock::iterator CurInstIterator = BB.begin(); + while (CurInstIterator != BB.end()) { + if (CallInst *CI = dyn_cast(&*CurInstIterator++)) + MadeChange |= optimizeCallInst(CI, ModifiedDT); + if (ModifiedDT) + return true; + } + + return MadeChange; +} + +bool ScalarizeVariableLengthMemIntrin::optimizeCallInst(CallInst *CI, + bool &ModifiedDT) { + IntrinsicInst *II = dyn_cast(CI); + if (II) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::variable_length_load: + // Scalarize unsupported vector variable-length load + if (TTI->isLegalVariableLengthLoad( + CI->getType(), + cast(CI->getArgOperand(1))->getAlignValue(), + CI->getArgOperand(2)->getType())) + return false; + scalarizeVariableLengthLoad(CI, ModifiedDT); + return true; + case Intrinsic::variable_length_store: + if (TTI->isLegalVariableLengthStore( + CI->getArgOperand(0)->getType(), + cast(CI->getArgOperand(2))->getAlignValue(), + CI->getArgOperand(3)->getType())) + return false; + scalarizeVariableLengthStore(CI, ModifiedDT); + return true; + } + } + + return false; +} diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -504,6 +504,8 @@ SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitSCALAR_TO_VECTOR(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); + SDValue visitVARLEN_STORE(SDNode *N); + SDValue visitVARLEN_LOAD(SDNode *N); SDValue visitMLOAD(SDNode *N); SDValue visitMSTORE(SDNode *N); SDValue visitMGATHER(SDNode *N); @@ -1685,6 +1687,10 @@ case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); + case ISD::VARLEN_LOAD: + return visitVARLEN_LOAD(N); + case ISD::VARLEN_STORE: + return visitVARLEN_STORE(N); case ISD::MGATHER: return visitMGATHER(N); case ISD::MLOAD: return visitMLOAD(N); case ISD::MSCATTER: return visitMSCATTER(N); @@ -9223,6 +9229,20 @@ return SDValue(); } +SDValue DAGCombiner::visitVARLEN_LOAD(SDNode *N) { + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + return SDValue(); +} + +SDValue DAGCombiner::visitVARLEN_STORE(SDNode *N) { + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + return SDValue(); +} + SDValue DAGCombiner::visitMLOAD(SDNode *N) { MaskedLoadSDNode *MLD = cast(N); SDValue Mask = MLD->getMask(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1137,6 +1137,11 @@ Node->getValueType(0), Scale); break; } + case ISD::VARLEN_STORE: + Action = TLI.getOperationAction( + Node->getOpcode(), + cast(Node)->getValue().getValueType()); + break; case ISD::MSCATTER: Action = TLI.getOperationAction(Node->getOpcode(), cast(Node)->getValue().getValueType()); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -68,6 +68,9 @@ case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; case ISD::LOAD: Res = PromoteIntRes_LOAD(cast(N)); break; + case ISD::VARLEN_LOAD: + Res = PromoteIntRes_VARLEN_LOAD(cast(N)); + break; case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast(N)); break; case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast(N)); @@ -654,6 +657,22 @@ return Res; } +SDValue +DAGTypeLegalizer::PromoteIntRes_VARLEN_LOAD(VariableLengthLoadSDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue ExtPassThru = GetPromotedInteger(N->getPassThru()); + + SDLoc dl(N); + SDValue Res = DAG.getVariableLengthLoad( + NVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), N->getLength(), + ExtPassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), + ISD::EXTLOAD); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue ExtPassThru = GetPromotedInteger(N->getPassThru()); @@ -1431,6 +1450,12 @@ case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break; case ISD::STORE: Res = PromoteIntOp_STORE(cast(N), OpNo); break; + case ISD::VARLEN_LOAD: + Res = PromoteIntOp_VARLEN_LOAD(cast(N), OpNo); + break; + case ISD::VARLEN_STORE: + Res = PromoteIntOp_VARLEN_STORE(cast(N), OpNo); + break; case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast(N), OpNo); break; case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast(N), @@ -1748,6 +1773,41 @@ N->getMemoryVT(), N->getMemOperand()); } +SDValue DAGTypeLegalizer::PromoteIntOp_VARLEN_LOAD(VariableLengthLoadSDNode *N, + unsigned OpNo) { + assert(OpNo == 3 && "Only know how to promote the length!"); + SDValue Length = GetPromotedInteger(N->getOperand(3)); + SmallVector NewOps(N->op_begin(), N->op_end()); + NewOps[OpNo] = Length; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue +DAGTypeLegalizer::PromoteIntOp_VARLEN_STORE(VariableLengthStoreSDNode *N, + unsigned OpNo) { + SDValue DataOp = N->getValue(); + SDValue Length = N->getLength(); + SDLoc dl(N); + + bool TruncateStore = false; + if (OpNo == 4) { + Length = GetPromotedInteger(Length); + // Update in place. + SmallVector NewOps(N->op_begin(), N->op_end()); + NewOps[4] = Length; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + } else { // Data operand + assert(OpNo == 1 && "Unexpected operand for promotion"); + DataOp = GetPromotedInteger(DataOp); + TruncateStore = true; + } + + return DAG.getVariableLengthStore(N->getChain(), dl, DataOp, N->getBasePtr(), + N->getOffset(), Length, N->getMemoryVT(), + N->getMemOperand(), N->getAddressingMode(), + TruncateStore); +} + SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -319,6 +319,7 @@ SDValue PromoteIntRes_FREEZE(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); + SDValue PromoteIntRes_VARLEN_LOAD(VariableLengthLoadSDNode *N); SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); @@ -377,6 +378,9 @@ SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); SDValue PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N); SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); + SDValue PromoteIntOp_VARLEN_LOAD(VariableLengthLoadSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_VARLEN_STORE(VariableLengthStoreSDNode *N, + unsigned OpNo); SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); @@ -867,6 +871,7 @@ SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); + SDValue WidenVecRes_VARLEN_LOAD(VariableLengthLoadSDNode *N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); SDValue WidenVecRes_ScalarOp(SDNode* N); @@ -899,6 +904,7 @@ SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); + SDValue WidenVecOp_VARLEN_STORE(SDNode *N, unsigned OpNo); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2785,6 +2785,9 @@ case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast(N)); break; + case ISD::VARLEN_LOAD: + Res = WidenVecRes_VARLEN_LOAD(cast(N)); + break; case ISD::MLOAD: Res = WidenVecRes_MLOAD(cast(N)); break; @@ -3791,6 +3794,22 @@ return Result; } +SDValue DAGTypeLegalizer::WidenVecRes_VARLEN_LOAD(VariableLengthLoadSDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue PassThru = GetWidenedVector(N->getPassThru()); + ISD::LoadExtType ExtType = N->getExtensionType(); + SDLoc dl(N); + + SDValue Res = DAG.getVariableLengthLoad( + WidenVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), + N->getLength(), PassThru, N->getMemoryVT(), N->getMemOperand(), + N->getAddressingMode(), ExtType, N->isExpandingLoad()); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); @@ -4266,6 +4285,9 @@ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::VARLEN_STORE: + Res = WidenVecOp_VARLEN_STORE(N, OpNo); + break; case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; @@ -4578,6 +4600,19 @@ return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); } +SDValue DAGTypeLegalizer::WidenVecOp_VARLEN_STORE(SDNode *N, unsigned OpNo) { + assert((OpNo == 1) && "Can widen only data operand of varlen_store"); + auto *ST = cast(N); + SDValue StVal = ST->getValue(); + SDLoc dl(N); + + StVal = GetWidenedVector(StVal); + return DAG.getVariableLengthStore(ST->getChain(), dl, StVal, ST->getBasePtr(), + ST->getOffset(), ST->getLength(), + ST->getMemoryVT(), ST->getMemOperand(), + ST->getAddressingMode(), false); +} + SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { assert((OpNo == 1 || OpNo == 3) && "Can widen only data or mask operand of mstore"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7206,6 +7206,98 @@ return V; } +SDValue SelectionDAG::getVariableLengthLoad( + EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, + SDValue Length, SDValue PassThru, EVT MemVT, MachineMemOperand *MMO, + ISD::MemIndexedMode AM, ISD::LoadExtType ExtTy, bool IsExpanding) { + bool Indexed = (AM != ISD::UNINDEXED); + assert((Indexed || Offset.isUndef()) && + "Unindexed variable-length load with an offset!"); + SDVTList VTs = Indexed ? getVTList(VT, Base.getValueType(), MVT::Other) + : getVTList(VT, MVT::Other); + SDValue Ops[] = {Chain, Base, Offset, Length, PassThru}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VARLEN_LOAD, VTs, Ops); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + dl.getIROrder(), VTs, AM, ExtTy, IsExpanding, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode(dl.getIROrder(), + dl.getDebugLoc(), VTs, AM, + ExtTy, IsExpanding, MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getIndexedVariableLengthLoad(SDValue OrigLoad, + const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + auto *LD = cast(OrigLoad); + assert(LD->getOffset().isUndef() && + "Variable-length load is already a indexed load!"); + return getVariableLengthLoad(OrigLoad.getValueType(), dl, LD->getChain(), + Base, Offset, LD->getLength(), LD->getPassThru(), + LD->getMemoryVT(), LD->getMemOperand(), AM, + LD->getExtensionType(), LD->isExpandingLoad()); +} + +SDValue SelectionDAG::getVariableLengthStore(SDValue Chain, const SDLoc &dl, + SDValue Val, SDValue Base, + SDValue Offset, SDValue Length, + EVT MemVT, MachineMemOperand *MMO, + ISD::MemIndexedMode AM, + bool IsTruncating) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + bool Indexed = (AM != ISD::UNINDEXED); + assert((Indexed || Offset.isUndef()) && + "Unindexed variable-length store with an offset!"); + SDVTList VTs = Indexed ? getVTList(Base.getValueType(), MVT::Other) + : getVTList(MVT::Other); + SDValue Ops[] = {Chain, Val, Base, Offset, Length}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VARLEN_STORE, VTs, Ops); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + dl.getIROrder(), VTs, AM, IsTruncating, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode( + dl.getIROrder(), dl.getDebugLoc(), VTs, AM, IsTruncating, MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getIndexedVariableLengthStore(SDValue OrigStore, + const SDLoc &dl, + SDValue Base, + SDValue Offset, + ISD::MemIndexedMode AM) { + auto *ST = cast(OrigStore); + assert(ST->getOffset().isUndef() && + "Variable-length store is already a indexed store!"); + return getVariableLengthStore( + ST->getChain(), dl, ST->getValue(), Base, Offset, ST->getLength(), + ST->getMemoryVT(), ST->getMemOperand(), AM, ST->isTruncatingStore()); +} + SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue PassThru, EVT MemVT, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -738,6 +738,8 @@ void visitAlloca(const AllocaInst &I); void visitLoad(const LoadInst &I); void visitStore(const StoreInst &I); + void visitVariableLengthLoad(const CallInst &I, bool IsExpanding = false); + void visitVariableLengthStore(const CallInst &I); void visitMaskedLoad(const CallInst &I, bool IsExpanding = false); void visitMaskedStore(const CallInst &I, bool IsCompressing = false); void visitMaskedGather(const CallInst &I); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4184,6 +4184,73 @@ DAG.setRoot(StoreNode); } +void SelectionDAGBuilder::visitVariableLengthLoad(const CallInst &I, + bool IsExpanding) { + SDLoc sdl = getCurSDLoc(); + unsigned i_op = 0; + Value *PtrOperand = I.getArgOperand(i_op++); + SDValue Ptr = getValue(PtrOperand); + MaybeAlign Alignment; + if (IsExpanding) + Alignment = None; + else + Alignment = + MaybeAlign(cast(I.getArgOperand(i_op++))->getZExtValue()); + SDValue Length = getValue(I.getArgOperand(i_op++)); + SDValue Src0 = getValue(I.getArgOperand(i_op++)); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + EVT VT = Src0.getValueType(); + if (!Alignment) + Alignment = DAG.getEVTAlign(VT); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + // Do not serialize variable-length loads of constant memory with anything. + MemoryLocation ML; + if (VT.isScalableVector()) + ML = MemoryLocation(PtrOperand); + else + ML = MemoryLocation(PtrOperand, + LocationSize::precise( + DAG.getDataLayout().getTypeStoreSize(I.getType())), + AAInfo); + bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges); + SDValue Load = DAG.getVariableLengthLoad( + VT, sdl, InChain, Ptr, Offset, Length, Src0, VT, MMO, ISD::UNINDEXED, + ISD::NON_EXTLOAD, IsExpanding); + if (AddToChain) + PendingLoads.push_back(Load.getValue(1)); + setValue(&I, Load); +} + +void SelectionDAGBuilder::visitVariableLengthStore(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + SDValue Src0 = getValue(I.getArgOperand(0)); + Value *PtrOperand = I.getArgOperand(1); + SDValue Ptr = getValue(PtrOperand); + MaybeAlign Alignment(cast(I.getArgOperand(2))->getZExtValue()); + SDValue Length = getValue(I.getArgOperand(3)); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + EVT VT = Src0.getValueType(); + if (!Alignment) + Alignment = DAG.getEVTAlign(VT); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo); + SDValue StoreNode = DAG.getVariableLengthStore( + getMemoryRoot(), sdl, Src0, Ptr, Offset, Length, VT, MMO, ISD::UNINDEXED, + false /* IsTruncating */); + DAG.setRoot(StoreNode); + setValue(&I, StoreNode); +} + void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, bool IsCompressing) { SDLoc sdl = getCurSDLoc(); @@ -6029,6 +6096,12 @@ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, getRoot())); return; + case Intrinsic::variable_length_load: + visitVariableLengthLoad(I); + return; + case Intrinsic::variable_length_store: + visitVariableLengthStore(I); + return; case Intrinsic::masked_gather: visitMaskedGather(I); return; diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -699,6 +699,7 @@ // Add scalarization of target's unsupported masked memory intrinsics pass. // the unsupported intrinsic will be replaced with a chain of basic blocks, // that stores/loads element one-by-one if the appropriate mask bit is set. + addPass(createScalarizeVariableLengthMemIntrinPass()); addPass(createScalarizeMaskedMemIntrinPass()); // Expand reduction intrinsics into shuffle sequences if the target wants to. diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -460,6 +460,54 @@ return createCallHelper(FnAssume, Ops, this); } +/// Create a call to a Variable-Length Load intrinsic. +/// \p Ptr - base pointer for the load +/// \p Alignment - alignment of the source location +/// \p Length - integer which indicates the extent of vector lanes that +/// should be accessed in memory +/// \p PassThru - pass-through value that is used to fill the masked-off lanes +/// of the result \p Name - name of the result variable +CallInst *IRBuilderBase::CreateVariableLengthLoad(Value *Ptr, Align Alignment, + Value *Length, + Value *PassThru, + const Twine &Name) { + auto *PtrTy = cast(Ptr->getType()); + Type *DataTy = PtrTy->getElementType(); + auto *LengthTy = cast(Length->getType()); + assert(DataTy->isVectorTy() && "Ptr should point to a vector"); + assert(LengthTy->isIntegerTy() && "Length should be an integer"); + if (!PassThru) + PassThru = UndefValue::get(DataTy); + Type *OverloadedTypes[] = {DataTy, PtrTy, LengthTy}; + Value *Ops[] = {Ptr, getInt32(Alignment.value()), Length, PassThru}; + Module *M = BB->getParent()->getParent(); + Function *IntrinDecl = Intrinsic::getDeclaration( + M, Intrinsic::variable_length_load, OverloadedTypes); + return createCallHelper(IntrinDecl, Ops, this, Name); +} + +/// Create a call to a Variable-Length Store intrinsic. +/// \p Val - data to be stored, +/// \p Ptr - base pointer for the store +/// \p Alignment - alignment of the destination location +/// \p Length - integer which indicates the extent of vector lanes that +/// should be accessed in memory +CallInst *IRBuilderBase::CreateVariableLengthStore(Value *Val, Value *Ptr, + Align Alignment, + Value *Length) { + auto *PtrTy = cast(Ptr->getType()); + Type *DataTy = PtrTy->getElementType(); + auto *LengthTy = cast(Length->getType()); + assert(DataTy->isVectorTy() && "Ptr should point to a vector"); + assert(LengthTy->isIntegerTy() && "Length should be an integer"); + Type *OverloadedTypes[] = {DataTy, PtrTy, LengthTy}; + Value *Ops[] = {Val, Ptr, getInt32(Alignment.value()), Length}; + Module *M = BB->getParent()->getParent(); + Function *IntrinDecl = Intrinsic::getDeclaration( + M, Intrinsic::variable_length_store, OverloadedTypes); + return createCallHelper(IntrinDecl, Ops, this); +} + /// Create a call to a Masked Load intrinsic. /// \p Ptr - base pointer for the load /// \p Alignment - alignment of the source location diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -316,6 +316,7 @@ initializeConstantHoistingLegacyPassPass(*Registry); initializeScalarOpts(*Registry); initializeVectorization(*Registry); + initializeScalarizeVariableLengthMemIntrinPass(*Registry); initializeScalarizeMaskedMemIntrinPass(*Registry); initializeExpandReductionsPass(*Registry); initializeHardwareLoopsPass(*Registry); diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -496,16 +496,27 @@ "si-", "gcn-", "amdgpu-", "aarch64-", "amdgcn-"}; std::vector PassNameContain = {"ehprepare"}; std::vector PassNameExact = { - "safe-stack", "cost-model", - "codegenprepare", "interleaved-load-combine", - "unreachableblockelim", "scalarize-masked-mem-intrin", - "verify-safepoint-ir", "divergence", - "infer-address-spaces", "atomic-expand", - "hardware-loops", "type-promotion", - "mve-tail-predication", "interleaved-access", - "global-merge", "pre-isel-intrinsic-lowering", - "expand-reductions", "indirectbr-expand", - "generic-to-nvvm", "expandmemcmp"}; + "safe-stack", + "cost-model", + "codegenprepare", + "interleaved-load-combine", + "unreachableblockelim", + "scalarize-variable-length-mem-intrin", + "scalarize-masked-mem-intrin", + "verify-safepoint-ir", + "divergence", + "infer-address-spaces", + "atomic-expand", + "hardware-loops", + "type-promotion", + "mve-tail-predication", + "interleaved-access", + "global-merge", + "pre-isel-intrinsic-lowering", + "expand-reductions", + "indirectbr-expand", + "generic-to-nvvm", + "expandmemcmp"}; for (const auto &P : PassNamePrefix) if (Pass.startswith(P)) return true; @@ -561,6 +572,7 @@ // For codegen passes, only passes that do IR to IR transformation are // supported. initializeExpandMemCmpPassPass(Registry); + initializeScalarizeVariableLengthMemIntrinPass(Registry); initializeScalarizeMaskedMemIntrinPass(Registry); initializeCodeGenPreparePass(Registry); initializeAtomicExpandPass(Registry);