Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -819,7 +819,7 @@ bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace = 0, Align Alignment = Align(1), - bool *Fast = nullptr) const; + unsigned *Fast = nullptr) const; /// Return hardware support for population count. PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; @@ -1616,7 +1616,7 @@ unsigned BitWidth, unsigned AddressSpace, Align Alignment, - bool *Fast) = 0; + unsigned *Fast) = 0; virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; virtual bool haveFastSqrt(Type *Ty) = 0; virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0; @@ -2081,7 +2081,7 @@ } bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, - bool *Fast) override { + unsigned *Fast) override { return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, Alignment, Fast); } Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -352,7 +352,7 @@ bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, - bool *Fast) const { + unsigned *Fast) const { return false; } Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -251,7 +251,7 @@ /// @{ bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, - bool *Fast) const { + unsigned *Fast) const { EVT E = EVT::getIntegerVT(Context, BitWidth); return getTLI()->allowsMisalignedMemoryAccesses( E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -580,7 +580,7 @@ getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT()) return false; - bool Fast = false; + unsigned Fast = 0; return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT, MMO, &Fast) && Fast; } @@ -1679,15 +1679,16 @@ /// /// This function returns true if the target allows unaligned memory accesses /// of the specified type in the given address space. If true, it also returns - /// whether the unaligned memory access is "fast" in the last argument by - /// reference. This is used, for example, in situations where an array - /// copy/move/set is converted to a sequence of store operations. Its use - /// helps to ensure that such replacements don't generate code that causes an - /// alignment error (trap) on the target machine. + /// a relative speed of the unaligned memory access in the last argument by + /// reference. The higher the speed number the faster the operation comparing + /// to a number returned by another such call. This is used, for example, in + /// situations where an array copy/move/set is converted to a sequence of + /// store operations. Its use helps to ensure that such replacements don't + /// generate code that causes an alignment error (trap) on the target machine. virtual bool allowsMisalignedMemoryAccesses( EVT, unsigned AddrSpace = 0, Align Alignment = Align(1), MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool * /*Fast*/ = nullptr) const { + unsigned * /*Fast*/ = nullptr) const { return false; } @@ -1695,51 +1696,51 @@ virtual bool allowsMisalignedMemoryAccesses( LLT, unsigned AddrSpace = 0, Align Alignment = Align(1), MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool * /*Fast*/ = nullptr) const { + unsigned * /*Fast*/ = nullptr) const { return false; } /// This function returns true if the memory access is aligned or if the /// target allows this specific unaligned memory access. If the access is - /// allowed, the optional final parameter returns if the access is also fast - /// (as defined by the target). + /// allowed, the optional final parameter returns a relative speed of the + /// access (as defined by the target). bool allowsMemoryAccessForAlignment( LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool *Fast = nullptr) const; + unsigned *Fast = nullptr) const; /// Return true if the memory access of this type is aligned or if the target /// allows this specific unaligned access for the given MachineMemOperand. - /// If the access is allowed, the optional final parameter returns if the - /// access is also fast (as defined by the target). + /// If the access is allowed, the optional final parameter returns a relative + /// speed of the access (as defined by the target). bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, const MachineMemOperand &MMO, - bool *Fast = nullptr) const; + unsigned *Fast = nullptr) const; /// Return true if the target supports a memory access of this type for the /// given address space and alignment. If the access is allowed, the optional - /// final parameter returns if the access is also fast (as defined by the - /// target). + /// final parameter returns the relative speed of the access (as defined by + /// the target). virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool *Fast = nullptr) const; + unsigned *Fast = nullptr) const; /// Return true if the target supports a memory access of this type for the /// given MachineMemOperand. If the access is allowed, the optional - /// final parameter returns if the access is also fast (as defined by the + /// final parameter returns the relative access speed (as defined by the /// target). bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, const MachineMemOperand &MMO, - bool *Fast = nullptr) const; + unsigned *Fast = nullptr) const; /// LLT handling variant. bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty, const MachineMemOperand &MMO, - bool *Fast = nullptr) const; + unsigned *Fast = nullptr) const; /// Returns the target specific optimal type for load and store operations as /// a result of memset, memcpy, and memmove lowering. Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -532,11 +532,12 @@ return TTIImpl->isFPVectorizationPotentiallyUnsafe(); } -bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context, - unsigned BitWidth, - unsigned AddressSpace, - Align Alignment, - bool *Fast) const { +bool +TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context, + unsigned BitWidth, + unsigned AddressSpace, + Align Alignment, + unsigned *Fast) const { return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, Alignment, Fast); } Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -3504,7 +3504,7 @@ // Load must be allowed and fast on the target. LLVMContext &C = MF.getFunction().getContext(); auto &DL = MF.getDataLayout(); - bool Fast = false; + unsigned Fast = 0; if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) || !Fast) return false; @@ -3709,7 +3709,7 @@ const auto &DL = LastStore.getMF()->getDataLayout(); auto &C = LastStore.getMF()->getFunction().getContext(); // Check that a store of the wide type is both allowed and fast on the target - bool Fast = false; + unsigned Fast = 0; bool Allowed = getTargetLowering().allowsMemoryAccess( C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast); if (!Allowed || !Fast) Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7428,7 +7428,7 @@ // If the new LLT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. - bool Fast; + unsigned Fast; // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). MVT VT = getMVTForLLT(Ty); if (NumMemOps && Op.allowOverlap() && NewTySize < Size && Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7911,7 +7911,7 @@ // Check that a store of the wide type is both allowed and fast on the target const DataLayout &Layout = DAG.getDataLayout(); - bool Fast = false; + unsigned Fast = 0; bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT, *FirstStore->getMemOperand(), &Fast); if (!Allowed || !Fast) @@ -8141,7 +8141,7 @@ return SDValue(); // Check that a load of the wide type is both allowed and fast on the target - bool Fast = false; + unsigned Fast = 0; bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, *FirstLoad->getMemOperand(), &Fast); @@ -9610,7 +9610,7 @@ uint64_t PtrOff = IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8); Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff); - bool Fast = false; + unsigned Fast = 0; if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, RHS->getAddressSpace(), NewAlign, RHS->getMemOperand()->getFlags(), &Fast) && @@ -13365,7 +13365,7 @@ LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); - bool LD1Fast = false; + unsigned LD1Fast = 0; EVT LD1VT = LD1->getValueType(0); unsigned LD1Bytes = LD1VT.getStoreSize(); if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && @@ -17014,7 +17014,7 @@ // Check if it will be merged with the load. // 1. Check the alignment / fast memory access constraint. - bool IsFast = false; + unsigned IsFast = 0; if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT, Origin->getAddressSpace(), getAlign(), Origin->getMemOperand()->getFlags(), &IsFast) || @@ -17504,7 +17504,7 @@ if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; - bool IsFast = false; + unsigned IsFast = 0; Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT, LD->getAddressSpace(), NewAlign, @@ -17563,7 +17563,7 @@ if (VTSize.isScalable()) return SDValue(); - bool FastLD = false, FastST = false; + unsigned FastLD = 0, FastST = 0; EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize()); if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || !TLI.isOperationLegal(ISD::STORE, IntVT) || @@ -18172,7 +18172,7 @@ // Find a legal type for the constant store. unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); - bool IsFast = false; + unsigned IsFast = 0; // Break early when size is too large to be legal. if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) @@ -18282,7 +18282,7 @@ // Find a legal type for the vector store. unsigned Elts = (i + 1) * NumMemElts; EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); - bool IsFast = false; + unsigned IsFast = 0; // Break early when size is too large to be legal. if (Ty.getSizeInBits() > MaximumLegalStoreInBits) @@ -18435,8 +18435,8 @@ if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) break; - bool IsFastSt = false; - bool IsFastLd = false; + unsigned IsFastSt = 0; + unsigned IsFastLd = 0; // Don't try vector types if we need a rotate. We may still fail the // legality checks for the integer type, but we can't handle the rotate // case with vectors. @@ -19431,7 +19431,7 @@ Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8); } - bool IsFast = false; + unsigned IsFast = 0; if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT, OriginalLoad->getAddressSpace(), Alignment, OriginalLoad->getMemOperand()->getFlags(), Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -259,7 +259,7 @@ // If the new VT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. - bool Fast; + unsigned Fast; if (NumMemOps && Op.allowOverlap() && NewVTSize < Size && allowsMisalignedMemoryAccesses( VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1), Index: llvm/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1670,7 +1670,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment( LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, - Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { + Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const { // Check if the specified alignment is sufficient based on the data layout. // TODO: While using the data layout works in practice, a better solution // would be to implement this check directly (make this a virtual function). @@ -1680,7 +1680,7 @@ if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) { // Assume that an access that meets the ABI-specified alignment is fast. if (Fast != nullptr) - *Fast = true; + *Fast = 1; return true; } @@ -1690,7 +1690,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment( LLVMContext &Context, const DataLayout &DL, EVT VT, - const MachineMemOperand &MMO, bool *Fast) const { + const MachineMemOperand &MMO, unsigned *Fast) const { return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(), MMO.getFlags(), Fast); } @@ -1699,7 +1699,7 @@ const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const { + unsigned *Fast) const { return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment, Flags, Fast); } @@ -1707,7 +1707,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, const MachineMemOperand &MMO, - bool *Fast) const { + unsigned *Fast) const { return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(), MMO.getFlags(), Fast); } @@ -1715,7 +1715,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty, const MachineMemOperand &MMO, - bool *Fast) const { + unsigned *Fast) const { EVT VT = getApproximateEVTForLLT(Ty, DL, Context); return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(), MMO.getFlags(), Fast); Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -526,12 +526,12 @@ bool allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool *Fast = nullptr) const override; + unsigned *Fast = nullptr) const override; /// LLT variant. bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast = nullptr) const override; + unsigned *Fast = nullptr) const override; /// Provide custom lowering hooks for some operations. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1898,7 +1898,7 @@ bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const { + unsigned *Fast) const { if (Subtarget->requiresStrictAlign()) return false; @@ -1923,7 +1923,7 @@ // Same as above but handling LLTs instead. bool AArch64TargetLowering::allowsMisalignedMemoryAccesses( LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const { + unsigned *Fast) const { if (Subtarget->requiresStrictAlign()) return false; @@ -13080,7 +13080,7 @@ auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) { if (Op.isAligned(AlignCheck)) return true; - bool Fast; + unsigned Fast; return allowsMisalignedMemoryAccesses(VT, 0, Align(1), MachineMemOperand::MONone, &Fast) && Fast; @@ -13110,7 +13110,7 @@ auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) { if (Op.isAligned(AlignCheck)) return true; - bool Fast; + unsigned Fast; return allowsMisalignedMemoryAccesses(VT, 0, Align(1), MachineMemOperand::MONone, &Fast) && Fast; Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -809,7 +809,7 @@ if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32)) return false; - bool Fast = false; + unsigned Fast = 0; return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), CastTy, MMO, &Fast) && Fast; @@ -2988,7 +2988,7 @@ unsigned Size = VT.getStoreSize(); Align Alignment = LN->getAlign(); if (Alignment < Size && isTypeLegal(VT)) { - bool IsFast; + unsigned IsFast; unsigned AS = LN->getAddressSpace(); // Expand unaligned loads earlier than legalization. Due to visitation order @@ -3041,7 +3041,7 @@ SelectionDAG &DAG = DCI.DAG; Align Alignment = SN->getAlign(); if (Alignment < Size && isTypeLegal(VT)) { - bool IsFast; + unsigned IsFast; unsigned AS = SN->getAddressSpace(); // Expand unaligned stores earlier than legalization. Due to visitation Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -406,7 +406,7 @@ // Do not widen if it would introduce a slow unaligned load. const SITargetLowering *TLI = ST.getTargetLowering(); - bool Fast = false; + unsigned Fast = 0; return TLI->allowsMisalignedMemoryAccessesImpl( RoundedSize, AddrSpace, Align(AlignInBits / 8), MachineMemOperand::MOLoad, &Fast) && Index: llvm/lib/Target/AMDGPU/R600ISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/R600ISelLowering.h +++ llvm/lib/Target/AMDGPU/R600ISelLowering.h @@ -52,7 +52,7 @@ bool allowsMisalignedMemoryAccesses( EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool *IsFast = nullptr) const override; + unsigned *IsFast = nullptr) const override; virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOperations) const override { Index: llvm/lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1572,9 +1572,9 @@ bool R600TargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *IsFast) const { + unsigned *IsFast) const { if (IsFast) - *IsFast = false; + *IsFast = 0; if (!VT.isSimple() || VT == MVT::Other) return false; @@ -1584,7 +1584,7 @@ // TODO: This is a rough estimate. if (IsFast) - *IsFast = true; + *IsFast = 1; return VT.bitsGT(MVT::i32) && Alignment >= Align(4); } Index: llvm/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -285,14 +285,14 @@ bool allowsMisalignedMemoryAccessesImpl( unsigned Size, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool *IsFast = nullptr) const; + unsigned *IsFast = nullptr) const; bool allowsMisalignedMemoryAccesses( LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool *IsFast = nullptr) const override { + unsigned *IsFast = nullptr) const override { if (IsFast) - *IsFast = false; + *IsFast = 0; return allowsMisalignedMemoryAccessesImpl(Ty.getSizeInBits(), AddrSpace, Alignment, Flags, IsFast); } @@ -300,7 +300,7 @@ bool allowsMisalignedMemoryAccesses( EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool *IsFast = nullptr) const override; + unsigned *IsFast = nullptr) const override; EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1529,9 +1529,9 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl( unsigned Size, unsigned AddrSpace, Align Alignment, - MachineMemOperand::Flags Flags, bool *IsFast) const { + MachineMemOperand::Flags Flags, unsigned *IsFast) const { if (IsFast) - *IsFast = false; + *IsFast = 0; if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || AddrSpace == AMDGPUAS::REGION_ADDRESS) { @@ -1570,7 +1570,7 @@ // ds_write2_b32 depending on the alignment. In either case with either // alignment there is no faster way of doing this. if (IsFast) - *IsFast = true; + *IsFast = 1; return true; } @@ -1610,7 +1610,7 @@ // will be more of them, so overall we will pay less penalty issuing a // single instruction. if (IsFast) - *IsFast = Alignment >= RequiredAlignment || Alignment < Align(4); + *IsFast= Alignment >= RequiredAlignment || Alignment < Align(4); return true; } @@ -1673,14 +1673,14 @@ // byte-address are ignored, thus forcing Dword alignment. // This applies to private, global, and constant memory. if (IsFast) - *IsFast = true; + *IsFast = 1; return Size >= 32 && Alignment >= Align(4); } bool SITargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *IsFast) const { + unsigned *IsFast) const { bool Allow = allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace, Alignment, Flags, IsFast); @@ -1693,7 +1693,7 @@ // which would be equally misaligned. // This is only used by the common passes, selection always calls the // allowsMisalignedMemoryAccessesImpl version. - *IsFast = true; + *IsFast= 1; } return Allow; @@ -8770,7 +8770,7 @@ llvm_unreachable("unsupported private_element_size"); } } else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) { - bool Fast = false; + unsigned Fast = 0; auto Flags = Load->getMemOperand()->getFlags(); if (allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS, Load->getAlign(), Flags, &Fast) && @@ -9269,7 +9269,7 @@ llvm_unreachable("unsupported private_element_size"); } } else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) { - bool Fast = false; + unsigned Fast = 0; auto Flags = Store->getMemOperand()->getFlags(); if (allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS, Store->getAlign(), Flags, &Fast) && Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -445,7 +445,7 @@ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const override; + unsigned *Fast) const override; EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override; Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -18738,7 +18738,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags, - bool *Fast) const { + unsigned *Fast) const { // Depends what it gets converted into if the type is weird. if (!VT.isSimple()) return false; @@ -18762,7 +18762,7 @@ // A big-endian target may also explicitly support unaligned accesses if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) { if (Fast) - *Fast = true; + *Fast = 1; return true; } } @@ -18774,7 +18774,7 @@ if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 || Ty == MVT::v2i1)) { if (Fast) - *Fast = true; + *Fast = 1; return true; } @@ -18800,7 +18800,7 @@ Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 || Ty == MVT::v2f64) { if (Fast) - *Fast = true; + *Fast = 1; return true; } @@ -18813,7 +18813,7 @@ // See if we can use NEON instructions for this... if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() && !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) { - bool Fast; + unsigned Fast; if (Op.size() >= 16 && (Op.isAligned(Align(16)) || (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1), Index: llvm/lib/Target/Hexagon/HexagonISelLowering.h =================================================================== --- llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -308,12 +308,12 @@ bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const override; + unsigned *Fast) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const override; + unsigned *Fast) const override; /// Returns relocation base for the given PIC jumptable. SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) @@ -415,10 +415,10 @@ SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const; bool allowsHvxMemoryAccess(MVT VecTy, MachineMemOperand::Flags Flags, - bool *Fast) const; + unsigned *Fast) const; bool allowsHvxMisalignedMemoryAccesses(MVT VecTy, MachineMemOperand::Flags Flags, - bool *Fast) const; + unsigned *Fast) const; bool isHvxSingleTy(MVT Ty) const; bool isHvxPairTy(MVT Ty) const; Index: llvm/lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3555,7 +3555,7 @@ bool HexagonTargetLowering::allowsMemoryAccess( LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, - Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const { + Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const { MVT SVT = VT.getSimpleVT(); if (Subtarget.isHVXVectorType(SVT, true)) return allowsHvxMemoryAccess(SVT, Flags, Fast); @@ -3565,12 +3565,12 @@ bool HexagonTargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const { + unsigned *Fast) const { MVT SVT = VT.getSimpleVT(); if (Subtarget.isHVXVectorType(SVT, true)) return allowsHvxMisalignedMemoryAccesses(SVT, Flags, Fast); if (Fast) - *Fast = false; + *Fast = 0; return false; } Index: llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -514,7 +514,7 @@ } bool HexagonTargetLowering::allowsHvxMemoryAccess( - MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const { + MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const { // Bool vectors are excluded by default, but make it explicit to // emphasize that bool vectors cannot be loaded or stored. // Also, disallow double vector stores (to prevent unnecessary @@ -524,17 +524,17 @@ if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false)) return false; if (Fast) - *Fast = true; + *Fast = 1; return true; } bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses( - MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const { + MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const { if (!Subtarget.isHVXVectorType(VecTy)) return false; // XXX Should this be false? vmemu are a bit slower than vmem. if (Fast) - *Fast = true; + *Fast = 1; return true; } Index: llvm/lib/Target/Mips/Mips16ISelLowering.h =================================================================== --- llvm/lib/Target/Mips/Mips16ISelLowering.h +++ llvm/lib/Target/Mips/Mips16ISelLowering.h @@ -24,7 +24,7 @@ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const override; + unsigned *Fast) const override; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, Index: llvm/lib/Target/Mips/Mips16ISelLowering.cpp =================================================================== --- llvm/lib/Target/Mips/Mips16ISelLowering.cpp +++ llvm/lib/Target/Mips/Mips16ISelLowering.cpp @@ -156,7 +156,7 @@ } bool Mips16TargetLowering::allowsMisalignedMemoryAccesses( - EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const { + EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { return false; } Index: llvm/lib/Target/Mips/MipsSEISelLowering.h =================================================================== --- llvm/lib/Target/Mips/MipsSEISelLowering.h +++ llvm/lib/Target/Mips/MipsSEISelLowering.h @@ -43,7 +43,7 @@ bool allowsMisalignedMemoryAccesses( EVT VT, unsigned AS = 0, Align Alignment = Align(1), MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool *Fast = nullptr) const override; + unsigned *Fast = nullptr) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; Index: llvm/lib/Target/Mips/MipsSEISelLowering.cpp =================================================================== --- llvm/lib/Target/Mips/MipsSEISelLowering.cpp +++ llvm/lib/Target/Mips/MipsSEISelLowering.cpp @@ -415,7 +415,7 @@ } bool MipsSETargetLowering::allowsMisalignedMemoryAccesses( - EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const { + EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; if (Subtarget.systemSupportsUnalignedAccess()) { @@ -424,7 +424,7 @@ // a hybrid of the two but it's expected that most implementations will // handle the majority of cases in hardware. if (Fast) - *Fast = true; + *Fast = 1; return true; } @@ -432,7 +432,7 @@ case MVT::i64: case MVT::i32: if (Fast) - *Fast = true; + *Fast = 1; return true; default: return false; Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1067,7 +1067,7 @@ bool allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace, Align Alignment = Align(1), MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool *Fast = nullptr) const override; + unsigned *Fast = nullptr) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16635,7 +16635,7 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align, MachineMemOperand::Flags, - bool *Fast) const { + unsigned *Fast) const { if (DisablePPCUnaligned) return false; @@ -16666,7 +16666,7 @@ return false; if (Fast) - *Fast = true; + *Fast = 1; return true; } Index: llvm/lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.h +++ llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -528,7 +528,7 @@ bool allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1), MachineMemOperand::Flags Flags = MachineMemOperand::MONone, - bool *Fast = nullptr) const override; + unsigned *Fast = nullptr) const override; bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11805,14 +11805,14 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const { + unsigned *Fast) const { if (!VT.isVector()) return false; EVT ElemVT = VT.getVectorElementType(); if (Alignment >= ElemVT.getStoreSize()) { if (Fast) - *Fast = true; + *Fast = 1; return true; } Index: llvm/lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -456,7 +456,7 @@ Instruction *I = nullptr) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const override; + unsigned *Fast) const override; bool findOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -862,12 +862,12 @@ } bool SystemZTargetLowering::allowsMisalignedMemoryAccesses( - EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const { + EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { // Unaligned accesses should never be slower than the expanded version. // We check specifically for aligned accesses in the few cases where // they are required. if (Fast) - *Fast = true; + *Fast = 1; return true; } Index: llvm/lib/Target/VE/VEISelLowering.h =================================================================== --- llvm/lib/Target/VE/VEISelLowering.h +++ llvm/lib/Target/VE/VEISelLowering.h @@ -215,7 +215,7 @@ /// specified type. bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align A, MachineMemOperand::Flags Flags, - bool *Fast) const override; + unsigned *Fast) const override; /// Inline Assembly { Index: llvm/lib/Target/VE/VEISelLowering.cpp =================================================================== --- llvm/lib/Target/VE/VEISelLowering.cpp +++ llvm/lib/Target/VE/VEISelLowering.cpp @@ -870,10 +870,10 @@ unsigned AddrSpace, Align A, MachineMemOperand::Flags, - bool *Fast) const { + unsigned *Fast) const { if (Fast) { // It's fast anytime on VE - *Fast = true; + *Fast = 1; } return true; } Index: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -72,7 +72,7 @@ Instruction *I = nullptr) const override; bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const override; + unsigned *Fast) const override; bool isIntDivCheap(EVT VT, AttributeList Attr) const override; bool isVectorLoadExtDesirable(SDValue ExtVal) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; Index: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -780,7 +780,7 @@ bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/, - MachineMemOperand::Flags /*Flags*/, bool *Fast) const { + MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const { // WebAssembly supports unaligned accesses, though it should be declared // with the p2align attribute on loads and stores which do so, and there // may be a performance impact. We tell LLVM they're "fast" because @@ -788,7 +788,7 @@ // of constants, etc.), WebAssembly implementations will either want the // unaligned access or they'll split anyway. if (Fast) - *Fast = true; + *Fast = 1; return true; } Index: llvm/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.h +++ llvm/lib/Target/X86/X86ISelLowering.h @@ -983,7 +983,7 @@ /// specified type. Returns whether it is "fast" in the last argument. bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const override; + unsigned *Fast) const override; /// Provide custom lowering hooks for some operations. /// Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2577,12 +2577,12 @@ bool X86TargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags, - bool *Fast) const { + unsigned *Fast) const { if (Fast) { switch (VT.getSizeInBits()) { default: // 8-byte and under are always assumed to be fast. - *Fast = true; + *Fast = 1; break; case 128: *Fast = !Subtarget.isUnalignedMem16Slow(); @@ -48430,7 +48430,7 @@ // into two 16-byte operations. Also split non-temporal aligned loads on // pre-AVX2 targets as 32-byte loads will lower to regular temporal loads. ISD::LoadExtType Ext = Ld->getExtensionType(); - bool Fast; + unsigned Fast; if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() && Ext == ISD::NON_EXTLOAD && ((Ld->isNonTemporal() && !Subtarget.hasInt256() && @@ -48888,7 +48888,7 @@ // If we are saving a 32-byte vector and 32-byte stores are slow, such as on // Sandy Bridge, perform two 16-byte stores. - bool Fast; + unsigned Fast; if (VT.is256BitVector() && StVT == VT && TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, *St->getMemOperand(), &Fast) && @@ -53529,7 +53529,7 @@ // Fold subvector loads into one. // If needed, look through bitcasts to get to the load. if (auto *FirstLd = dyn_cast(peekThroughBitcasts(Op0))) { - bool Fast; + unsigned Fast; const X86TargetLowering *TLI = Subtarget.getTargetLowering(); if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, *FirstLd->getMemOperand(), &Fast) && Index: llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -1320,7 +1320,7 @@ if (Alignment.value() % SzInBytes == 0) return false; - bool Fast = false; + unsigned Fast = 0; bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(), SzInBytes * 8, AddressSpace, Alignment, &Fast);