Index: llvm/include/llvm/AsmParser/LLParser.h =================================================================== --- llvm/include/llvm/AsmParser/LLParser.h +++ llvm/include/llvm/AsmParser/LLParser.h @@ -172,9 +172,8 @@ /// getGlobalVal - Get a value with the specified name or ID, creating a /// forward reference record if needed. This can return null if the value /// exists but does not have the right type. - GlobalValue *getGlobalVal(const std::string &N, Type *Ty, LocTy Loc, - bool IsCall); - GlobalValue *getGlobalVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall); + GlobalValue *getGlobalVal(const std::string &N, Type *Ty, LocTy Loc); + GlobalValue *getGlobalVal(unsigned ID, Type *Ty, LocTy Loc); /// Get a Comdat with the specified name, creating a forward reference /// record if needed. @@ -423,8 +422,8 @@ /// GetVal - Get a value with the specified name or ID, creating a /// forward reference record if needed. This can return null if the value /// exists but does not have the right type. - Value *getVal(const std::string &Name, Type *Ty, LocTy Loc, bool IsCall); - Value *getVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall); + Value *getVal(const std::string &Name, Type *Ty, LocTy Loc); + Value *getVal(unsigned ID, Type *Ty, LocTy Loc); /// setInstName - After an instruction is parsed and inserted into its /// basic block, this installs its name. @@ -446,10 +445,10 @@ }; bool convertValIDToValue(Type *Ty, ValID &ID, Value *&V, - PerFunctionState *PFS, bool IsCall); + PerFunctionState *PFS); Value *checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty, - Value *Val, bool IsCall); + Value *Val); bool parseConstantValue(Type *Ty, Constant *&C); bool parseValue(Type *Ty, Value *&V, PerFunctionState *PFS); Index: llvm/lib/AsmParser/LLLexer.cpp =================================================================== --- llvm/lib/AsmParser/LLLexer.cpp +++ llvm/lib/AsmParser/LLLexer.cpp @@ -849,7 +849,15 @@ TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context)); TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context)); TYPEKEYWORD("token", Type::getTokenTy(Context)); - TYPEKEYWORD("ptr", PointerType::getUnqual(Context)); + + if (Keyword == "ptr") { + if (Context.supportsTypedPointers()) { + Warning("ptr type is only supported in -opaque-pointers mode"); + return lltok::Error; + } + TyVal = PointerType::getUnqual(Context); + return lltok::Type; + } #undef TYPEKEYWORD Index: llvm/lib/AsmParser/LLParser.cpp =================================================================== --- llvm/lib/AsmParser/LLParser.cpp +++ llvm/lib/AsmParser/LLParser.cpp @@ -1404,14 +1404,10 @@ } Value *LLParser::checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty, - Value *Val, bool IsCall) { + Value *Val) { Type *ValTy = Val->getType(); if (ValTy == Ty) return Val; - // For calls, we also allow opaque pointers. - if (IsCall && ValTy == PointerType::get(Ty->getContext(), - Ty->getPointerAddressSpace())) - return Val; if (Ty->isLabelTy()) error(Loc, "'" + Name + "' is not a basic block"); else @@ -1425,7 +1421,7 @@ /// forward reference record if needed. This can return null if the value /// exists but does not have the right type. GlobalValue *LLParser::getGlobalVal(const std::string &Name, Type *Ty, - LocTy Loc, bool IsCall) { + LocTy Loc) { PointerType *PTy = dyn_cast(Ty); if (!PTy) { error(Loc, "global variable reference must have pointer type"); @@ -1447,7 +1443,7 @@ // If we have the value in the symbol table or fwd-ref table, return it. if (Val) return cast_or_null( - checkValidVariableType(Loc, "@" + Name, Ty, Val, IsCall)); + checkValidVariableType(Loc, "@" + Name, Ty, Val)); // Otherwise, create a new forward reference for this value and remember it. GlobalValue *FwdVal = createGlobalFwdRef(M, PTy); @@ -1455,8 +1451,7 @@ return FwdVal; } -GlobalValue *LLParser::getGlobalVal(unsigned ID, Type *Ty, LocTy Loc, - bool IsCall) { +GlobalValue *LLParser::getGlobalVal(unsigned ID, Type *Ty, LocTy Loc) { PointerType *PTy = dyn_cast(Ty); if (!PTy) { error(Loc, "global variable reference must have pointer type"); @@ -1476,7 +1471,7 @@ // If we have the value in the symbol table or fwd-ref table, return it. if (Val) return cast_or_null( - checkValidVariableType(Loc, "@" + Twine(ID), Ty, Val, IsCall)); + checkValidVariableType(Loc, "@" + Twine(ID), Ty, Val)); // Otherwise, create a new forward reference for this value and remember it. GlobalValue *FwdVal = createGlobalFwdRef(M, PTy); @@ -2218,7 +2213,7 @@ Result = Lex.getTyVal(); Lex.Lex(); - // Handle (explicit) opaque pointer types (not --force-opaque-pointers). + // Handle "ptr" opaque pointer type. // // Type ::= ptr ('addrspace' '(' uint32 ')')? if (Result->isOpaquePointerTy()) { @@ -2794,7 +2789,7 @@ /// forward reference record if needed. This can return null if the value /// exists but does not have the right type. Value *LLParser::PerFunctionState::getVal(const std::string &Name, Type *Ty, - LocTy Loc, bool IsCall) { + LocTy Loc) { // Look this name up in the normal function symbol table. Value *Val = F.getValueSymbolTable()->lookup(Name); @@ -2808,7 +2803,7 @@ // If we have the value in the symbol table or fwd-ref table, return it. if (Val) - return P.checkValidVariableType(Loc, "%" + Name, Ty, Val, IsCall); + return P.checkValidVariableType(Loc, "%" + Name, Ty, Val); // Don't make placeholders with invalid type. if (!Ty->isFirstClassType()) { @@ -2828,8 +2823,7 @@ return FwdVal; } -Value *LLParser::PerFunctionState::getVal(unsigned ID, Type *Ty, LocTy Loc, - bool IsCall) { +Value *LLParser::PerFunctionState::getVal(unsigned ID, Type *Ty, LocTy Loc) { // Look this name up in the normal function symbol table. Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : nullptr; @@ -2843,7 +2837,7 @@ // If we have the value in the symbol table or fwd-ref table, return it. if (Val) - return P.checkValidVariableType(Loc, "%" + Twine(ID), Ty, Val, IsCall); + return P.checkValidVariableType(Loc, "%" + Twine(ID), Ty, Val); if (!Ty->isFirstClassType()) { P.error(Loc, "invalid use of a non-first-class type"); @@ -2930,12 +2924,12 @@ BasicBlock *LLParser::PerFunctionState::getBB(const std::string &Name, LocTy Loc) { return dyn_cast_or_null( - getVal(Name, Type::getLabelTy(F.getContext()), Loc, /*IsCall=*/false)); + getVal(Name, Type::getLabelTy(F.getContext()), Loc)); } BasicBlock *LLParser::PerFunctionState::getBB(unsigned ID, LocTy Loc) { return dyn_cast_or_null( - getVal(ID, Type::getLabelTy(F.getContext()), Loc, /*IsCall=*/false)); + getVal(ID, Type::getLabelTy(F.getContext()), Loc)); } /// defineBB - Define the specified basic block, which is either named or @@ -3648,7 +3642,7 @@ ValID ID; Value *V = nullptr; bool Parsed = parseValID(ID, /*PFS=*/nullptr, Ty) || - convertValIDToValue(Ty, ID, V, nullptr, /*IsCall=*/false); + convertValIDToValue(Ty, ID, V, nullptr); if (V && !(C = dyn_cast(V))) return error(ID.Loc, "global values must be constants"); return Parsed; @@ -5238,7 +5232,7 @@ //===----------------------------------------------------------------------===// bool LLParser::convertValIDToValue(Type *Ty, ValID &ID, Value *&V, - PerFunctionState *PFS, bool IsCall) { + PerFunctionState *PFS) { if (Ty->isFunctionTy()) return error(ID.Loc, "functions are not values, refer to them as pointers"); @@ -5246,12 +5240,12 @@ case ValID::t_LocalID: if (!PFS) return error(ID.Loc, "invalid use of function-local name"); - V = PFS->getVal(ID.UIntVal, Ty, ID.Loc, IsCall); + V = PFS->getVal(ID.UIntVal, Ty, ID.Loc); return V == nullptr; case ValID::t_LocalName: if (!PFS) return error(ID.Loc, "invalid use of function-local name"); - V = PFS->getVal(ID.StrVal, Ty, ID.Loc, IsCall); + V = PFS->getVal(ID.StrVal, Ty, ID.Loc); return V == nullptr; case ValID::t_InlineAsm: { if (!ID.FTy || !InlineAsm::Verify(ID.FTy, ID.StrVal2)) @@ -5262,10 +5256,10 @@ return false; } case ValID::t_GlobalName: - V = getGlobalVal(ID.StrVal, Ty, ID.Loc, IsCall); + V = getGlobalVal(ID.StrVal, Ty, ID.Loc); return V == nullptr; case ValID::t_GlobalID: - V = getGlobalVal(ID.UIntVal, Ty, ID.Loc, IsCall); + V = getGlobalVal(ID.UIntVal, Ty, ID.Loc); return V == nullptr; case ValID::t_APSInt: if (!Ty->isIntegerTy()) @@ -5389,7 +5383,7 @@ case ValID::t_ConstantStruct: case ValID::t_PackedConstantStruct: { Value *V; - if (convertValIDToValue(Ty, ID, V, /*PFS=*/nullptr, /*IsCall=*/false)) + if (convertValIDToValue(Ty, ID, V, /*PFS=*/nullptr)) return true; assert(isa(V) && "Expected a constant value"); C = cast(V); @@ -5407,7 +5401,7 @@ V = nullptr; ValID ID; return parseValID(ID, PFS, Ty) || - convertValIDToValue(Ty, ID, V, PFS, /*IsCall=*/false); + convertValIDToValue(Ty, ID, V, PFS); } bool LLParser::parseTypeAndValue(Value *&V, PerFunctionState *PFS) { @@ -5702,7 +5696,7 @@ Value *ResolvedVal = BlockAddress::get(&F, BB); ResolvedVal = P.checkValidVariableType(BBID.Loc, BBID.StrVal, GV->getType(), - ResolvedVal, false); + ResolvedVal); if (!ResolvedVal) return true; GV->replaceAllUsesWith(ResolvedVal); @@ -6271,7 +6265,7 @@ // Look up the callee. Value *Callee; if (convertValIDToValue(PointerType::get(Ty, InvokeAddrSpace), CalleeID, - Callee, &PFS, /*IsCall=*/true)) + Callee, &PFS)) return true; // Set up the Attribute for the function. @@ -6596,8 +6590,7 @@ // Look up the callee. Value *Callee; - if (convertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS, - /*IsCall=*/true)) + if (convertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS)) return true; // Set up the Attribute for the function. @@ -7003,7 +6996,7 @@ // Look up the callee. Value *Callee; if (convertValIDToValue(PointerType::get(Ty, CallAddrSpace), CalleeID, Callee, - &PFS, /*IsCall=*/true)) + &PFS)) return true; // Set up the Attribute for the function. Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1792,6 +1792,9 @@ case bitc::TYPE_CODE_OPAQUE_POINTER: { // OPAQUE_POINTER: [addrspace] if (Record.size() != 1) return error("Invalid record"); + if (Context.supportsTypedPointers()) + return error( + "Opaque pointers are only supported in -opaque-pointers mode"); unsigned AddressSpace = Record[0]; ResultTy = PointerType::get(Context, AddressSpace); break; Index: llvm/lib/IR/Function.cpp =================================================================== --- llvm/lib/IR/Function.cpp +++ llvm/lib/IR/Function.cpp @@ -1446,11 +1446,6 @@ if (!PT->isOpaque()) return matchIntrinsicType(PT->getElementType(), Infos, ArgTys, DeferredChecks, IsDeferredCheck); - // If typed pointers are supported, do not allow using opaque pointer in - // place of fixed pointer type. This would make the intrinsic signature - // non-unique. - if (Ty->getContext().supportsTypedPointers()) - return true; // Consume IIT descriptors relating to the pointer element type. while (Infos.front().Kind == IITDescriptor::Pointer) Infos = Infos.slice(1); @@ -1568,11 +1563,8 @@ if (!ThisArgType || !ReferenceType) return true; - if (!ThisArgType->isOpaque()) - return ThisArgType->getElementType() != ReferenceType->getElementType(); - // If typed pointers are supported, do not allow opaque pointer to ensure - // uniqueness. - return Ty->getContext().supportsTypedPointers(); + return !ThisArgType->isOpaqueOrPointeeTypeMatches( + ReferenceType->getElementType()); } case IITDescriptor::VecOfAnyPtrsToElt: { unsigned RefArgNumber = D.getRefArgNumber(); Index: llvm/lib/IR/LLVMContext.cpp =================================================================== --- llvm/lib/IR/LLVMContext.cpp +++ llvm/lib/IR/LLVMContext.cpp @@ -349,5 +349,5 @@ } bool LLVMContext::supportsTypedPointers() const { - return !pImpl->ForceOpaquePointers; + return !pImpl->OpaquePointers; } Index: llvm/lib/IR/LLVMContextImpl.h =================================================================== --- llvm/lib/IR/LLVMContextImpl.h +++ llvm/lib/IR/LLVMContextImpl.h @@ -1461,7 +1461,7 @@ DenseMap, VectorType*> VectorTypes; // TODO: clean up the following after we no longer support non-opaque pointer // types. - bool ForceOpaquePointers; + bool OpaquePointers; DenseMap PointerTypes; // Pointers in AddrSpace = 0 DenseMap, PointerType*> ASPointerTypes; Index: llvm/lib/IR/LLVMContextImpl.cpp =================================================================== --- llvm/lib/IR/LLVMContextImpl.cpp +++ llvm/lib/IR/LLVMContextImpl.cpp @@ -23,9 +23,8 @@ using namespace llvm; static cl::opt - ForceOpaquePointersCL("force-opaque-pointers", - cl::desc("Force all pointers to be opaque pointers"), - cl::init(false)); + OpaquePointersCL("opaque-pointers", cl::desc("Use opaque pointers"), + cl::init(false)); LLVMContextImpl::LLVMContextImpl(LLVMContext &C) : DiagHandler(std::make_unique()), @@ -37,7 +36,7 @@ PPC_FP128Ty(C, Type::PPC_FP128TyID), X86_MMXTy(C, Type::X86_MMXTyID), X86_AMXTy(C, Type::X86_AMXTyID), Int1Ty(C, 1), Int8Ty(C, 8), Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128), - ForceOpaquePointers(ForceOpaquePointersCL) {} + OpaquePointers(OpaquePointersCL) {} LLVMContextImpl::~LLVMContextImpl() { // NOTE: We need to delete the contents of OwnedModules, but Module's dtor Index: llvm/lib/IR/Type.cpp =================================================================== --- llvm/lib/IR/Type.cpp +++ llvm/lib/IR/Type.cpp @@ -696,8 +696,8 @@ LLVMContextImpl *CImpl = EltTy->getContext().pImpl; - // Create opaque pointer for pointer to opaque pointer. - if (CImpl->ForceOpaquePointers || EltTy->isOpaquePointerTy()) + // Automatically convert typed pointers to opaque pointers. + if (CImpl->OpaquePointers) return get(EltTy->getContext(), AddressSpace); // Since AddressSpace #0 is the common case, we special case it. @@ -711,6 +711,8 @@ PointerType *PointerType::get(LLVMContext &C, unsigned AddressSpace) { LLVMContextImpl *CImpl = C.pImpl; + assert(CImpl->OpaquePointers && + "Can only create opaque pointers in opaque pointer mode"); // Since AddressSpace #0 is the common case, we special case it. PointerType *&Entry = Index: llvm/test/Assembler/invalid-opaque-ptr-addrspace.ll =================================================================== --- llvm/test/Assembler/invalid-opaque-ptr-addrspace.ll +++ llvm/test/Assembler/invalid-opaque-ptr-addrspace.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s +; RUN: not llvm-as < %s -opaque-pointers -disable-output 2>&1 | FileCheck %s ; CHECK: ptr* is invalid - use ptr instead define void @f(ptr addrspace(3) %a) { Index: llvm/test/Assembler/invalid-opaque-ptr-double-addrspace.ll =================================================================== --- llvm/test/Assembler/invalid-opaque-ptr-double-addrspace.ll +++ llvm/test/Assembler/invalid-opaque-ptr-double-addrspace.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s +; RUN: not llvm-as < %s -opaque-pointers -disable-output 2>&1 | FileCheck %s ; CHECK: expected top-level entity @g1 = external global ptr addrspace(3) addrspace(4) Index: llvm/test/Assembler/invalid-opaque-ptr.ll =================================================================== --- llvm/test/Assembler/invalid-opaque-ptr.ll +++ llvm/test/Assembler/invalid-opaque-ptr.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s +; RUN: not llvm-as < %s -opaque-pointers -disable-output 2>&1 | FileCheck %s ; CHECK: ptr* is invalid - use ptr instead define void @f(ptr %a) { Index: llvm/test/Assembler/opaque-ptr-cmpxchg.ll =================================================================== --- llvm/test/Assembler/opaque-ptr-cmpxchg.ll +++ llvm/test/Assembler/opaque-ptr-cmpxchg.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s +; RUN: not llvm-as -opaque-pointers < %s 2>&1 | FileCheck %s ; CHECK: compare value and new value type do not match define void @cmpxchg(ptr %p, i32 %a, i64 %b) { Index: llvm/test/Assembler/opaque-ptr.ll =================================================================== --- llvm/test/Assembler/opaque-ptr.ll +++ llvm/test/Assembler/opaque-ptr.ll @@ -1,12 +1,12 @@ -; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s -; RUN: verify-uselistorder %s +; RUN: llvm-as -opaque-pointers < %s | llvm-dis -opaque-pointers | llvm-as -opaque-pointers | llvm-dis -opaque-pointers | FileCheck %s +; RUN: verify-uselistorder -opaque-pointers %s ; CHECK: @global = external global ptr @global = external global ptr -; CHECK: @fptr1 = external global ptr ()* -; CHECK: @fptr2 = external global ptr () addrspace(1)* -; CHECK: @fptr3 = external global ptr () addrspace(1)* addrspace(2)* +; CHECK: @fptr1 = external global ptr +; CHECK: @fptr2 = external global ptr addrspace(1) +; CHECK: @fptr3 = external global ptr addrspace(2) @fptr1 = external global ptr ()* @fptr2 = external global ptr () addrspace(1)* @fptr3 = external global ptr () addrspace(1)* addrspace(2)* @@ -125,7 +125,7 @@ ret void } -; CHECK: define void @invoke(ptr %p) personality void ()* @personality { +; CHECK: define void @invoke(ptr %p) personality ptr @personality { ; CHECK: invoke void %p() ; CHECK: to label %continue unwind label %cleanup declare void @personality() Index: llvm/test/Assembler/ptr-outside-opaque-pointers-mode.ll =================================================================== --- /dev/null +++ llvm/test/Assembler/ptr-outside-opaque-pointers-mode.ll @@ -0,0 +1,7 @@ +; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s + +; CHECK: warning: ptr type is only supported in -opaque-pointers mode +; CHECK: error: expected type +define void @f(ptr %a) { + ret void +} Index: llvm/test/Assembler/remangle-intrinsic-opaque-ptr.ll =================================================================== --- llvm/test/Assembler/remangle-intrinsic-opaque-ptr.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s --check-prefix=TYPED -; RUN: llvm-as --force-opaque-pointers < %s | llvm-dis --force-opaque-pointers | FileCheck %s --check-prefix=OPAQUE - -; An opaque pointer type should not be accepted for an intrinsic that -; specifies a fixed pointer type, outside of --force-opaque-pointers mode. - -define void @test() { -; TYPED: Intrinsic has incorrect return type! -; OPAQUE: call ptr @llvm.stacksave() - call ptr @llvm.stacksave() - -; TYPED: Intrinsic has incorrect argument type! -; OPAQUE: call <2 x i64> @llvm.masked.expandload.v2i64(ptr null, <2 x i1> zeroinitializer, <2 x i64> zeroinitializer) - call <2 x i64> @llvm.masked.expandload.v2i64(ptr null, <2 x i1> zeroinitializer, <2 x i64> zeroinitializer) - - ret void -} - -declare ptr @llvm.stacksave() -declare <2 x i64> @llvm.masked.expandload.v2i64(ptr, <2 x i1>, <2 x i64>) Index: llvm/test/CodeGen/AArch64/sve-vscale.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-vscale.ll +++ llvm/test/CodeGen/AArch64/sve-vscale.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s +; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 -opaque-pointers < %s | FileCheck %s ; RUN: opt -mtriple=aarch64 -codegenprepare -S < %s | llc -mtriple=aarch64 -mattr=+sve -asm-verbose=0 | FileCheck %s ; @@ -48,13 +49,6 @@ ret i32 mul nsw (i32 ptrtoint (* getelementptr (, * null, i64 1) to i32), i32 16) } -; CHECK-LABEL: rdvl_const_opaque_ptr: -; CHECK: rdvl x0, #1 -; CHECK-NEXT: ret -define i32 @rdvl_const_opaque_ptr() nounwind { - ret i32 mul nsw (i32 ptrtoint (ptr getelementptr (, ptr null, i64 1) to i32), i32 16) -} - define i32 @vscale_1() nounwind { ; CHECK-LABEL: vscale_1: ; CHECK: rdvl [[TMP:x[0-9]+]], #1 Index: llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll =================================================================== --- llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll +++ llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll @@ -1,8 +1,8 @@ ; RUN: opt -O2 %s | llvm-dis > %t1 ; RUN: llc -filetype=asm -o - %t1 | FileCheck %s ; RUN: llc -mattr=+alu32 -filetype=asm -o - %t1 | FileCheck %s -; RUN: llc -filetype=asm -force-opaque-pointers -o - %t1 | FileCheck %s -; RUN: llc -mattr=+alu32 -filetype=asm -force-opaque-pointers -o - %t1 | FileCheck %s +; RUN: llc -filetype=asm -opaque-pointers -o - %t1 | FileCheck %s +; RUN: llc -mattr=+alu32 -filetype=asm -opaque-pointers -o - %t1 | FileCheck %s ; ; Source code: ; #define _(x) (__builtin_preserve_access_index(x)) Index: llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll =================================================================== --- llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll +++ llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=hexagon -force-opaque-pointers < %s | FileCheck %s +; RUN: llc -march=hexagon -opaque-pointers < %s | FileCheck %s %s.0 = type { i8 } @g0 = internal global i8 0, align 1 Index: llvm/test/CodeGen/Thumb2/mve-gather-ind32-scaled.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-gather-ind32-scaled.ll +++ llvm/test/CodeGen/Thumb2/mve-gather-ind32-scaled.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -opaque-pointers %s -o - | FileCheck %s define arm_aapcs_vfpcc <4 x i32> @zext_scaled_i16_i32(i16* %base, <4 x i32>* %offptr) { ; CHECK-LABEL: zext_scaled_i16_i32: @@ -15,20 +16,6 @@ ret <4 x i32> %gather.zext } -define arm_aapcs_vfpcc <4 x i32> @zext_scaled_i16_i32_opaque(ptr %base, ptr %offptr) { -; CHECK-LABEL: zext_scaled_i16_i32_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vldrh.u32 q0, [r0, q1, uxtw #1] -; CHECK-NEXT: bx lr -entry: - %offs = load <4 x i32>, ptr %offptr, align 4 - %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs - %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> , <4 x i16> undef) - %gather.zext = zext <4 x i16> %gather to <4 x i32> - ret <4 x i32> %gather.zext -} - define arm_aapcs_vfpcc <4 x i32> @sext_scaled_i16_i32(i16* %base, <4 x i32>* %offptr) { ; CHECK-LABEL: sext_scaled_i16_i32: ; CHECK: @ %bb.0: @ %entry @@ -72,20 +59,6 @@ ret <4 x float> %gather } -define arm_aapcs_vfpcc <4 x float> @scaled_f32_i32_opaque(ptr %base, ptr %offptr) { -; CHECK-LABEL: scaled_f32_i32_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] -; CHECK-NEXT: bx lr -entry: - %offs = load <4 x i32>, ptr %offptr, align 4 - %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs - %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr> - %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x float> undef) - ret <4 x float> %gather -} - define arm_aapcs_vfpcc <4 x i32> @unsigned_scaled_b_i32_i16(i32* %base, <4 x i16>* %offptr) { ; CHECK-LABEL: unsigned_scaled_b_i32_i16: ; CHECK: @ %bb.0: @ %entry @@ -114,34 +87,6 @@ ret <4 x i32> %gather } -define arm_aapcs_vfpcc <4 x i32> @unsigned_scaled_b_i32_i16_opaque(ptr %base, ptr %offptr) { -; CHECK-LABEL: unsigned_scaled_b_i32_i16_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q1, [r1] -; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] -; CHECK-NEXT: bx lr -entry: - %offs = load <4 x i16>, ptr %offptr, align 2 - %offs.zext = zext <4 x i16> %offs to <4 x i32> - %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext - %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> undef) - ret <4 x i32> %gather -} - -define arm_aapcs_vfpcc <4 x i32> @signed_scaled_i32_i16_opaque(ptr %base, ptr %offptr) { -; CHECK-LABEL: signed_scaled_i32_i16_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.s32 q1, [r1] -; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] -; CHECK-NEXT: bx lr -entry: - %offs = load <4 x i16>, ptr %offptr, align 2 - %offs.sext = sext <4 x i16> %offs to <4 x i32> - %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext - %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> undef) - ret <4 x i32> %gather -} - define arm_aapcs_vfpcc <4 x float> @a_unsigned_scaled_f32_i16(i32* %base, <4 x i16>* %offptr) { ; CHECK-LABEL: a_unsigned_scaled_f32_i16: ; CHECK: @ %bb.0: @ %entry @@ -371,13 +316,13 @@ define arm_aapcs_vfpcc <4 x i32> @scaled_i32_i32_2gep2(i32* %base) { ; CHECK-LABEL: scaled_i32_i32_2gep2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r1, .LCPI25_0 +; CHECK-NEXT: adr r1, .LCPI21_0 ; CHECK-NEXT: vldrw.u32 q1, [r1] ; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI25_0: +; CHECK-NEXT: .LCPI21_0: ; CHECK-NEXT: .long 5 @ 0x5 ; CHECK-NEXT: .long 8 @ 0x8 ; CHECK-NEXT: .long 11 @ 0xb @@ -389,50 +334,8 @@ ret <4 x i32> %gather } -define arm_aapcs_vfpcc <4 x i32> @scaled_i32_i32_2gep_opaque(ptr %base, ptr %offptr) { -; CHECK-LABEL: scaled_i32_i32_2gep_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vmov.i32 q0, #0x14 -; CHECK-NEXT: vshl.i32 q1, q1, #2 -; CHECK-NEXT: vadd.i32 q1, q1, r0 -; CHECK-NEXT: vadd.i32 q1, q1, q0 -; CHECK-NEXT: vldrw.u32 q0, [q1] -; CHECK-NEXT: bx lr -entry: - %offs = load <4 x i32>, ptr %offptr, align 4 - %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs - %ptrs2 = getelementptr inbounds i32, <4 x ptr> %ptrs, i32 5 - %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs2, i32 4, <4 x i1> , <4 x i32> undef) - ret <4 x i32> %gather -} - -define arm_aapcs_vfpcc <4 x i32> @scaled_i32_i32_2gep2_opaque(ptr %base) { -; CHECK-LABEL: scaled_i32_i32_2gep2_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r1, .LCPI27_0 -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vldrw.u32 q0, [r0, q1, uxtw #2] -; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI27_0: -; CHECK-NEXT: .long 5 @ 0x5 -; CHECK-NEXT: .long 8 @ 0x8 -; CHECK-NEXT: .long 11 @ 0xb -; CHECK-NEXT: .long 14 @ 0xe -entry: - %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> - %ptrs2 = getelementptr inbounds i32, <4 x ptr> %ptrs, i32 5 - %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs2, i32 4, <4 x i1> , <4 x i32> undef) - ret <4 x i32> %gather -} - declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32, <4 x i1>, <4 x i8>) declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>) -declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>) declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) -declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) declare <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*>, i32, <4 x i1>, <4 x half>) declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>) -declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>) Index: llvm/test/CodeGen/Thumb2/mve-gather-ind32-unscaled.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-gather-ind32-unscaled.ll +++ llvm/test/CodeGen/Thumb2/mve-gather-ind32-unscaled.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -opaque-pointers %s -o - | FileCheck %s define arm_aapcs_vfpcc <4 x i32> @zext_unscaled_i8_i32(i8* %base, <4 x i32>* %offptr) { ; CHECK-LABEL: zext_unscaled_i8_i32: @@ -29,20 +30,6 @@ ret <4 x i32> %gather.sext } -define arm_aapcs_vfpcc <4 x i32> @sext_unscaled_i8_i32_opaque(ptr %base, ptr %offptr) { -; CHECK-LABEL: sext_unscaled_i8_i32_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vldrb.s32 q0, [r0, q1] -; CHECK-NEXT: bx lr -entry: - %offs = load <4 x i32>, ptr %offptr, align 4 - %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs - %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) - %gather.sext = sext <4 x i8> %gather to <4 x i32> - ret <4 x i32> %gather.sext -} - define arm_aapcs_vfpcc <4 x i32> @zext_unscaled_i16_i32(i8* %base, <4 x i32>* %offptr) { ; CHECK-LABEL: zext_unscaled_i16_i32: ; CHECK: @ %bb.0: @ %entry @@ -469,21 +456,6 @@ ret <4 x i32> %gather.sext } -define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i8_i8_opaque(ptr %base, ptr %offptr) { -; CHECK-LABEL: sext_unsigned_unscaled_i8_i8_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q1, [r1] -; CHECK-NEXT: vldrb.s32 q0, [r0, q1] -; CHECK-NEXT: bx lr -entry: - %offs = load <4 x i8>, ptr %offptr, align 1 - %offs.zext = zext <4 x i8> %offs to <4 x i32> - %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext - %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) - %gather.sext = sext <4 x i8> %gather to <4 x i32> - ret <4 x i32> %gather.sext -} - ; VLDRW.u32 Qd, [P, 4] define arm_aapcs_vfpcc <4 x i32> @qi4(<4 x i32*> %p) { ; CHECK-LABEL: qi4: @@ -523,5 +495,3 @@ declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) declare <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*>, i32, <4 x i1>, <4 x half>) declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>) - -declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>) Index: llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll +++ llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -opaque-pointers %s -o - | FileCheck %s ; i32 @@ -29,18 +30,6 @@ ret <4 x i32> %gather } -define arm_aapcs_vfpcc <4 x i32> @ptr_v4i32_opaque(<4 x ptr>* %offptr) { -; CHECK-LABEL: ptr_v4i32_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vldrw.u32 q0, [q1] -; CHECK-NEXT: bx lr -entry: - %offs = load <4 x ptr>, <4 x ptr>* %offptr, align 4 - %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %offs, i32 4, <4 x i1> , <4 x i32> undef) - ret <4 x i32> %gather -} - define arm_aapcs_vfpcc <8 x i32> @ptr_v8i32(<8 x i32*>* %offptr) { ; CHECK-LABEL: ptr_v8i32: ; CHECK: @ %bb.0: @ %entry @@ -146,18 +135,6 @@ ret <4 x float> %gather } -define arm_aapcs_vfpcc <4 x float> @ptr_v4f32_opaque(<4 x ptr>* %offptr) { -; CHECK-LABEL: ptr_v4f32_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vldrw.u32 q0, [q1] -; CHECK-NEXT: bx lr -entry: - %offs = load <4 x ptr>, <4 x ptr>* %offptr, align 4 - %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %offs, i32 4, <4 x i1> , <4 x float> undef) - ret <4 x float> %gather -} - define arm_aapcs_vfpcc <8 x float> @ptr_v8f32(<8 x float*>* %offptr) { ; CHECK-LABEL: ptr_v8f32: ; CHECK: @ %bb.0: @ %entry @@ -283,34 +260,6 @@ ret <4 x i32> %ext } -define arm_aapcs_vfpcc <4 x i32> @ptr_v4i16_sext_opaque(<4 x ptr>* %offptr) { -; CHECK-LABEL: ptr_v4i16_sext_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vldrh.s32 q0, [r1, q1] -; CHECK-NEXT: bx lr -entry: - %offs = load <4 x ptr>, <4 x ptr>* %offptr, align 4 - %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %offs, i32 2, <4 x i1> , <4 x i16> undef) - %ext = sext <4 x i16> %gather to <4 x i32> - ret <4 x i32> %ext -} - -define arm_aapcs_vfpcc <4 x i32> @ptr_v4i16_zext_opaque(<4 x ptr>* %offptr) { -; CHECK-LABEL: ptr_v4i16_zext_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: vldrh.u32 q0, [r1, q1] -; CHECK-NEXT: bx lr -entry: - %offs = load <4 x ptr>, <4 x ptr>* %offptr, align 4 - %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %offs, i32 2, <4 x i1> , <4 x i16> undef) - %ext = zext <4 x i16> %gather to <4 x i32> - ret <4 x i32> %ext -} - define arm_aapcs_vfpcc <4 x i16> @ptr_v4i16(<4 x i16*>* %offptr) { ; CHECK-LABEL: ptr_v4i16: ; CHECK: @ %bb.0: @ %entry @@ -742,17 +691,17 @@ ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} -; CHECK-NEXT: .LBB30_1: @ %vector.body.preheader +; CHECK-NEXT: .LBB26_1: @ %vector.body.preheader ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r2, lsr #2 -; CHECK-NEXT: .LBB30_2: @ %vector.body +; CHECK-NEXT: .LBB26_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vptt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q1, [q0] ; CHECK-NEXT: vstrwt.32 q1, [r0], #16 -; CHECK-NEXT: le lr, .LBB30_2 +; CHECK-NEXT: le lr, .LBB26_2 ; CHECK-NEXT: @ %bb.3: @ %for.end ; CHECK-NEXT: pop {r7, pc} entry: @@ -778,51 +727,6 @@ ret void } -define void @foo_ptr_p_int32_t_opaque(ptr %dest, ptr %src, i32 %n) { -; CHECK-LABEL: foo_ptr_p_int32_t_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: bic r2, r2, #15 -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: it lt -; CHECK-NEXT: poplt {r7, pc} -; CHECK-NEXT: .LBB31_1: @ %vector.body.preheader -; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w lr, r3, r2, lsr #2 -; CHECK-NEXT: .LBB31_2: @ %vector.body -; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q0, [r1], #16 -; CHECK-NEXT: vptt.i32 ne, q0, zr -; CHECK-NEXT: vldrwt.u32 q1, [q0] -; CHECK-NEXT: vstrwt.32 q1, [r0], #16 -; CHECK-NEXT: le lr, .LBB31_2 -; CHECK-NEXT: @ %bb.3: @ %for.end -; CHECK-NEXT: pop {r7, pc} -entry: - %and = and i32 %n, -16 - %cmp11 = icmp sgt i32 %and, 0 - br i1 %cmp11, label %vector.body, label %for.end - -vector.body: ; preds = %entry, %vector.body - %index = phi i32 [ %index.next, %vector.body ], [ 0, %entry ] - %0 = getelementptr inbounds ptr, ptr %src, i32 %index - %1 = bitcast ptr %0 to ptr - %wide.load = load <4 x ptr>, ptr %1, align 4 - %2 = icmp ne <4 x ptr> %wide.load, zeroinitializer - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %wide.load, i32 4, <4 x i1> %2, <4 x i32> undef) - %3 = getelementptr inbounds i32, ptr %dest, i32 %index - %4 = bitcast ptr %3 to ptr - call void @llvm.masked.store.v4i32.p0(<4 x i32> %wide.masked.gather, ptr %4, i32 4, <4 x i1> %2) - %index.next = add i32 %index, 4 - %5 = icmp eq i32 %index.next, %and - br i1 %5, label %for.end, label %vector.body - -for.end: ; preds = %vector.body, %entry - ret void -} - define void @foo_ptr_p_float(float* %dest, float** %src, i32 %n) { ; CHECK-LABEL: foo_ptr_p_float: ; CHECK: @ %bb.0: @ %entry @@ -832,17 +736,17 @@ ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} -; CHECK-NEXT: .LBB32_1: @ %vector.body.preheader +; CHECK-NEXT: .LBB27_1: @ %vector.body.preheader ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: add.w lr, r3, r2, lsr #2 -; CHECK-NEXT: .LBB32_2: @ %vector.body +; CHECK-NEXT: .LBB27_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vptt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q1, [q0] ; CHECK-NEXT: vstrwt.32 q1, [r0], #16 -; CHECK-NEXT: le lr, .LBB32_2 +; CHECK-NEXT: le lr, .LBB27_2 ; CHECK-NEXT: @ %bb.3: @ %for.end ; CHECK-NEXT: pop {r7, pc} entry: @@ -932,8 +836,3 @@ declare <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>) declare <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*>, i32, <32 x i1>, <32 x i8>) declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) - -declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>) -declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) -declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>) -declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) Index: llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll +++ llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp --arm-memtransfer-tploop=allow %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp --arm-memtransfer-tploop=allow -opaque-pointers %s -o - | FileCheck %s !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 1, !"min_enum_size", i32 4} @@ -146,66 +147,22 @@ ret void; } -define arm_aapcs_vfpcc void @push_out_mul_add_gather_opaque(ptr noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { -; CHECK-LABEL: push_out_mul_add_gather_opaque: -; CHECK: @ %bb.0: @ %vector.ph -; CHECK-NEXT: adr r3, .LCPI3_0 -; CHECK-NEXT: vldrw.u32 q0, [r3] -; CHECK-NEXT: vadd.i32 q0, q0, r0 -; CHECK-NEXT: .LBB3_1: @ %vector.body -; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrw.u32 q1, [q0, #96]! -; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vstrb.8 q1, [r1], #16 -; CHECK-NEXT: bne .LBB3_1 -; CHECK-NEXT: @ %bb.2: @ %end -; CHECK-NEXT: bx lr -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.3: -; CHECK-NEXT: .LCPI3_0: -; CHECK-NEXT: .long 4294967224 @ 0xffffffb8 -; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 -; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 -; CHECK-NEXT: .long 0 @ 0x0 - -vector.ph: ; preds = %for.body.preheader - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %vec.ind = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] - %0 = mul <4 x i32> %vec.ind, - %1 = add <4 x i32> %0, - %2 = getelementptr inbounds i32, ptr %data, <4 x i32> %1 - %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> , <4 x i32> undef) - %3 = getelementptr inbounds i32, i32* %dst, i32 %index - %4 = bitcast i32* %3 to <4 x i32>* - store <4 x i32> %wide.masked.gather, <4 x i32>* %4, align 4 - %index.next = add i32 %index, 4 - %vec.ind.next = add <4 x i32> %vec.ind, - %5 = icmp eq i32 %index.next, %n.vec - br i1 %5, label %end, label %vector.body - -end: - ret void; -} - define arm_aapcs_vfpcc void @push_out_mul_scatter(i32* noalias nocapture readonly %data, ; CHECK-LABEL: push_out_mul_scatter: ; CHECK: @ %bb.0: @ %vector.ph -; CHECK-NEXT: adr r1, .LCPI4_0 +; CHECK-NEXT: adr r1, .LCPI3_0 ; CHECK-NEXT: vldrw.u32 q1, [r1] ; CHECK-NEXT: vadd.i32 q1, q1, r0 -; CHECK-NEXT: .LBB4_1: @ %vector.body +; CHECK-NEXT: .LBB3_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vstrw.32 q0, [q1, #96]! -; CHECK-NEXT: bne .LBB4_1 +; CHECK-NEXT: bne .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %end ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.3: -; CHECK-NEXT: .LCPI4_0: +; CHECK-NEXT: .LCPI3_0: ; CHECK-NEXT: .long 4294967200 @ 0xffffffa0 ; CHECK-NEXT: .long 4294967224 @ 0xffffffb8 ; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 @@ -234,19 +191,19 @@ define arm_aapcs_vfpcc void @push_out_add_scatter(i32* noalias nocapture readonly %data, ; CHECK-LABEL: push_out_add_scatter: ; CHECK: @ %bb.0: @ %vector.ph -; CHECK-NEXT: adr r1, .LCPI5_0 +; CHECK-NEXT: adr r1, .LCPI4_0 ; CHECK-NEXT: vldrw.u32 q1, [r1] ; CHECK-NEXT: vadd.i32 q1, q1, r0 -; CHECK-NEXT: .LBB5_1: @ %vector.body +; CHECK-NEXT: .LBB4_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vstrw.32 q0, [q1, #32]! -; CHECK-NEXT: bne .LBB5_1 +; CHECK-NEXT: bne .LBB4_1 ; CHECK-NEXT: @ %bb.2: @ %end ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.3: -; CHECK-NEXT: .LCPI5_0: +; CHECK-NEXT: .LCPI4_0: ; CHECK-NEXT: .long 4294967288 @ 0xfffffff8 ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 8 @ 0x8 @@ -275,22 +232,22 @@ define arm_aapcs_vfpcc void @push_out_mul_gather_scatter(i32* noalias nocapture readonly %data, ; CHECK-LABEL: push_out_mul_gather_scatter: ; CHECK: @ %bb.0: @ %vector.ph -; CHECK-NEXT: adr r1, .LCPI6_0 +; CHECK-NEXT: adr r1, .LCPI5_0 ; CHECK-NEXT: vmov.i32 q0, #0x18 ; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: .LBB6_1: @ %vector.body +; CHECK-NEXT: .LBB5_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q2, [r0, q1, uxtw #2] ; CHECK-NEXT: vadd.i32 q3, q1, q0 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vstrw.32 q2, [r0, q1, uxtw #2] ; CHECK-NEXT: vmov q1, q3 -; CHECK-NEXT: bne .LBB6_1 +; CHECK-NEXT: bne .LBB5_1 ; CHECK-NEXT: @ %bb.2: @ %end ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.3: -; CHECK-NEXT: .LCPI6_0: +; CHECK-NEXT: .LCPI5_0: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 6 @ 0x6 ; CHECK-NEXT: .long 12 @ 0xc @@ -319,20 +276,20 @@ define arm_aapcs_vfpcc void @push_out_add_sub_block(i32* noalias nocapture readonly %data, i32* noalias nocapture %dst, i32 %n.vec) { ; CHECK-LABEL: push_out_add_sub_block: ; CHECK: @ %bb.0: @ %vector.ph -; CHECK-NEXT: adr r3, .LCPI7_0 +; CHECK-NEXT: adr r3, .LCPI6_0 ; CHECK-NEXT: vldrw.u32 q0, [r3] ; CHECK-NEXT: vadd.i32 q0, q0, r0 -; CHECK-NEXT: .LBB7_1: @ %vector.body +; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [q0, #32]! ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vstrb.8 q1, [r1], #16 -; CHECK-NEXT: bne .LBB7_1 +; CHECK-NEXT: bne .LBB6_1 ; CHECK-NEXT: @ %bb.2: @ %end ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.3: -; CHECK-NEXT: .LCPI7_0: +; CHECK-NEXT: .LCPI6_0: ; CHECK-NEXT: .long 4294967288 @ 0xfffffff8 ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 8 @ 0x8 @@ -370,12 +327,12 @@ ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: adr.w r12, .LCPI8_0 +; CHECK-NEXT: adr.w r12, .LCPI7_0 ; CHECK-NEXT: vmov.i32 q0, #0x9 ; CHECK-NEXT: vldrw.u32 q3, [r12] ; CHECK-NEXT: vmov.i32 q1, #0xc ; CHECK-NEXT: vmov.i32 q2, #0x8 -; CHECK-NEXT: .LBB8_1: @ %vector.body +; CHECK-NEXT: .LBB7_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vadd.i32 q4, q3, q2 ; CHECK-NEXT: vmul.i32 q5, q3, q0 @@ -385,13 +342,13 @@ ; CHECK-NEXT: vmov q3, q4 ; CHECK-NEXT: vstrw.32 q5, [r3] ; CHECK-NEXT: vstrb.8 q6, [r1], #16 -; CHECK-NEXT: bne .LBB8_1 +; CHECK-NEXT: bne .LBB7_1 ; CHECK-NEXT: @ %bb.2: @ %end ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.3: -; CHECK-NEXT: .LCPI8_0: +; CHECK-NEXT: .LCPI7_0: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 2 @ 0x2 ; CHECK-NEXT: .long 4 @ 0x4 @@ -426,13 +383,13 @@ ; CHECK: @ %bb.0: @ %vector.ph ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: adr.w r12, .LCPI9_0 +; CHECK-NEXT: adr.w r12, .LCPI8_0 ; CHECK-NEXT: vmov.i32 q0, #0x12 ; CHECK-NEXT: vldrw.u32 q4, [r12] ; CHECK-NEXT: vmov.i32 q1, #0x9 ; CHECK-NEXT: vmov.i32 q2, #0x8 ; CHECK-NEXT: vmov.i32 q3, #0xc -; CHECK-NEXT: .LBB9_1: @ %vector.body +; CHECK-NEXT: .LBB8_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vadd.i32 q5, q4, q2 ; CHECK-NEXT: vmul.i32 q6, q4, q1 @@ -443,13 +400,13 @@ ; CHECK-NEXT: vstrw.32 q4, [r3] ; CHECK-NEXT: vmov q4, q5 ; CHECK-NEXT: vstrb.8 q7, [r1], #16 -; CHECK-NEXT: bne .LBB9_1 +; CHECK-NEXT: bne .LBB8_1 ; CHECK-NEXT: @ %bb.2: @ %end ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.3: -; CHECK-NEXT: .LCPI9_0: +; CHECK-NEXT: .LCPI8_0: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 2 @ 0x2 ; CHECK-NEXT: .long 4 @ 0x4 @@ -500,22 +457,22 @@ ; CHECK-NEXT: vshl.i32 q3, q1, #3 ; CHECK-NEXT: subs r7, #4 ; CHECK-NEXT: add.w r10, r6, r7, lsr #2 -; CHECK-NEXT: adr r7, .LCPI10_0 -; CHECK-NEXT: adr r6, .LCPI10_1 +; CHECK-NEXT: adr r7, .LCPI9_0 +; CHECK-NEXT: adr r6, .LCPI9_1 ; CHECK-NEXT: vldrw.u32 q2, [r7] ; CHECK-NEXT: vldrw.u32 q0, [r6] ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill -; CHECK-NEXT: .LBB10_1: @ %for.cond8.preheader.us.us.preheader +; CHECK-NEXT: .LBB9_1: @ %for.cond8.preheader.us.us.preheader ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB10_2 Depth 2 -; CHECK-NEXT: @ Child Loop BB10_3 Depth 3 +; CHECK-NEXT: @ Child Loop BB9_2 Depth 2 +; CHECK-NEXT: @ Child Loop BB9_3 Depth 3 ; CHECK-NEXT: mul r11, r8, r9 ; CHECK-NEXT: movs r5, #0 ; CHECK-NEXT: mul r7, r8, r12 -; CHECK-NEXT: .LBB10_2: @ %vector.ph -; CHECK-NEXT: @ Parent Loop BB10_1 Depth=1 +; CHECK-NEXT: .LBB9_2: @ %vector.ph +; CHECK-NEXT: @ Parent Loop BB9_1 Depth=1 ; CHECK-NEXT: @ => This Loop Header: Depth=2 -; CHECK-NEXT: @ Child Loop BB10_3 Depth 3 +; CHECK-NEXT: @ Child Loop BB9_3 Depth 3 ; CHECK-NEXT: vdup.32 q5, r7 ; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload ; CHECK-NEXT: vshl.i32 q5, q5, #2 @@ -525,9 +482,9 @@ ; CHECK-NEXT: vmov.i32 q4, #0x0 ; CHECK-NEXT: vadd.i32 q5, q5, q0 ; CHECK-NEXT: vmlas.u32 q6, q2, r5 -; CHECK-NEXT: .LBB10_3: @ %vector.body -; CHECK-NEXT: @ Parent Loop BB10_1 Depth=1 -; CHECK-NEXT: @ Parent Loop BB10_2 Depth=2 +; CHECK-NEXT: .LBB9_3: @ %vector.body +; CHECK-NEXT: @ Parent Loop BB9_1 Depth=1 +; CHECK-NEXT: @ Parent Loop BB9_2 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 ; CHECK-NEXT: vadd.i32 q7, q6, q3 ; CHECK-NEXT: vldrw.u32 q0, [r1, q6, uxtw #2] @@ -535,20 +492,20 @@ ; CHECK-NEXT: vmul.i32 q0, q0, q6 ; CHECK-NEXT: vmov q6, q7 ; CHECK-NEXT: vadd.i32 q4, q0, q4 -; CHECK-NEXT: le lr, .LBB10_3 +; CHECK-NEXT: le lr, .LBB9_3 ; CHECK-NEXT: @ %bb.4: @ %middle.block -; CHECK-NEXT: @ in Loop: Header=BB10_2 Depth=2 +; CHECK-NEXT: @ in Loop: Header=BB9_2 Depth=2 ; CHECK-NEXT: add.w r4, r5, r11 ; CHECK-NEXT: adds r5, #1 ; CHECK-NEXT: vaddv.u32 r6, q4 ; CHECK-NEXT: cmp r5, r9 ; CHECK-NEXT: str.w r6, [r2, r4, lsl #2] -; CHECK-NEXT: bne .LBB10_2 +; CHECK-NEXT: bne .LBB9_2 ; CHECK-NEXT: @ %bb.5: @ %for.cond4.for.cond.cleanup6_crit_edge.us -; CHECK-NEXT: @ in Loop: Header=BB10_1 Depth=1 +; CHECK-NEXT: @ in Loop: Header=BB9_1 Depth=1 ; CHECK-NEXT: add.w r8, r8, #1 ; CHECK-NEXT: cmp r8, r3 -; CHECK-NEXT: bne .LBB10_1 +; CHECK-NEXT: bne .LBB9_1 ; CHECK-NEXT: @ %bb.6: @ %for.end25 ; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} @@ -556,12 +513,12 @@ ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.7: -; CHECK-NEXT: .LCPI10_0: +; CHECK-NEXT: .LCPI9_0: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 2 @ 0x2 ; CHECK-NEXT: .long 4 @ 0x4 ; CHECK-NEXT: .long 6 @ 0x6 -; CHECK-NEXT: .LCPI10_1: +; CHECK-NEXT: .LCPI9_1: ; CHECK-NEXT: .long 4294967264 @ 0xffffffe0 ; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 ; CHECK-NEXT: .long 4294967280 @ 0xfffffff0 @@ -647,13 +604,13 @@ ; CHECK-NEXT: itt ne ; CHECK-NEXT: ldrne r0, [sp, #136] ; CHECK-NEXT: cmpne r0, #0 -; CHECK-NEXT: bne .LBB11_2 -; CHECK-NEXT: .LBB11_1: @ %for.cond.cleanup +; CHECK-NEXT: bne .LBB10_2 +; CHECK-NEXT: .LBB10_1: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; CHECK-NEXT: .LBB11_2: @ %for.cond1.preheader.us.preheader +; CHECK-NEXT: .LBB10_2: @ %for.cond1.preheader.us.preheader ; CHECK-NEXT: ldr.w r12, [sp, #140] ; CHECK-NEXT: movs r7, #1 ; CHECK-NEXT: mov.w r11, #0 @@ -662,7 +619,7 @@ ; CHECK-NEXT: subs r3, r2, #4 ; CHECK-NEXT: add.w r0, r7, r3, lsr #2 ; CHECK-NEXT: ldr r7, [sp, #136] -; CHECK-NEXT: adr r3, .LCPI11_0 +; CHECK-NEXT: adr r3, .LCPI10_0 ; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: lsl.w r0, r12, #1 ; CHECK-NEXT: vdup.32 q1, r7 @@ -673,15 +630,15 @@ ; CHECK-NEXT: vshl.i32 q3, q1, #2 ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: b .LBB11_5 -; CHECK-NEXT: .LBB11_3: @ %for.cond5.preheader.us73.preheader -; CHECK-NEXT: @ in Loop: Header=BB11_5 Depth=1 +; CHECK-NEXT: b .LBB10_5 +; CHECK-NEXT: .LBB10_3: @ %for.cond5.preheader.us73.preheader +; CHECK-NEXT: @ in Loop: Header=BB10_5 Depth=1 ; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: add.w r3, r0, r5, lsl #1 -; CHECK-NEXT: wlstp.8 lr, r6, .LBB11_4 -; CHECK-NEXT: b .LBB11_15 -; CHECK-NEXT: .LBB11_4: @ %for.cond1.for.cond.cleanup3_crit_edge.us -; CHECK-NEXT: @ in Loop: Header=BB11_5 Depth=1 +; CHECK-NEXT: wlstp.8 lr, r6, .LBB10_4 +; CHECK-NEXT: b .LBB10_15 +; CHECK-NEXT: .LBB10_4: @ %for.cond1.for.cond.cleanup3_crit_edge.us +; CHECK-NEXT: @ in Loop: Header=BB10_5 Depth=1 ; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: add r11, r12 ; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload @@ -691,51 +648,51 @@ ; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: adds r3, #1 ; CHECK-NEXT: cmp r3, r0 -; CHECK-NEXT: beq .LBB11_1 -; CHECK-NEXT: .LBB11_5: @ %for.cond1.preheader.us +; CHECK-NEXT: beq .LBB10_1 +; CHECK-NEXT: .LBB10_5: @ %for.cond1.preheader.us ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB11_8 Depth 2 -; CHECK-NEXT: @ Child Loop BB11_11 Depth 3 -; CHECK-NEXT: @ Child Loop BB11_14 Depth 3 -; CHECK-NEXT: @ Child Loop BB11_15 Depth 2 +; CHECK-NEXT: @ Child Loop BB10_8 Depth 2 +; CHECK-NEXT: @ Child Loop BB10_11 Depth 3 +; CHECK-NEXT: @ Child Loop BB10_14 Depth 3 +; CHECK-NEXT: @ Child Loop BB10_15 Depth 2 ; CHECK-NEXT: mul r5, r3, r7 ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: beq .LBB11_3 +; CHECK-NEXT: beq .LBB10_3 ; CHECK-NEXT: @ %bb.6: @ %for.cond5.preheader.us.us.preheader -; CHECK-NEXT: @ in Loop: Header=BB11_5 Depth=1 +; CHECK-NEXT: @ in Loop: Header=BB10_5 Depth=1 ; CHECK-NEXT: mov.w r8, #0 -; CHECK-NEXT: b .LBB11_8 -; CHECK-NEXT: .LBB11_7: @ %for.cond5.for.cond.cleanup7_crit_edge.us.us -; CHECK-NEXT: @ in Loop: Header=BB11_8 Depth=2 +; CHECK-NEXT: b .LBB10_8 +; CHECK-NEXT: .LBB10_7: @ %for.cond5.for.cond.cleanup7_crit_edge.us.us +; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2 ; CHECK-NEXT: ldr r3, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: add.w r0, r8, r5 ; CHECK-NEXT: add.w r8, r8, #1 ; CHECK-NEXT: cmp r8, r7 ; CHECK-NEXT: strh.w r10, [r3, r0, lsl #1] -; CHECK-NEXT: beq .LBB11_4 -; CHECK-NEXT: .LBB11_8: @ %for.cond5.preheader.us.us -; CHECK-NEXT: @ Parent Loop BB11_5 Depth=1 +; CHECK-NEXT: beq .LBB10_4 +; CHECK-NEXT: .LBB10_8: @ %for.cond5.preheader.us.us +; CHECK-NEXT: @ Parent Loop BB10_5 Depth=1 ; CHECK-NEXT: @ => This Loop Header: Depth=2 -; CHECK-NEXT: @ Child Loop BB11_11 Depth 3 -; CHECK-NEXT: @ Child Loop BB11_14 Depth 3 +; CHECK-NEXT: @ Child Loop BB10_11 Depth 3 +; CHECK-NEXT: @ Child Loop BB10_14 Depth 3 ; CHECK-NEXT: cmp.w r12, #3 -; CHECK-NEXT: bhi .LBB11_10 -; CHECK-NEXT: @ %bb.9: @ in Loop: Header=BB11_8 Depth=2 +; CHECK-NEXT: bhi .LBB10_10 +; CHECK-NEXT: @ %bb.9: @ in Loop: Header=BB10_8 Depth=2 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: mov.w r10, #0 -; CHECK-NEXT: b .LBB11_13 -; CHECK-NEXT: .LBB11_10: @ %vector.ph -; CHECK-NEXT: @ in Loop: Header=BB11_8 Depth=2 +; CHECK-NEXT: b .LBB10_13 +; CHECK-NEXT: .LBB10_10: @ %vector.ph +; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2 ; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vmov.i32 q4, #0x0 ; CHECK-NEXT: vmlas.u32 q5, q2, r8 ; CHECK-NEXT: dls lr, r0 ; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: .LBB11_11: @ %vector.body -; CHECK-NEXT: @ Parent Loop BB11_5 Depth=1 -; CHECK-NEXT: @ Parent Loop BB11_8 Depth=2 +; CHECK-NEXT: .LBB10_11: @ %vector.body +; CHECK-NEXT: @ Parent Loop BB10_5 Depth=1 +; CHECK-NEXT: @ Parent Loop BB10_8 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 ; CHECK-NEXT: vadd.i32 q6, q5, q3 ; CHECK-NEXT: vldrh.s32 q7, [r1, q5, uxtw #1] @@ -743,15 +700,15 @@ ; CHECK-NEXT: vmul.i32 q5, q7, q5 ; CHECK-NEXT: vadd.i32 q4, q5, q4 ; CHECK-NEXT: vmov q5, q6 -; CHECK-NEXT: le lr, .LBB11_11 +; CHECK-NEXT: le lr, .LBB10_11 ; CHECK-NEXT: @ %bb.12: @ %middle.block -; CHECK-NEXT: @ in Loop: Header=BB11_8 Depth=2 +; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2 ; CHECK-NEXT: vaddv.u32 r10, q4 ; CHECK-NEXT: cmp r2, r12 ; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: beq .LBB11_7 -; CHECK-NEXT: .LBB11_13: @ %for.body8.us.us.preheader -; CHECK-NEXT: @ in Loop: Header=BB11_8 Depth=2 +; CHECK-NEXT: beq .LBB10_7 +; CHECK-NEXT: .LBB10_13: @ %for.body8.us.us.preheader +; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2 ; CHECK-NEXT: mla r3, r7, r4, r8 ; CHECK-NEXT: add.w r0, r11, r4 ; CHECK-NEXT: ldr r7, [sp, #24] @ 4-byte Reload @@ -759,24 +716,24 @@ ; CHECK-NEXT: add.w r9, r7, r0, lsl #1 ; CHECK-NEXT: ldr r7, [sp, #136] ; CHECK-NEXT: add.w r3, r1, r3, lsl #1 -; CHECK-NEXT: .LBB11_14: @ %for.body8.us.us -; CHECK-NEXT: @ Parent Loop BB11_5 Depth=1 -; CHECK-NEXT: @ Parent Loop BB11_8 Depth=2 +; CHECK-NEXT: .LBB10_14: @ %for.body8.us.us +; CHECK-NEXT: @ Parent Loop BB10_5 Depth=1 +; CHECK-NEXT: @ Parent Loop BB10_8 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 ; CHECK-NEXT: ldrsh.w r4, [r3] ; CHECK-NEXT: add r3, r6 ; CHECK-NEXT: ldrsh r0, [r9], #2 ; CHECK-NEXT: smlabb r10, r4, r0, r10 -; CHECK-NEXT: le lr, .LBB11_14 -; CHECK-NEXT: b .LBB11_7 -; CHECK-NEXT: .LBB11_15: @ Parent Loop BB11_5 Depth=1 +; CHECK-NEXT: le lr, .LBB10_14 +; CHECK-NEXT: b .LBB10_7 +; CHECK-NEXT: .LBB10_15: @ Parent Loop BB10_5 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vstrb.8 q0, [r3], #16 -; CHECK-NEXT: letp lr, .LBB11_15 -; CHECK-NEXT: b .LBB11_4 +; CHECK-NEXT: letp lr, .LBB10_15 +; CHECK-NEXT: b .LBB10_4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.16: -; CHECK-NEXT: .LCPI11_0: +; CHECK-NEXT: .LCPI10_0: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 1 @ 0x1 ; CHECK-NEXT: .long 2 @ 0x2 @@ -900,7 +857,7 @@ ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: ldrd r2, r7, [sp, #104] ; CHECK-NEXT: add.w r8, r7, #10 -; CHECK-NEXT: adr r7, .LCPI12_0 +; CHECK-NEXT: adr r7, .LCPI11_0 ; CHECK-NEXT: ldr r1, [sp, #96] ; CHECK-NEXT: vdup.32 q0, r2 ; CHECK-NEXT: vldrw.u32 q1, [r7] @@ -909,36 +866,36 @@ ; CHECK-NEXT: movs r6, #11 ; CHECK-NEXT: vshl.i32 q0, q0, #2 ; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: .LBB12_1: @ %for.body10.i +; CHECK-NEXT: .LBB11_1: @ %for.body10.i ; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB12_2 Depth 2 -; CHECK-NEXT: @ Child Loop BB12_3 Depth 3 -; CHECK-NEXT: @ Child Loop BB12_4 Depth 4 -; CHECK-NEXT: @ Child Loop BB12_5 Depth 5 +; CHECK-NEXT: @ Child Loop BB11_2 Depth 2 +; CHECK-NEXT: @ Child Loop BB11_3 Depth 3 +; CHECK-NEXT: @ Child Loop BB11_4 Depth 4 +; CHECK-NEXT: @ Child Loop BB11_5 Depth 5 ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: .LBB12_2: @ %for.cond22.preheader.i -; CHECK-NEXT: @ Parent Loop BB12_1 Depth=1 +; CHECK-NEXT: .LBB11_2: @ %for.cond22.preheader.i +; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1 ; CHECK-NEXT: @ => This Loop Header: Depth=2 -; CHECK-NEXT: @ Child Loop BB12_3 Depth 3 -; CHECK-NEXT: @ Child Loop BB12_4 Depth 4 -; CHECK-NEXT: @ Child Loop BB12_5 Depth 5 +; CHECK-NEXT: @ Child Loop BB11_3 Depth 3 +; CHECK-NEXT: @ Child Loop BB11_4 Depth 4 +; CHECK-NEXT: @ Child Loop BB11_5 Depth 5 ; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: .LBB12_3: @ %for.body27.i -; CHECK-NEXT: @ Parent Loop BB12_1 Depth=1 -; CHECK-NEXT: @ Parent Loop BB12_2 Depth=2 +; CHECK-NEXT: .LBB11_3: @ %for.body27.i +; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1 +; CHECK-NEXT: @ Parent Loop BB11_2 Depth=2 ; CHECK-NEXT: @ => This Loop Header: Depth=3 -; CHECK-NEXT: @ Child Loop BB12_4 Depth 4 -; CHECK-NEXT: @ Child Loop BB12_5 Depth 5 +; CHECK-NEXT: @ Child Loop BB11_4 Depth 4 +; CHECK-NEXT: @ Child Loop BB11_5 Depth 5 ; CHECK-NEXT: dls lr, r9 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: mov.w r11, #4 -; CHECK-NEXT: .LBB12_4: @ %for.body78.us.i -; CHECK-NEXT: @ Parent Loop BB12_1 Depth=1 -; CHECK-NEXT: @ Parent Loop BB12_2 Depth=2 -; CHECK-NEXT: @ Parent Loop BB12_3 Depth=3 +; CHECK-NEXT: .LBB11_4: @ %for.body78.us.i +; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1 +; CHECK-NEXT: @ Parent Loop BB11_2 Depth=2 +; CHECK-NEXT: @ Parent Loop BB11_3 Depth=3 ; CHECK-NEXT: @ => This Loop Header: Depth=4 -; CHECK-NEXT: @ Child Loop BB12_5 Depth 5 +; CHECK-NEXT: @ Child Loop BB11_5 Depth 5 ; CHECK-NEXT: mul r4, r11, r6 ; CHECK-NEXT: vdup.32 q3, r5 ; CHECK-NEXT: vdup.32 q2, r7 @@ -948,11 +905,11 @@ ; CHECK-NEXT: vadd.i32 q4, q1, r4 ; CHECK-NEXT: mov r4, r8 ; CHECK-NEXT: vmla.u32 q2, q4, r2 -; CHECK-NEXT: .LBB12_5: @ %vector.body -; CHECK-NEXT: @ Parent Loop BB12_1 Depth=1 -; CHECK-NEXT: @ Parent Loop BB12_2 Depth=2 -; CHECK-NEXT: @ Parent Loop BB12_3 Depth=3 -; CHECK-NEXT: @ Parent Loop BB12_4 Depth=4 +; CHECK-NEXT: .LBB11_5: @ %vector.body +; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1 +; CHECK-NEXT: @ Parent Loop BB11_2 Depth=2 +; CHECK-NEXT: @ Parent Loop BB11_3 Depth=3 +; CHECK-NEXT: @ Parent Loop BB11_4 Depth=4 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=5 ; CHECK-NEXT: vldrb.s32 q6, [r0, q2] ; CHECK-NEXT: vadd.i32 q5, q2, q0 @@ -963,34 +920,34 @@ ; CHECK-NEXT: vmov q3, q4 ; CHECK-NEXT: vmlava.u32 r12, q2, q6 ; CHECK-NEXT: vmov q2, q5 -; CHECK-NEXT: bne .LBB12_5 +; CHECK-NEXT: bne .LBB11_5 ; CHECK-NEXT: @ %bb.6: @ %middle.block -; CHECK-NEXT: @ in Loop: Header=BB12_4 Depth=4 +; CHECK-NEXT: @ in Loop: Header=BB11_4 Depth=4 ; CHECK-NEXT: add.w r11, r11, #1 -; CHECK-NEXT: le lr, .LBB12_4 +; CHECK-NEXT: le lr, .LBB11_4 ; CHECK-NEXT: @ %bb.7: @ %for.cond.cleanup77.i -; CHECK-NEXT: @ in Loop: Header=BB12_3 Depth=3 +; CHECK-NEXT: @ in Loop: Header=BB11_3 Depth=3 ; CHECK-NEXT: adds r5, #1 ; CHECK-NEXT: add.w r10, r10, #1 ; CHECK-NEXT: cmp r5, r2 -; CHECK-NEXT: bne .LBB12_3 +; CHECK-NEXT: bne .LBB11_3 ; CHECK-NEXT: @ %bb.8: @ %for.cond.cleanup26.i -; CHECK-NEXT: @ in Loop: Header=BB12_2 Depth=2 +; CHECK-NEXT: @ in Loop: Header=BB11_2 Depth=2 ; CHECK-NEXT: adds r7, #1 ; CHECK-NEXT: cmp r7, r3 -; CHECK-NEXT: bne .LBB12_2 +; CHECK-NEXT: bne .LBB11_2 ; CHECK-NEXT: @ %bb.9: @ %for.cond.cleanup20.i -; CHECK-NEXT: @ in Loop: Header=BB12_1 Depth=1 +; CHECK-NEXT: @ in Loop: Header=BB11_1 Depth=1 ; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: ldr r7, [sp, #148] ; CHECK-NEXT: adds r5, #1 ; CHECK-NEXT: cmp r5, r7 ; CHECK-NEXT: it eq ; CHECK-NEXT: moveq r5, #0 -; CHECK-NEXT: b .LBB12_1 +; CHECK-NEXT: b .LBB11_1 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.10: -; CHECK-NEXT: .LCPI12_0: +; CHECK-NEXT: .LCPI11_0: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 1 @ 0x1 ; CHECK-NEXT: .long 2 @ 0x2 @@ -1136,7 +1093,6 @@ } declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) -declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>) declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32 immarg, <4 x i1>, <4 x i8>) #3 Index: llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll +++ llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -opaque-pointers %s -o - | FileCheck %s define arm_aapcs_vfpcc void @scatter_inc_minipred_4i32(<4 x i32> %data, i32* %dst, <4 x i32> %offs) { @@ -224,104 +225,6 @@ ret void } -define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex_opaque(<4 x i32> %data1, <4 x i32> %data2, <4 x i32> %data3, ptr %dst, i32 %n) { -; CHECK-LABEL: scatter_inc_v4i32_complex_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: cmp r1, #1 -; CHECK-NEXT: blt .LBB4_5 -; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader -; CHECK-NEXT: adr r4, .LCPI4_2 -; CHECK-NEXT: bic r2, r1, #3 -; CHECK-NEXT: vldrw.u32 q3, [r4] -; CHECK-NEXT: sub.w r12, r2, #4 -; CHECK-NEXT: adr.w lr, .LCPI4_1 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: vadd.i32 q3, q3, r0 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: vstrw.32 q3, [sp] @ 16-byte Spill -; CHECK-NEXT: vldrw.u32 q3, [lr] -; CHECK-NEXT: adr.w r12, .LCPI4_0 -; CHECK-NEXT: vadd.i32 q4, q3, r0 -; CHECK-NEXT: vldrw.u32 q3, [r12] -; CHECK-NEXT: vadd.i32 q3, q3, r0 -; CHECK-NEXT: .LBB4_2: @ %vector.ph -; CHECK-NEXT: @ =>This Loop Header: Depth=1 -; CHECK-NEXT: @ Child Loop BB4_3 Depth 2 -; CHECK-NEXT: dls lr, r3 -; CHECK-NEXT: vmov q6, q4 -; CHECK-NEXT: vldrw.u32 q7, [sp] @ 16-byte Reload -; CHECK-NEXT: vmov q5, q3 -; CHECK-NEXT: .LBB4_3: @ %vector.body -; CHECK-NEXT: @ Parent Loop BB4_2 Depth=1 -; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: vstrw.32 q0, [q5, #48]! -; CHECK-NEXT: vstrw.32 q1, [q6, #48]! -; CHECK-NEXT: vstrw.32 q2, [q7, #48]! -; CHECK-NEXT: le lr, .LBB4_3 -; CHECK-NEXT: @ %bb.4: @ %middle.block -; CHECK-NEXT: @ in Loop: Header=BB4_2 Depth=1 -; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: bne .LBB4_2 -; CHECK-NEXT: .LBB4_5: @ %for.cond.cleanup -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: pop {r4, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.6: -; CHECK-NEXT: .LCPI4_0: -; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 -; CHECK-NEXT: .long 4294967260 @ 0xffffffdc -; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 -; CHECK-NEXT: .long 4294967284 @ 0xfffffff4 -; CHECK-NEXT: .LCPI4_1: -; CHECK-NEXT: .long 4294967252 @ 0xffffffd4 -; CHECK-NEXT: .long 4294967264 @ 0xffffffe0 -; CHECK-NEXT: .long 4294967276 @ 0xffffffec -; CHECK-NEXT: .long 4294967288 @ 0xfffffff8 -; CHECK-NEXT: .LCPI4_2: -; CHECK-NEXT: .long 4294967256 @ 0xffffffd8 -; CHECK-NEXT: .long 4294967268 @ 0xffffffe4 -; CHECK-NEXT: .long 4294967280 @ 0xfffffff0 -; CHECK-NEXT: .long 4294967292 @ 0xfffffffc -entry: - %cmp22 = icmp sgt i32 %n, 0 - br i1 %cmp22, label %vector.ph, label %for.cond.cleanup - -vector.ph: ; preds = %for.body.preheader - %n.vec = and i32 %n, -4 - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %vec.ind = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] - %0 = mul nuw nsw <4 x i32> %vec.ind, - %1 = getelementptr inbounds i32, ptr %dst, <4 x i32> %0 - call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data1, <4 x ptr> %1, i32 4, <4 x i1> ) - %2 = add nuw nsw <4 x i32> %0, - %3 = getelementptr inbounds i32, ptr %dst, <4 x i32> %2 - call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data2, <4 x ptr> %3, i32 4, <4 x i1> ) - %4 = add nuw nsw <4 x i32> %0, - %5 = getelementptr inbounds i32, ptr %dst, <4 x i32> %4 - call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data3, <4 x ptr> %5, i32 4, <4 x i1> ) - %index.next = add i32 %index, 4 - %vec.ind.next = add <4 x i32> %vec.ind, - %6 = icmp eq i32 %index.next, %n.vec - br i1 %6, label %middle.block, label %vector.body - -middle.block: ; preds = %vector.body - %cmp.n = icmp eq i32 %n.vec, %n - br i1 %cmp.n, label %for.cond.cleanup, label %vector.ph - -for.cond.cleanup: ; preds = %for.body, %middle.block, %entry - ret void -} - declare void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8>, <8 x i8*>, i32, <8 x i1>) declare void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16>, <8 x i16*>, i32, <8 x i1>) declare void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half>, <8 x half*>, i32, <8 x i1>) @@ -330,5 +233,4 @@ declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32, <4 x i1>) declare void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half>, <4 x half*>, i32, <4 x i1>) declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) -declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32, <4 x i1>) Index: llvm/test/CodeGen/Thumb2/mve-scatter-ptrs.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-scatter-ptrs.ll +++ llvm/test/CodeGen/Thumb2/mve-scatter-ptrs.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -opaque-pointers %s -o - | FileCheck %s ; i32 @@ -242,20 +243,6 @@ ret void } -define arm_aapcs_vfpcc void @ptr_v4i16_trunc_opaque(<4 x i32> %v, ptr %offptr) { -; CHECK-LABEL: ptr_v4i16_trunc_opaque: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: vstrh.32 q0, [r0, q1] -; CHECK-NEXT: bx lr -entry: - %offs = load <4 x ptr>, ptr %offptr, align 4 - %ext = trunc <4 x i32> %v to <4 x i16> - call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %ext, <4 x ptr> %offs, i32 2, <4 x i1> ) - ret void -} - define arm_aapcs_vfpcc void @ptr_v4i16_dup(i32 %v, <4 x i16*> %offs) { ; CHECK-LABEL: ptr_v4i16_dup: ; CHECK: @ %bb.0: @ %entry @@ -521,14 +508,14 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr -; CHECK-NEXT: .LBB20_1: @ %vector.body +; CHECK-NEXT: .LBB19_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vptt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q1, [r0], #16 ; CHECK-NEXT: vstrwt.32 q1, [q0] -; CHECK-NEXT: bne .LBB20_1 +; CHECK-NEXT: bne .LBB19_1 ; CHECK-NEXT: @ %bb.2: @ %for.end ; CHECK-NEXT: bx lr entry: @@ -561,14 +548,14 @@ ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: it lt ; CHECK-NEXT: bxlt lr -; CHECK-NEXT: .LBB21_1: @ %vector.body +; CHECK-NEXT: .LBB20_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vptt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q1, [r0], #16 ; CHECK-NEXT: vstrwt.32 q1, [q0] -; CHECK-NEXT: bne .LBB21_1 +; CHECK-NEXT: bne .LBB20_1 ; CHECK-NEXT: @ %bb.2: @ %for.end ; CHECK-NEXT: bx lr entry: @@ -614,7 +601,6 @@ declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float>, <2 x float*>, i32, <2 x i1>) declare void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8>, <4 x i8*>, i32, <4 x i1>) declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32, <4 x i1>) -declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>) declare void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half>, <4 x half*>, i32, <4 x i1>) declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32, <4 x i1>) Index: llvm/test/CodeGen/WebAssembly/add-prototypes-opaque-ptrs.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/add-prototypes-opaque-ptrs.ll +++ llvm/test/CodeGen/WebAssembly/add-prototypes-opaque-ptrs.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -wasm-add-missing-prototypes -force-opaque-pointers %s | FileCheck %s +; RUN: opt -S -wasm-add-missing-prototypes -opaque-pointers %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" Index: llvm/test/CodeGen/WebAssembly/function-bitcasts.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/function-bitcasts.ll +++ llvm/test/CodeGen/WebAssembly/function-bitcasts.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -enable-emscripten-cxx-exceptions | FileCheck %s --check-prefixes=CHECK,TYPED -; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -enable-emscripten-cxx-exceptions -force-opaque-pointers | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -enable-emscripten-cxx-exceptions -opaque-pointers | FileCheck %s --check-prefixes=CHECK,OPAQUE ; Test that function pointer casts are replaced with wrappers. Index: llvm/test/CodeGen/WebAssembly/main-declaration.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/main-declaration.ll +++ llvm/test/CodeGen/WebAssembly/main-declaration.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -asm-verbose=false | FileCheck %s -; RUN: llc < %s -asm-verbose=false -force-opaque-pointers | FileCheck %s +; RUN: llc < %s -asm-verbose=false -opaque-pointers | FileCheck %s ; Test main functions with alternate signatures. Index: llvm/test/Instrumentation/SanitizerCoverage/opaque-ptr.ll =================================================================== --- llvm/test/Instrumentation/SanitizerCoverage/opaque-ptr.ll +++ llvm/test/Instrumentation/SanitizerCoverage/opaque-ptr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals -; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 -force-opaque-pointers -S | FileCheck %s +; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 -opaque-pointers -S | FileCheck %s ;. ; CHECK: @[[__SANCOV_LOWEST_STACK:[a-zA-Z0-9_$"\\.-]+]] = external thread_local(initialexec) global i64 Index: llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll =================================================================== --- llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll +++ llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll @@ -3,7 +3,7 @@ ; RUN: not opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 \ ; RUN: -sanitizer-coverage-stack-depth -S 2>&1 | FileCheck %s ; RUN: not opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 \ -; RUN: -sanitizer-coverage-stack-depth -force-opaque-pointers -S 2>&1 | FileCheck %s +; RUN: -sanitizer-coverage-stack-depth -opaque-pointers -S 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: llvm/test/Other/force-opaque-ptrs-typed-dis.ll =================================================================== --- llvm/test/Other/force-opaque-ptrs-typed-dis.ll +++ llvm/test/Other/force-opaque-ptrs-typed-dis.ll @@ -1,13 +1,10 @@ -; RUN: llvm-as --force-opaque-pointers < %s | llvm-dis | FileCheck %s +; RUN: llvm-as --opaque-pointers < %s | not llvm-dis 2>&1 | FileCheck %s + +; CHECK: error: Opaque pointers are only supported in -opaque-pointers mode -; CHECK: @g = external global i16 @g = external global i16 define void @f(i32* %p) { -; CHECK-LABEL: @f( -; CHECK-NEXT: [[A:%.*]] = alloca i17, align 4 -; CHECK-NEXT: ret void -; %a = alloca i17 ret void } Index: llvm/test/Other/force-opaque-ptrs.ll =================================================================== --- llvm/test/Other/force-opaque-ptrs.ll +++ llvm/test/Other/force-opaque-ptrs.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature -; RUN: llvm-as --force-opaque-pointers < %s | llvm-dis --force-opaque-pointers | FileCheck %s -; RUN: llvm-as < %s | llvm-dis --force-opaque-pointers | FileCheck %s -; RUN: opt --force-opaque-pointers < %s -S | FileCheck %s -; RUN: verify-uselistorder --force-opaque-pointers < %s +; RUN: llvm-as --opaque-pointers < %s | llvm-dis --opaque-pointers | FileCheck %s +; RUN: llvm-as < %s | llvm-dis --opaque-pointers | FileCheck %s +; RUN: opt --opaque-pointers < %s -S | FileCheck %s +; RUN: verify-uselistorder --opaque-pointers < %s %ty = type i32* Index: llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll +++ llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll @@ -21,26 +21,6 @@ ret void } -define void @write4to7_opaque_ptr(ptr nocapture %p) { -; CHECK-LABEL: @write4to7_opaque_ptr( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast ptr [[ARRAYIDX0]] to i8* -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 4 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to ptr -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP2]], i8 0, i64 24, i1 false) -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1 -; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: ret void -; -entry: - %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1 - call void @llvm.memset.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i1 false) - %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1 - store i32 1, ptr %arrayidx1, align 4 - ret void -} - define void @write4to7_weird_element_type(i32* nocapture %p) { ; CHECK-LABEL: @write4to7_weird_element_type( ; CHECK-NEXT: entry: @@ -452,7 +432,6 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind declare void @llvm.memset.p0i32.i64(i32* nocapture, i8, i64, i1) nounwind -declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i1) nounwind declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind Index: llvm/test/Transforms/DeadStoreElimination/opaque-ptr.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/DeadStoreElimination/opaque-ptr.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -dse -opaque-pointers -S | FileCheck %s + +define void @write4to7_opaque_ptr(ptr nocapture %p) { +; CHECK-LABEL: @write4to7_opaque_ptr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false) +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1 +; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: ret void +; +entry: + %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1 + call void @llvm.memset.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i1 false) + %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1 + store i32 1, ptr %arrayidx1, align 4 + ret void +} + +declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind Index: llvm/test/Transforms/InstCombine/force-opaque-ptr.ll =================================================================== --- llvm/test/Transforms/InstCombine/force-opaque-ptr.ll +++ llvm/test/Transforms/InstCombine/force-opaque-ptr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -instcombine -force-opaque-pointers < %s | FileCheck %s +; RUN: opt -S -instcombine -opaque-pointers < %s | FileCheck %s @g = global [16 x i16] zeroinitializer Index: llvm/test/Transforms/InstCombine/opaque-ptr.ll =================================================================== --- llvm/test/Transforms/InstCombine/opaque-ptr.ll +++ llvm/test/Transforms/InstCombine/opaque-ptr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -instcombine < %s | FileCheck %s +; RUN: opt -S -instcombine -opaque-pointers < %s | FileCheck %s define ptr @bitcast_opaque_to_opaque(ptr %a) { ; CHECK-LABEL: @bitcast_opaque_to_opaque( @@ -11,8 +11,7 @@ define ptr @bitcast_typed_to_opaque(i8* %a) { ; CHECK-LABEL: @bitcast_typed_to_opaque( -; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[A:%.*]] to ptr -; CHECK-NEXT: ret ptr [[B]] +; CHECK-NEXT: ret ptr [[A:%.*]] ; %b = bitcast i8* %a to ptr ret ptr %b @@ -20,8 +19,7 @@ define i8* @bitcast_opaque_to_typed(ptr %a) { ; CHECK-LABEL: @bitcast_opaque_to_typed( -; CHECK-NEXT: [[B:%.*]] = bitcast ptr [[A:%.*]] to i8* -; CHECK-NEXT: ret i8* [[B]] +; CHECK-NEXT: ret ptr [[A:%.*]] ; %b = bitcast ptr %a to i8* ret i8* %b @@ -30,7 +28,7 @@ @g = global i8 0 define ptr @bitcast_typed_to_opaque_constexpr() { ; CHECK-LABEL: @bitcast_typed_to_opaque_constexpr( -; CHECK-NEXT: ret ptr bitcast (i8* @g to ptr) +; CHECK-NEXT: ret ptr @g ; ret ptr bitcast (i8* @g to ptr) } @@ -46,8 +44,7 @@ define ptr @addrspacecast_typed_to_opaque(i8 addrspace(1)* %a) { ; CHECK-LABEL: @addrspacecast_typed_to_opaque( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[A:%.*]] to ptr addrspace(1) -; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr +; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(1) [[A:%.*]] to ptr ; CHECK-NEXT: ret ptr [[B]] ; %b = addrspacecast i8 addrspace(1)* %a to ptr @@ -56,9 +53,8 @@ define i8* @addrspacecast_opaque_to_typed(ptr addrspace(1) %a) { ; CHECK-LABEL: @addrspacecast_opaque_to_typed( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(1) [[A:%.*]] to i8 addrspace(1)* -; CHECK-NEXT: [[B:%.*]] = addrspacecast i8 addrspace(1)* [[TMP1]] to i8* -; CHECK-NEXT: ret i8* [[B]] +; CHECK-NEXT: [[B:%.*]] = addrspacecast ptr addrspace(1) [[A:%.*]] to ptr +; CHECK-NEXT: ret ptr [[B]] ; %b = addrspacecast ptr addrspace(1) %a to i8* ret i8* %b @@ -76,7 +72,7 @@ define ptr addrspace(1) @addrspacecast_typed_to_opaque_constexpr() { ; CHECK-LABEL: @addrspacecast_typed_to_opaque_constexpr( -; CHECK-NEXT: ret ptr addrspace(1) addrspacecast (ptr bitcast (i8* @g to ptr) to ptr addrspace(1)) +; CHECK-NEXT: ret ptr addrspace(1) addrspacecast (ptr @g to ptr addrspace(1)) ; ret ptr addrspace(1) addrspacecast (i8* @g to ptr addrspace(1)) } @@ -90,7 +86,7 @@ define ptr @gep_constexpr_2(ptr %a) { ; CHECK-LABEL: @gep_constexpr_2( -; CHECK-NEXT: ret ptr bitcast (i8* getelementptr (i8, i8* @g, i64 3) to ptr) +; CHECK-NEXT: ret ptr getelementptr (i8, ptr @g, i64 3) ; ret ptr getelementptr (i8, ptr bitcast (i8* @g to ptr), i32 3) } @@ -128,7 +124,7 @@ declare void @varargs(...) define void @varargs_cast_typed_to_opaque_same_type(i32* %a) { ; CHECK-LABEL: @varargs_cast_typed_to_opaque_same_type( -; CHECK-NEXT: call void (...) @varargs(i32* byval(i32) [[A:%.*]]) +; CHECK-NEXT: call void (...) @varargs(ptr byval(i32) [[A:%.*]]) ; CHECK-NEXT: ret void ; %b = bitcast i32* %a to ptr @@ -138,7 +134,7 @@ define void @varargs_cast_typed_to_opaque_different_type(i32* %a) { ; CHECK-LABEL: @varargs_cast_typed_to_opaque_different_type( -; CHECK-NEXT: call void (...) @varargs(i32* byval(i32) [[A:%.*]]) +; CHECK-NEXT: call void (...) @varargs(ptr byval(float) [[A:%.*]]) ; CHECK-NEXT: ret void ; %b = bitcast i32* %a to ptr @@ -148,8 +144,7 @@ define void @varargs_cast_typed_to_opaque_different_size(i32* %a) { ; CHECK-LABEL: @varargs_cast_typed_to_opaque_different_size( -; CHECK-NEXT: [[B:%.*]] = bitcast i32* [[A:%.*]] to ptr -; CHECK-NEXT: call void (...) @varargs(ptr byval(i64) [[B]]) +; CHECK-NEXT: call void (...) @varargs(ptr byval(i64) [[A:%.*]]) ; CHECK-NEXT: ret void ; %b = bitcast i32* %a to ptr Index: llvm/test/Transforms/LoadStoreVectorizer/X86/opaque-ptr.ll =================================================================== --- llvm/test/Transforms/LoadStoreVectorizer/X86/opaque-ptr.ll +++ llvm/test/Transforms/LoadStoreVectorizer/X86/opaque-ptr.ll @@ -1,14 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -S < %s | FileCheck %s +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -load-store-vectorizer -opaque-pointers -S < %s | FileCheck %s define void @test(ptr %ptr) { ; CHECK-LABEL: @test( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[PTR:%.*]] to <2 x i32>* -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[L11:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0 -; CHECK-NEXT: [[L22:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr [[PTR]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> zeroinitializer, <2 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: [[L11:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[L22:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; %ptr2 = getelementptr i32, ptr %ptr, i64 1 Index: llvm/test/Transforms/LoopStrengthReduce/opaque-ptr.ll =================================================================== --- llvm/test/Transforms/LoopStrengthReduce/opaque-ptr.ll +++ llvm/test/Transforms/LoopStrengthReduce/opaque-ptr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -loop-reduce < %s | FileCheck %s +; RUN: opt -S -loop-reduce -opaque-pointers < %s | FileCheck %s target datalayout = "e-p:64:64:64-n32:64" Index: llvm/test/Transforms/LoopVectorize/opaque-ptr.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/opaque-ptr.ll +++ llvm/test/Transforms/LoopVectorize/opaque-ptr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=2 -opaque-pointers < %s | FileCheck %s ; TODO: This still crashes with inbounds on the GEPs. define void @test(ptr %p1.start, ptr %p2.start, ptr %p1.end) { Index: llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll +++ llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll @@ -309,22 +309,6 @@ ret void } -define void @test_opaque_ptrs(ptr %src, i64 %src_size, ptr noalias %dst, i64 %dst_size, i8 %c) { -; CHECK-LABEL: @test_opaque_ptrs( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], [[SRC_SIZE:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], [[SRC_SIZE]] -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast ptr [[DST:%.*]] to i8* -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[SRC_SIZE]] -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP5]], i8 [[C:%.*]], i64 [[TMP3]], i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC:%.*]], i64 [[SRC_SIZE]], i1 false) -; CHECK-NEXT: ret void -; - call void @llvm.memset.p0.i64(ptr %dst, i8 %c, i64 %dst_size, i1 false) - call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %src_size, i1 false) - ret void -} - define void @test_weird_element_type(i16* %src, i64 %src_size, i16* noalias %dst, i64 %dst_size, i8 %c) { ; CHECK-LABEL: @test_weird_element_type( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], [[SRC_SIZE:%.*]] @@ -362,8 +346,6 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) declare void @llvm.memset.p0i8.i128(i8* nocapture, i8, i128, i1) declare void @llvm.memcpy.p0i8.p0i8.i128(i8* nocapture, i8* nocapture readonly, i128, i1) -declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) -declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) declare void @llvm.memset.p0i16.i64(i16* nocapture, i8, i64, i1) declare void @llvm.memcpy.p0i16.p0i16.i64(i16* nocapture, i16* nocapture readonly, i64, i1) declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i1) Index: llvm/test/Transforms/MemCpyOpt/no-libcalls.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/no-libcalls.ll +++ llvm/test/Transforms/MemCpyOpt/no-libcalls.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -memcpyopt < %s | FileCheck %s --check-prefixes=CHECK,LIBCALLS -; RUN: opt -S -memcpyopt -mtriple=amdgcn-- < %s | FileCheck %s --check-prefixes=CHECK,NO-LIBCALLS -; RUN: opt -S -memcpyopt -mtriple=amdgcn-- -enable-memcpyopt-without-libcalls < %s \ +; RUN: opt -S -memcpyopt -opaque-pointers < %s | FileCheck %s --check-prefixes=CHECK,LIBCALLS +; RUN: opt -S -memcpyopt -mtriple=amdgcn-- -opaque-pointers < %s | FileCheck %s --check-prefixes=CHECK,NO-LIBCALLS +; RUN: opt -S -memcpyopt -mtriple=amdgcn-- -enable-memcpyopt-without-libcalls -opaque-pointers < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,LIBCALLS ; REQUIRES: amdgpu-registered-target Index: llvm/test/Transforms/MemCpyOpt/opaque-ptr.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/MemCpyOpt/opaque-ptr.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -basic-aa -memcpyopt -S %s -verify-memoryssa -opaque-pointers | FileCheck %s + +define void @test_memset_memcpy(ptr %src, i64 %src_size, ptr noalias %dst, i64 %dst_size, i8 %c) { +; CHECK-LABEL: @test_memset_memcpy( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST_SIZE:%.*]], [[SRC_SIZE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_SIZE]], [[SRC_SIZE]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[SRC_SIZE]] +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP4]], i8 [[C:%.*]], i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST]], ptr [[SRC:%.*]], i64 [[SRC_SIZE]], i1 false) +; CHECK-NEXT: ret void +; + call void @llvm.memset.p0.i64(ptr %dst, i8 %c, i64 %dst_size, i1 false) + call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %src_size, i1 false) + ret void +} + +declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) +declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) Index: llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll +++ llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -slp-vectorizer -mtriple=x86_64-apple-macosx -mcpu=haswell < %s | FileCheck %s +; RUN: opt -S -slp-vectorizer -mtriple=x86_64-apple-macosx -mcpu=haswell -opaque-pointers < %s | FileCheck %s define void @test(ptr %r, ptr %p, ptr %q) #0 { ; CHECK-LABEL: @test( @@ -11,19 +11,17 @@ ; CHECK-NEXT: [[Q1:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 1 ; CHECK-NEXT: [[Q2:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 2 ; CHECK-NEXT: [[Q3:%.*]] = getelementptr inbounds i64, ptr [[Q]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[P0]] to <4 x i64>* -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* [[TMP1]], align 2 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr [[Q0]] to <4 x i64>* -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP3]], align 2 -; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <4 x i64> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 -; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1 -; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 -; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 -; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr [[P0]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr [[Q0]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 +; CHECK-NEXT: [[G0:%.*]] = getelementptr inbounds i32, ptr [[R:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 1 +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2 +; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 +; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[TMP7]] ; CHECK-NEXT: ret void ; %p0 = getelementptr inbounds i64, ptr %p, i64 0 Index: llvm/test/Transforms/SimplifyCFG/speculate-store-opaque-pointer.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/speculate-store-opaque-pointer.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -opaque-pointers -S < %s | FileCheck %s + +declare void @unknown_fun() + +define void @different_type(ptr %ptr, i1 %cmp) { +; CHECK-LABEL: @different_type( +; CHECK-NEXT: store i32 0, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: br i1 [[CMP:%.*]], label [[IF_THEN:%.*]], label [[RET_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i64 1, ptr [[PTR]], align 4 +; CHECK-NEXT: br label [[RET_END]] +; CHECK: ret.end: +; CHECK-NEXT: ret void +; + store i32 0, ptr %ptr + br i1 %cmp, label %if.then, label %ret.end + +if.then: + store i64 1, ptr %ptr + br label %ret.end + +ret.end: + ret void +} + +define void @readonly_call(ptr %ptr, i1 %cmp) { +; CHECK-LABEL: @readonly_call( +; CHECK-NEXT: ret.end: +; CHECK-NEXT: store i32 0, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: call void @unknown_fun() #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP:%.*]], i32 1, i32 0 +; CHECK-NEXT: store i32 [[SPEC_STORE_SELECT]], ptr [[PTR]], align 4 +; CHECK-NEXT: ret void +; + store i32 0, ptr %ptr + call void @unknown_fun() readonly + br i1 %cmp, label %if.then, label %ret.end + +if.then: + store i32 1, ptr %ptr + br label %ret.end + +ret.end: + ret void +} + +define void @atomic_and_simple(ptr %ptr, i1 %cmp) { +; CHECK-LABEL: @atomic_and_simple( +; CHECK-NEXT: store atomic i32 0, ptr [[PTR:%.*]] seq_cst, align 4 +; CHECK-NEXT: br i1 [[CMP:%.*]], label [[IF_THEN:%.*]], label [[RET_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i32 1, ptr [[PTR]], align 4 +; CHECK-NEXT: br label [[RET_END]] +; CHECK: ret.end: +; CHECK-NEXT: ret void +; + store atomic i32 0, ptr %ptr seq_cst, align 4 + br i1 %cmp, label %if.then, label %ret.end + +if.then: + store i32 1, ptr %ptr + br label %ret.end + +ret.end: + ret void +} Index: llvm/test/Transforms/SimplifyCFG/speculate-store.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/speculate-store.ll +++ llvm/test/Transforms/SimplifyCFG/speculate-store.ll @@ -112,68 +112,6 @@ ret void } -define void @different_type(ptr %ptr, i1 %cmp) { -; CHECK-LABEL: @different_type( -; CHECK-NEXT: store i32 0, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: br i1 [[CMP:%.*]], label [[IF_THEN:%.*]], label [[RET_END:%.*]] -; CHECK: if.then: -; CHECK-NEXT: store i64 1, ptr [[PTR]], align 4 -; CHECK-NEXT: br label [[RET_END]] -; CHECK: ret.end: -; CHECK-NEXT: ret void -; - store i32 0, ptr %ptr - br i1 %cmp, label %if.then, label %ret.end - -if.then: - store i64 1, ptr %ptr - br label %ret.end - -ret.end: - ret void -} - -define void @readonly_call(ptr %ptr, i1 %cmp) { -; CHECK-LABEL: @readonly_call( -; CHECK-NEXT: ret.end: -; CHECK-NEXT: store i32 0, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: call void @unknown_fun() #[[ATTR0:[0-9]+]] -; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP:%.*]], i32 1, i32 0 -; CHECK-NEXT: store i32 [[SPEC_STORE_SELECT]], ptr [[PTR]], align 4 -; CHECK-NEXT: ret void -; - store i32 0, ptr %ptr - call void @unknown_fun() readonly - br i1 %cmp, label %if.then, label %ret.end - -if.then: - store i32 1, ptr %ptr - br label %ret.end - -ret.end: - ret void -} - -define void @atomic_and_simple(ptr %ptr, i1 %cmp) { -; CHECK-LABEL: @atomic_and_simple( -; CHECK-NEXT: store atomic i32 0, ptr [[PTR:%.*]] seq_cst, align 4 -; CHECK-NEXT: br i1 [[CMP:%.*]], label [[IF_THEN:%.*]], label [[RET_END:%.*]] -; CHECK: if.then: -; CHECK-NEXT: store i32 1, ptr [[PTR]], align 4 -; CHECK-NEXT: br label [[RET_END]] -; CHECK: ret.end: -; CHECK-NEXT: ret void -; - store atomic i32 0, ptr %ptr seq_cst, align 4 - br i1 %cmp, label %if.then, label %ret.end - -if.then: - store i32 1, ptr %ptr - br label %ret.end - -ret.end: - ret void -} ;; Speculate a store, preceded by a local, non-escaping load define i32 @load_before_store_noescape(i64 %i, i32 %b) { Index: llvm/test/Verifier/force-opaque-ptr.ll =================================================================== --- llvm/test/Verifier/force-opaque-ptr.ll +++ llvm/test/Verifier/force-opaque-ptr.ll @@ -1,4 +1,4 @@ -; RUN: not opt -passes=verify -force-opaque-pointers -S < %s 2>&1 | FileCheck %s +; RUN: not opt -passes=verify -opaque-pointers -S < %s 2>&1 | FileCheck %s declare i32 @llvm.umax.i32(i32, i32) Index: llvm/test/Verifier/musttail-invalid.ll =================================================================== --- llvm/test/Verifier/musttail-invalid.ll +++ llvm/test/Verifier/musttail-invalid.ll @@ -1,4 +1,5 @@ ; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llvm-as %s -opaque-pointers -o /dev/null 2>&1 | FileCheck %s ; Each musttail call should fail to validate. @@ -46,13 +47,6 @@ ret void } -declare void @mismatched_byval_callee2(ptr byval(i32)) -define void @mismatched_byval2(ptr byval(i64) %a) { -; CHECK: mismatched ABI impacting function attributes - musttail call void @mismatched_byval_callee2(ptr byval(i32) %a) - ret void -} - declare void @mismatched_inreg_callee(i32 inreg) define void @mismatched_inreg(i32 %a) { ; CHECK: mismatched ABI impacting function attributes Index: llvm/test/Verifier/opaque-ptr-invalid.ll =================================================================== --- llvm/test/Verifier/opaque-ptr-invalid.ll +++ llvm/test/Verifier/opaque-ptr-invalid.ll @@ -1,7 +1,7 @@ -; RUN: not opt -verify < %s 2>&1 | FileCheck %s +; RUN: not opt -verify -opaque-pointers < %s 2>&1 | FileCheck %s ; CHECK: Attribute 'inalloca' does not support unsized types! -; CHECK-NEXT: void (ptr)* @f +; CHECK-NEXT: ptr @f define void @f(ptr inalloca(token)) { ret void } Index: llvm/test/Verifier/opaque-ptr.ll =================================================================== --- llvm/test/Verifier/opaque-ptr.ll +++ llvm/test/Verifier/opaque-ptr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=verify -S < %s | FileCheck %s +; RUN: opt -passes=verify -opaque-pointers -S < %s | FileCheck %s define i32 @load(ptr %a) { ; CHECK-LABEL: @load(