diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -30,6 +30,7 @@ class SCEV; class SCEVUnionPredicate; class Value; +class LoadStoreSourceExpression; /// Collection of parameters shared beetween the Loop Vectorizer and the /// Loop Access Analysis. @@ -566,7 +567,7 @@ class LoopAccessInfo { public: LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, - AAResults *AA, DominatorTree *DT, LoopInfo *LI); + AAResults *AA, DominatorTree *DT, LoopInfo *LI, LoadStoreSourceExpression *LSE); /// Return true we can analyze the memory accesses in the loop and there are /// no memory dependence cycles. @@ -643,7 +644,7 @@ private: /// Analyze the loop. void analyzeLoop(AAResults *AA, LoopInfo *LI, - const TargetLibraryInfo *TLI, DominatorTree *DT); + const TargetLibraryInfo *TLI, DominatorTree *DT, LoadStoreSourceExpression *LSE); /// Check if the structure of the loop allows it to be analyzed by this /// pass. @@ -666,7 +667,7 @@ // Emits the first unsafe memory dependence in a loop. // Emits nothing if there are no unsafe dependences // or if the dependences were not recorded. - void emitUnsafeDependenceRemark(); + void emitUnsafeDependenceRemark(LoadStoreSourceExpression *LSE); std::unique_ptr PSE; @@ -776,11 +777,12 @@ DominatorTree &DT; LoopInfo &LI; const TargetLibraryInfo *TLI = nullptr; + LoadStoreSourceExpression &LSE; public: LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT, - LoopInfo &LI, const TargetLibraryInfo *TLI) - : SE(SE), AA(AA), DT(DT), LI(LI), TLI(TLI) {} + LoopInfo &LI, const TargetLibraryInfo *TLI, LoadStoreSourceExpression &LSE) + : SE(SE), AA(AA), DT(DT), LI(LI), TLI(TLI), LSE(LSE) {} const LoopAccessInfo &getInfo(Loop &L); diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h --- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h +++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h @@ -43,6 +43,7 @@ class ScalarEvolution; class TargetLibraryInfo; class TargetTransformInfo; +class LoadStoreSourceExpression; /// The adaptor from a function pass to a loop pass computes these analyses and /// makes them available to the loop passes "for free". Each loop pass is @@ -56,6 +57,7 @@ ScalarEvolution &SE; TargetLibraryInfo &TLI; TargetTransformInfo &TTI; + LoadStoreSourceExpression &LSE; BlockFrequencyInfo *BFI; BranchProbabilityInfo *BPI; MemorySSA *MSSA; diff --git a/llvm/include/llvm/Analysis/SourceExpressionAnalysis.h b/llvm/include/llvm/Analysis/SourceExpressionAnalysis.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/SourceExpressionAnalysis.h @@ -0,0 +1,110 @@ +//===- SourceExpressionAnalysis.h - Mapping LLVM Values to Source Level Expression -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The file defines the LoadStoreSourceExpression class related to analyzing +// and generating source-level expressions for LLVM values by utilising the +// debug metadata. +// +// This analysis is useful for understanding memory access patterns, aiding optimization decisions, +// and providing more informative optimization reports. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SOURCEEXPRESSIONANALYSIS_H +#define LLVM_ANALYSIS_SOURCEEXPRESSIONANALYSIS_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" +#include "llvm/Passes/PassBuilder.h" +#include +#include +#include +using namespace llvm; + +namespace llvm { + +class LoadStoreSourceExpression { +public: + // Constructor that takes a Function reference. + LoadStoreSourceExpression(const Function &F) : F(F) {} + + // Print out the values currently in the cache. + void print(raw_ostream &OS) const; + + // Query the SourceExpressionMap For a Value + std::string getSourceExpressionForValue(Value *Key) const { + auto It = SourceExpressionsMap.find(Key); + if (It != SourceExpressionsMap.end()) { + return It->second; + } + + return "Complex Expression or load and store get optimized out"; + } + + // Get the expression string corresponding to an opcode. + std::string getExpressionFromOpcode(unsigned Opcode); + + // Process a StoreInst instruction and return its source-level expression. + void processStoreInst(StoreInst *I); + + // Process a LoadInst instruction and update the sourceExpressionsMap. + void processLoadInst(LoadInst *I); + +private: + // This map stores the source-level expressions for LLVM values. + // The expressions are represented as strings and are associated with the + // corresponding values. It is used to cache and retrieve source expressions + // during the generation process. + std::map SourceExpressionsMap; + + // Process Debug Metadata associated with a stored value + DILocalVariable *processDbgMetadata(Value *StoredValue); + + const Function &F; + + // Get the source-level expression for an LLVM value. + std::string getSourceExpression(Value *Operand); + + // Get the source-level expression for a GetElementPtr instruction. + std::string + getSourceExpressionForGetElementPtr(GetElementPtrInst *GepInstruction); + + // Get the source-level expression for a BinaryOperator. + std::string getSourceExpressionForBinaryOperator(BinaryOperator *BinaryOp, + Value *Operand); + + // Get the source-level expression for a SExtInst. + std::string getSourceExpressionForSExtInst(SExtInst *SextInstruction); +}; + +class SourceExpressionAnalysis + : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + using Result = LoadStoreSourceExpression; + Result run(Function &F, FunctionAnalysisManager &); +}; + +class SourceExpressionAnalysisPrinterPass + : public PassInfoMixin { + raw_ostream &OS; + +public: + explicit SourceExpressionAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -83,6 +83,7 @@ LazyValueInfo.cpp Lint.cpp Loads.cpp + SourceExpressionAnalysis.cpp Local.cpp LoopAccessAnalysis.cpp LoopAnalysisManager.cpp @@ -159,4 +160,4 @@ ProfileData Support TargetParser - ) + ) \ No newline at end of file diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -35,6 +35,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/Analysis/SourceExpressionAnalysis.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -84,6 +85,11 @@ VectorizerParams::VectorizationInterleave)); unsigned VectorizerParams::VectorizationInterleave; +static cl::opt ReportSourceExpr( + "report-source-expr", cl::Hidden, + cl::desc("Report source expression for Load/Store pointers."), + cl::init(true)); + static cl::opt RuntimeMemoryCheckThreshold( "runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " @@ -2187,7 +2193,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, - DominatorTree *DT) { + DominatorTree *DT, LoadStoreSourceExpression *LSE) { // Holds the Load and Store instructions. SmallVector Loads; SmallVector Stores; @@ -2487,10 +2493,10 @@ << (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"); else - emitUnsafeDependenceRemark(); + emitUnsafeDependenceRemark(LSE); } -void LoopAccessInfo::emitUnsafeDependenceRemark() { +void LoopAccessInfo::emitUnsafeDependenceRemark(LoadStoreSourceExpression *LSE) { auto Deps = getDepChecker().getDependences(); if (!Deps) return; @@ -2501,17 +2507,52 @@ if (Found == Deps->end()) return; MemoryDepChecker::Dependence Dep = *Found; - + LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); - // Emit remark for first unsafe dependence OptimizationRemarkAnalysis &R = recordAnalysis("UnsafeDep", Dep.getDestination(*this)) - << "unsafe dependent memory operations in loop. Use " + << "unsafe dependent memory operations in loop. Use " "#pragma loop distribute(enable) to allow loop distribution " "to attempt to isolate the offending operations into a separate " "loop"; + // Report source expression for dependence source and destination if the user + // asked for it. + + if (ReportSourceExpr) { + llvm::Instruction *SourceInst = Dep.getSource(*this); + llvm::Instruction *DestInst = Dep.getDestination(*this); + + R << " Dependence source: "; + llvm::Value *SourceValue = nullptr; + + if (llvm::StoreInst *StoreInstruction = + llvm::dyn_cast(SourceInst)) { + SourceValue = StoreInstruction->getPointerOperand(); + } else if (llvm::LoadInst *LoadInstruction = + llvm::dyn_cast(SourceInst)) { + SourceValue = LoadInstruction->getPointerOperand(); + } else { + SourceValue = Dep.getSource(*this); + } + R << LSE->getSourceExpressionForValue(SourceValue); + + R << " Dependence destination: "; + llvm::Value *DestValue = nullptr; + + if (llvm::StoreInst *StoreInstruction = + llvm::dyn_cast(DestInst)) { + DestValue = StoreInstruction->getPointerOperand(); + } else if (llvm::LoadInst *LoadInstruction = + llvm::dyn_cast(DestInst)) { + DestValue = LoadInstruction->getPointerOperand(); + } else { + DestValue = Dep.getDestination(*this); + } + R << LSE->getSourceExpressionForValue(DestValue); + } + switch (Dep.Type) { case MemoryDepChecker::Dependence::NoDep: case MemoryDepChecker::Dependence::Forward: @@ -2794,13 +2835,13 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AAResults *AA, - DominatorTree *DT, LoopInfo *LI) + DominatorTree *DT, LoopInfo *LI, LoadStoreSourceExpression *LSE) : PSE(std::make_unique(*SE, *L)), PtrRtChecking(nullptr), DepChecker(std::make_unique(*PSE, L)), TheLoop(L) { PtrRtChecking = std::make_unique(*DepChecker, SE); if (canAnalyzeLoop()) { - analyzeLoop(AA, LI, TLI, DT); + analyzeLoop(AA, LI, TLI, DT, LSE); } } @@ -2853,7 +2894,7 @@ if (I.second) I.first->second = - std::make_unique(&L, &SE, TLI, &AA, &DT, &LI); + std::make_unique(&L, &SE, TLI, &AA, &DT, &LI, &LSE); return *I.first->second; } @@ -2883,7 +2924,8 @@ auto &DT = FAM.getResult(F); auto &LI = FAM.getResult(F); auto &TLI = FAM.getResult(F); - return LoopAccessInfoManager(SE, AA, DT, LI, &TLI); + auto &LSE = FAM.getResult(F); + return LoopAccessInfoManager(SE, AA, DT, LI, &TLI, LSE); } AnalysisKey LoopAccessAnalysis::Key; diff --git a/llvm/lib/Analysis/SourceExpressionAnalysis.cpp b/llvm/lib/Analysis/SourceExpressionAnalysis.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/SourceExpressionAnalysis.cpp @@ -0,0 +1,402 @@ +//===- SourceExpressionAnalysis.cpp - Mapping Source Expression +//---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the mapping between LLVM Value and Source level +// expression, by utilizing the debug intrinsics. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/SourceExpressionAnalysis.h" + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Passes/PassPlugin.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "source_expr" + +// This function translates LLVM opcodes to source-level expressions using DWARF +// operation encodings. It takes an LLVM opcode as input and returns the +// corresponding symbol as a string. If the opcode is supported, +// the function returns the appropriate symbol, such as "+", +// "-", "*", "/", "<<", ">>", "&", "|", "^", or "%". If the opcode is not +// supported, the function returns "unknown". +std::string +LoadStoreSourceExpression::getExpressionFromOpcode(unsigned Opcode) { + // Map LLVM opcodes to source-level expressions + switch (Opcode) { + case Instruction::Add: + case Instruction::FAdd: + return "+"; + case Instruction::Sub: + case Instruction::FSub: + return "-"; + case Instruction::Mul: + case Instruction::FMul: + return "*"; + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + return "/"; + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + return "%"; + case Instruction::Shl: + return "<<"; + case Instruction::LShr: + case Instruction::AShr: + return ">>"; + case Instruction::And: + return "&"; + case Instruction::Or: + return "|"; + case Instruction::Xor: + return "^"; + default: + return "unknown"; + } +} + +// Function to remove the '&' character from a string +static const std::string removeAmpersand(StringRef AddrStr) { + std::string Result = AddrStr.str(); + + size_t Found = Result.find('&'); + if (Found != std::string::npos) { + Result.erase(Found, 1); + } + return Result; +} + +// Process the debug metadata for the given stored value. This function +// retrieves the corresponding debug values (DbgValueInst) and debug declare +// instructions (DbgDeclareInst) associated with the stored value. If a +// DbgDeclareInst is found, the associated DILocalVariable is retrieved and +// returned. If a DbgValueInst is found, the associated DILocalVariable is +// retrieved and the source expression is stored in the 'sourceExpressionsMap' +// for the stored value. This function is used to extract debug information for +// the source expressions. +// +// @param StoredValue The stored value to process. +// @return The DILocalVariable associated with the stored value, or nullptr if +// no debug metadata is found. +DILocalVariable * +LoadStoreSourceExpression::processDbgMetadata(Value *StoredValue) { + if (StoredValue->isUsedByMetadata()) { + // Find the corresponding DbgValues and DbgDeclareInsts + SmallVector DbgValues; + findDbgValues(DbgValues, StoredValue); + + TinyPtrVector DbgDeclareInsts = + FindDbgDeclareUses(StoredValue); + + if (!DbgDeclareInsts.empty()) { + // Handle the case where DbgDeclareInst is found + DbgDeclareInst *DbgDeclare = DbgDeclareInsts[0]; + DILocalVariable *LocalVar = DbgDeclare->getVariable(); + SourceExpressionsMap[StoredValue] = LocalVar->getName().str(); + return LocalVar; + } else if (!DbgValues.empty()) { + // Handle the case where DbgValueInst is found + DbgValueInst *DbgValue = DbgValues[0]; + DILocalVariable *LocalVar = DbgValue->getVariable(); + SourceExpressionsMap[StoredValue] = LocalVar->getName().str(); + return LocalVar; + } + } + + return nullptr; +} + +// Get the source-level expression for an LLVM value. +// @param Operand The LLVM value to generate the source-level expression for. +std::string LoadStoreSourceExpression::getSourceExpression(Value *Operand) { + + if (SourceExpressionsMap.count(Operand)) + return SourceExpressionsMap[Operand]; + + if (GetElementPtrInst *GepInstruction = + dyn_cast(Operand)) { + return getSourceExpressionForGetElementPtr(GepInstruction); + } else if (BinaryOperator *BinaryOp = dyn_cast(Operand)) { + return getSourceExpressionForBinaryOperator(BinaryOp, Operand); + } else if (SExtInst *SextInstruction = dyn_cast(Operand)) { + return getSourceExpressionForSExtInst(SextInstruction); + } else { + // Check if the operand has debug metadata associated with it + if (!isa(Operand)) { + DILocalVariable *LocalVar = processDbgMetadata(Operand); + if (LocalVar) { + SourceExpressionsMap[Operand] = LocalVar->getName().str(); + return SourceExpressionsMap[Operand]; + } + } + } + + // If no specific case matches, return the name of the operand or its + // representation + return Operand->getNameOrAsOperand(); +} + +// Get the type tag from the given DIType +// Returns: +// 0: If the DIType is null or the type tag is unknown or unsupported +// DW_TAG_base_type, DW_TAG_pointer_type, DW_TAG_const_type, etc.: The type +// tag +static uint16_t getTypeTag(DIType *TypeToBeProcessed) { + if (!TypeToBeProcessed) + return 0; + + if (auto *BasicType = dyn_cast(TypeToBeProcessed)) { + return BasicType->getTag(); + } else if (auto *DerivedType = dyn_cast(TypeToBeProcessed)) { + return DerivedType->getTag(); + } else if (auto *CompositeType = + dyn_cast(TypeToBeProcessed)) { + return CompositeType->getTag(); + } + + // Return 0 for unknown or unsupported type tags + return 0; +} + +// Get the source-level expression for a GetElementPtr instruction. +// @param GepInstruction The GetElementPtr instruction. +// @return The source-level expression for the address computation. +std::string LoadStoreSourceExpression::getSourceExpressionForGetElementPtr( + GetElementPtrInst *GepInstruction) { + // GetElementPtr instruction - construct source expression for address + // computation + Value *BasePointer = GepInstruction->getOperand(0); + Value *Offset = GepInstruction->getOperand(GepInstruction->getNumIndices()); + // auto *type = GepInstruction->getSourceElementType(); + + int OffsetVal = INT_MIN; + if (ConstantInt *OffsetConstant = dyn_cast(Offset)) { + // Retrieve the value of the constant integer as an integer + OffsetVal = OffsetConstant->getSExtValue(); + } + + DILocalVariable *LocalVar = processDbgMetadata(BasePointer); + DIType *Type = LocalVar ? LocalVar->getType() : nullptr; + + std::string BasePointerName = getSourceExpression(BasePointer); + std::string OffsetName = getSourceExpression(Offset); + + SmallString<32> Expression; + raw_svector_ostream OS(Expression); + + uint16_t Tag = getTypeTag(Type); + auto *SourceElementType = GepInstruction->getSourceElementType(); + + // If the source element type is a struct or an array of structs, set the + // source expression as "unknown" + if (Tag == dwarf::DW_TAG_structure_type || + SourceElementType->getTypeID() == Type::TypeID::StructTyID || + (SourceElementType->getTypeID() == Type::TypeID::ArrayTyID && + SourceElementType->getArrayElementType()->getTypeID() == + Type::TypeID::StructTyID) || + SourceExpressionsMap[BasePointer] == "unknown") { + SourceExpressionsMap[GepInstruction] = "unknown"; + return "unknown"; + } else if (Tag == dwarf::DW_TAG_array_type || + isa(BasePointer->getType())) { + if (BasePointerName.find('[') == std::string::npos) { + // Construct the source expression for the address computation with + // square brackets + OS << "&" << BasePointerName << "[" << OffsetName << "]"; + } else if (BasePointerName.find('[') != std::string::npos) { + // If basePointerName already contains square brackets, combine it + // with offsetName directly + OS << BasePointerName << "[" << OffsetName << "]"; + } else if (BasePointerName.find('[') != std::string::npos && + OffsetVal != INT_MIN) { + // If basePointerName already contains square brackets, combine it + // with offsetName directly + OS << BasePointerName << " + " << OffsetVal; + } + } + SourceExpressionsMap[GepInstruction] = Expression.str().str(); + + // Return the constructed source expression + return Expression.str().str(); +} + +// Get the source-level expression for a binary operator instruction. +// @param BinaryOp The binary operator instruction. +// @param Operand The operand associated with the instruction. +// @return The source-level expression for the binary operation. +std::string LoadStoreSourceExpression::getSourceExpressionForBinaryOperator( + BinaryOperator *BinaryOp, Value *Operand) { + // Binary operator - build source expression using two operands + Value *Operand1 = BinaryOp->getOperand(0); + Value *Operand2 = BinaryOp->getOperand(1); + + // dbgs() << Operand2->getNameOrAsOperand() << " " << + // Operand1->getNameOrAsOperand() << "\n"; dbgs() << *Operand; + std::string Name1 = getSourceExpression(Operand1); + std::string Name2 = getSourceExpression(Operand2); + std::string Opcode = BinaryOp->getOpcodeName(); + + SmallString<32> Expression; + raw_svector_ostream OS(Expression); + + OS << "(" << Name1 << " " << getExpressionFromOpcode(BinaryOp->getOpcode()) + << " " << Name2 << ")"; + + SourceExpressionsMap[Operand] = Expression.str().str(); + // Return the constructed source expression + return Expression.str().str(); +} + +// Helper function to get the type name as a string +static std::string getTypeNameAsString(Type *TypeToBeProcessed) { + std::string TypeName; + raw_string_ostream TypeNameStream(TypeName); + TypeToBeProcessed->print(TypeNameStream); + return TypeNameStream.str(); +} + +// Get the source-level expression for a sign extension instruction. +// @param SextInstruction The sign extension instruction. +// @return The source-level expression for the operand. +std::string LoadStoreSourceExpression::getSourceExpressionForSExtInst( + SExtInst *SextInstruction) { + // Signed Extension instruction - return the source expression for its operand + + Value *OperandVal = SextInstruction->getOperand(0); + std::string OperandName = getSourceExpression(OperandVal); + + // Get the target type name for the signed extension + std::string TargetType = getTypeNameAsString(SextInstruction->getType()); + + // Construct the source expression with the casting operation + std::string SourceExpression = "(" + TargetType + ")" + OperandName; + + // Update the source expression map + SourceExpressionsMap[OperandVal] = SourceExpression; + + // Return the source expression + return SourceExpression; +} + +// Process the StoreInst and generate the source expression for the stored +// value. This function takes a StoreInst pointer and processes the associated +// metadata to retrieve the variable name. It then constructs the source +// expressions for both the pointer operand and the value operand. If the +// operands are instructions, it calls the appropriate function to get their +// source expressions. Otherwise, it constructs the source expressions directly +// for non-instruction operands. The resulting source expressions are stored in +// the sourceExpressionsMap. +void LoadStoreSourceExpression::processStoreInst(StoreInst *I) { + Value *PointerOperand = I->getPointerOperand(); + Value *ValueOperand = I->getValueOperand(); + + std::string PointerExpression, ValueExpression; + + PointerExpression = getSourceExpression(PointerOperand); + ValueExpression = getSourceExpression(ValueOperand); + // Store the source expressions for both operands in the SourceExpressionsMap + SourceExpressionsMap[PointerOperand] = PointerExpression; + SourceExpressionsMap[ValueOperand] = ValueExpression; +} + +// Process the LoadInst and generate the source expressions for the loaded value +// and its corresponding store instruction (if applicable). +void LoadStoreSourceExpression::processLoadInst(LoadInst *I) { + SmallVector SourceExpressions; + + Value *Val = I->getPointerOperand(); + + // Check if the pointer operand of the LoadInst is an instruction + if (isa(Val)) { + std::string Expression; + auto It = SourceExpressionsMap.find(Val); + if (It != SourceExpressionsMap.end()) { + Expression = It->second; + } else { + Expression = getSourceExpression(Val); + } + + // Map the LoadInst to its source expression in the SourceExpressionsMap + SourceExpressionsMap[I] = removeAmpersand(Expression); + } +} + +AnalysisKey SourceExpressionAnalysis::Key; + +SourceExpressionAnalysis::Result +SourceExpressionAnalysis::run(Function &F, FunctionAnalysisManager &) { + LoadStoreSourceExpression PI( + F); // Create an instance of LoadStoreSourceExpression + ReversePostOrderTraversal RPOT(&F); + for (BasicBlock *BB : RPOT) { + for (Instruction &I : *BB) { + if (auto *loadInst = dyn_cast(&I)) { + // Process the LoadInst and generate the source expressions + PI.processLoadInst(loadInst); + } else if (auto *storeInst = dyn_cast(&I)) { + // Process the StoreInst and generate the source expressions + PI.processStoreInst(storeInst); + } + } + } + + return PI; // Return the built LoadStoreSourceExpression instance +} + +void LoadStoreSourceExpression::print(raw_ostream &OS) const { + + for (const auto &Entry : SourceExpressionsMap) { + Value *Key = Entry.first; + std::string Value = Entry.second; + + if (Instruction *KeyInst = dyn_cast(Key)) { + KeyInst->printAsOperand(dbgs(), /*PrintType=*/false); + } else { + OS << ""; + } + OS << " = " << Value; + + OS << "\n"; + } +} + +PreservedAnalyses +SourceExpressionAnalysisPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + dbgs() << "Load Store Expression " << F.getName() << "\n"; + + SourceExpressionAnalysis::Result &PI = AM.getResult( + F); // Retrieve the correct analysis result type + ReversePostOrderTraversal RPOT(&F); + for (BasicBlock *BB : RPOT) { + for (Instruction &I : *BB) { + if (auto *loadInst = dyn_cast(&I)) { + // Process the LoadInst and generate the source expressions + PI.processLoadInst(loadInst); + } else if (auto *storeInst = dyn_cast(&I)) { + // Process the StoreInst and generate the source expressions + PI.processStoreInst(storeInst); + } + } + } + + PI.print(OS); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -65,6 +65,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/SourceExpressionAnalysis.h" #include "llvm/Analysis/StackLifetime.h" #include "llvm/Analysis/StackSafetyAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -267,6 +267,7 @@ FUNCTION_ANALYSIS("should-run-extra-vector-passes", ShouldRunExtraVectorPasses()) FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis()) FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) +FUNCTION_ANALYSIS("source-expr", SourceExpressionAnalysis()) FUNCTION_ANALYSIS("targetir", TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis()) FUNCTION_ANALYSIS("verify", VerifierAnalysis()) @@ -391,6 +392,7 @@ FUNCTION_PASS("print", PhiValuesPrinterPass(dbgs())) FUNCTION_PASS("print", RegionInfoPrinterPass(dbgs())) FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) +FUNCTION_PASS("print", SourceExpressionAnalysisPrinterPass(dbgs())) FUNCTION_PASS("print", StackSafetyPrinterPass(dbgs())) FUNCTION_PASS("print", LoopAccessInfoPrinterPass(dbgs())) // TODO: rename to print after NPM switch diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/SourceExpressionAnalysis.h" #include "llvm/Support/TimeProfiler.h" using namespace llvm; @@ -235,6 +236,7 @@ AM.getResult(F), AM.getResult(F), AM.getResult(F), + AM.getResult(F), BFI, BPI, MSSA}; diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -70,6 +70,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/SourceExpressionAnalysis.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -576,8 +577,9 @@ DominatorTree *DT = &LAR.DT; const Function *F = L.getHeader()->getParent(); OptimizationRemarkEmitter ORE(F); + LoadStoreSourceExpression *LSE = &LAR.LSE; - LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr); + LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, *LSE); if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9319,7 +9319,7 @@ NewRed = createTargetReduction(State.Builder, TTI, RdxDesc, NewVecOp); } if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { - NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(), + NextInChain = createMinMaxOp(State.Builder, RdxDesc->getRecurrenceKind(), NewRed, PrevInChain); } else if (IsOrdered) NextInChain = NewRed; diff --git a/llvm/test/Analysis/SourceExpressionAnalysis/loop.ll b/llvm/test/Analysis/SourceExpressionAnalysis/loop.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/SourceExpressionAnalysis/loop.ll @@ -0,0 +1,83 @@ +; RUN: opt < %s --passes='print' -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: Load Store Expression foo +define dso_local void @foo(ptr nocapture noundef %arr, i64 noundef %n) local_unnamed_addr #0 !dbg !8 { +entry: + call void @llvm.dbg.value(metadata ptr %arr, metadata !14, metadata !DIExpression()), !dbg !17 + call void @llvm.dbg.value(metadata i64 %n, metadata !15, metadata !DIExpression()), !dbg !17 + call void @llvm.dbg.value(metadata i64 0, metadata !16, metadata !DIExpression()), !dbg !17 + br label %for.body, !dbg !18 + +for.body: ; preds = %entry, %for.body +; CHECK: %l1.07 = l1 + %l1.07 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + call void @llvm.dbg.value(metadata i64 %l1.07, metadata !16, metadata !DIExpression()), !dbg !17 + %add = sub nsw i64 %l1.07, 1, !dbg !20 +; CHECK: %arrayidx = &arr[(l1 - 1)] + %arrayidx = getelementptr inbounds i64, ptr %arr, i64 %add, !dbg !22 +; CHECK: %0 = arr[(l1 - 1)] + %0 = load i64, ptr %arrayidx, align 8, !dbg !22, !tbaa !23 +; CHECK: %add1 = (arr[(l1 - 1)] + 10) + %add1 = add nsw i64 %0, 10, !dbg !27 +; CHECK: %arrayidx2 = &arr[l1] + %arrayidx2 = getelementptr inbounds i64, ptr %arr, i64 %l1.07, !dbg !28 + store i64 %add1, ptr %arrayidx2, align 8, !dbg !29, !tbaa !23 + %inc = add nuw nsw i64 %l1.07, 1, !dbg !30 + call void @llvm.dbg.value(metadata i64 %inc, metadata !16, metadata !DIExpression()), !dbg !17 + %exitcond.not = icmp eq i64 %inc, 1024, !dbg !31 + br i1 %exitcond.not, label %for.end, label %for.body, !dbg !18, !llvm.loop !32 + +for.end: ; preds = %for.body + ret void, !dbg !36 +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "approx-func-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" "loopopt-pipeline"="light" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="true" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6} +!llvm.ident = !{!7} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang based Intel(R) oneAPI DPC++/C++ Compiler 2024.0.0 (2024.x.0.YYYYMMDD)", isOptimized: true, flags: " --intel -O1 -S -g -emit-llvm tm2.c -fveclib=SVML -fheinous-gnu-extensions", runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "tm2.c", directory: "/iusers/sguggill/work") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 7, !"uwtable", i32 2} +!6 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!7 = !{!"Intel(R) oneAPI DPC++/C++ Compiler 2024.0.0 (2024.x.0.YYYYMMDD)"} +!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!9 = !DISubroutineType(types: !10) +!10 = !{null, !11, !12} +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed) +!13 = !{!14, !15, !16} +!14 = !DILocalVariable(name: "arr", arg: 1, scope: !8, file: !1, line: 1, type: !11) +!15 = !DILocalVariable(name: "n", arg: 2, scope: !8, file: !1, line: 1, type: !12) +!16 = !DILocalVariable(name: "l1", scope: !8, file: !1, line: 3, type: !12) +!17 = !DILocation(line: 0, scope: !8) +!18 = !DILocation(line: 5, column: 3, scope: !19) +!19 = distinct !DILexicalBlock(scope: !8, file: !1, line: 5, column: 3) +!20 = !DILocation(line: 6, column: 22, scope: !21) +!21 = distinct !DILexicalBlock(scope: !19, file: !1, line: 5, column: 3) +!22 = !DILocation(line: 6, column: 15, scope: !21) +!23 = !{!24, !24, i64 0} +!24 = !{!"long", !25, i64 0} +!25 = !{!"omnipotent char", !26, i64 0} +!26 = !{!"Simple C/C++ TBAA"} +!27 = !DILocation(line: 6, column: 27, scope: !21) +!28 = !DILocation(line: 6, column: 5, scope: !21) +!29 = !DILocation(line: 6, column: 13, scope: !21) +!30 = !DILocation(line: 5, column: 29, scope: !21) +!31 = !DILocation(line: 5, column: 19, scope: !21) +!32 = distinct !{!32, !18, !33} +!33 = !DILocation(line: 6, column: 29, scope: !19) +!34 = !{!"llvm.loop.mustprogress"} +!35 = !{!"llvm.loop.unroll.disable"} +!36 = !DILocation(line: 7, column: 1, scope: !8) diff --git a/llvm/test/Analysis/SourceExpressionAnalysis/mul.ll b/llvm/test/Analysis/SourceExpressionAnalysis/mul.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/SourceExpressionAnalysis/mul.ll @@ -0,0 +1,58 @@ +; RUN: opt < %s --passes='print' -disable-output 2>&1 | FileCheck %s + + + +; CHECK-LABEL: Load Store Expression foo +define dso_local i64 @foo(ptr nocapture noundef readonly %lp, i64 noundef %n1, i64 noundef %n2) local_unnamed_addr #0 !dbg !9 { +entry: + call void @llvm.dbg.value(metadata ptr %lp, metadata !15, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata i64 %n1, metadata !16, metadata !DIExpression()), !dbg !18 + call void @llvm.dbg.value(metadata i64 %n2, metadata !17, metadata !DIExpression()), !dbg !18 +; CHECK: %mul = (n1 << 1) + %mul = shl nsw i64 %n1, 1, !dbg !19 +; CHECK: %add = ((n1 << 1) + n2) + %add = add nsw i64 %mul, %n2, !dbg !20 +; CHECK: %arrayidx = &lp[((n1 << 1) + n2)] + %arrayidx = getelementptr inbounds i64, ptr %lp, i64 %add, !dbg !21 +; CHECK: %0 = lp[((n1 << 1) + n2)] + %0 = load i64, ptr %arrayidx, align 8, !dbg !21, !tbaa !22 + ret i64 %0, !dbg !26 +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 17.0.0 (https://github.com/phyBrackets/llvm-project-1.git 598f579cf15336fb818edb33659a839e3338e624)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "../cpp/ts.c", directory: "/home/shivam/llvm-project-1", checksumkind: CSK_MD5, checksum: "9e9f3a66ae451d81cff547dc10cd8006") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{!"clang version 17.0.0 (https://github.com/phyBrackets/llvm-project-1.git 598f579cf15336fb818edb33659a839e3338e624)"} +!9 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !10, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !14) +!10 = !DISubroutineType(types: !11) +!11 = !{!12, !13, !12, !12} +!12 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed) +!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!14 = !{!15, !16, !17} +!15 = !DILocalVariable(name: "lp", arg: 1, scope: !9, file: !1, line: 1, type: !13) +!16 = !DILocalVariable(name: "n1", arg: 2, scope: !9, file: !1, line: 1, type: !12) +!17 = !DILocalVariable(name: "n2", arg: 3, scope: !9, file: !1, line: 1, type: !12) +!18 = !DILocation(line: 0, scope: !9) +!19 = !DILocation(line: 3, column: 15, scope: !9) +!20 = !DILocation(line: 3, column: 20, scope: !9) +!21 = !DILocation(line: 3, column: 10, scope: !9) +!22 = !{!23, !23, i64 0} +!23 = !{!"long", !24, i64 0} +!24 = !{!"omnipotent char", !25, i64 0} +!25 = !{!"Simple C/C++ TBAA"} +!26 = !DILocation(line: 3, column: 3, scope: !9) diff --git a/llvm/test/Analysis/SourceExpressionAnalysis/struct.ll b/llvm/test/Analysis/SourceExpressionAnalysis/struct.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/SourceExpressionAnalysis/struct.ll @@ -0,0 +1,108 @@ +; RUN: opt < %s --passes='print' -disable-output 2>&1 | FileCheck %s + +; ModuleID = '../cpp/st.cpp' +source_filename = "../cpp/st.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.foo = type { i32, [5 x i32] } + +$_ZN3fooC2Ev = comdat any + +@constinit = private constant [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5], align 4 + +; CHECK-LABEL: Load Store Expression _Z4funcv +define dso_local void @_Z4funcv() #0 !dbg !19 { +entry: +; CHECK: %obj = obj + %obj = alloca %struct.foo, align 4 +; CHECK: %s = s + %s = alloca i32, align 4 + call void @llvm.dbg.declare(metadata ptr %obj, metadata !23, metadata !DIExpression()), !dbg !24 + call void @_ZN3fooC2Ev(ptr noundef nonnull align 4 dereferenceable(24) %obj) #4, !dbg !24 +; CHECK: %a = unknown + %a = getelementptr inbounds %struct.foo, ptr %obj, i32 0, i32 0, !dbg !25 + store i32 20, ptr %a, align 4, !dbg !26 + call void @llvm.dbg.declare(metadata ptr %s, metadata !27, metadata !DIExpression()), !dbg !28 +; CHECK: %arr = unknown + %arr = getelementptr inbounds %struct.foo, ptr %obj, i32 0, i32 1, !dbg !29 +; CHECK: %arrayidx = unknown + %arrayidx = getelementptr inbounds [5 x i32], ptr %arr, i64 0, i64 3, !dbg !30 +; CHECK: %0 = unknown + %0 = load i32, ptr %arrayidx, align 4, !dbg !30 + store i32 %0, ptr %s, align 4, !dbg !28 + ret void, !dbg !31 +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: noinline nounwind optnone uwtable +define linkonce_odr dso_local void @_ZN3fooC2Ev(ptr noundef nonnull align 4 dereferenceable(24) %this) unnamed_addr #2 comdat align 2 !dbg !32 { +entry: + %this.addr = alloca ptr, align 8 + store ptr %this, ptr %this.addr, align 8 + call void @llvm.dbg.declare(metadata ptr %this.addr, metadata !37, metadata !DIExpression()), !dbg !39 + %this1 = load ptr, ptr %this.addr, align 8 + %arr = getelementptr inbounds %struct.foo, ptr %this1, i32 0, i32 1, !dbg !40 + %arrayinit.begin = getelementptr inbounds [5 x i32], ptr %arr, i64 0, i64 0, !dbg !41 + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %arr, ptr align 4 @constinit, i64 20, i1 false), !dbg !41 + ret void, !dbg !42 +} + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #3 + +attributes #0 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #2 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +attributes #4 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!11, !12, !13, !14, !15, !16, !17} +!llvm.ident = !{!18} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 17.0.0 (https://github.com/phyBrackets/llvm-project-1.git 0d3edc0be92f6c8f60d49772b74e456f285483e6)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "../cpp/st.cpp", directory: "/home/shivam/llvm-project-1", checksumkind: CSK_MD5, checksum: "19ed7b3e714703c8f231838bfe106739") +!2 = !{!3} +!3 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", file: !1, line: 1, size: 192, flags: DIFlagTypePassByValue | DIFlagNonTrivial, elements: !4, identifier: "_ZTS3foo") +!4 = !{!5, !7} +!5 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !3, file: !1, line: 2, baseType: !6, size: 32) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !DIDerivedType(tag: DW_TAG_member, name: "arr", scope: !3, file: !1, line: 3, baseType: !8, size: 160, offset: 32) +!8 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 160, elements: !9) +!9 = !{!10} +!10 = !DISubrange(count: 5) +!11 = !{i32 7, !"Dwarf Version", i32 5} +!12 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !{i32 1, !"wchar_size", i32 4} +!14 = !{i32 8, !"PIC Level", i32 2} +!15 = !{i32 7, !"PIE Level", i32 2} +!16 = !{i32 7, !"uwtable", i32 2} +!17 = !{i32 7, !"frame-pointer", i32 2} +!18 = !{!"clang version 17.0.0 (https://github.com/phyBrackets/llvm-project-1.git 0d3edc0be92f6c8f60d49772b74e456f285483e6)"} +!19 = distinct !DISubprogram(name: "func", linkageName: "_Z4funcv", scope: !1, file: !1, line: 6, type: !20, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !22) +!20 = !DISubroutineType(types: !21) +!21 = !{null} +!22 = !{} +!23 = !DILocalVariable(name: "obj", scope: !19, file: !1, line: 7, type: !3) +!24 = !DILocation(line: 7, column: 9, scope: !19) +!25 = !DILocation(line: 8, column: 9, scope: !19) +!26 = !DILocation(line: 8, column: 11, scope: !19) +!27 = !DILocalVariable(name: "s", scope: !19, file: !1, line: 9, type: !6) +!28 = !DILocation(line: 9, column: 9, scope: !19) +!29 = !DILocation(line: 9, column: 17, scope: !19) +!30 = !DILocation(line: 9, column: 13, scope: !19) +!31 = !DILocation(line: 10, column: 1, scope: !19) +!32 = distinct !DISubprogram(name: "foo", linkageName: "_ZN3fooC2Ev", scope: !3, file: !1, line: 1, type: !33, scopeLine: 1, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, declaration: !36, retainedNodes: !22) +!33 = !DISubroutineType(types: !34) +!34 = !{null, !35} +!35 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !3, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer) +!36 = !DISubprogram(name: "foo", scope: !3, type: !33, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: 0) +!37 = !DILocalVariable(name: "this", arg: 1, scope: !32, type: !38, flags: DIFlagArtificial | DIFlagObjectPointer) +!38 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !3, size: 64) +!39 = !DILocation(line: 0, scope: !32) +!40 = !DILocation(line: 3, column: 9, scope: !32) +!41 = !DILocation(line: 3, column: 16, scope: !32) +!42 = !DILocation(line: 1, column: 8, scope: !32) diff --git a/llvm/test/Transforms/LoopVectorize/report-source-expr.ll b/llvm/test/Transforms/LoopVectorize/report-source-expr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/report-source-expr.ll @@ -0,0 +1,444 @@ +; RUN: opt -report-source-expr=true -passes='function(loop-vectorize,require)' -disable-output -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s + + + +; // Dependence::Backward +; // Loop does not get vectorized since it contains a backward +; // dependency between A[i] and A[i+3]. +; void test_backward_dep(int n, int *A) { +; for (int i = 1; i <= n - 3; i += 3) { +; A[i] = A[i-1]; +; A[i+1] = A[i+3]; +; } +; } + +; CHECK:remark: source.c:4:14: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop Dependence source: &A[i] Dependence destination: &A[(i + 3)] +; CHECK-NEXT: Backward loop carried data dependence. Memory location is the same as accessed at source.c:3:6 + +define dso_local void @test_backward_dep(i32 noundef %n, ptr nocapture noundef %A) local_unnamed_addr #0 !dbg !10 { +entry: + call void @llvm.dbg.value(metadata i32 %n, metadata !16, metadata !DIExpression()), !dbg !20 + call void @llvm.dbg.value(metadata ptr %A, metadata !17, metadata !DIExpression()), !dbg !20 + call void @llvm.dbg.value(metadata i32 1, metadata !18, metadata !DIExpression()), !dbg !21 + call void @llvm.dbg.value(metadata i32 1, metadata !18, metadata !DIExpression()), !dbg !21 + %cmp.not18 = icmp slt i32 %n, 4, !dbg !22 + br i1 %cmp.not18, label %for.cond.cleanup, label %for.body.preheader, !dbg !24 + +for.body.preheader: ; preds = %entry + %sub = add nsw i32 %n, -3 + %0 = zext i32 %sub to i64, !dbg !24 + br label %for.body, !dbg !24 + +for.cond.cleanup: ; preds = %for.body, %entry + ret void, !dbg !25 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 1, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !18, metadata !DIExpression()), !dbg !21 + %1 = add nsw i64 %indvars.iv, -1, !dbg !26 + %arrayidx = getelementptr inbounds i32, ptr %A, i64 %1, !dbg !28 + %2 = load i32, ptr %arrayidx, align 4, !dbg !28, !tbaa !29 + %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !33 + store i32 %2, ptr %arrayidx3, align 4, !dbg !34, !tbaa !29 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3, !dbg !35 + %arrayidx5 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next, !dbg !36 + %3 = load i32, ptr %arrayidx5, align 4, !dbg !36, !tbaa !29 + %4 = add nuw nsw i64 %indvars.iv, 1, !dbg !37 + %arrayidx8 = getelementptr inbounds i32, ptr %A, i64 %4, !dbg !38 + store i32 %3, ptr %arrayidx8, align 4, !dbg !39, !tbaa !29 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !18, metadata !DIExpression()), !dbg !21 + %cmp.not = icmp ugt i64 %indvars.iv.next, %0, !dbg !22 + br i1 %cmp.not, label %for.cond.cleanup, label %for.body, !dbg !24, !llvm.loop !40 +} + +; // Dependence::ForwardButPreventsForwarding +; // Loop does not get vectorized despite only having a forward +; // dependency between A[i] and A[i-3]. +; // This is because the store-to-load forwarding distance (here 3) +; // needs to be a multiple of vector factor otherwise the +; // store (A[5:6] in i=5) and load (A[4:5],A[6:7] in i=7,9) are unaligned. +; void test_forwardButPreventsForwarding_dep(int n, int* A, int* B) { +; for(int i=3; i < n; ++i) { +; A[i] = 10; +; B[i] = A[i-3]; +; } +; } + +; CHECK:remark: source.c:10:11: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop Dependence source: &A[(i + -3)] Dependence destination: &A[(i + 1)] +; CHECK-NEXT: Backward loop carried data dependence. Memory location is the same as accessed at source.c:11:13 + +define dso_local void @test_forwardButPreventsForwarding_dep(i32 noundef %n, ptr nocapture noundef %A, ptr nocapture noundef writeonly %B) local_unnamed_addr #0 !dbg !43 { +entry: + call void @llvm.dbg.value(metadata i32 %n, metadata !47, metadata !DIExpression()), !dbg !52 + call void @llvm.dbg.value(metadata ptr %A, metadata !48, metadata !DIExpression()), !dbg !52 + call void @llvm.dbg.value(metadata ptr %B, metadata !49, metadata !DIExpression()), !dbg !52 + call void @llvm.dbg.value(metadata i32 3, metadata !50, metadata !DIExpression()), !dbg !53 + %cmp10 = icmp sgt i32 %n, 3, !dbg !54 + br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup, !dbg !56 + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64, !dbg !54 + %0 = add nsw i64 %wide.trip.count, -3, !dbg !56 + %xtraiter = and i64 %0, 1, !dbg !56 + %1 = icmp eq i32 %n, 4, !dbg !56 + br i1 %1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new, !dbg !56 + +for.body.preheader.new: ; preds = %for.body.preheader + %unroll_iter = and i64 %0, -2, !dbg !56 + br label %for.body, !dbg !56 + +for.cond.cleanup.loopexit.unr-lcssa: ; preds = %for.body, %for.body.preheader + %indvars.iv.unr = phi i64 [ 3, %for.body.preheader ], [ %indvars.iv.next.1, %for.body ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0, !dbg !56 + br i1 %lcmp.mod.not, label %for.cond.cleanup, label %for.body.epil, !dbg !56 + +for.body.epil: ; preds = %for.cond.cleanup.loopexit.unr-lcssa + call void @llvm.dbg.value(metadata i64 %indvars.iv.unr, metadata !50, metadata !DIExpression()), !dbg !53 + %arrayidx.epil = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.unr, !dbg !57 + store i32 10, ptr %arrayidx.epil, align 4, !dbg !59, !tbaa !29 + %2 = add nsw i64 %indvars.iv.unr, -3, !dbg !60 + %arrayidx2.epil = getelementptr inbounds i32, ptr %A, i64 %2, !dbg !61 + %3 = load i32, ptr %arrayidx2.epil, align 4, !dbg !61, !tbaa !29 + %arrayidx4.epil = getelementptr inbounds i32, ptr %B, i64 %indvars.iv.unr, !dbg !62 + store i32 %3, ptr %arrayidx4.epil, align 4, !dbg !63, !tbaa !29 + call void @llvm.dbg.value(metadata i64 %indvars.iv.unr, metadata !50, metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value)), !dbg !53 + br label %for.cond.cleanup, !dbg !64 + +for.cond.cleanup: ; preds = %for.body.epil, %for.cond.cleanup.loopexit.unr-lcssa, %entry + ret void, !dbg !64 + +for.body: ; preds = %for.body, %for.body.preheader.new + %indvars.iv = phi i64 [ 3, %for.body.preheader.new ], [ %indvars.iv.next.1, %for.body ] + %niter = phi i64 [ 0, %for.body.preheader.new ], [ %niter.next.1, %for.body ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !50, metadata !DIExpression()), !dbg !53 + %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !57 + store i32 10, ptr %arrayidx, align 4, !dbg !59, !tbaa !29 + %4 = add nsw i64 %indvars.iv, -3, !dbg !60 + %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %4, !dbg !61 + %5 = load i32, ptr %arrayidx2, align 4, !dbg !61, !tbaa !29 + %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv, !dbg !62 + store i32 %5, ptr %arrayidx4, align 4, !dbg !63, !tbaa !29 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !65 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !50, metadata !DIExpression()), !dbg !53 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !50, metadata !DIExpression()), !dbg !53 + %arrayidx.1 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next, !dbg !57 + store i32 10, ptr %arrayidx.1, align 4, !dbg !59, !tbaa !29 + %6 = add nsw i64 %indvars.iv, -2, !dbg !60 + %arrayidx2.1 = getelementptr inbounds i32, ptr %A, i64 %6, !dbg !61 + %7 = load i32, ptr %arrayidx2.1, align 4, !dbg !61, !tbaa !29 + %arrayidx4.1 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv.next, !dbg !62 + store i32 %7, ptr %arrayidx4.1, align 4, !dbg !63, !tbaa !29 + %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2, !dbg !65 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next.1, metadata !50, metadata !DIExpression()), !dbg !53 + %niter.next.1 = add i64 %niter, 2, !dbg !56 + %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter, !dbg !56 + br i1 %niter.ncmp.1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body, !dbg !56, !llvm.loop !66 +} + +; // Dependence::BackwardVectorizableButPreventsForwarding +; // Loop does not get vectorized despite having a backward +; // but vectorizable dependency between A[i] and A[i-15]. +; // +; // This is because the store-to-load forwarding distance (here 15) +; // needs to be a multiple of vector factor otherwise +; // store (A[16:17] in i=16) and load (A[15:16], A[17:18] in i=30,32) are unaligned. +; void test_backwardVectorizableButPreventsForwarding(int n, int* A) { +; for(int i=15; i < n; ++i) { +; A[i] = A[i-2] + A[i-15]; +; } +; } + +; CHECK:remark: source.c:17:11: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop Dependence source: &A[(i + -1)] Dependence destination: &A[(i + 1)] +; CHECK: Backward loop carried data dependence. Memory location is the same as accessed at source.c:17:13 + +define dso_local void @test_backwardVectorizableButPreventsForwarding(i32 noundef %n, ptr nocapture noundef %A) local_unnamed_addr #0 !dbg !68 { +entry: + call void @llvm.dbg.value(metadata i32 %n, metadata !70, metadata !DIExpression()), !dbg !74 + call void @llvm.dbg.value(metadata ptr %A, metadata !71, metadata !DIExpression()), !dbg !74 + call void @llvm.dbg.value(metadata i32 15, metadata !72, metadata !DIExpression()), !dbg !75 + %cmp12 = icmp sgt i32 %n, 15, !dbg !76 + br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup, !dbg !78 + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64, !dbg !76 + %0 = add nsw i64 %wide.trip.count, -15, !dbg !78 + %xtraiter = and i64 %0, 1, !dbg !78 + %1 = icmp eq i32 %n, 16, !dbg !78 + br i1 %1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new, !dbg !78 + +for.body.preheader.new: ; preds = %for.body.preheader + %unroll_iter = and i64 %0, -2, !dbg !78 + br label %for.body, !dbg !78 + +for.cond.cleanup.loopexit.unr-lcssa: ; preds = %for.body, %for.body.preheader + %indvars.iv.unr = phi i64 [ 15, %for.body.preheader ], [ %indvars.iv.next.1, %for.body ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0, !dbg !78 + br i1 %lcmp.mod.not, label %for.cond.cleanup, label %for.body.epil, !dbg !78 + +for.body.epil: ; preds = %for.cond.cleanup.loopexit.unr-lcssa + call void @llvm.dbg.value(metadata i64 %indvars.iv.unr, metadata !72, metadata !DIExpression()), !dbg !75 + %2 = add nsw i64 %indvars.iv.unr, -2, !dbg !79 + %arrayidx.epil = getelementptr inbounds i32, ptr %A, i64 %2, !dbg !81 + %3 = load i32, ptr %arrayidx.epil, align 4, !dbg !81, !tbaa !29 + %4 = add nsw i64 %indvars.iv.unr, -15, !dbg !82 + %arrayidx3.epil = getelementptr inbounds i32, ptr %A, i64 %4, !dbg !83 + %5 = load i32, ptr %arrayidx3.epil, align 4, !dbg !83, !tbaa !29 + %add.epil = add nsw i32 %5, %3, !dbg !84 + %arrayidx5.epil = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.unr, !dbg !85 + store i32 %add.epil, ptr %arrayidx5.epil, align 4, !dbg !86, !tbaa !29 + call void @llvm.dbg.value(metadata i64 %indvars.iv.unr, metadata !72, metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value)), !dbg !75 + br label %for.cond.cleanup, !dbg !87 + +for.cond.cleanup: ; preds = %for.body.epil, %for.cond.cleanup.loopexit.unr-lcssa, %entry + ret void, !dbg !87 + +for.body: ; preds = %for.body, %for.body.preheader.new + %indvars.iv = phi i64 [ 15, %for.body.preheader.new ], [ %indvars.iv.next.1, %for.body ] + %niter = phi i64 [ 0, %for.body.preheader.new ], [ %niter.next.1, %for.body ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !72, metadata !DIExpression()), !dbg !75 + %6 = add nsw i64 %indvars.iv, -2, !dbg !79 + %arrayidx = getelementptr inbounds i32, ptr %A, i64 %6, !dbg !81 + %7 = load i32, ptr %arrayidx, align 4, !dbg !81, !tbaa !29 + %8 = add nsw i64 %indvars.iv, -15, !dbg !82 + %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %8, !dbg !83 + %9 = load i32, ptr %arrayidx3, align 4, !dbg !83, !tbaa !29 + %add = add nsw i32 %9, %7, !dbg !84 + %arrayidx5 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !85 + store i32 %add, ptr %arrayidx5, align 4, !dbg !86, !tbaa !29 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !88 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !72, metadata !DIExpression()), !dbg !75 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !72, metadata !DIExpression()), !dbg !75 + %10 = add nsw i64 %indvars.iv, -1, !dbg !79 + %arrayidx.1 = getelementptr inbounds i32, ptr %A, i64 %10, !dbg !81 + %11 = load i32, ptr %arrayidx.1, align 4, !dbg !81, !tbaa !29 + %12 = add nsw i64 %indvars.iv, -14, !dbg !82 + %arrayidx3.1 = getelementptr inbounds i32, ptr %A, i64 %12, !dbg !83 + %13 = load i32, ptr %arrayidx3.1, align 4, !dbg !83, !tbaa !29 + %add.1 = add nsw i32 %13, %11, !dbg !84 + %arrayidx5.1 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next, !dbg !85 + store i32 %add.1, ptr %arrayidx5.1, align 4, !dbg !86, !tbaa !29 + %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2, !dbg !88 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next.1, metadata !72, metadata !DIExpression()), !dbg !75 + %niter.next.1 = add i64 %niter, 2, !dbg !78 + %niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter, !dbg !78 + br i1 %niter.ncmp.1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body, !dbg !78, !llvm.loop !89 +} + +; // Dependence::Unknown +; // Different stride lengths +; void test_unknown_dep(int n, int* A) { +; for(int i=0; i < n; ++i) { +; A[(i+1)*4] = 10; +; A[i] = 100; +; } +; } + +; CHECK:remark: source.c:24:13: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop Dependence source: &A[((i + 1) << 2)] Dependence destination: &A[i] +; CHECK: Unknown data dependence. Memory location is the same as accessed at source.c:23:8 + +define dso_local void @test_unknown_dep(i32 noundef %n, ptr nocapture noundef writeonly %A) local_unnamed_addr #1 !dbg !91 { +entry: + call void @llvm.dbg.value(metadata i32 %n, metadata !93, metadata !DIExpression()), !dbg !97 + call void @llvm.dbg.value(metadata ptr %A, metadata !94, metadata !DIExpression()), !dbg !97 + call void @llvm.dbg.value(metadata i32 0, metadata !95, metadata !DIExpression()), !dbg !98 + %cmp7 = icmp sgt i32 %n, 0, !dbg !99 + br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup, !dbg !101 + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64, !dbg !99 + %xtraiter = and i64 %wide.trip.count, 3, !dbg !101 + %0 = icmp ult i32 %n, 4, !dbg !101 + br i1 %0, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new, !dbg !101 + +for.body.preheader.new: ; preds = %for.body.preheader + %unroll_iter = and i64 %wide.trip.count, 4294967292, !dbg !101 + br label %for.body, !dbg !101 + +for.cond.cleanup.loopexit.unr-lcssa: ; preds = %for.body, %for.body.preheader + %indvars.iv.unr = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next.3, %for.body ] + %lcmp.mod.not = icmp eq i64 %xtraiter, 0, !dbg !101 + br i1 %lcmp.mod.not, label %for.cond.cleanup, label %for.body.epil, !dbg !101 + +for.body.epil: ; preds = %for.cond.cleanup.loopexit.unr-lcssa, %for.body.epil + %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %for.cond.cleanup.loopexit.unr-lcssa ] + %epil.iter = phi i64 [ %epil.iter.next, %for.body.epil ], [ 0, %for.cond.cleanup.loopexit.unr-lcssa ] + call void @llvm.dbg.value(metadata i64 %indvars.iv.epil, metadata !95, metadata !DIExpression()), !dbg !98 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv.epil, 1, !dbg !102 + %1 = shl nsw i64 %indvars.iv.next.epil, 2, !dbg !104 + %arrayidx.epil = getelementptr inbounds i32, ptr %A, i64 %1, !dbg !105 + store i32 10, ptr %arrayidx.epil, align 4, !dbg !106, !tbaa !29 + %arrayidx2.epil = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.epil, !dbg !107 + store i32 100, ptr %arrayidx2.epil, align 4, !dbg !108, !tbaa !29 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next.epil, metadata !95, metadata !DIExpression()), !dbg !98 + %epil.iter.next = add i64 %epil.iter, 1, !dbg !101 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.next, %xtraiter, !dbg !101 + br i1 %epil.iter.cmp.not, label %for.cond.cleanup, label %for.body.epil, !dbg !101, !llvm.loop !109 + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit.unr-lcssa, %for.body.epil, %entry + ret void, !dbg !111 + +for.body: ; preds = %for.body, %for.body.preheader.new + %indvars.iv = phi i64 [ 0, %for.body.preheader.new ], [ %indvars.iv.next.3, %for.body ] + %niter = phi i64 [ 0, %for.body.preheader.new ], [ %niter.next.3, %for.body ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !95, metadata !DIExpression()), !dbg !98 + %indvars.iv.next = or i64 %indvars.iv, 1, !dbg !102 + %2 = shl nsw i64 %indvars.iv.next, 2, !dbg !104 + %arrayidx = getelementptr inbounds i32, ptr %A, i64 %2, !dbg !105 + store i32 10, ptr %arrayidx, align 4, !dbg !106, !tbaa !29 + %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv, !dbg !107 + store i32 100, ptr %arrayidx2, align 4, !dbg !108, !tbaa !29 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !95, metadata !DIExpression()), !dbg !98 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !95, metadata !DIExpression()), !dbg !98 + %indvars.iv.next.1 = or i64 %indvars.iv, 2, !dbg !102 + %3 = shl nsw i64 %indvars.iv.next.1, 2, !dbg !104 + %arrayidx.1 = getelementptr inbounds i32, ptr %A, i64 %3, !dbg !105 + store i32 10, ptr %arrayidx.1, align 4, !dbg !106, !tbaa !29 + %arrayidx2.1 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next, !dbg !107 + store i32 100, ptr %arrayidx2.1, align 4, !dbg !108, !tbaa !29 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next.1, metadata !95, metadata !DIExpression()), !dbg !98 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next.1, metadata !95, metadata !DIExpression()), !dbg !98 + %indvars.iv.next.2 = or i64 %indvars.iv, 3, !dbg !102 + %4 = shl nsw i64 %indvars.iv.next.2, 2, !dbg !104 + %arrayidx.2 = getelementptr inbounds i32, ptr %A, i64 %4, !dbg !105 + store i32 10, ptr %arrayidx.2, align 4, !dbg !106, !tbaa !29 + %arrayidx2.2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next.1, !dbg !107 + store i32 100, ptr %arrayidx2.2, align 4, !dbg !108, !tbaa !29 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next.2, metadata !95, metadata !DIExpression()), !dbg !98 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next.2, metadata !95, metadata !DIExpression()), !dbg !98 + %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 4, !dbg !102 + %5 = shl nsw i64 %indvars.iv.next.3, 2, !dbg !104 + %arrayidx.3 = getelementptr inbounds i32, ptr %A, i64 %5, !dbg !105 + store i32 10, ptr %arrayidx.3, align 4, !dbg !106, !tbaa !29 + %arrayidx2.3 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next.2, !dbg !107 + store i32 100, ptr %arrayidx2.3, align 4, !dbg !108, !tbaa !29 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next.3, metadata !95, metadata !DIExpression()), !dbg !98 + %niter.next.3 = add nuw nsw i64 %niter, 4, !dbg !101 + %niter.ncmp.3 = icmp eq i64 %niter.next.3, %unroll_iter, !dbg !101 + br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body, !dbg !101, !llvm.loop !112 +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.value(metadata, metadata, metadata) #2 + +attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nofree norecurse nosync nounwind memory(argmem: write) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 17.0.0 (https://github.com/phyBrackets/llvm-project-1.git 3a0a540c1307821748ab1f08e457126af0fafb6d)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "source.c", directory: "/home/shivam/llvm-project-1", checksumkind: CSK_MD5, checksum: "ce6d68d4fe0715e72ef0524124388d7f") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!9 = !{!"clang version 17.0.0 (https://github.com/phyBrackets/llvm-project-1.git 3a0a540c1307821748ab1f08e457126af0fafb6d)"} +!10 = distinct !DISubprogram(name: "test_backward_dep", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !15) +!11 = !DISubroutineType(types: !12) +!12 = !{null, !13, !14} +!13 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 64) +!15 = !{!16, !17, !18} +!16 = !DILocalVariable(name: "n", arg: 1, scope: !10, file: !1, line: 1, type: !13) +!17 = !DILocalVariable(name: "A", arg: 2, scope: !10, file: !1, line: 1, type: !14) +!18 = !DILocalVariable(name: "i", scope: !19, file: !1, line: 2, type: !13) +!19 = distinct !DILexicalBlock(scope: !10, file: !1, line: 2, column: 4) +!20 = !DILocation(line: 0, scope: !10) +!21 = !DILocation(line: 0, scope: !19) +!22 = !DILocation(line: 2, column: 22, scope: !23) +!23 = distinct !DILexicalBlock(scope: !19, file: !1, line: 2, column: 4) +!24 = !DILocation(line: 2, column: 4, scope: !19) +!25 = !DILocation(line: 6, column: 2, scope: !10) +!26 = !DILocation(line: 3, column: 16, scope: !27) +!27 = distinct !DILexicalBlock(scope: !23, file: !1, line: 2, column: 40) +!28 = !DILocation(line: 3, column: 13, scope: !27) +!29 = !{!30, !30, i64 0} +!30 = !{!"int", !31, i64 0} +!31 = !{!"omnipotent char", !32, i64 0} +!32 = !{!"Simple C/C++ TBAA"} +!33 = !DILocation(line: 3, column: 6, scope: !27) +!34 = !DILocation(line: 3, column: 11, scope: !27) +!35 = !DILocation(line: 4, column: 17, scope: !27) +!36 = !DILocation(line: 4, column: 14, scope: !27) +!37 = !DILocation(line: 4, column: 8, scope: !27) +!38 = !DILocation(line: 4, column: 5, scope: !27) +!39 = !DILocation(line: 4, column: 12, scope: !27) +!40 = distinct !{!40, !24, !41, !42} +!41 = !DILocation(line: 5, column: 4, scope: !19) +!42 = !{!"llvm.loop.mustprogress"} +!43 = distinct !DISubprogram(name: "test_forwardButPreventsForwarding_dep", scope: !1, file: !1, line: 8, type: !44, scopeLine: 8, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !46) +!44 = !DISubroutineType(types: !45) +!45 = !{null, !13, !14, !14} +!46 = !{!47, !48, !49, !50} +!47 = !DILocalVariable(name: "n", arg: 1, scope: !43, file: !1, line: 8, type: !13) +!48 = !DILocalVariable(name: "A", arg: 2, scope: !43, file: !1, line: 8, type: !14) +!49 = !DILocalVariable(name: "B", arg: 3, scope: !43, file: !1, line: 8, type: !14) +!50 = !DILocalVariable(name: "i", scope: !51, file: !1, line: 9, type: !13) +!51 = distinct !DILexicalBlock(scope: !43, file: !1, line: 9, column: 4) +!52 = !DILocation(line: 0, scope: !43) +!53 = !DILocation(line: 0, scope: !51) +!54 = !DILocation(line: 9, column: 19, scope: !55) +!55 = distinct !DILexicalBlock(scope: !51, file: !1, line: 9, column: 4) +!56 = !DILocation(line: 9, column: 4, scope: !51) +!57 = !DILocation(line: 10, column: 6, scope: !58) +!58 = distinct !DILexicalBlock(scope: !55, file: !1, line: 9, column: 29) +!59 = !DILocation(line: 10, column: 11, scope: !58) +!60 = !DILocation(line: 11, column: 16, scope: !58) +!61 = !DILocation(line: 11, column: 13, scope: !58) +!62 = !DILocation(line: 11, column: 6, scope: !58) +!63 = !DILocation(line: 11, column: 11, scope: !58) +!64 = !DILocation(line: 13, column: 2, scope: !43) +!65 = !DILocation(line: 9, column: 24, scope: !55) +!66 = distinct !{!66, !56, !67, !42} +!67 = !DILocation(line: 12, column: 4, scope: !51) +!68 = distinct !DISubprogram(name: "test_backwardVectorizableButPreventsForwarding", scope: !1, file: !1, line: 15, type: !11, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !69) +!69 = !{!70, !71, !72} +!70 = !DILocalVariable(name: "n", arg: 1, scope: !68, file: !1, line: 15, type: !13) +!71 = !DILocalVariable(name: "A", arg: 2, scope: !68, file: !1, line: 15, type: !14) +!72 = !DILocalVariable(name: "i", scope: !73, file: !1, line: 16, type: !13) +!73 = distinct !DILexicalBlock(scope: !68, file: !1, line: 16, column: 4) +!74 = !DILocation(line: 0, scope: !68) +!75 = !DILocation(line: 0, scope: !73) +!76 = !DILocation(line: 16, column: 20, scope: !77) +!77 = distinct !DILexicalBlock(scope: !73, file: !1, line: 16, column: 4) +!78 = !DILocation(line: 16, column: 4, scope: !73) +!79 = !DILocation(line: 17, column: 16, scope: !80) +!80 = distinct !DILexicalBlock(scope: !77, file: !1, line: 16, column: 30) +!81 = !DILocation(line: 17, column: 13, scope: !80) +!82 = !DILocation(line: 17, column: 25, scope: !80) +!83 = !DILocation(line: 17, column: 22, scope: !80) +!84 = !DILocation(line: 17, column: 20, scope: !80) +!85 = !DILocation(line: 17, column: 6, scope: !80) +!86 = !DILocation(line: 17, column: 11, scope: !80) +!87 = !DILocation(line: 19, column: 2, scope: !68) +!88 = !DILocation(line: 16, column: 25, scope: !77) +!89 = distinct !{!89, !78, !90, !42} +!90 = !DILocation(line: 18, column: 4, scope: !73) +!91 = distinct !DISubprogram(name: "test_unknown_dep", scope: !1, file: !1, line: 21, type: !11, scopeLine: 21, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !92) +!92 = !{!93, !94, !95} +!93 = !DILocalVariable(name: "n", arg: 1, scope: !91, file: !1, line: 21, type: !13) +!94 = !DILocalVariable(name: "A", arg: 2, scope: !91, file: !1, line: 21, type: !14) +!95 = !DILocalVariable(name: "i", scope: !96, file: !1, line: 22, type: !13) +!96 = distinct !DILexicalBlock(scope: !91, file: !1, line: 22, column: 4) +!97 = !DILocation(line: 0, scope: !91) +!98 = !DILocation(line: 0, scope: !96) +!99 = !DILocation(line: 22, column: 19, scope: !100) +!100 = distinct !DILexicalBlock(scope: !96, file: !1, line: 22, column: 4) +!101 = !DILocation(line: 22, column: 4, scope: !96) +!102 = !DILocation(line: 23, column: 12, scope: !103) +!103 = distinct !DILexicalBlock(scope: !100, file: !1, line: 22, column: 29) +!104 = !DILocation(line: 23, column: 15, scope: !103) +!105 = !DILocation(line: 23, column: 8, scope: !103) +!106 = !DILocation(line: 23, column: 19, scope: !103) +!107 = !DILocation(line: 24, column: 8, scope: !103) +!108 = !DILocation(line: 24, column: 13, scope: !103) +!109 = distinct !{!109, !110} +!110 = !{!"llvm.loop.unroll.disable"} +!111 = !DILocation(line: 26, column: 2, scope: !91) +!112 = distinct !{!112, !101, !113, !42} +!113 = !DILocation(line: 25, column: 4, scope: !96)