Index: llvm/trunk/include/llvm-c/Transforms/Scalar.h =================================================================== --- llvm/trunk/include/llvm-c/Transforms/Scalar.h +++ llvm/trunk/include/llvm-c/Transforms/Scalar.h @@ -56,6 +56,9 @@ /** See llvm::createGVNPass function. */ void LLVMAddGVNPass(LLVMPassManagerRef PM); +/** See llvm::createGVNPass function. */ +void LLVMAddNewGVNPass(LLVMPassManagerRef PM); + /** See llvm::createIndVarSimplifyPass function. */ void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM); Index: llvm/trunk/include/llvm/InitializePasses.h =================================================================== --- llvm/trunk/include/llvm/InitializePasses.h +++ llvm/trunk/include/llvm/InitializePasses.h @@ -252,6 +252,7 @@ void initializeModuleSummaryIndexWrapperPassPass(PassRegistry &); void initializeNameAnonGlobalLegacyPassPass(PassRegistry &); void initializeNaryReassociateLegacyPassPass(PassRegistry &); +void initializeNewGVNPass(PassRegistry&); void initializeNoAAPass(PassRegistry&); void initializeObjCARCAAWrapperPassPass(PassRegistry&); void initializeObjCARCAPElimPass(PassRegistry&); Index: llvm/trunk/include/llvm/LinkAllPasses.h =================================================================== --- llvm/trunk/include/llvm/LinkAllPasses.h +++ llvm/trunk/include/llvm/LinkAllPasses.h @@ -167,6 +167,7 @@ (void) llvm::createGVNHoistPass(); (void) llvm::createMergedLoadStoreMotionPass(); (void) llvm::createGVNPass(); + (void) llvm::createNewGVNPass(); (void) llvm::createMemCpyOptPass(); (void) llvm::createLoopDeletionPass(); (void) llvm::createPostDomTree(); Index: llvm/trunk/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar.h +++ llvm/trunk/include/llvm/Transforms/Scalar.h @@ -348,6 +348,13 @@ //===----------------------------------------------------------------------===// // +// GVN - This pass performs global value numbering and redundant load +// elimination cotemporaneously. +// +FunctionPass *createNewGVNPass(); + +//===----------------------------------------------------------------------===// +// // MemCpyOpt - This pass performs optimizations related to eliminating memcpy // calls and/or combining multiple stores into memset's. // Index: llvm/trunk/include/llvm/Transforms/Scalar/GVNExpression.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/GVNExpression.h +++ llvm/trunk/include/llvm/Transforms/Scalar/GVNExpression.h @@ -0,0 +1,551 @@ +//======- GVNExpression.h - GVN Expression classes -------*- C++ -*-==-------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// The header file for the GVN pass that contains expression handling +/// classes +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_GVNEXPRESSION_H +#define LLVM_TRANSFORMS_SCALAR_GVNEXPRESSION_H + +#include "llvm/ADT/Hashing.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ArrayRecycler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { +class MemoryAccess; + +namespace GVNExpression { + +enum ExpressionType { + ET_Base, + ET_Constant, + ET_Variable, + ET_BasicStart, + ET_Basic, + ET_Call, + ET_AggregateValue, + ET_Phi, + ET_Load, + ET_Store, + ET_BasicEnd +}; + +class Expression { +private: + ExpressionType EType; + unsigned Opcode; + +public: + Expression(const Expression &) = delete; + Expression(ExpressionType ET = ET_Base, unsigned O = ~2U) + : EType(ET), Opcode(O) {} + void operator=(const Expression &) = delete; + virtual ~Expression(); + + static unsigned getEmptyKey() { return ~0U; } + static unsigned getTombstoneKey() { return ~1U; } + + bool operator==(const Expression &Other) const { + if (getOpcode() != Other.getOpcode()) + return false; + if (getOpcode() == getEmptyKey() || getOpcode() == getTombstoneKey()) + return true; + // Compare the expression type for anything but load and store. + // For load and store we set the opcode to zero. + // This is needed for load coercion. + if (getExpressionType() != ET_Load && + getExpressionType() != ET_Store && + getExpressionType() != Other.getExpressionType()) + return false; + + return equals(Other); + } + + virtual bool equals(const Expression &Other) const { return true; } + + unsigned getOpcode() const { return Opcode; } + void setOpcode(unsigned opcode) { Opcode = opcode; } + ExpressionType getExpressionType() const { return EType; } + + virtual hash_code getHashValue() const { + return hash_combine(getExpressionType(), getOpcode()); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const { + if (PrintEType) + OS << "etype = " << getExpressionType() << ","; + OS << "opcode = " << getOpcode() << ", "; + } + + void print(raw_ostream &OS) const { + OS << "{ "; + printInternal(OS, true); + OS << "}"; + } + void dump() const { print(dbgs()); } +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const Expression &E) { + E.print(OS); + return OS; +} + +class BasicExpression : public Expression { +private: + typedef ArrayRecycler RecyclerType; + typedef RecyclerType::Capacity RecyclerCapacity; + Value **Operands; + unsigned MaxOperands; + unsigned NumOperands; + Type *ValueType; + +public: + static bool classof(const Expression *EB) { + ExpressionType ET = EB->getExpressionType(); + return ET > ET_BasicStart && ET < ET_BasicEnd; + } + + BasicExpression(unsigned NumOperands) + : BasicExpression(NumOperands, ET_Basic) {} + BasicExpression(unsigned NumOperands, ExpressionType ET) + : Expression(ET), Operands(nullptr), MaxOperands(NumOperands), + NumOperands(0), ValueType(nullptr) {} + virtual ~BasicExpression() override; + void operator=(const BasicExpression &) = delete; + BasicExpression(const BasicExpression &) = delete; + BasicExpression() = delete; + + /// \brief Swap two operands. Used during GVN to put commutative operands in + /// order. + void swapOperands(unsigned First, unsigned Second) { + std::swap(Operands[First], Operands[Second]); + } + + Value *getOperand(unsigned N) const { + assert(Operands && "Operands not allocated"); + assert(N < NumOperands && "Operand out of range"); + return Operands[N]; + } + + void setOperand(unsigned N, Value *V) { + assert(Operands && "Operands not allocated before setting"); + assert(N < NumOperands && "Operand out of range"); + Operands[N] = V; + } + + unsigned getNumOperands() const { return NumOperands; } + + typedef Value **op_iterator; + typedef Value *const *const_ops_iterator; + op_iterator ops_begin() { return Operands; } + op_iterator ops_end() { return Operands + NumOperands; } + const_ops_iterator ops_begin() const { return Operands; } + const_ops_iterator ops_end() const { return Operands + NumOperands; } + iterator_range operands() { + return iterator_range(ops_begin(), ops_end()); + } + iterator_range operands() const { + return iterator_range(ops_begin(), ops_end()); + } + + void ops_push_back(Value *Arg) { + assert(NumOperands < MaxOperands && "Tried to add too many operands"); + assert(Operands && "Operandss not allocated before pushing"); + Operands[NumOperands++] = Arg; + } + bool ops_empty() const { return getNumOperands() == 0; } + + void allocateOperands(RecyclerType &Recycler, BumpPtrAllocator &Allocator) { + assert(!Operands && "Operands already allocated"); + Operands = Recycler.allocate(RecyclerCapacity::get(MaxOperands), Allocator); + } + void deallocateOperands(RecyclerType &Recycler) { + Recycler.deallocate(RecyclerCapacity::get(MaxOperands), Operands); + } + + void setType(Type *T) { ValueType = T; } + Type *getType() const { return ValueType; } + + virtual bool equals(const Expression &Other) const override { + if (getOpcode() != Other.getOpcode()) + return false; + + const auto &OE = cast(Other); + if (getType() != OE.getType()) + return false; + if (NumOperands != OE.NumOperands) + return false; + if (!std::equal(ops_begin(), ops_end(), OE.ops_begin())) + return false; + return true; + } + + virtual hash_code getHashValue() const override { + return hash_combine(getExpressionType(), getOpcode(), ValueType, + hash_combine_range(ops_begin(), ops_end())); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeBasic, "; + + this->Expression::printInternal(OS, false); + OS << "operands = {"; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + OS << "[" << i << "] = "; + Operands[i]->printAsOperand(OS); + OS << " "; + } + OS << "} "; + } +}; + +class CallExpression final : public BasicExpression { +private: + CallInst *Call; + MemoryAccess *DefiningAccess; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_Call; + } + + CallExpression(unsigned NumOperands, CallInst *C, MemoryAccess *DA) + : BasicExpression(NumOperands, ET_Call), Call(C), + DefiningAccess(DA) {} + void operator=(const CallExpression &) = delete; + CallExpression(const CallExpression &) = delete; + CallExpression() = delete; + virtual ~CallExpression() override; + + virtual bool equals(const Expression &Other) const override { + if (!this->BasicExpression::equals(Other)) + return false; + const auto &OE = cast(Other); + return DefiningAccess == OE.DefiningAccess; + } + + virtual hash_code getHashValue() const override { + return hash_combine(this->BasicExpression::getHashValue(), DefiningAccess); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeCall, "; + this->BasicExpression::printInternal(OS, false); + OS << " represents call at " << Call; + } +}; + +class LoadExpression final : public BasicExpression { +private: + LoadInst *Load; + MemoryAccess *DefiningAccess; + unsigned Alignment; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_Load; + } + + LoadExpression(unsigned NumOperands, LoadInst *L, MemoryAccess *DA) + : LoadExpression(ET_Load, NumOperands, L, DA) {} + LoadExpression(enum ExpressionType EType, unsigned NumOperands, + LoadInst *L, MemoryAccess *DA) + : BasicExpression(NumOperands, EType), Load(L), DefiningAccess(DA) { + Alignment = L ? L->getAlignment() : 0; + } + void operator=(const LoadExpression &) = delete; + LoadExpression(const LoadExpression &) = delete; + LoadExpression() = delete; + virtual ~LoadExpression() override; + + LoadInst *getLoadInst() const { return Load; } + void setLoadInst(LoadInst *L) { Load = L; } + + MemoryAccess *getDefiningAccess() const { return DefiningAccess; } + void setDefiningAccess(MemoryAccess *MA) { DefiningAccess = MA; } + unsigned getAlignment() const { return Alignment; } + void setAlignment(unsigned Align) { Alignment = Align; } + + virtual bool equals(const Expression &Other) const override; + + virtual hash_code getHashValue() const override { + return hash_combine(getOpcode(), getType(), DefiningAccess, + hash_combine_range(ops_begin(), ops_end())); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeLoad, "; + this->BasicExpression::printInternal(OS, false); + OS << " represents Load at " << Load; + OS << " with DefiningAccess " << DefiningAccess; + } +}; + +class StoreExpression final : public BasicExpression { +private: + StoreInst *Store; + MemoryAccess *DefiningAccess; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_Store; + } + + StoreExpression(unsigned NumOperands, StoreInst *S, MemoryAccess *DA) + : BasicExpression(NumOperands, ET_Store), Store(S), + DefiningAccess(DA) {} + void operator=(const StoreExpression &) = delete; + StoreExpression(const StoreExpression &) = delete; + StoreExpression() = delete; + virtual ~StoreExpression() override; + + StoreInst *getStoreInst() const { return Store; } + MemoryAccess *getDefiningAccess() const { return DefiningAccess; } + + virtual bool equals(const Expression &Other) const override; + + virtual hash_code getHashValue() const override { + return hash_combine(getOpcode(), getType(), DefiningAccess, + hash_combine_range(ops_begin(), ops_end())); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeStore, "; + this->BasicExpression::printInternal(OS, false); + OS << " represents Store at " << Store; + } +}; + +class AggregateValueExpression final : public BasicExpression { +private: + unsigned MaxIntOperands; + unsigned NumIntOperands; + unsigned *IntOperands; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_AggregateValue; + } + + AggregateValueExpression(unsigned NumOperands, + unsigned NumIntOperands) + : BasicExpression(NumOperands, ET_AggregateValue), + MaxIntOperands(NumIntOperands), NumIntOperands(0), + IntOperands(nullptr) {} + + void operator=(const AggregateValueExpression &) = delete; + AggregateValueExpression(const AggregateValueExpression &) = delete; + AggregateValueExpression() = delete; + virtual ~AggregateValueExpression() override; + + typedef unsigned *int_arg_iterator; + typedef const unsigned *const_int_arg_iterator; + + int_arg_iterator int_ops_begin() { return IntOperands; } + int_arg_iterator int_ops_end() { return IntOperands + NumIntOperands; } + const_int_arg_iterator int_ops_begin() const { return IntOperands; } + const_int_arg_iterator int_ops_end() const { + return IntOperands + NumIntOperands; + } + unsigned int_ops_size() const { return NumIntOperands; } + bool int_ops_empty() const { return NumIntOperands == 0; } + void int_ops_push_back(unsigned IntOperand) { + assert(NumIntOperands < MaxIntOperands && + "Tried to add too many int operands"); + assert(IntOperands && "Operands not allocated before pushing"); + IntOperands[NumIntOperands++] = IntOperand; + } + + virtual void allocateIntOperands(BumpPtrAllocator &Allocator) { + assert(!IntOperands && "Operands already allocated"); + IntOperands = Allocator.Allocate(MaxIntOperands); + } + + virtual bool equals(const Expression &Other) const override { + if (!this->BasicExpression::equals(Other)) + return false; + const AggregateValueExpression &OE = cast(Other); + if (NumIntOperands != OE.NumIntOperands) + return false; + if (!std::equal(int_ops_begin(), int_ops_end(), OE.int_ops_begin())) + return false; + return true; + } + + virtual hash_code getHashValue() const override { + return hash_combine(this->BasicExpression::getHashValue(), + hash_combine_range(int_ops_begin(), int_ops_end())); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeAggregateValue, "; + this->BasicExpression::printInternal(OS, false); + OS << ", intoperands = {"; + for (unsigned i = 0, e = int_ops_size(); i != e; ++i) { + OS << "[" << i << "] = " << IntOperands[i] << " "; + } + OS << "}"; + } +}; + +class PHIExpression final : public BasicExpression { +private: + BasicBlock *BB; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_Phi; + } + + PHIExpression(unsigned NumOperands, BasicBlock *B) + : BasicExpression(NumOperands, ET_Phi), BB(B) {} + void operator=(const PHIExpression &) = delete; + PHIExpression(const PHIExpression &) = delete; + PHIExpression() = delete; + virtual ~PHIExpression() override; + + virtual bool equals(const Expression &Other) const override { + if (!this->BasicExpression::equals(Other)) + return false; + const PHIExpression &OE = cast(Other); + if (BB != OE.BB) + return false; + return true; + } + + virtual hash_code getHashValue() const override { + return hash_combine(this->BasicExpression::getHashValue(), BB); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypePhi, "; + this->BasicExpression::printInternal(OS, false); + OS << "bb = " << BB; + } +}; + +class VariableExpression final : public Expression { +private: + Value *VariableValue; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_Variable; + } + + VariableExpression(Value *V) + : Expression(ET_Variable), VariableValue(V) {} + void operator=(const VariableExpression &) = delete; + VariableExpression(const VariableExpression &) = delete; + VariableExpression() = delete; + + Value *getVariableValue() const { return VariableValue; } + void setVariableValue(Value *V) { VariableValue = V; } + virtual bool equals(const Expression &Other) const override { + const VariableExpression &OC = cast(Other); + if (VariableValue != OC.VariableValue) + return false; + return true; + } + + virtual hash_code getHashValue() const override { + return hash_combine(getExpressionType(), VariableValue->getType(), + VariableValue); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeVariable, "; + this->Expression::printInternal(OS, false); + OS << " variable = " << *VariableValue; + } +}; + +class ConstantExpression final : public Expression { +private: + Constant *ConstantValue; + +public: + static bool classof(const Expression *EB) { + return EB->getExpressionType() == ET_Constant; + } + + ConstantExpression() + : Expression(ET_Constant), ConstantValue(NULL) {} + ConstantExpression(Constant *constantValue) + : Expression(ET_Constant), ConstantValue(constantValue) {} + void operator=(const ConstantExpression &) = delete; + ConstantExpression(const ConstantExpression &) = delete; + + Constant *getConstantValue() const { return ConstantValue; } + void setConstantValue(Constant *V) { ConstantValue = V; } + + virtual bool equals(const Expression &Other) const override { + const ConstantExpression &OC = cast(Other); + return ConstantValue == OC.ConstantValue; + } + + virtual hash_code getHashValue() const override { + return hash_combine(getExpressionType(), ConstantValue->getType(), + ConstantValue); + } + + // + // Debugging support + // + virtual void printInternal(raw_ostream &OS, bool PrintEType) const override { + if (PrintEType) + OS << "ExpressionTypeConstant, "; + this->Expression::printInternal(OS, false); + OS << " constant = " << *ConstantValue; + } +}; +} +} + +#endif Index: llvm/trunk/include/llvm/Transforms/Scalar/NewGVN.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/NewGVN.h +++ llvm/trunk/include/llvm/Transforms/Scalar/NewGVN.h @@ -0,0 +1,28 @@ +//===----- NewGVN.h - Global Value Numbering Pass ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides the interface for LLVM's Global Value Numbering pass. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_NEWGVN_H +#define LLVM_TRANSFORMS_SCALAR_NEWGVN_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class NewGVNPass : public PassInfoMixin { +public: + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, AnalysisManager &AM); +}; +} + +#endif // LLVM_TRANSFORMS_SCALAR_NEWGVN_H + Index: llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt @@ -39,6 +39,7 @@ MemCpyOptimizer.cpp MergedLoadStoreMotion.cpp NaryReassociate.cpp + NewGVN.cpp PartiallyInlineLibCalls.cpp PlaceSafepoints.cpp Reassociate.cpp Index: llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp +++ llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp @@ -0,0 +1,1853 @@ +//===---- NewGVN.cpp - Global Value Numbering Pass --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the new LLVM's Global Value Numbering pass. +/// GVN partitions values computed by a function into congruence classes. +/// Values ending up in the same congruence class are guaranteed to be the same +/// for every execution of the program. In that respect, congruency is a +/// compile-time approximation of equivalence of values at runtime. +/// The algorithm implemented here uses a sparse formulation and it's based +/// on the ideas described in the paper: +/// "A Sparse Algorithm for Predicated Global Value Numbering" from +/// Karthik Gargi. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/NewGVN.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CFGPrinter.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/PredIteratorCache.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVNExpression.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/MemorySSA.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include +#include +#include +using namespace llvm; +using namespace PatternMatch; +using namespace llvm::GVNExpression; + +#define DEBUG_TYPE "newgvn" + +STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted"); +STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted"); +STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified"); +STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same"); + +//===----------------------------------------------------------------------===// +// GVN Pass +//===----------------------------------------------------------------------===// + +// Anchor methods. +namespace llvm { +namespace GVNExpression { + Expression::~Expression() = default; + BasicExpression::~BasicExpression() = default; + CallExpression::~CallExpression() = default; + LoadExpression::~LoadExpression() = default; + StoreExpression::~StoreExpression() = default; + AggregateValueExpression::~AggregateValueExpression() = default; + PHIExpression::~PHIExpression() = default; +} +} + +// Congruence classes represent the set of expressions/instructions +// that are all the same *during some scope in the function*. +// That is, because of the way we perform equality propagation, and +// because of memory value numbering, it is not correct to assume +// you can willy-nilly replace any member with any other at any +// point in the function. +// +// For any Value in the Member set, it is valid to replace any dominated member +// with that Value. +// +// Every congruence class has a leader, and the leader is used to +// symbolize instructions in a canonical way (IE every operand of an +// instruction that is a member of the same congruence class will +// always be replaced with leader during symbolization). +// To simplify symbolization, we keep the leader as a constant if class can be +// proved to be a constant value. +// Otherwise, the leader is a randomly chosen member of the value set, it does +// not matter which one is chosen. +// Each congruence class also has a defining expression, +// though the expression may be null. If it exists, it can be used for forward +// propagation and reassociation of values. +// +struct CongruenceClass { + typedef SmallPtrSet MemberSet; + unsigned ID; + // Representative leader. + Value *RepLeader; + // Defining Expression. + const Expression *DefiningExpr; + // Actual members of this class. + MemberSet Members; + + // True if this class has no members left. This is mainly used for assertion + // purposes, and for skipping empty classes. + bool Dead; + + explicit CongruenceClass(unsigned ID) + : ID(ID), RepLeader(0), DefiningExpr(0), Dead(false) {} + CongruenceClass(unsigned ID, Value *Leader, const Expression *E) + : ID(ID), RepLeader(Leader), DefiningExpr(E), Dead(false) {} +}; + +namespace llvm { + template <> struct DenseMapInfo { + static const Expression *getEmptyKey() { + uintptr_t Val = static_cast(-1); + Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; + return reinterpret_cast(Val); + } + static const Expression *getTombstoneKey() { + uintptr_t Val = static_cast(~1U); + Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; + return reinterpret_cast(Val); + } + static unsigned getHashValue(const Expression *V) { + return static_cast(V->getHashValue()); + } + static bool isEqual(const Expression *LHS, const Expression *RHS) { + if (LHS == RHS) + return true; + if (LHS == getTombstoneKey() || RHS == getTombstoneKey() || + LHS == getEmptyKey() || RHS == getEmptyKey()) + return false; + return *LHS == *RHS; + } + }; +} // end namespace llvm + +class NewGVN : public FunctionPass { + DominatorTree *DT; + const DataLayout *DL; + const TargetLibraryInfo *TLI; + AssumptionCache *AC; + AliasAnalysis *AA; + MemorySSA *MSSA; + MemorySSAWalker *MSSAWalker; + BumpPtrAllocator ExpressionAllocator; + ArrayRecycler ArgRecycler; + + // Congruence class info. + CongruenceClass *InitialClass; + std::vector CongruenceClasses; + unsigned NextCongruenceNum; + + // Value Mappings. + DenseMap ValueToClass; + DenseMap ValueToExpression; + + // Expression to class mapping. + typedef DenseMap ExpressionClassMap; + ExpressionClassMap ExpressionToClass; + + // Which values have changed as a result of leader changes. + SmallPtrSet ChangedValues; + + // Reachability info. + typedef BasicBlockEdge BlockEdge; + DenseSet ReachableEdges; + SmallPtrSet ReachableBlocks; + + // This is a bitvector because, on larger functions, we may have + // thousands of touched instructions at once (entire blocks, + // instructions with hundreds of uses, etc). Even with optimization + // for when we mark whole blocks as touched, when this was a + // SmallPtrSet or DenseSet, for some functions, we spent >20% of all + // the time in GVN just managing this list. The bitvector, on the + // other hand, efficiently supports test/set/clear of both + // individual and ranges, as well as "find next element" This + // enables us to use it as a worklist with essentially 0 cost. + BitVector TouchedInstructions; + + DenseMap> BlockInstRange; + DenseMap> + DominatedInstRange; + +#ifndef NDEBUG + // Debugging for how many times each block and instruction got processed. + DenseMap ProcessedCount; +#endif + + // DFS info. + DenseMap> DFSDomMap; + DenseMap InstrDFS; + std::vector DFSToInstr; + + // Deletion info. + SmallPtrSet InstructionsToErase; + +public: + static char ID; // Pass identification, replacement for typeid. + NewGVN() : FunctionPass(ID) { + initializeNewGVNPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + bool runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC, + TargetLibraryInfo *TLI, AliasAnalysis *AA, + MemorySSA *MSSA); + +private: + // This transformation requires dominator postdominator info. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + + AU.addPreserved(); + AU.addPreserved(); + } + + // Expression handling. + const Expression *createExpression(Instruction *, const BasicBlock *); + const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *, + const BasicBlock *); + PHIExpression *createPHIExpression(Instruction *); + const VariableExpression *createVariableExpression(Value *); + const ConstantExpression *createConstantExpression(Constant *); + const Expression *createVariableOrConstant(Value *V, const BasicBlock *B); + const StoreExpression *createStoreExpression(StoreInst *, MemoryAccess *, + const BasicBlock *); + LoadExpression *createLoadExpression(Type *, Value *, LoadInst *, + MemoryAccess *, const BasicBlock *); + + const CallExpression *createCallExpression(CallInst *, MemoryAccess *, + const BasicBlock *); + const AggregateValueExpression * + createAggregateValueExpression(Instruction *, const BasicBlock *); + bool setBasicExpressionInfo(Instruction *, BasicExpression *, + const BasicBlock *); + + // Congruence class handling. + CongruenceClass *createCongruenceClass(Value *Leader, const Expression *E) { + CongruenceClass *result = + new CongruenceClass(NextCongruenceNum++, Leader, E); + CongruenceClasses.emplace_back(result); + return result; + } + + CongruenceClass *createSingletonCongruenceClass(Value *Member) { + CongruenceClass *CClass = createCongruenceClass(Member, NULL); + CClass->Members.insert(Member); + ValueToClass[Member] = CClass; + return CClass; + } + void initializeCongruenceClasses(Function &F); + + // Symbolic evaluation. + const Expression *checkSimplificationResults(Expression *, Instruction *, + Value *); + const Expression *performSymbolicEvaluation(Value *, const BasicBlock *); + const Expression *performSymbolicLoadEvaluation(Instruction *, + const BasicBlock *); + const Expression *performSymbolicStoreEvaluation(Instruction *, + const BasicBlock *); + const Expression *performSymbolicCallEvaluation(Instruction *, + const BasicBlock *); + const Expression *performSymbolicPHIEvaluation(Instruction *, + const BasicBlock *); + const Expression *performSymbolicAggrValueEvaluation(Instruction *, + const BasicBlock *); + + // Congruence finding. + // Templated to allow them to work both on BB's and BB-edges. + template + Value *lookupOperandLeader(Value *, const User *, const T &) const; + void performCongruenceFinding(Value *, const Expression *); + + // Reachability handling. + void updateReachableEdge(BasicBlock *, BasicBlock *); + void processOutgoingEdges(TerminatorInst *, BasicBlock *); + bool isOnlyReachableViaThisEdge(const BasicBlockEdge &); + Value *findConditionEquivalence(Value *, BasicBlock *) const; + + // Elimination. + struct ValueDFS; + void convertDenseToDFSOrdered(CongruenceClass::MemberSet &, + std::vector &); + + bool eliminateInstructions(Function &); + void replaceInstruction(Instruction *, Value *); + void markInstructionForDeletion(Instruction *); + void deleteInstructionsInBlock(BasicBlock *); + + // New instruction creation. + void handleNewInstruction(Instruction *){}; + void markUsersTouched(Value *); + void markMemoryUsersTouched(MemoryAccess *); + + // Utilities. + void cleanupTables(); + std::pair assignDFSNumbers(BasicBlock *, unsigned); + void updateProcessedCount(Value *V); +}; + +char NewGVN::ID = 0; + +// createGVNPass - The public interface to this file. +FunctionPass *llvm::createNewGVNPass() { return new NewGVN(); } + +bool LoadExpression::equals(const Expression &Other) const { + if (!isa(Other) && !isa(Other)) + return false; + if (!this->BasicExpression::equals(Other)) + return false; + if (const auto *OtherL = dyn_cast(&Other)) { + if (DefiningAccess != OtherL->getDefiningAccess()) + return false; + } else if (const auto *OtherS = dyn_cast(&Other)) { + if (DefiningAccess != OtherS->getDefiningAccess()) + return false; + } + + return true; +} + +bool StoreExpression::equals(const Expression &Other) const { + if (!isa(Other) && !isa(Other)) + return false; + if (!this->BasicExpression::equals(Other)) + return false; + if (const auto *OtherL = dyn_cast(&Other)) { + if (DefiningAccess != OtherL->getDefiningAccess()) + return false; + } else if (const auto *OtherS = dyn_cast(&Other)) { + if (DefiningAccess != OtherS->getDefiningAccess()) + return false; + } + + return true; +} + +#ifndef NDEBUG +static std::string getBlockName(const BasicBlock *B) { + return DOTGraphTraits::getSimpleNodeLabel(B, NULL); +} +#endif + +INITIALIZE_PASS_BEGIN(NewGVN, "newgvn", "Global Value Numbering", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false) + +PHIExpression *NewGVN::createPHIExpression(Instruction *I) { + BasicBlock *PhiBlock = I->getParent(); + PHINode *PN = cast(I); + PHIExpression *E = new (ExpressionAllocator) + PHIExpression(PN->getNumOperands(), I->getParent()); + + E->allocateOperands(ArgRecycler, ExpressionAllocator); + E->setType(I->getType()); + E->setOpcode(I->getOpcode()); + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + BasicBlock *B = PN->getIncomingBlock(i); + if (!ReachableBlocks.count(B)) { + DEBUG(dbgs() << "Skipping unreachable block " << getBlockName(B) + << " in PHI node " << *PN << "\n"); + continue; + } + if (I->getOperand(i) != I) { + const BasicBlockEdge BBE(B, PhiBlock); + auto Operand = lookupOperandLeader(I->getOperand(i), I, BBE); + E->ops_push_back(Operand); + } else { + E->ops_push_back(I->getOperand(i)); + } + } + return E; +} + +// Set basic expression info (Arguments, type, opcode) for Expression +// E from Instruction I in block B. +bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E, + const BasicBlock *B) { + bool AllConstant = true; + if (auto *GEP = dyn_cast(I)) + E->setType(GEP->getSourceElementType()); + else + E->setType(I->getType()); + E->setOpcode(I->getOpcode()); + E->allocateOperands(ArgRecycler, ExpressionAllocator); + + for (auto &O : I->operands()) { + auto Operand = lookupOperandLeader(O, I, B); + if (!isa(Operand)) + AllConstant = false; + E->ops_push_back(Operand); + } + return AllConstant; +} + +const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T, + Value *Arg1, Value *Arg2, + const BasicBlock *B) { + BasicExpression *E = new (ExpressionAllocator) BasicExpression(2); + + E->setType(T); + E->setOpcode(Opcode); + E->allocateOperands(ArgRecycler, ExpressionAllocator); + if (Instruction::isCommutative(Opcode)) { + // Ensure that commutative instructions that only differ by a permutation + // of their operands get the same value number by sorting the operand value + // numbers. Since all commutative instructions have two operands it is more + // efficient to sort by hand rather than using, say, std::sort. + if (Arg1 > Arg2) + std::swap(Arg1, Arg2); + } + E->ops_push_back(lookupOperandLeader(Arg1, nullptr, B)); + E->ops_push_back(lookupOperandLeader(Arg2, nullptr, B)); + + Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), *DL, TLI, + DT, AC); + if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V)) + return SimplifiedE; + return E; +} + +// Take a Value returned by simplification of Expression E/Instruction +// I, and see if it resulted in a simpler expression. If so, return +// that expression. +// TODO: Once finished, this should not take an Instruction, we only +// use it for printing. +const Expression *NewGVN::checkSimplificationResults(Expression *E, + Instruction *I, Value *V) { + if (!V) + return nullptr; + if (auto *C = dyn_cast(V)) { + if (I) + DEBUG(dbgs() << "Simplified " << *I << " to " + << " constant " << *C << "\n"); + NumGVNOpsSimplified++; + assert(isa(E) && + "We should always have had a basic expression here"); + + cast(E)->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); + return createConstantExpression(C); + } else if (isa(V) || isa(V)) { + if (I) + DEBUG(dbgs() << "Simplified " << *I << " to " + << " variable " << *V << "\n"); + cast(E)->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); + return createVariableExpression(V); + } + + CongruenceClass *CC = ValueToClass.lookup(V); + if (CC && CC->DefiningExpr) { + if (I) + DEBUG(dbgs() << "Simplified " << *I << " to " + << " expression " << *V << "\n"); + NumGVNOpsSimplified++; + assert(isa(E) && + "We should always have had a basic expression here"); + cast(E)->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); + return CC->DefiningExpr; + } + return nullptr; +} + +const Expression *NewGVN::createExpression(Instruction *I, + const BasicBlock *B) { + + BasicExpression *E = + new (ExpressionAllocator) BasicExpression(I->getNumOperands()); + + bool AllConstant = setBasicExpressionInfo(I, E, B); + + if (I->isCommutative()) { + // Ensure that commutative instructions that only differ by a permutation + // of their operands get the same value number by sorting the operand value + // numbers. Since all commutative instructions have two operands it is more + // efficient to sort by hand rather than using, say, std::sort. + assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); + if (E->getOperand(0) > E->getOperand(1)) + E->swapOperands(0, 1); + } + + // Perform simplificaiton + // TODO: Right now we only check to see if we get a constant result. + // We may get a less than constant, but still better, result for + // some operations. + // IE + // add 0, x -> x + // and x, x -> x + // We should handle this by simply rewriting the expression. + if (auto *CI = dyn_cast(I)) { + // Sort the operand value numbers so xx get the same value + // number. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (E->getOperand(0) > E->getOperand(1)) { + E->swapOperands(0, 1); + Predicate = CmpInst::getSwappedPredicate(Predicate); + } + E->setOpcode((CI->getOpcode() << 8) | Predicate); + // TODO: 25% of our time is spent in SimplifyCmpInst with pointer operands + // TODO: Since we noop bitcasts, we may need to check types before + // simplifying, so that we don't end up simplifying based on a wrong + // type assumption. We should clean this up so we can use constants of the + // wrong type + + assert(I->getOperand(0)->getType() == I->getOperand(1)->getType() && + "Wrong types on cmp instruction"); + if ((E->getOperand(0)->getType() == I->getOperand(0)->getType() && + E->getOperand(1)->getType() == I->getOperand(1)->getType())) { + Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), + *DL, TLI, DT, AC); + if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) + return SimplifiedE; + } + } else if (isa(I)) { + if (isa(E->getOperand(0)) || + (E->getOperand(1)->getType() == I->getOperand(1)->getType() && + E->getOperand(2)->getType() == I->getOperand(2)->getType())) { + Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1), + E->getOperand(2), *DL, TLI, DT, AC); + if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) + return SimplifiedE; + } + } else if (I->isBinaryOp()) { + Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), + *DL, TLI, DT, AC); + if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) + return SimplifiedE; + } else if (auto *BI = dyn_cast(I)) { + Value *V = SimplifyInstruction(BI, *DL, TLI, DT, AC); + if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) + return SimplifiedE; + } else if (isa(I)) { + Value *V = SimplifyGEPInst(E->getType(), + ArrayRef(E->ops_begin(), E->ops_end()), + *DL, TLI, DT, AC); + if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) + return SimplifiedE; + } else if (AllConstant) { + // We don't bother trying to simplify unless all of the operands + // were constant. + // TODO: There are a lot of Simplify*'s we could call here, if we + // wanted to. The original motivating case for this code was a + // zext i1 false to i8, which we don't have an interface to + // simplify (IE there is no SimplifyZExt). + + SmallVector C; + for (Value *Arg : E->operands()) + C.emplace_back(cast(Arg)); + + if (Value *V = ConstantFoldInstOperands(I, C, *DL, TLI)) + if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V)) + return SimplifiedE; + } + return E; +} + +const AggregateValueExpression * +NewGVN::createAggregateValueExpression(Instruction *I, const BasicBlock *B) { + if (auto *II = dyn_cast(I)) { + AggregateValueExpression *E = new (ExpressionAllocator) + AggregateValueExpression(I->getNumOperands(), II->getNumIndices()); + setBasicExpressionInfo(I, E, B); + E->allocateIntOperands(ExpressionAllocator); + + for (auto &Index : II->indices()) + E->int_ops_push_back(Index); + return E; + + } else if (auto *EI = dyn_cast(I)) { + AggregateValueExpression *E = new (ExpressionAllocator) + AggregateValueExpression(I->getNumOperands(), EI->getNumIndices()); + setBasicExpressionInfo(EI, E, B); + E->allocateIntOperands(ExpressionAllocator); + + for (auto &Index : EI->indices()) + E->int_ops_push_back(Index); + return E; + } + llvm_unreachable("Unhandled type of aggregate value operation"); +} + +const VariableExpression * +NewGVN::createVariableExpression(Value *V) { + VariableExpression *E = new (ExpressionAllocator) VariableExpression(V); + E->setOpcode(V->getValueID()); + return E; +} + +const Expression *NewGVN::createVariableOrConstant(Value *V, + const BasicBlock *B) { + auto Leader = lookupOperandLeader(V, nullptr, B); + if (auto *C = dyn_cast(Leader)) + return createConstantExpression(C); + return createVariableExpression(Leader); +} + +const ConstantExpression * +NewGVN::createConstantExpression(Constant *C) { + ConstantExpression *E = new (ExpressionAllocator) ConstantExpression(C); + E->setOpcode(C->getValueID()); + return E; +} + +const CallExpression *NewGVN::createCallExpression(CallInst *CI, + MemoryAccess *HV, + const BasicBlock *B) { + // FIXME: Add operand bundles for calls. + CallExpression *E = + new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, HV); + setBasicExpressionInfo(CI, E, B); + return E; +} + +// See if we have a congruence class and leader for this operand, and if so, +// return it. Otherwise, return the operand itself. +template +Value *NewGVN::lookupOperandLeader(Value *V, const User *U, + const T &B) const { + CongruenceClass *CC = ValueToClass.lookup(V); + if (CC && (CC != InitialClass)) + return CC->RepLeader; + return V; +} + +LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp, + LoadInst *LI, MemoryAccess *DA, + const BasicBlock *B) { + LoadExpression *E = new (ExpressionAllocator) LoadExpression(1, LI, DA); + E->allocateOperands(ArgRecycler, ExpressionAllocator); + E->setType(LoadType); + + // Give store and loads same opcode so they value number together. + E->setOpcode(0); + auto Operand = lookupOperandLeader(PointerOp, LI, B); + E->ops_push_back(Operand); + if (LI) + E->setAlignment(LI->getAlignment()); + + // TODO: Value number heap versions. We may be able to discover + // things alias analysis can't on it's own (IE that a store and a + // load have the same value, and thus, it isn't clobbering the load). + return E; +} + +const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI, + MemoryAccess *DA, + const BasicBlock *B) { + StoreExpression *E = + new (ExpressionAllocator) StoreExpression(SI->getNumOperands(), SI, DA); + E->allocateOperands(ArgRecycler, ExpressionAllocator); + E->setType(SI->getValueOperand()->getType()); + + // Give store and loads same opcode so they value number together. + E->setOpcode(0); + E->ops_push_back(lookupOperandLeader(SI->getPointerOperand(), SI, B)); + + // TODO: Value number heap versions. We may be able to discover + // things alias analysis can't on it's own (IE that a store and a + // load have the same value, and thus, it isn't clobbering the load). + return E; +} + +const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I, + const BasicBlock *B) { + StoreInst *SI = cast(I); + const Expression *E = createStoreExpression(SI, MSSA->getMemoryAccess(SI), B); + return E; +} + +const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I, + const BasicBlock *B) { + LoadInst *LI = cast(I); + + // We can eliminate in favor of non-simple loads, but we won't be able to + // eliminate them. + if (!LI->isSimple()) + return nullptr; + + Value *LoadAddressLeader = + lookupOperandLeader(LI->getPointerOperand(), I, B); + // Load of undef is undef. + if (isa(LoadAddressLeader)) + return createConstantExpression(UndefValue::get(LI->getType())); + + MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I); + + if (!MSSA->isLiveOnEntryDef(DefiningAccess)) { + if (auto *MD = dyn_cast(DefiningAccess)) { + Instruction *DefiningInst = MD->getMemoryInst(); + // If the defining instruction is not reachable, replace with undef. + if (!ReachableBlocks.count(DefiningInst->getParent())) + return createConstantExpression(UndefValue::get(LI->getType())); + } + } + + const Expression *E = createLoadExpression( + LI->getType(), LI->getPointerOperand(), LI, DefiningAccess, B); + return E; +} + +// Evaluate read only and pure calls, and create an expression result. +const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I, + const BasicBlock *B) { + CallInst *CI = cast(I); + if (AA->doesNotAccessMemory(CI)) + return createCallExpression(CI, nullptr, B); + else if (AA->onlyReadsMemory(CI)) + return createCallExpression(CI, MSSAWalker->getClobberingMemoryAccess(CI), + B); + else + return nullptr; +} + +// Evaluate PHI nodes symbolically, and create an expression result. +const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I, + const BasicBlock *B) { + PHIExpression *E = cast(createPHIExpression(I)); + if (E->ops_empty()) { + DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef" + << "\n"); + E->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); + return createConstantExpression(UndefValue::get(I->getType())); + } + + Value *AllSameValue = E->getOperand(0); + + // See if all arguments are the same, ignoring undef arguments, because we can + // choose a value that is the same for them. + for (const Value *Arg : E->operands()) + if (Arg != AllSameValue && !isa(Arg)) { + AllSameValue = NULL; + break; + } + + if (AllSameValue) { + // It's possible to have phi nodes with cycles (IE dependent on + // other phis that are .... dependent on the original phi node), + // especially in weird CFG's where some arguments are unreachable, or + // uninitialized along certain paths. + // This can cause infinite loops during evaluation (even if you disable + // the recursion below, you will simply ping-pong between congruence + // classes). If a phi node symbolically evaluates to another phi node, + // just leave it alone. If they are really the same, we will still + // eliminate them in favor of each other. + if (isa(AllSameValue)) + return E; + NumGVNPhisAllSame++; + DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue + << "\n"); + E->deallocateOperands(ArgRecycler); + ExpressionAllocator.Deallocate(E); + if (auto *C = dyn_cast(AllSameValue)) + return createConstantExpression(C); + return createVariableExpression(AllSameValue); + } + return E; +} + +const Expression * +NewGVN::performSymbolicAggrValueEvaluation(Instruction *I, + const BasicBlock *B) { + if (auto *EI = dyn_cast(I)) { + auto *II = dyn_cast(EI->getAggregateOperand()); + if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) { + unsigned Opcode = 0; + // EI might be an extract from one of our recognised intrinsics. If it + // is we'll synthesize a semantically equivalent expression instead on + // an extract value expression. + switch (II->getIntrinsicID()) { + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + Opcode = Instruction::Add; + break; + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + Opcode = Instruction::Sub; + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + Opcode = Instruction::Mul; + break; + default: + break; + } + + if (Opcode != 0) { + // Intrinsic recognized. Grab its args to finish building the + // expression. + assert(II->getNumArgOperands() == 2 && + "Expect two args for recognised intrinsics."); + return createBinaryExpression(Opcode, EI->getType(), + II->getArgOperand(0), + II->getArgOperand(1), B); + } + } + } + + return createAggregateValueExpression(I, B); +} + +// Substitute and symbolize the value before value numbering. +const Expression *NewGVN::performSymbolicEvaluation(Value *V, + const BasicBlock *B) { + const Expression *E = NULL; + if (auto *C = dyn_cast(V)) + E = createConstantExpression(C); + else if (isa(V) || isa(V)) { + E = createVariableExpression(V); + } else { + // TODO: memory intrinsics. + // TODO: Some day, we should do the forward propagation and reassociation + // parts of the algorithm. + Instruction *I = cast(V); + switch (I->getOpcode()) { + case Instruction::ExtractValue: + case Instruction::InsertValue: + E = performSymbolicAggrValueEvaluation(I, B); + break; + case Instruction::PHI: + E = performSymbolicPHIEvaluation(I, B); + break; + case Instruction::Call: + E = performSymbolicCallEvaluation(I, B); + break; + case Instruction::Store: + E = performSymbolicStoreEvaluation(I, B); + break; + case Instruction::Load: + E = performSymbolicLoadEvaluation(I, B); + break; + case Instruction::BitCast: { + E = createExpression(I, B); + } break; + + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::Select: + case Instruction::ExtractElement: + case Instruction::InsertElement: + case Instruction::ShuffleVector: + case Instruction::GetElementPtr: + E = createExpression(I, B); + break; + default: + return nullptr; + } + } + if (!E) + return nullptr; + return E; +} + +// There is an edge from 'Src' to 'Dst'. Return true if every path from +// the entry block to 'Dst' passes via this edge. In particular 'Dst' +// must not be reachable via another edge from 'Src'. +bool NewGVN::isOnlyReachableViaThisEdge(const BasicBlockEdge &E) { + + // While in theory it is interesting to consider the case in which Dst has + // more than one predecessor, because Dst might be part of a loop which is + // only reachable from Src, in practice it is pointless since at the time + // GVN runs all such loops have preheaders, which means that Dst will have + // been changed to have only one predecessor, namely Src. + const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); + const BasicBlock *Src = E.getStart(); + assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); + (void)Src; + return Pred != nullptr; +} + +void NewGVN::markUsersTouched(Value *V) { + // Now mark the users as touched. + for (auto &U : V->uses()) { + auto *User = dyn_cast(U.getUser()); + assert(User && "Use of value not within an instruction?"); + TouchedInstructions.set(InstrDFS[User]); + } +} + +void NewGVN::markMemoryUsersTouched(MemoryAccess *MA) { + for (auto U : MA->users()) { + if (auto *MUD = dyn_cast(U)) + TouchedInstructions.set(InstrDFS[MUD->getMemoryInst()]); + else + TouchedInstructions.set(InstrDFS[MA]); + } +} + +// Perform congruence finding on a given value numbering expression. +void NewGVN::performCongruenceFinding(Value *V, const Expression *E) { + + ValueToExpression[V] = E; + // This is guaranteed to return something, since it will at least find + // INITIAL. + CongruenceClass *VClass = ValueToClass[V]; + assert(VClass && "Should have found a vclass"); + // Dead classes should have been eliminated from the mapping. + assert(!VClass->Dead && "Found a dead class"); + + CongruenceClass *EClass; + // Expressions we can't symbolize are always in their own unique + // congruence class. + if (E == NULL) { + // We may have already made a unique class. + if (VClass->Members.size() != 1 || VClass->RepLeader != V) { + CongruenceClass *NewClass = createCongruenceClass(V, NULL); + // We should always be adding the member in the below code. + EClass = NewClass; + DEBUG(dbgs() << "Created new congruence class for " << *V + << " due to NULL expression\n"); + } else { + EClass = VClass; + } + } else if (const auto *VE = dyn_cast(E)) { + EClass = ValueToClass[VE->getVariableValue()]; + } else { + auto lookupResult = ExpressionToClass.insert({E, nullptr}); + + // If it's not in the value table, create a new congruence class. + if (lookupResult.second) { + CongruenceClass *NewClass = createCongruenceClass(NULL, E); + auto place = lookupResult.first; + place->second = NewClass; + + // Constants and variables should always be made the leader. + if (const auto *CE = dyn_cast(E)) + NewClass->RepLeader = CE->getConstantValue(); + else if (const auto *VE = dyn_cast(E)) + NewClass->RepLeader = VE->getVariableValue(); + else if (const auto *SE = dyn_cast(E)) + NewClass->RepLeader = SE->getStoreInst()->getValueOperand(); + else + NewClass->RepLeader = V; + + EClass = NewClass; + DEBUG(dbgs() << "Created new congruence class for " << *V + << " using expression " << *E << " at " << NewClass->ID + << "\n"); + DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n"); + } else { + EClass = lookupResult.first->second; + assert(EClass && "Somehow don't have an eclass"); + + assert(!EClass->Dead && "We accidentally looked up a dead class"); + } + } + bool WasInChanged = ChangedValues.erase(V); + if (VClass != EClass || WasInChanged) { + DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E + << "\n"); + + if (VClass != EClass) { + DEBUG(dbgs() << "New congruence class for " << V << " is " << EClass->ID + << "\n"); + + VClass->Members.erase(V); + EClass->Members.insert(V); + ValueToClass[V] = EClass; + // See if we destroyed the class or need to swap leaders. + if (VClass->Members.empty() && VClass != InitialClass) { + if (VClass->DefiningExpr) { + VClass->Dead = true; + DEBUG(dbgs() << "Erasing expression " << *E << " from table\n"); + ExpressionToClass.erase(VClass->DefiningExpr); + } + } else if (VClass->RepLeader == V) { + // FIXME: When the leader changes, the value numbering of + // everything may change, so we need to reprocess. + VClass->RepLeader = *(VClass->Members.begin()); + for (auto M : VClass->Members) { + if (auto *I = dyn_cast(M)) + TouchedInstructions.set(InstrDFS[I]); + ChangedValues.insert(M); + } + } + } + markUsersTouched(V); + if (Instruction *I = dyn_cast(V)) + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + markMemoryUsersTouched(MA); + } +} + +// Process the fact that Edge (from, to) is reachable, including marking +// any newly reachable blocks and instructions for processing. +void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { + // Check if the Edge was reachable before. + if (ReachableEdges.insert({From, To}).second) { + // If this block wasn't reachable before, all instructions are touched. + if (ReachableBlocks.insert(To).second) { + DEBUG(dbgs() << "Block " << getBlockName(To) << " marked reachable\n"); + const auto &InstRange = BlockInstRange.lookup(To); + TouchedInstructions.set(InstRange.first, InstRange.second); + } else { + DEBUG(dbgs() << "Block " << getBlockName(To) + << " was reachable, but new edge {" << getBlockName(From) + << "," << getBlockName(To) << "} to it found\n"); + + // We've made an edge reachable to an existing block, which may + // impact predicates. Otherwise, only mark the phi nodes as touched, as + // they are the only thing that depend on new edges. Anything using their + // values will get propagated to if necessary. + auto BI = To->begin(); + while (isa(BI)) { + TouchedInstructions.set(InstrDFS[&*BI]); + ++BI; + } + } + } +} + +// Given a predicate condition (from a switch, cmp, or whatever) and a block, +// see if we know some constant value for it already. +Value *NewGVN::findConditionEquivalence(Value *Cond, BasicBlock *B) const { + auto Result = lookupOperandLeader(Cond, nullptr, B); + if (isa(Result)) + return Result; + return nullptr; +} + +// Process the outgoing edges of a block for reachability. +void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { + // Evaluate reachability of terminator instruction. + BranchInst *BR; + if ((BR = dyn_cast(TI)) && BR->isConditional()) { + Value *Cond = BR->getCondition(); + Value *CondEvaluated = findConditionEquivalence(Cond, B); + if (!CondEvaluated) { + if (auto *I = dyn_cast(Cond)) { + const Expression *E = createExpression(I, B); + if (const auto *CE = dyn_cast(E)) { + CondEvaluated = CE->getConstantValue(); + } + } else if (isa(Cond)) { + CondEvaluated = Cond; + } + } + ConstantInt *CI; + BasicBlock *TrueSucc = BR->getSuccessor(0); + BasicBlock *FalseSucc = BR->getSuccessor(1); + if (CondEvaluated && (CI = dyn_cast(CondEvaluated))) { + if (CI->isOne()) { + DEBUG(dbgs() << "Condition for Terminator " << *TI + << " evaluated to true\n"); + updateReachableEdge(B, TrueSucc); + } else if (CI->isZero()) { + DEBUG(dbgs() << "Condition for Terminator " << *TI + << " evaluated to false\n"); + updateReachableEdge(B, FalseSucc); + } + } else { + updateReachableEdge(B, TrueSucc); + updateReachableEdge(B, FalseSucc); + } + } else if (auto *SI = dyn_cast(TI)) { + // For switches, propagate the case values into the case + // destinations. + + // Remember how many outgoing edges there are to every successor. + SmallDenseMap SwitchEdges; + + bool MultipleEdgesOneReachable = false; + Value *SwitchCond = SI->getCondition(); + Value *CondEvaluated = findConditionEquivalence(SwitchCond, B); + // See if we were able to turn this switch statement into a constant. + if (CondEvaluated && isa(CondEvaluated)) { + ConstantInt *CondVal = cast(CondEvaluated); + // We should be able to get case value for this. + auto CaseVal = SI->findCaseValue(CondVal); + if (CaseVal.getCaseSuccessor() == SI->getDefaultDest()) { + // We proved the value is outside of the range of the case. + // We can't do anything other than mark the default dest as reachable, + // and go home. + updateReachableEdge(B, SI->getDefaultDest()); + return; + } + // Now get where it goes and mark it reachable. + BasicBlock *TargetBlock = CaseVal.getCaseSuccessor(); + updateReachableEdge(B, TargetBlock); + unsigned WhichSucc = CaseVal.getSuccessorIndex(); + // Calculate whether our single reachable edge is really a single edge to + // the target block. If not, and the block has multiple predecessors, we + // can only replace phi node values. + for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { + if (i == WhichSucc) + continue; + BasicBlock *Block = SI->getSuccessor(i); + if (Block == TargetBlock) + MultipleEdgesOneReachable = true; + } + } else { + for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) { + BasicBlock *TargetBlock = SI->getSuccessor(i); + ++SwitchEdges[TargetBlock]; + updateReachableEdge(B, TargetBlock); + } + } + } else { + // Otherwise this is either unconditional, or a type we have no + // idea about. Just mark successors as reachable. + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + BasicBlock *TargetBlock = TI->getSuccessor(i); + updateReachableEdge(B, TargetBlock); + } + } +} + +// The algorithm initially places the values of the routine in the INITIAL congruence +// class. The leader of INITIAL is the undetermined value `TOP`. +// When the algorithm has finished, values still in INITIAL are unreachable. +void NewGVN::initializeCongruenceClasses(Function &F) { + // FIXME now i can't remember why this is 2 + NextCongruenceNum = 2; + // Initialize all other instructions to be in INITIAL class. + CongruenceClass::MemberSet InitialValues; + for (auto &B : F) + for (auto &I : B) + InitialValues.insert(&I); + + InitialClass = createCongruenceClass(NULL, NULL); + for (auto L : InitialValues) + ValueToClass[L] = InitialClass; + InitialClass->Members.swap(InitialValues); + + // Initialize arguments to be in their own unique congruence classes + for (auto &FA : F.args()) + createSingletonCongruenceClass(&FA); +} + +void NewGVN::cleanupTables() { + for (unsigned i = 0, e = CongruenceClasses.size(); i != e; ++i) { + DEBUG(dbgs() << "Congruence class " << CongruenceClasses[i]->ID << " has " + << CongruenceClasses[i]->Members.size() << " members\n"); + // Make sure we delete the congruence class (probably worth switching to + // a unique_ptr at some point. + delete CongruenceClasses[i]; + CongruenceClasses[i] = NULL; + } + + ValueToClass.clear(); + ArgRecycler.clear(ExpressionAllocator); + ExpressionAllocator.Reset(); + CongruenceClasses.clear(); + ExpressionToClass.clear(); + ValueToExpression.clear(); + ReachableBlocks.clear(); + ReachableEdges.clear(); +#ifndef NDEBUG + ProcessedCount.clear(); +#endif + DFSDomMap.clear(); + InstrDFS.clear(); + InstructionsToErase.clear(); + + DFSToInstr.clear(); + BlockInstRange.clear(); + TouchedInstructions.clear(); + DominatedInstRange.clear(); +} + +std::pair NewGVN::assignDFSNumbers(BasicBlock *B, + unsigned Start) { + unsigned End = Start; + for (auto &I : *B) { + InstrDFS[&I] = End++; + DFSToInstr.emplace_back(&I); + } + + // All of the range functions taken half-open ranges (open on the end side). + // So we do not subtract one from count, because at this point it is one + // greater than the last instruction. + return std::make_pair(Start, End); +} + +void NewGVN::updateProcessedCount(Value *V) { +#ifndef NDEBUG + if (ProcessedCount.count(V) == 0) { + ProcessedCount.insert({V, 1}); + } else { + ProcessedCount[V] += 1; + assert(ProcessedCount[V] < 100 && + "Seem to have processed the same Value a lot\n"); + } +#endif +} + +// This is the main transformation entry point. +bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC, + TargetLibraryInfo *_TLI, AliasAnalysis *_AA, + MemorySSA *_MSSA) { + bool Changed = false; + DT = _DT; + AC = _AC; + TLI = _TLI; + AA = _AA; + MSSA = _MSSA; + DL = &F.getParent()->getDataLayout(); + MSSAWalker = MSSA->getWalker(); + + // Count number of instructions for sizing of hash tables, and come + // up with a global dfs numbering for instructions. + unsigned ICount = 0; + SmallPtrSet VisitedBlocks; + + // Note: We want RPO traversal of the blocks, which is not quite the same as + // dominator tree order, particularly with regard whether backedges get + // visited first or second, given a block with multiple successors. + // If we visit in the wrong order, we will end up performing N times as many + // iterations. + ReversePostOrderTraversal RPOT(&F); + for (auto &B : RPOT) { + VisitedBlocks.insert(B); + const auto &BlockRange = assignDFSNumbers(B, ICount); + BlockInstRange.insert({B, BlockRange}); + ICount += BlockRange.second - BlockRange.first; + } + + // Handle forward unreachable blocks and figure out which blocks + // have single preds. + for (auto &B : F) { + // Assign numbers to unreachable blocks. + if (!VisitedBlocks.count(&B)) { + const auto &BlockRange = assignDFSNumbers(&B, ICount); + BlockInstRange.insert({&B, BlockRange}); + ICount += BlockRange.second - BlockRange.first; + } + } + + TouchedInstructions.resize(ICount + 1); + DominatedInstRange.reserve(F.size()); + // Ensure we don't end up resizing the expressionToClass map, as + // that can be quite expensive. At most, we have one expression per + // instruction. + ExpressionToClass.reserve(ICount + 1); + + // Initialize the touched instructions to include the entry block. + const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock()); + TouchedInstructions.set(InstRange.first, InstRange.second); + ReachableBlocks.insert(&F.getEntryBlock()); + + initializeCongruenceClasses(F); + + // We start out in the entry block. + BasicBlock *LastBlock = &F.getEntryBlock(); + while (TouchedInstructions.any()) { + // Walk through all the instructions in all the blocks in RPO. + for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1; + InstrNum = TouchedInstructions.find_next(InstrNum)) { + Instruction *I = DFSToInstr[InstrNum]; + BasicBlock *CurrBlock = I->getParent(); + + // If we hit a new block, do reachability processing. + if (CurrBlock != LastBlock) { + LastBlock = CurrBlock; + bool BlockReachable = ReachableBlocks.count(CurrBlock); + const auto &CurrInstRange = BlockInstRange.lookup(CurrBlock); + + // If it's not reachable, erase any touched instructions and move on. + if (!BlockReachable) { + TouchedInstructions.reset(CurrInstRange.first, CurrInstRange.second); + DEBUG(dbgs() << "Skipping instructions in block " + << getBlockName(CurrBlock) + << " because it is unreachable\n"); + continue; + } + updateProcessedCount(CurrBlock); + } + DEBUG(dbgs() << "Processing instruction " << *I << "\n"); + if (I->use_empty() && !I->getType()->isVoidTy()) { + DEBUG(dbgs() << "Skipping unused instruction\n"); + if (isInstructionTriviallyDead(I, TLI)) + markInstructionForDeletion(I); + TouchedInstructions.reset(InstrNum); + continue; + } + updateProcessedCount(I); + + if (!I->isTerminator()) { + const Expression *Symbolized = performSymbolicEvaluation(I, CurrBlock); + performCongruenceFinding(I, Symbolized); + } else { + processOutgoingEdges(dyn_cast(I), CurrBlock); + } + // Reset after processing (because we may mark ourselves as touched when + // we propagate equalities). + TouchedInstructions.reset(InstrNum); + } + } + + Changed |= eliminateInstructions(F); + + // Delete all instructions marked for deletion. + for (Instruction *ToErase : InstructionsToErase) { + if (!ToErase->use_empty()) + ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType())); + + ToErase->eraseFromParent(); + } + + // Delete all unreachable blocks. + for (auto &B : F) { + BasicBlock *BB = &B; + if (!ReachableBlocks.count(BB)) { + DEBUG(dbgs() << "We believe block " << getBlockName(BB) + << " is unreachable\n"); + deleteInstructionsInBlock(BB); + Changed = true; + } + } + + cleanupTables(); + return Changed; +} + +bool NewGVN::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + return runGVN(F, &getAnalysis().getDomTree(), + &getAnalysis().getAssumptionCache(F), + &getAnalysis().getTLI(), + &getAnalysis().getAAResults(), + &getAnalysis().getMSSA()); +} + +PreservedAnalyses NewGVNPass::run(Function &F, + AnalysisManager &AM) { + NewGVN Impl; + + // Apparently the order in which we get these results matter for + // the old GVN (see Chandler's comment in GVN.cpp). I'll keep + // the same order here, just in case. + auto &AC = AM.getResult(F); + auto &DT = AM.getResult(F); + auto &TLI = AM.getResult(F); + auto &AA = AM.getResult(F); + auto &MSSA = AM.getResult(F).getMSSA(); + bool Changed = Impl.runGVN(F, &DT, &AC, &TLI, &AA, &MSSA); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + return PA; +} + +// Return true if V is a value that will always be available (IE can +// be placed anywhere) in the function. We don't do globals here +// because they are often worse to put in place. +// TODO: Separate cost from availability +static bool alwaysAvailable(Value *V) { + return isa(V) || isa(V); +} + +// Get the basic block from an instruction/value. +static BasicBlock *getBlockForValue(Value *V) { + if (auto *I = dyn_cast(V)) + return I->getParent(); + return nullptr; +} + +struct NewGVN::ValueDFS { + int DFSIn; + int DFSOut; + int LocalNum; + // Only one of these will be set. + Value *Val; + Use *U; + ValueDFS() + : DFSIn(0), DFSOut(0), LocalNum(0), Val(nullptr), U(nullptr) {} + + bool operator<(const ValueDFS &Other) const { + // It's not enough that any given field be less than - we have sets + // of fields that need to be evaluated together to give a proper ordering. + // For example, if you have; + // DFS (1, 3) + // Val 0 + // DFS (1, 2) + // Val 50 + // We want the second to be less than the first, but if we just go field + // by field, we will get to Val 0 < Val 50 and say the first is less than + // the second. We only want it to be less than if the DFS orders are equal. + // + // Each LLVM instruction only produces one value, and thus the lowest-level + // differentiator that really matters for the stack (and what we use as as a + // replacement) is the local dfs number. + // Everything else in the structure is instruction level, and only affects the + // order in which we will replace operands of a given instruction. + // + // For a given instruction (IE things with equal dfsin, dfsout, localnum), + // the order of replacement of uses does not matter. + // IE given, + // a = 5 + // b = a + a + // When you hit b, you will have two valuedfs with the same dfsin, out, and localnum. + // The .val will be the same as well. + // The .u's will be different. + // You will replace both, and it does not matter what order you replace them in + // (IE whether you replace operand 2, then operand 1, or operand 1, then operand 2). + // Similarly for the case of same dfsin, dfsout, localnum, but different .val's + // a = 5 + // b = 6 + // c = a + b + // in c, we will a valuedfs for a, and one for b,with everything the same but + // .val and .u. + // It does not matter what order we replace these operands in. + // You will always end up with the same IR, and this is guaranteed. + return std::tie(DFSIn, DFSOut, LocalNum, Val, U) < + std::tie(Other.DFSIn, Other.DFSOut, Other.LocalNum, Other.Val, + Other.U); + } +}; + +void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense, + std::vector &DFSOrderedSet) { + for (auto D : Dense) { + // First add the value. + BasicBlock *BB = getBlockForValue(D); + // Constants are handled prior to ever calling this function, so + // we should only be left with instructions as members. + assert(BB || "Should have figured out a basic block for value"); + ValueDFS VD; + + std::pair DFSPair = DFSDomMap[BB]; + assert(DFSPair.first != -1 && DFSPair.second != -1 && "Invalid DFS Pair"); + VD.DFSIn = DFSPair.first; + VD.DFSOut = DFSPair.second; + VD.Val = D; + // If it's an instruction, use the real local dfs number. + if (auto *I = dyn_cast(D)) + VD.LocalNum = InstrDFS[I]; + else + llvm_unreachable("Should have been an instruction"); + + DFSOrderedSet.emplace_back(VD); + + // Now add the users. + for (auto &U : D->uses()) { + if (auto *I = dyn_cast(U.getUser())) { + ValueDFS VD; + // Put the phi node uses in the incoming block. + BasicBlock *IBlock; + if (auto *P = dyn_cast(I)) { + IBlock = P->getIncomingBlock(U); + // Make phi node users appear last in the incoming block + // they are from. + VD.LocalNum = InstrDFS.size() + 1; + } else { + IBlock = I->getParent(); + VD.LocalNum = InstrDFS[I]; + } + std::pair DFSPair = DFSDomMap[IBlock]; + VD.DFSIn = DFSPair.first; + VD.DFSOut = DFSPair.second; + VD.U = &U; + DFSOrderedSet.emplace_back(VD); + } + } + } +} + +static void patchReplacementInstruction(Instruction *I, Value *Repl) { + // Patch the replacement so that it is not more restrictive than the value + // being replaced. + auto *Op = dyn_cast(I); + auto *ReplOp = dyn_cast(Repl); + + if (Op && ReplOp) + ReplOp->andIRFlags(Op); + + if (auto *ReplInst = dyn_cast(Repl)) { + // FIXME: If both the original and replacement value are part of the + // same control-flow region (meaning that the execution of one + // guarentees the executation of the other), then we can combine the + // noalias scopes here and do better than the general conservative + // answer used in combineMetadata(). + + // In general, GVN unifies expressions over different control-flow + // regions, and so we need a conservative combination of the noalias + // scopes. + unsigned KnownIDs[] = { + LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, LLVMContext::MD_range, + LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, + LLVMContext::MD_invariant_group}; + combineMetadata(ReplInst, I, KnownIDs); + } +} + +static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { + patchReplacementInstruction(I, Repl); + I->replaceAllUsesWith(Repl); +} + +void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) { + DEBUG(dbgs() << " BasicBlock Dead:" << *BB); + ++NumGVNBlocksDeleted; + + // Check to see if there are non-terminating instructions to delete. + if (isa(BB->begin())) + return; + + // Delete the instructions backwards, as it has a reduced likelihood of having + // to update as many def-use and use-def chains. Start after the terminator. + auto StartPoint = BB->rbegin(); + ++StartPoint; + // Note that we explicitly recalculate BB->rend() on each iteration, + // as it may change when we remove the first instruction. + for (BasicBlock::reverse_iterator I(StartPoint); I != BB->rend();) { + Instruction &Inst = *I++; + if (!Inst.use_empty()) + Inst.replaceAllUsesWith(UndefValue::get(Inst.getType())); + if (isa(Inst)) + continue; + + Inst.eraseFromParent(); + ++NumGVNInstrDeleted; + } +} + +void NewGVN::markInstructionForDeletion(Instruction *I) { + DEBUG(dbgs() << "Marking " << *I << " for deletion\n"); + InstructionsToErase.insert(I); +} + +void NewGVN::replaceInstruction(Instruction *I, Value *V) { + + DEBUG(dbgs() << "Replacing " << *I << " with " << *V << "\n"); + patchAndReplaceAllUsesWith(I, V); + // We save the actual erasing to avoid invalidating memory + // dependencies until we are done with everything. + markInstructionForDeletion(I); +} + +namespace { + +// This is a stack that contains both the value and dfs info of where +// that value is valid. +class ValueDFSStack { +public: + Value *back() const { return ValueStack.back(); } + std::pair dfs_back() const { return DFSStack.back(); } + + void push_back(Value *V, int DFSIn, int DFSOut) { + ValueStack.emplace_back(V); + DFSStack.emplace_back(DFSIn, DFSOut); + } + bool empty() const { return DFSStack.empty(); } + bool isInScope(int DFSIn, int DFSOut) const { + if (empty()) + return false; + return DFSIn >= DFSStack.back().first && DFSOut <= DFSStack.back().second; + } + + void popUntilDFSScope(int DFSIn, int DFSOut) { + + // These two should always be in sync at this point. + assert(ValueStack.size() == DFSStack.size() && + "Mismatch between ValueStack and DFSStack"); + while ( + !DFSStack.empty() && + !(DFSIn >= DFSStack.back().first && DFSOut <= DFSStack.back().second)) { + DFSStack.pop_back(); + ValueStack.pop_back(); + } + } + +private: + SmallVector ValueStack; + SmallVector, 8> DFSStack; +}; +} + +bool NewGVN::eliminateInstructions(Function &F) { + // This is a non-standard eliminator. The normal way to eliminate is + // to walk the dominator tree in order, keeping track of available + // values, and eliminating them. However, this is mildly + // pointless. It requires doing lookups on every instruction, + // regardless of whether we will ever eliminate it. For + // instructions part of most singleton congruence class, we know we + // will never eliminate it. + + // Instead, this eliminator looks at the congruence classes directly, sorts + // them into a DFS ordering of the dominator tree, and then we just + // perform eliminate straight on the sets by walking the congruence + // class member uses in order, and eliminate the ones dominated by the + // last member. This is technically O(N log N) where N = number of + // instructions (since in theory all instructions may be in the same + // congruence class). + // When we find something not dominated, it becomes the new leader + // for elimination purposes + + bool AnythingReplaced = false; + + // Since we are going to walk the domtree anyway, and we can't guarantee the + // DFS numbers are updated, we compute some ourselves. + DT->updateDFSNumbers(); + + for (auto &B : F) { + if (!ReachableBlocks.count(&B)) { + for (const auto S : successors(&B)) { + for (auto II = S->begin(); isa(II); ++II) { + PHINode &Phi = cast(*II); + DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block " + << getBlockName(&B) + << " with undef due to it being unreachable\n"); + for (auto &Operand : Phi.incoming_values()) + if (Phi.getIncomingBlock(Operand) == &B) + Operand.set(UndefValue::get(Phi.getType())); + } + } + } + DomTreeNode *Node = DT->getNode(&B); + if (Node) + DFSDomMap[&B] = {Node->getDFSNumIn(), Node->getDFSNumOut()}; + } + + for (CongruenceClass *CC : CongruenceClasses) { + // FIXME: We should eventually be able to replace everything still + // in the initial class with undef, as they should be unreachable. + // Right now, initial still contains some things we skip value + // numbering of (UNREACHABLE's, for example). + if (CC == InitialClass || CC->Dead) + continue; + assert(CC->RepLeader && "We should have had a leader"); + + // If this is a leader that is always available, and it's a + // constant or has no equivalences, just replace everything with + // it. We then update the congruence class with whatever members + // are left. + if (alwaysAvailable(CC->RepLeader)) { + SmallPtrSet MembersLeft; + for (auto M : CC->Members) { + + Value *Member = M; + + // Void things have no uses we can replace. + if (Member == CC->RepLeader || Member->getType()->isVoidTy()) { + MembersLeft.insert(Member); + continue; + } + + DEBUG(dbgs() << "Found replacement " << *(CC->RepLeader) << " for " + << *Member << "\n"); + // Due to equality propagation, these may not always be + // instructions, they may be real values. We don't really + // care about trying to replace the non-instructions. + if (auto *I = dyn_cast(Member)) { + assert(CC->RepLeader != I && + "About to accidentally remove our leader"); + replaceInstruction(I, CC->RepLeader); + AnythingReplaced = true; + + continue; + } else { + MembersLeft.insert(I); + } + } + CC->Members.swap(MembersLeft); + + } else { + DEBUG(dbgs() << "Eliminating in congruence class " << CC->ID << "\n"); + // If this is a singleton, we can skip it. + if (CC->Members.size() != 1) { + + // This is a stack because equality replacement/etc may place + // constants in the middle of the member list, and we want to use + // those constant values in preference to the current leader, over + // the scope of those constants. + ValueDFSStack EliminationStack; + + // Convert the members to DFS ordered sets and then merge them. + std::vector DFSOrderedSet; + convertDenseToDFSOrdered(CC->Members, DFSOrderedSet); + + // Sort the whole thing. + sort(DFSOrderedSet.begin(), DFSOrderedSet.end()); + + for (auto &C : DFSOrderedSet) { + int MemberDFSIn = C.DFSIn; + int MemberDFSOut = C.DFSOut; + Value *Member = C.Val; + Use *MemberUse = C.U; + + // We ignore void things because we can't get a value from them. + if (Member && Member->getType()->isVoidTy()) + continue; + + if (EliminationStack.empty()) { + DEBUG(dbgs() << "Elimination Stack is empty\n"); + } else { + DEBUG(dbgs() << "Elimination Stack Top DFS numbers are (" + << EliminationStack.dfs_back().first << "," + << EliminationStack.dfs_back().second << ")\n"); + } + if (Member && isa(Member)) + assert(isa(CC->RepLeader)); + + DEBUG(dbgs() << "Current DFS numbers are (" << MemberDFSIn << "," + << MemberDFSOut << ")\n"); + // First, we see if we are out of scope or empty. If so, + // and there equivalences, we try to replace the top of + // stack with equivalences (if it's on the stack, it must + // not have been eliminated yet). + // Then we synchronize to our current scope, by + // popping until we are back within a DFS scope that + // dominates the current member. + // Then, what happens depends on a few factors + // If the stack is now empty, we need to push + // If we have a constant or a local equivalence we want to + // start using, we also push. + // Otherwise, we walk along, processing members who are + // dominated by this scope, and eliminate them. + bool ShouldPush = + Member && (EliminationStack.empty() || isa(Member)); + bool OutOfScope = + !EliminationStack.isInScope(MemberDFSIn, MemberDFSOut); + + if (OutOfScope || ShouldPush) { + // Sync to our current scope. + EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut); + ShouldPush |= Member && EliminationStack.empty(); + if (ShouldPush) { + EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut); + } + } + + // If we get to this point, and the stack is empty we must have a use + // with nothing we can use to eliminate it, just skip it. + if (EliminationStack.empty()) + continue; + + // Skip the Value's, we only want to eliminate on their uses. + if (Member) + continue; + Value *Result = EliminationStack.back(); + + // Don't replace our existing users with ourselves. + if (MemberUse->get() == Result) + continue; + + DEBUG(dbgs() << "Found replacement " << *Result << " for " + << *MemberUse->get() << " in " << *(MemberUse->getUser()) + << "\n"); + + // If we replaced something in an instruction, handle the patching of + // metadata. + if (auto *ReplacedInst = + dyn_cast(MemberUse->get())) + patchReplacementInstruction(ReplacedInst, Result); + + assert(isa(MemberUse->getUser())); + MemberUse->set(Result); + AnythingReplaced = true; + } + } + } + + // Cleanup the congruence class. + SmallPtrSet MembersLeft; + for (auto MI = CC->Members.begin(), ME = CC->Members.end(); MI != ME;) { + auto CurrIter = MI; + ++MI; + Value *Member = *CurrIter; + if (Member->getType()->isVoidTy()) { + MembersLeft.insert(Member); + continue; + } + + if (auto *MemberInst = dyn_cast(Member)) { + if (isInstructionTriviallyDead(MemberInst)) { + // TODO: Don't mark loads of undefs. + markInstructionForDeletion(MemberInst); + continue; + } + } + MembersLeft.insert(Member); + } + CC->Members.swap(MembersLeft); + } + + return AnythingReplaced; +} + Index: llvm/trunk/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/Scalar.cpp +++ llvm/trunk/lib/Transforms/Scalar/Scalar.cpp @@ -43,6 +43,7 @@ initializeDSELegacyPassPass(Registry); initializeGuardWideningLegacyPassPass(Registry); initializeGVNLegacyPassPass(Registry); + initializeNewGVNPass(Registry); initializeEarlyCSELegacyPassPass(Registry); initializeEarlyCSEMemSSALegacyPassPass(Registry); initializeGVNHoistLegacyPassPass(Registry); @@ -126,6 +127,10 @@ unwrap(PM)->add(createGVNPass()); } +void LLVMAddNewGVNPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createNewGVNPass()); +} + void LLVMAddMergedLoadStoreMotionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createMergedLoadStoreMotionPass()); } Index: llvm/trunk/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll +++ llvm/trunk/test/Transforms/NewGVN/2007-07-25-DominatedLoop.ll @@ -0,0 +1,86 @@ +; RUN: opt < %s -newgvn | llvm-dis + + %struct.PerlInterpreter = type { i8 } +@PL_sv_count = external global i32 ; [#uses=2] + +define void @perl_destruct(%struct.PerlInterpreter* %sv_interp) { +entry: + br i1 false, label %cond_next25, label %cond_true16 + +cond_true16: ; preds = %entry + ret void + +cond_next25: ; preds = %entry + br i1 false, label %cond_next33, label %cond_true32 + +cond_true32: ; preds = %cond_next25 + ret void + +cond_next33: ; preds = %cond_next25 + br i1 false, label %cond_next61, label %cond_true.i46 + +cond_true.i46: ; preds = %cond_next33 + ret void + +cond_next61: ; preds = %cond_next33 + br i1 false, label %cond_next69, label %cond_true66 + +cond_true66: ; preds = %cond_next61 + ret void + +cond_next69: ; preds = %cond_next61 + br i1 false, label %Perl_safefree.exit52, label %cond_true.i50 + +cond_true.i50: ; preds = %cond_next69 + ret void + +Perl_safefree.exit52: ; preds = %cond_next69 + br i1 false, label %cond_next80, label %cond_true77 + +cond_true77: ; preds = %Perl_safefree.exit52 + ret void + +cond_next80: ; preds = %Perl_safefree.exit52 + br i1 false, label %Perl_safefree.exit56, label %cond_true.i54 + +cond_true.i54: ; preds = %cond_next80 + ret void + +Perl_safefree.exit56: ; preds = %cond_next80 + br i1 false, label %Perl_safefree.exit60, label %cond_true.i58 + +cond_true.i58: ; preds = %Perl_safefree.exit56 + ret void + +Perl_safefree.exit60: ; preds = %Perl_safefree.exit56 + br i1 false, label %Perl_safefree.exit64, label %cond_true.i62 + +cond_true.i62: ; preds = %Perl_safefree.exit60 + ret void + +Perl_safefree.exit64: ; preds = %Perl_safefree.exit60 + br i1 false, label %Perl_safefree.exit68, label %cond_true.i66 + +cond_true.i66: ; preds = %Perl_safefree.exit64 + ret void + +Perl_safefree.exit68: ; preds = %Perl_safefree.exit64 + br i1 false, label %cond_next150, label %cond_true23.i + +cond_true23.i: ; preds = %Perl_safefree.exit68 + ret void + +cond_next150: ; preds = %Perl_safefree.exit68 + %tmp16092 = load i32, i32* @PL_sv_count, align 4 ; [#uses=0] + br label %cond_next165 + +bb157: ; preds = %cond_next165 + %tmp158 = load i32, i32* @PL_sv_count, align 4 ; [#uses=0] + br label %cond_next165 + +cond_next165: ; preds = %bb157, %cond_next150 + br i1 false, label %bb171, label %bb157 + +bb171: ; preds = %cond_next165 + ret void +} Index: llvm/trunk/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll +++ llvm/trunk/test/Transforms/NewGVN/2007-07-25-InfiniteLoop.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + + %struct.INT2 = type { i32, i32 } +@blkshifts = external global %struct.INT2* ; <%struct.INT2**> [#uses=2] + +define i32 @xcompact() { +entry: + store %struct.INT2* null, %struct.INT2** @blkshifts, align 4 + br label %bb + +bb: ; preds = %bb, %entry + %tmp10 = load %struct.INT2*, %struct.INT2** @blkshifts, align 4 ; <%struct.INT2*> [#uses=0] +; CHECK-NOT: %tmp10 + br label %bb +} Index: llvm/trunk/test/Transforms/NewGVN/2007-07-25-Loop.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2007-07-25-Loop.ll +++ llvm/trunk/test/Transforms/NewGVN/2007-07-25-Loop.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -newgvn | llvm-dis + + %struct.s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float } + +define void @print_arch(i8* %arch_file, i32 %route_type, i64 %det_routing_arch.0.0, i64 %det_routing_arch.0.1, i64 %det_routing_arch.0.2, i64 %det_routing_arch.0.3, i64 %det_routing_arch.0.4, %struct.s_segment_inf* %segment_inf, i64 %timing_inf.0.0, i64 %timing_inf.0.1, i64 %timing_inf.0.2, i64 %timing_inf.0.3, i64 %timing_inf.0.4, i32 %timing_inf.1) { +entry: + br i1 false, label %bb278, label %bb344 + +bb278: ; preds = %bb278, %entry + br i1 false, label %bb278, label %bb344 + +bb344: ; preds = %bb278, %entry + %tmp38758 = load i16, i16* null, align 2 ; [#uses=0] + ret void +} Index: llvm/trunk/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll +++ llvm/trunk/test/Transforms/NewGVN/2007-07-25-NestedLoop.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -newgvn | llvm-dis + + %struct.TypHeader = type { i32, %struct.TypHeader**, [3 x i8], i8 } + +define %struct.TypHeader* @LtRec(%struct.TypHeader* %hdL, %struct.TypHeader* %hdR) { +entry: + br i1 false, label %bb556.preheader, label %bb534.preheader + +bb534.preheader: ; preds = %entry + ret %struct.TypHeader* null + +bb556.preheader: ; preds = %entry + %tmp56119 = getelementptr %struct.TypHeader, %struct.TypHeader* %hdR, i32 0, i32 0 ; [#uses=1] + %tmp56220 = load i32, i32* %tmp56119 ; [#uses=0] + br i1 false, label %bb.nph23, label %bb675.preheader + +bb.nph23: ; preds = %bb556.preheader + ret %struct.TypHeader* null + +bb656: ; preds = %bb675.outer, %bb656 + %tmp678 = load i32, i32* %tmp677 ; [#uses=0] + br i1 false, label %bb684, label %bb656 + +bb684: ; preds = %bb675.outer, %bb656 + br i1 false, label %bb924.preheader, label %bb675.outer + +bb675.outer: ; preds = %bb675.preheader, %bb684 + %tmp67812 = load i32, i32* %tmp67711 ; [#uses=0] + br i1 false, label %bb684, label %bb656 + +bb675.preheader: ; preds = %bb556.preheader + %tmp67711 = getelementptr %struct.TypHeader, %struct.TypHeader* %hdR, i32 0, i32 0 ; [#uses=1] + %tmp677 = getelementptr %struct.TypHeader, %struct.TypHeader* %hdR, i32 0, i32 0 ; [#uses=1] + br label %bb675.outer + +bb924.preheader: ; preds = %bb684 + ret %struct.TypHeader* null +} Index: llvm/trunk/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll +++ llvm/trunk/test/Transforms/NewGVN/2007-07-25-SinglePredecessor.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -newgvn | llvm-dis + + %struct.ggBRDF = type { i32 (...)** } + %struct.ggBox3 = type { %struct.ggPoint3, %struct.ggPoint3 } + %struct.ggMaterialRecord = type { %struct.ggPoint2, %struct.ggBox3, %struct.ggBox3, %struct.ggSpectrum, %struct.ggSpectrum, %struct.ggSpectrum, %struct.ggBRDF*, i32, i32, i32, i32 } + %struct.ggONB3 = type { %struct.ggPoint3, %struct.ggPoint3, %struct.ggPoint3 } + %struct.ggPoint2 = type { [2 x double] } + %struct.ggPoint3 = type { [3 x double] } + %struct.ggSpectrum = type { [8 x float] } + %struct.mrViewingHitRecord = type { double, %struct.ggPoint3, %struct.ggONB3, %struct.ggPoint2, double, %struct.ggSpectrum, %struct.ggSpectrum, i32, i32, i32, i32 } + %struct.mrXEllipticalCylinder = type { %struct.ggBRDF, float, float, float, float, float, float } + +define i32 @_ZNK21mrZEllipticalCylinder10viewingHitERK6ggRay3dddR18mrViewingHitRecordR16ggMaterialRecord(%struct.mrXEllipticalCylinder* %this, %struct.ggBox3* %ray, double %unnamed_arg, double %tmin, double %tmax, %struct.mrViewingHitRecord* %VHR, %struct.ggMaterialRecord* %unnamed_arg2) { +entry: + %tmp80.i = getelementptr %struct.mrViewingHitRecord, %struct.mrViewingHitRecord* %VHR, i32 0, i32 1, i32 0, i32 0 ; [#uses=1] + store double 0.000000e+00, double* %tmp80.i + br i1 false, label %return, label %cond_next.i + +cond_next.i: ; preds = %entry + br i1 false, label %return, label %cond_true + +cond_true: ; preds = %cond_next.i + %tmp3.i8 = getelementptr %struct.mrViewingHitRecord, %struct.mrViewingHitRecord* %VHR, i32 0, i32 1, i32 0, i32 0 ; [#uses=1] + %tmp46 = load double, double* %tmp3.i8 ; [#uses=0] + ret i32 1 + +return: ; preds = %cond_next.i, %entry + ret i32 0 +} Index: llvm/trunk/test/Transforms/NewGVN/2007-07-26-InterlockingLoops.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2007-07-26-InterlockingLoops.ll +++ llvm/trunk/test/Transforms/NewGVN/2007-07-26-InterlockingLoops.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +@last = external global [65 x i32*] + +define i32 @NextRootMove(i32 %wtm, i32 %x, i32 %y, i32 %z) { +entry: + %A = alloca i32* + %tmp17618 = load i32*, i32** getelementptr ([65 x i32*], [65 x i32*]* @last, i32 0, i32 1), align 4 + store i32* %tmp17618, i32** %A +; CHECK: entry: +; CHECK-NEXT: alloca i32 +; CHECK-NEXT: %tmp17618 = load +; CHECK-NOT: load +; CHECK-NOT: phi + br label %cond_true116 + +cond_true116: + %cmp = icmp eq i32 %x, %y + br i1 %cmp, label %cond_true128, label %cond_true145 + +cond_true128: + %tmp17625 = load i32*, i32** getelementptr ([65 x i32*], [65 x i32*]* @last, i32 0, i32 1), align 4 + store i32* %tmp17625, i32** %A + %cmp1 = icmp eq i32 %x, %z + br i1 %cmp1 , label %bb98.backedge, label %return.loopexit + +bb98.backedge: + br label %cond_true116 + +cond_true145: + %tmp17631 = load i32*, i32** getelementptr ([65 x i32*], [65 x i32*]* @last, i32 0, i32 1), align 4 + store i32* %tmp17631, i32** %A + br i1 false, label %bb98.backedge, label %return.loopexit + +return.loopexit: + br label %return + +return: + ret i32 0 +} Index: llvm/trunk/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll +++ llvm/trunk/test/Transforms/NewGVN/2007-07-26-NonRedundant.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -newgvn | llvm-dis + +@bsLive = external global i32 ; [#uses=2] + +define i32 @bsR(i32 %n) { +entry: + br i1 false, label %cond_next, label %bb19 + +cond_next: ; preds = %entry + store i32 0, i32* @bsLive, align 4 + br label %bb19 + +bb19: ; preds = %cond_next, %entry + %tmp29 = load i32, i32* @bsLive, align 4 ; [#uses=0] + ret i32 0 +} Index: llvm/trunk/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll +++ llvm/trunk/test/Transforms/NewGVN/2007-07-26-PhiErasure.ll @@ -0,0 +1,45 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s + + %struct..0anon = type { i32 } + %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } + %struct.__sFILEX = type opaque + %struct.__sbuf = type { i8*, i32 } + %struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] } +@n_spills = external global i32 ; [#uses=2] + +define i32 @reload(%struct.rtx_def* %first, i32 %global, %struct.FILE* %dumpfile) { +cond_next2835.1: ; preds = %cond_next2861 + %tmp2922 = load i32, i32* @n_spills, align 4 ; [#uses=0] + br label %bb2928 + +bb2928: ; preds = %cond_next2835.1, %cond_next2943 + br i1 false, label %cond_next2943, label %cond_true2935 + +cond_true2935: ; preds = %bb2928 + br label %cond_next2943 + +cond_next2943: ; preds = %cond_true2935, %bb2928 + br i1 false, label %bb2982.preheader, label %bb2928 + +bb2982.preheader: ; preds = %cond_next2943 + %tmp298316 = load i32, i32* @n_spills, align 4 ; [#uses=0] + ret i32 %tmp298316 + +} + +; CHECK: define i32 @reload(%struct.rtx_def* %first, i32 %global, %struct.FILE* %dumpfile) { +; CHECK-NEXT: cond_next2835.1: +; CHECK-NEXT: br label %bb2928 +; CHECK: bb2928: +; CHECK-NEXT: br i1 false, label %bb2928.cond_next2943_crit_edge, label %cond_true2935 +; CHECK: bb2928.cond_next2943_crit_edge: +; CHECK-NEXT: br label %cond_next2943 +; CHECK: cond_true2935: +; CHECK-NEXT: br label %cond_next2943 +; CHECK: cond_next2943: +; CHECK-NEXT: br i1 false, label %bb2982.preheader, label %bb2928 +; CHECK: bb2982.preheader: +; CHECK-NEXT: %tmp298316 = load i32, i32* @n_spills, align 4 +; CHECK-NEXT: ret i32 %tmp298316 +; CHECK-NEXT: } Index: llvm/trunk/test/Transforms/NewGVN/2007-07-30-PredIDom.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2007-07-30-PredIDom.ll +++ llvm/trunk/test/Transforms/NewGVN/2007-07-30-PredIDom.ll @@ -0,0 +1,274 @@ +; RUN: opt < %s -newgvn | llvm-dis + + %"struct.Block::$_16" = type { i32 } + %struct.Exp = type { %struct.Exp_*, i32, i32, i32, %struct.Exp*, %struct.Exp*, %"struct.Exp::$_10", %"struct.Block::$_16", %"struct.Exp::$_12" } + %"struct.Exp::$_10" = type { %struct.Exp* } + %"struct.Exp::$_12" = type { %struct.Exp** } + %struct.Exp_ = type { i32, i32, i32, i32, %struct.Id* } + %struct.Id = type { i8*, i32, i32, i32, %"struct.Id::$_13" } + %"struct.Id::$_13" = type { double } + +define i8* @_ZN3Exp8toStringEj(%struct.Exp* %this, i32 %nextpc) { +entry: + switch i32 0, label %bb970 [ + i32 1, label %bb + i32 2, label %bb39 + i32 3, label %bb195 + i32 4, label %bb270 + i32 5, label %bb418 + i32 6, label %bb633 + i32 7, label %bb810 + i32 8, label %bb882 + i32 9, label %bb925 + ] + +bb: ; preds = %entry + store i8* null, i8** null + br label %return + +bb39: ; preds = %entry + br i1 false, label %cond_true, label %cond_false132 + +cond_true: ; preds = %bb39 + br i1 false, label %cond_true73, label %cond_false + +cond_true73: ; preds = %cond_true + br i1 false, label %cond_true108, label %cond_next + +cond_true108: ; preds = %cond_true73 + br label %cond_next + +cond_next: ; preds = %cond_true108, %cond_true73 + br label %cond_next131 + +cond_false: ; preds = %cond_true + br label %cond_next131 + +cond_next131: ; preds = %cond_false, %cond_next + br label %cond_next141 + +cond_false132: ; preds = %bb39 + br label %cond_next141 + +cond_next141: ; preds = %cond_false132, %cond_next131 + br i1 false, label %cond_true169, label %cond_false175 + +cond_true169: ; preds = %cond_next141 + br label %cond_next181 + +cond_false175: ; preds = %cond_next141 + br label %cond_next181 + +cond_next181: ; preds = %cond_false175, %cond_true169 + br i1 false, label %cond_true189, label %cond_next191 + +cond_true189: ; preds = %cond_next181 + br label %cond_next191 + +cond_next191: ; preds = %cond_true189, %cond_next181 + store i8* null, i8** null + br label %return + +bb195: ; preds = %entry + br i1 false, label %cond_true248, label %cond_false250 + +cond_true248: ; preds = %bb195 + br label %cond_next252 + +cond_false250: ; preds = %bb195 + br label %cond_next252 + +cond_next252: ; preds = %cond_false250, %cond_true248 + br i1 false, label %cond_true265, label %cond_next267 + +cond_true265: ; preds = %cond_next252 + br label %cond_next267 + +cond_next267: ; preds = %cond_true265, %cond_next252 + store i8* null, i8** null + br label %return + +bb270: ; preds = %entry + br i1 false, label %cond_true338, label %cond_false340 + +cond_true338: ; preds = %bb270 + br label %cond_next342 + +cond_false340: ; preds = %bb270 + br label %cond_next342 + +cond_next342: ; preds = %cond_false340, %cond_true338 + br i1 false, label %cond_true362, label %cond_false364 + +cond_true362: ; preds = %cond_next342 + br label %cond_next366 + +cond_false364: ; preds = %cond_next342 + br label %cond_next366 + +cond_next366: ; preds = %cond_false364, %cond_true362 + br i1 false, label %cond_true393, label %cond_next395 + +cond_true393: ; preds = %cond_next366 + br label %cond_next395 + +cond_next395: ; preds = %cond_true393, %cond_next366 + br i1 false, label %cond_true406, label %cond_next408 + +cond_true406: ; preds = %cond_next395 + br label %cond_next408 + +cond_next408: ; preds = %cond_true406, %cond_next395 + br i1 false, label %cond_true413, label %cond_next415 + +cond_true413: ; preds = %cond_next408 + br label %cond_next415 + +cond_next415: ; preds = %cond_true413, %cond_next408 + store i8* null, i8** null + br label %return + +bb418: ; preds = %entry + br i1 false, label %cond_true512, label %cond_false514 + +cond_true512: ; preds = %bb418 + br label %cond_next516 + +cond_false514: ; preds = %bb418 + br label %cond_next516 + +cond_next516: ; preds = %cond_false514, %cond_true512 + br i1 false, label %cond_true536, label %cond_false538 + +cond_true536: ; preds = %cond_next516 + br label %cond_next540 + +cond_false538: ; preds = %cond_next516 + br label %cond_next540 + +cond_next540: ; preds = %cond_false538, %cond_true536 + br i1 false, label %cond_true560, label %cond_false562 + +cond_true560: ; preds = %cond_next540 + br label %cond_next564 + +cond_false562: ; preds = %cond_next540 + br label %cond_next564 + +cond_next564: ; preds = %cond_false562, %cond_true560 + br i1 false, label %cond_true597, label %cond_next599 + +cond_true597: ; preds = %cond_next564 + br label %cond_next599 + +cond_next599: ; preds = %cond_true597, %cond_next564 + br i1 false, label %cond_true614, label %cond_next616 + +cond_true614: ; preds = %cond_next599 + br label %cond_next616 + +cond_next616: ; preds = %cond_true614, %cond_next599 + br i1 false, label %cond_true621, label %cond_next623 + +cond_true621: ; preds = %cond_next616 + br label %cond_next623 + +cond_next623: ; preds = %cond_true621, %cond_next616 + br i1 false, label %cond_true628, label %cond_next630 + +cond_true628: ; preds = %cond_next623 + br label %cond_next630 + +cond_next630: ; preds = %cond_true628, %cond_next623 + store i8* null, i8** null + br label %return + +bb633: ; preds = %entry + br i1 false, label %cond_true667, label %cond_next669 + +cond_true667: ; preds = %bb633 + br label %cond_next669 + +cond_next669: ; preds = %cond_true667, %bb633 + br i1 false, label %cond_true678, label %cond_next791 + +cond_true678: ; preds = %cond_next669 + br label %bb735 + +bb679: ; preds = %bb735 + br i1 false, label %cond_true729, label %cond_next731 + +cond_true729: ; preds = %bb679 + br label %cond_next731 + +cond_next731: ; preds = %cond_true729, %bb679 + br label %bb735 + +bb735: ; preds = %cond_next731, %cond_true678 + br i1 false, label %bb679, label %bb743 + +bb743: ; preds = %bb735 + br i1 false, label %cond_true788, label %cond_next790 + +cond_true788: ; preds = %bb743 + br label %cond_next790 + +cond_next790: ; preds = %cond_true788, %bb743 + br label %cond_next791 + +cond_next791: ; preds = %cond_next790, %cond_next669 + br i1 false, label %cond_true805, label %cond_next807 + +cond_true805: ; preds = %cond_next791 + br label %cond_next807 + +cond_next807: ; preds = %cond_true805, %cond_next791 + store i8* null, i8** null + br label %return + +bb810: ; preds = %entry + br i1 false, label %cond_true870, label %cond_next872 + +cond_true870: ; preds = %bb810 + br label %cond_next872 + +cond_next872: ; preds = %cond_true870, %bb810 + br i1 false, label %cond_true877, label %cond_next879 + +cond_true877: ; preds = %cond_next872 + br label %cond_next879 + +cond_next879: ; preds = %cond_true877, %cond_next872 + store i8* null, i8** null + br label %return + +bb882: ; preds = %entry + br i1 false, label %cond_true920, label %cond_next922 + +cond_true920: ; preds = %bb882 + br label %cond_next922 + +cond_next922: ; preds = %cond_true920, %bb882 + store i8* null, i8** null + br label %return + +bb925: ; preds = %entry + br i1 false, label %cond_true965, label %cond_next967 + +cond_true965: ; preds = %bb925 + br label %cond_next967 + +cond_next967: ; preds = %cond_true965, %bb925 + store i8* null, i8** null + br label %return + +bb970: ; preds = %entry + unreachable + ; No predecessors! + store i8* null, i8** null + br label %return + +return: ; preds = %0, %cond_next967, %cond_next922, %cond_next879, %cond_next807, %cond_next630, %cond_next415, %cond_next267, %cond_next191, %bb + %retval980 = load i8*, i8** null ; [#uses=1] + ret i8* %retval980 +} Index: llvm/trunk/test/Transforms/NewGVN/2007-07-31-NoDomInherit.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2007-07-31-NoDomInherit.ll +++ llvm/trunk/test/Transforms/NewGVN/2007-07-31-NoDomInherit.ll @@ -0,0 +1,315 @@ +; XFAIL: * +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + + %struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* } +@debug = external constant i32 ; [#uses=0] +@counters = external constant i32 ; [#uses=1] +@trialx = external global [17 x i32] ; <[17 x i32]*> [#uses=1] +@dummy1 = external global [7 x i32] ; <[7 x i32]*> [#uses=0] +@dummy2 = external global [4 x i32] ; <[4 x i32]*> [#uses=0] +@unacceptable = external global i32 ; [#uses=0] +@isa = external global [13 x %struct.anon] ; <[13 x %struct.anon]*> [#uses=3] +@.str = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str1 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str2 = external constant [1 x i8] ; <[1 x i8]*> [#uses=0] +@.str3 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str4 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str5 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str6 = external constant [2 x i8] ; <[2 x i8]*> [#uses=0] +@.str7 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str8 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str9 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str10 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str11 = external constant [2 x i8] ; <[2 x i8]*> [#uses=0] +@.str12 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str13 = external constant [2 x i8] ; <[2 x i8]*> [#uses=0] +@.str14 = external constant [5 x i8] ; <[5 x i8]*> [#uses=0] +@.str15 = external constant [5 x i8] ; <[5 x i8]*> [#uses=0] +@.str16 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str17 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str18 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str19 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str20 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str21 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str22 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str23 = external constant [5 x i8] ; <[5 x i8]*> [#uses=0] +@.str24 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str25 = external constant [6 x i8] ; <[6 x i8]*> [#uses=0] +@.str26 = external constant [5 x i8] ; <[5 x i8]*> [#uses=0] +@.str27 = external constant [6 x i8] ; <[6 x i8]*> [#uses=0] +@r = external global [17 x i32] ; <[17 x i32]*> [#uses=0] +@.str28 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str29 = external constant [5 x i8] ; <[5 x i8]*> [#uses=0] +@pgm = external global [5 x { i32, [3 x i32] }] ; <[5 x { i32, [3 x i32] }]*> [#uses=4] +@.str30 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str31 = external constant [13 x i8] ; <[13 x i8]*> [#uses=0] +@.str32 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str33 = external constant [4 x i8] ; <[4 x i8]*> [#uses=0] +@.str34 = external constant [20 x i8] ; <[20 x i8]*> [#uses=0] +@numi = external global i32 ; [#uses=7] +@.str35 = external constant [10 x i8] ; <[10 x i8]*> [#uses=0] +@counter = external global [5 x i32] ; <[5 x i32]*> [#uses=2] +@itrialx.2510 = external global i32 ; [#uses=0] +@.str36 = external constant [43 x i8] ; <[43 x i8]*> [#uses=0] +@.str37 = external constant [42 x i8] ; <[42 x i8]*> [#uses=0] +@corr_result = external global i32 ; [#uses=0] +@.str38 = external constant [3 x i8] ; <[3 x i8]*> [#uses=0] +@.str39 = external constant [5 x i8] ; <[5 x i8]*> [#uses=0] +@.str40 = external constant [47 x i8] ; <[47 x i8]*> [#uses=0] +@correct_result = external global [17 x i32] ; <[17 x i32]*> [#uses=1] +@.str41 = external constant [46 x i8] ; <[46 x i8]*> [#uses=0] +@.str42 = external constant [32 x i8] ; <[32 x i8]*> [#uses=0] +@.str43 = external constant [44 x i8] ; <[44 x i8]*> [#uses=1] +@.str44 = external constant [21 x i8] ; <[21 x i8]*> [#uses=1] +@.str45 = external constant [12 x i8] ; <[12 x i8]*> [#uses=1] +@.str46 = external constant [5 x i8] ; <[5 x i8]*> [#uses=1] +@.str47 = external constant [12 x i8] ; <[12 x i8]*> [#uses=1] + +declare i32 @neg(i32, i32, i32) + +declare i32 @Not(i32, i32, i32) + +declare i32 @pop(i32, i32, i32) + +declare i32 @nlz(i32, i32, i32) + +declare i32 @rev(i32, i32, i32) + +declare i32 @add(i32, i32, i32) + +declare i32 @sub(i32, i32, i32) + +declare i32 @mul(i32, i32, i32) + +declare i32 @divide(i32, i32, i32) + +declare i32 @divu(i32, i32, i32) + +declare i32 @And(i32, i32, i32) + +declare i32 @Or(i32, i32, i32) + +declare i32 @Xor(i32, i32, i32) + +declare i32 @rotl(i32, i32, i32) + +declare i32 @shl(i32, i32, i32) + +declare i32 @shr(i32, i32, i32) + +declare i32 @shrs(i32, i32, i32) + +declare i32 @cmpeq(i32, i32, i32) + +declare i32 @cmplt(i32, i32, i32) + +declare i32 @cmpltu(i32, i32, i32) + +declare i32 @seleq(i32, i32, i32) + +declare i32 @sellt(i32, i32, i32) + +declare i32 @selle(i32, i32, i32) + +declare void @print_expr(i32) + +declare i32 @printf(i8*, ...) + +declare i32 @putchar(i32) + +declare void @print_pgm() + +declare void @simulate_one_instruction(i32) + +declare i32 @check(i32) + +declare i32 @puts(i8*) + +declare void @fix_operands(i32) + +declare void @abort() + +declare i32 @increment() + +declare i32 @search() + +define i32 @main(i32 %argc, i8** %argv) { +entry: + %argc_addr = alloca i32 ; [#uses=1] + %argv_addr = alloca i8** ; [#uses=1] + %retval = alloca i32, align 4 ; [#uses=2] + %tmp = alloca i32, align 4 ; [#uses=2] + %i = alloca i32, align 4 ; [#uses=21] + %num_sol = alloca i32, align 4 ; [#uses=4] + %total = alloca i32, align 4 ; [#uses=4] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + store i32 %argc, i32* %argc_addr + store i8** %argv, i8*** %argv_addr + store i32 0, i32* %num_sol + store i32 1, i32* @numi + br label %bb91 + +bb: ; preds = %cond_next97 + %tmp1 = load i32, i32* @numi ; [#uses=1] + %tmp2 = getelementptr [44 x i8], [44 x i8]* @.str43, i32 0, i32 0 ; [#uses=1] + %tmp3 = call i32 (i8*, ...) @printf( i8* %tmp2, i32 %tmp1 ) ; [#uses=0] + store i32 0, i32* %i + br label %bb13 + +bb4: ; preds = %bb13 + %tmp5 = load i32, i32* %i ; [#uses=1] + %tmp6 = load i32, i32* %i ; [#uses=1] + %tmp7 = getelementptr [17 x i32], [17 x i32]* @trialx, i32 0, i32 %tmp6 ; [#uses=1] + %tmp8 = load i32, i32* %tmp7 ; [#uses=1] + %tmp9 = call i32 @userfun( i32 %tmp8 ) ; [#uses=1] + %tmp10 = getelementptr [17 x i32], [17 x i32]* @correct_result, i32 0, i32 %tmp5 ; [#uses=1] + store i32 %tmp9, i32* %tmp10 + %tmp11 = load i32, i32* %i ; [#uses=1] + %tmp12 = add i32 %tmp11, 1 ; [#uses=1] + store i32 %tmp12, i32* %i + br label %bb13 + +bb13: ; preds = %bb4, %bb + %tmp14 = load i32, i32* %i ; [#uses=1] + %tmp15 = icmp sle i32 %tmp14, 16 ; [#uses=1] + %tmp1516 = zext i1 %tmp15 to i32 ; [#uses=1] + %toBool = icmp ne i32 %tmp1516, 0 ; [#uses=1] + br i1 %toBool, label %bb4, label %bb17 + +bb17: ; preds = %bb13 + store i32 0, i32* %i + br label %bb49 + +bb18: ; preds = %bb49 + %tmp19 = load i32, i32* %i ; [#uses=1] + %tmp20 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp19 ; <{ i32, [3 x i32] }*> [#uses=1] + %tmp21 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp20, i32 0, i32 0 ; [#uses=1] + store i32 0, i32* %tmp21 + %tmp22 = load i32, i32* %i ; [#uses=1] + %tmp23 = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 0 ; <%struct.anon*> [#uses=1] + %tmp24 = getelementptr %struct.anon, %struct.anon* %tmp23, i32 0, i32 3 ; <[3 x i32]*> [#uses=1] + %tmp25 = getelementptr [3 x i32], [3 x i32]* %tmp24, i32 0, i32 0 ; [#uses=1] + %tmp26 = load i32, i32* %tmp25 ; [#uses=1] + %tmp27 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp22 ; <{ i32, [3 x i32] }*> [#uses=1] + %tmp28 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp27, i32 0, i32 1 ; <[3 x i32]*> [#uses=1] + %tmp29 = getelementptr [3 x i32], [3 x i32]* %tmp28, i32 0, i32 0 ; [#uses=1] + store i32 %tmp26, i32* %tmp29 + %tmp30 = load i32, i32* %i ; [#uses=1] + %tmp31 = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 0 ; <%struct.anon*> [#uses=1] + %tmp32 = getelementptr %struct.anon, %struct.anon* %tmp31, i32 0, i32 3 ; <[3 x i32]*> [#uses=1] + %tmp33 = getelementptr [3 x i32], [3 x i32]* %tmp32, i32 0, i32 1 ; [#uses=1] + %tmp34 = load i32, i32* %tmp33 ; [#uses=1] + %tmp35 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp30 ; <{ i32, [3 x i32] }*> [#uses=1] + %tmp36 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp35, i32 0, i32 1 ; <[3 x i32]*> [#uses=1] + %tmp37 = getelementptr [3 x i32], [3 x i32]* %tmp36, i32 0, i32 1 ; [#uses=1] + store i32 %tmp34, i32* %tmp37 + %tmp38 = load i32, i32* %i ; [#uses=1] + %tmp39 = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 0 ; <%struct.anon*> [#uses=1] + %tmp40 = getelementptr %struct.anon, %struct.anon* %tmp39, i32 0, i32 3 ; <[3 x i32]*> [#uses=1] + %tmp41 = getelementptr [3 x i32], [3 x i32]* %tmp40, i32 0, i32 2 ; [#uses=1] + %tmp42 = load i32, i32* %tmp41 ; [#uses=1] + %tmp43 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp38 ; <{ i32, [3 x i32] }*> [#uses=1] + %tmp44 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp43, i32 0, i32 1 ; <[3 x i32]*> [#uses=1] + %tmp45 = getelementptr [3 x i32], [3 x i32]* %tmp44, i32 0, i32 2 ; [#uses=1] + store i32 %tmp42, i32* %tmp45 + %tmp46 = load i32, i32* %i ; [#uses=1] + call void @fix_operands( i32 %tmp46 ) + %tmp47 = load i32, i32* %i ; [#uses=1] +; CHECK: %tmp47 = phi i32 [ %tmp48, %bb18 ], [ 0, %bb17 ] + %tmp48 = add i32 %tmp47, 1 ; [#uses=1] + store i32 %tmp48, i32* %i + br label %bb49 + +bb49: ; preds = %bb18, %bb17 + %tmp50 = load i32, i32* @numi ; [#uses=1] + %tmp51 = load i32, i32* %i ; [#uses=1] + %tmp52 = icmp slt i32 %tmp51, %tmp50 ; [#uses=1] + %tmp5253 = zext i1 %tmp52 to i32 ; [#uses=1] + %toBool54 = icmp ne i32 %tmp5253, 0 ; [#uses=1] + br i1 %toBool54, label %bb18, label %bb55 + +bb55: ; preds = %bb49 + %tmp56 = call i32 @search( ) ; [#uses=1] + store i32 %tmp56, i32* %num_sol + %tmp57 = getelementptr [21 x i8], [21 x i8]* @.str44, i32 0, i32 0 ; [#uses=1] + %tmp58 = load i32, i32* %num_sol ; [#uses=1] + %tmp59 = call i32 (i8*, ...) @printf( i8* %tmp57, i32 %tmp58 ) ; [#uses=0] + %tmp60 = load i32, i32* @counters ; [#uses=1] + %tmp61 = icmp ne i32 %tmp60, 0 ; [#uses=1] + %tmp6162 = zext i1 %tmp61 to i32 ; [#uses=1] + %toBool63 = icmp ne i32 %tmp6162, 0 ; [#uses=1] + br i1 %toBool63, label %cond_true, label %cond_next + +cond_true: ; preds = %bb55 + store i32 0, i32* %total + %tmp64 = getelementptr [12 x i8], [12 x i8]* @.str45, i32 0, i32 0 ; [#uses=1] + %tmp65 = call i32 (i8*, ...) @printf( i8* %tmp64 ) ; [#uses=0] + store i32 0, i32* %i + br label %bb79 + +bb66: ; preds = %bb79 + %tmp67 = load i32, i32* %i ; [#uses=1] + %tmp68 = getelementptr [5 x i32], [5 x i32]* @counter, i32 0, i32 %tmp67 ; [#uses=1] + %tmp69 = load i32, i32* %tmp68 ; [#uses=1] + %tmp70 = getelementptr [5 x i8], [5 x i8]* @.str46, i32 0, i32 0 ; [#uses=1] + %tmp71 = call i32 (i8*, ...) @printf( i8* %tmp70, i32 %tmp69 ) ; [#uses=0] + %tmp72 = load i32, i32* %i ; [#uses=1] + %tmp73 = getelementptr [5 x i32], [5 x i32]* @counter, i32 0, i32 %tmp72 ; [#uses=1] + %tmp74 = load i32, i32* %tmp73 ; [#uses=1] + %tmp75 = load i32, i32* %total ; [#uses=1] + %tmp76 = add i32 %tmp74, %tmp75 ; [#uses=1] + store i32 %tmp76, i32* %total + %tmp77 = load i32, i32* %i ; [#uses=1] + %tmp78 = add i32 %tmp77, 1 ; [#uses=1] + store i32 %tmp78, i32* %i + br label %bb79 + +bb79: ; preds = %bb66, %cond_true + %tmp80 = load i32, i32* @numi ; [#uses=1] + %tmp81 = load i32, i32* %i ; [#uses=1] + %tmp82 = icmp slt i32 %tmp81, %tmp80 ; [#uses=1] + %tmp8283 = zext i1 %tmp82 to i32 ; [#uses=1] + %toBool84 = icmp ne i32 %tmp8283, 0 ; [#uses=1] + br i1 %toBool84, label %bb66, label %bb85 + +bb85: ; preds = %bb79 + %tmp86 = getelementptr [12 x i8], [12 x i8]* @.str47, i32 0, i32 0 ; [#uses=1] + %tmp87 = load i32, i32* %total ; [#uses=1] + %tmp88 = call i32 (i8*, ...) @printf( i8* %tmp86, i32 %tmp87 ) ; [#uses=0] + br label %cond_next + +cond_next: ; preds = %bb85, %bb55 + %tmp89 = load i32, i32* @numi ; [#uses=1] + %tmp90 = add i32 %tmp89, 1 ; [#uses=1] + store i32 %tmp90, i32* @numi + br label %bb91 + +bb91: ; preds = %cond_next, %entry + %tmp92 = load i32, i32* @numi ; [#uses=1] + %tmp93 = icmp sgt i32 %tmp92, 5 ; [#uses=1] + %tmp9394 = zext i1 %tmp93 to i32 ; [#uses=1] + %toBool95 = icmp ne i32 %tmp9394, 0 ; [#uses=1] + br i1 %toBool95, label %cond_true96, label %cond_next97 + +cond_true96: ; preds = %bb91 + br label %bb102 + +cond_next97: ; preds = %bb91 + %tmp98 = load i32, i32* %num_sol ; [#uses=1] + %tmp99 = icmp eq i32 %tmp98, 0 ; [#uses=1] + %tmp99100 = zext i1 %tmp99 to i32 ; [#uses=1] + %toBool101 = icmp ne i32 %tmp99100, 0 ; [#uses=1] + br i1 %toBool101, label %bb, label %bb102 + +bb102: ; preds = %cond_next97, %cond_true96 + store i32 0, i32* %tmp + %tmp103 = load i32, i32* %tmp ; [#uses=1] + store i32 %tmp103, i32* %retval + br label %return + +return: ; preds = %bb102 + %retval104 = load i32, i32* %retval ; [#uses=1] + ret i32 %retval104 +} + +declare i32 @userfun(i32) Index: llvm/trunk/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll +++ llvm/trunk/test/Transforms/NewGVN/2007-07-31-RedundantPhi.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +@img_width = external global i16 ; [#uses=2] + +define i32 @smpUMHEXBipredIntegerPelBlockMotionSearch(i16* %cur_pic, i16 signext %ref, i32 %list, i32 %pic_pix_x, i32 %pic_pix_y, i32 %blocktype, i16 signext %pred_mv_x1, i16 signext %pred_mv_y1, i16 signext %pred_mv_x2, i16 signext %pred_mv_y2, i16* %mv_x, i16* %mv_y, i16* %s_mv_x, i16* %s_mv_y, i32 %search_range, i32 %min_mcost, i32 %lambda_factor) { +cond_next143: ; preds = %entry + store i16 0, i16* @img_width, align 2 + br i1 false, label %cond_next449, label %cond_false434 + +cond_false434: ; preds = %cond_true415 + br label %cond_next449 + +cond_next449: ; preds = %cond_false434, %cond_true415 + br i1 false, label %cond_next698, label %cond_false470 + +cond_false470: ; preds = %cond_next449 + br label %cond_next698 + +cond_next698: ; preds = %cond_true492 + %tmp701 = load i16, i16* @img_width, align 2 ; [#uses=0] +; CHECK-NOT: %tmp701 = + ret i32 0 +} Index: llvm/trunk/test/Transforms/NewGVN/2008-02-12-UndefLoad.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2008-02-12-UndefLoad.ll +++ llvm/trunk/test/Transforms/NewGVN/2008-02-12-UndefLoad.ll @@ -0,0 +1,22 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s +; PR1996 + +%struct.anon = type { i32, i8, i8, i8, i8 } + +define i32 @a() { +entry: + %c = alloca %struct.anon ; <%struct.anon*> [#uses=2] + %tmp = getelementptr %struct.anon, %struct.anon* %c, i32 0, i32 0 ; [#uses=1] + %tmp1 = getelementptr i32, i32* %tmp, i32 1 ; [#uses=2] + %tmp2 = load i32, i32* %tmp1, align 4 ; [#uses=1] +; CHECK-NOT: load + %tmp3 = or i32 %tmp2, 11 ; [#uses=1] + %tmp4 = and i32 %tmp3, -21 ; [#uses=1] + store i32 %tmp4, i32* %tmp1, align 4 + %call = call i32 (...) @x( %struct.anon* %c ) ; [#uses=0] + ret i32 undef +} + + +declare i32 @x(...) Index: llvm/trunk/test/Transforms/NewGVN/2008-02-13-NewPHI.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2008-02-13-NewPHI.ll +++ llvm/trunk/test/Transforms/NewGVN/2008-02-13-NewPHI.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -newgvn +; PR2032 + +define i32 @sscal(i32 %n, double %sa1, float* %sx, i32 %incx) { +entry: + %sx_addr = alloca float* ; [#uses=3] + store float* %sx, float** %sx_addr, align 4 + br label %bb33 + +bb: ; preds = %bb33 + %tmp27 = load float*, float** %sx_addr, align 4 ; [#uses=1] + store float 0.000000e+00, float* %tmp27, align 4 + store float* null, float** %sx_addr, align 4 + br label %bb33 + +bb33: ; preds = %bb, %entry + br i1 false, label %bb, label %return + +return: ; preds = %bb33 + %retval59 = load i32, i32* null, align 4 ; [#uses=1] + ret i32 %retval59 +} Index: llvm/trunk/test/Transforms/NewGVN/2008-07-02-Unreachable.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2008-07-02-Unreachable.ll +++ llvm/trunk/test/Transforms/NewGVN/2008-07-02-Unreachable.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +; PR2503 + +@g_3 = external global i8 ; [#uses=2] + +define i8 @func_1(i32 %x, i32 %y) nounwind { +entry: + %A = alloca i8 + %cmp = icmp eq i32 %x, %y + br i1 %cmp, label %ifelse, label %ifthen + +ifthen: ; preds = %entry + br label %ifend + +ifelse: ; preds = %entry + %tmp3 = load i8, i8* @g_3 ; [#uses=0] + store i8 %tmp3, i8* %A + br label %afterfor + +forcond: ; preds = %forinc + br i1 false, label %afterfor, label %forbody + +forbody: ; preds = %forcond + br label %forinc + +forinc: ; preds = %forbody + br label %forcond + +afterfor: ; preds = %forcond, %forcond.thread + %tmp10 = load i8, i8* @g_3 ; [#uses=0] + ret i8 %tmp10 +; CHECK: ret i8 %tmp3 + +ifend: ; preds = %afterfor, %ifthen + ret i8 0 +} Index: llvm/trunk/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll +++ llvm/trunk/test/Transforms/NewGVN/2008-12-09-SelfRemove.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -newgvn -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin9.5" + %struct.anon = type { i8*, i32 } + %struct.d_print_info = type { i32, i8*, i32, i32, %struct.d_print_template*, %struct.d_print_mod*, i32 } + %struct.d_print_mod = type { %struct.d_print_mod*, %struct.demangle_component*, i32, %struct.d_print_template* } + %struct.d_print_template = type { %struct.d_print_template*, %struct.demangle_component* } + %struct.demangle_component = type { i32, { %struct.anon } } + +define void @d_print_mod_list(%struct.d_print_info* %dpi, %struct.d_print_mod* %mods, i32 %suffix) nounwind { +entry: + %0 = getelementptr %struct.d_print_info, %struct.d_print_info* %dpi, i32 0, i32 1 ; [#uses=1] + br i1 false, label %return, label %bb + +bb: ; preds = %entry + %1 = load i8*, i8** %0, align 4 ; [#uses=0] + %2 = getelementptr %struct.d_print_info, %struct.d_print_info* %dpi, i32 0, i32 1 ; [#uses=0] + br label %bb21 + +bb21: ; preds = %bb21, %bb + br label %bb21 + +return: ; preds = %entry + ret void +} + +; CHECK: define void @d_print_mod_list(%struct.d_print_info* %dpi, %struct.d_print_mod* %mods, i32 %suffix) #0 { +; CHECK: entry: +; CHECK: %0 = getelementptr %struct.d_print_info, %struct.d_print_info* %dpi, i32 0, i32 1 +; CHECK: br i1 false, label %return, label %bb +; CHECK: bb: +; CHECK: br label %bb21 +; CHECK: bb21: +; CHECK: br label %bb21 +; CHECK: return: +; CHECK: ret void +; CHECK: } Index: llvm/trunk/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll +++ llvm/trunk/test/Transforms/NewGVN/2008-12-12-RLE-Crash.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -newgvn | llvm-dis +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" + +define i32 @main(i32 %argc, i8** %argv) nounwind { +entry: + br label %bb84 + +bb41: ; preds = %bb82 + %tmp = load i8, i8* %opt.0, align 1 ; [#uses=0] + %tmp1 = getelementptr i8, i8* %opt.0, i32 1 ; [#uses=2] + switch i32 0, label %bb81 [ + i32 102, label %bb82 + i32 110, label %bb79 + i32 118, label %bb80 + ] + +bb79: ; preds = %bb41 + br label %bb82 + +bb80: ; preds = %bb41 + ret i32 0 + +bb81: ; preds = %bb41 + ret i32 1 + +bb82: ; preds = %bb84, %bb79, %bb41 + %opt.0 = phi i8* [ %tmp3, %bb84 ], [ %tmp1, %bb79 ], [ %tmp1, %bb41 ] ; [#uses=3] + %tmp2 = load i8, i8* %opt.0, align 1 ; [#uses=0] + br i1 false, label %bb84, label %bb41 + +bb84: ; preds = %bb82, %entry + %tmp3 = getelementptr i8, i8* null, i32 1 ; [#uses=1] + br label %bb82 +} Index: llvm/trunk/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll +++ llvm/trunk/test/Transforms/NewGVN/2008-12-14-rle-reanalyze.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -newgvn | llvm-dis +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" +@sort_value = external global [256 x i32], align 32 ; <[256 x i32]*> [#uses=2] + +define i32 @Quiesce(i32 %alpha, i32 %beta, i32 %wtm, i32 %ply) nounwind { +entry: + br label %bb22 + +bb22: ; preds = %bb23, %bb22, %entry + br i1 false, label %bb23, label %bb22 + +bb23: ; preds = %bb23, %bb22 + %sortv.233 = phi i32* [ getelementptr ([256 x i32], [256 x i32]* @sort_value, i32 0, i32 0), %bb22 ], [ %sortv.2, %bb23 ] ; [#uses=1] + %0 = load i32, i32* %sortv.233, align 4 ; [#uses=0] + %sortv.2 = getelementptr [256 x i32], [256 x i32]* @sort_value, i32 0, i32 0 ; [#uses=1] + br i1 false, label %bb23, label %bb22 +} Index: llvm/trunk/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll +++ llvm/trunk/test/Transforms/NewGVN/2008-12-15-CacheVisited.ll @@ -0,0 +1,28 @@ +; RUN: opt < %s -newgvn | llvm-dis +; Cached results must be added to and verified against the visited sets. +; PR3217 + +define fastcc void @gen_field_die(i32* %decl) nounwind { +entry: + br i1 false, label %bb203, label %bb202 + +bb202: ; preds = %entry + unreachable + +bb203: ; preds = %entry + %tmp = getelementptr i32, i32* %decl, i32 1 ; [#uses=1] + %tmp1 = load i32, i32* %tmp, align 4 ; [#uses=0] + br i1 false, label %bb207, label %bb204 + +bb204: ; preds = %bb203 + %tmp2 = getelementptr i32, i32* %decl, i32 1 ; [#uses=1] + br label %bb208 + +bb207: ; preds = %bb203 + br label %bb208 + +bb208: ; preds = %bb207, %bb204 + %iftmp.1374.0.in = phi i32* [ null, %bb207 ], [ %tmp2, %bb204 ] ; [#uses=1] + %iftmp.1374.0 = load i32, i32* %iftmp.1374.0.in ; [#uses=0] + unreachable +} Index: llvm/trunk/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll +++ llvm/trunk/test/Transforms/NewGVN/2009-01-21-SortInvalidation.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -newgvn | llvm-dis +; PR3358 +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + %struct.re_pattern_buffer = type { i8*, i64, i64, i64, i8*, i8*, i64, i8 } + %struct.re_registers = type { i32, i32*, i32* } + +define fastcc i32 @byte_re_match_2_internal(%struct.re_pattern_buffer* nocapture %bufp, i8* %string1, i32 %size1, i8* %string2, i32 %size2, i32 %pos, %struct.re_registers* %regs, i32 %stop) nounwind { +entry: + br label %bb159 + +succeed_label: ; preds = %bb159 + ret i32 0 + +bb159: ; preds = %bb664, %bb554, %bb159, %bb159, %bb159, %entry + %d.0 = phi i8* [ null, %entry ], [ %d.0, %bb159 ], [ %d.0, %bb554 ], [ %d.0, %bb159 ], [ %d.0, %bb159 ], [ %d.12, %bb664 ] ; [#uses=5] + switch i32 0, label %bb661 [ + i32 0, label %bb159 + i32 1, label %succeed_label + i32 13, label %bb159 + i32 14, label %bb159 + i32 16, label %bb411 + i32 24, label %bb622 + i32 28, label %bb543 + ] + +bb411: ; preds = %bb411, %bb159 + br label %bb411 + +bb543: ; preds = %bb159 + br i1 false, label %bb549, label %bb550 + +bb549: ; preds = %bb543 + br label %bb554 + +bb550: ; preds = %bb543 + br i1 false, label %bb554, label %bb552 + +bb552: ; preds = %bb550 + %0 = load i8, i8* %d.0, align 8 ; [#uses=0] + br label %bb554 + +bb554: ; preds = %bb552, %bb550, %bb549 + br i1 false, label %bb159, label %bb661 + +bb622: ; preds = %bb622, %bb159 + br label %bb622 + +bb661: ; preds = %bb554, %bb159 + %d.12 = select i1 false, i8* null, i8* null ; [#uses=1] + br label %bb664 + +bb664: ; preds = %bb664, %bb661 + br i1 false, label %bb159, label %bb664 +} Index: llvm/trunk/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll +++ llvm/trunk/test/Transforms/NewGVN/2009-01-22-SortInvalidation.ll @@ -0,0 +1,100 @@ +; RUN: opt < %s -newgvn | llvm-dis + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" + %struct..4sPragmaType = type { i8*, i32 } + %struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 } + %struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* } + %struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 } + %struct.AuxData = type { i8*, void (i8*)* } + %struct.Bitvec = type { i32, i32, i32, { [125 x i32] } } + %struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* } + %struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* } + %struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* } + %struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* } + %struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] } + %struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 } + %struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 } + %struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* } + %struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 } + %struct.Context = type { i64, i32, %struct.Fifo } + %struct.CountCtx = type { i64 } + %struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* } + %struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* } + %struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..4sPragmaType, %struct..4sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 } + %struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* } + %struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 } + %struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 } + %struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* } + %struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] } + %struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] } + %struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* } + %struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 } + %struct.IdList = type { %struct..4sPragmaType*, i32, i32 } + %struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** } + %struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] } + %struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* } + %struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.PgHdr*, i32, %struct.MemPage* } + %struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* } + %struct.Op = type { i8, i8, i8, i8, i32, i32, i32, { i32 } } + %struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.PgHdr*, %struct.PgHdr*, %struct.PgHdr*, i64, i64, i64, i64, i64, i32, void (%struct.PgHdr*, i32)*, void (%struct.PgHdr*, i32)*, i32, %struct.PgHdr**, i8*, [16 x i8] } + %struct.PagerLruLink = type { %struct.PgHdr*, %struct.PgHdr* } + %struct.PagerLruList = type { %struct.PgHdr*, %struct.PgHdr*, %struct.PgHdr* } + %struct.Parse = type { %struct.sqlite3*, i32, i8*, %struct.Vdbe*, i8, i8, i8, i8, i8, i8, i8, [8 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x i32], i32, %struct.TableLock*, i32, i32, i32, i32, i32, %struct.Expr**, i8, %struct..4sPragmaType, %struct..4sPragmaType, %struct..4sPragmaType, i8*, i8*, %struct.Table*, %struct.Trigger*, %struct.TriggerStack*, i8*, %struct..4sPragmaType, i8, %struct.Table*, i32 } + %struct.PgHdr = type { %struct.Pager*, i32, %struct.PgHdr*, %struct.PgHdr*, %struct.PagerLruLink, %struct.PgHdr*, i8, i8, i8, i8, i8, i16, %struct.PgHdr*, %struct.PgHdr*, i8* } + %struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* } + %struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] } + %struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] } + %struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 } + %struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* } + %struct.TableLock = type { i32, i32, i8, i8* } + %struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..4sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* } + %struct.TriggerStack = type { %struct.Table*, i32, i32, i32, i32, i32, i32, %struct.Trigger*, %struct.TriggerStack* } + %struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..4sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* } + %struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 } + %struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] } + %struct._OvflCell = type { i8*, i16 } + %struct._ht = type { i32, %struct.HashElem* } + %struct.anon = type { double } + %struct.sColMap = type { i32, i8* } + %struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %struct.anon, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 } + %struct.sqlite3InitInfo = type { i32, i32, i8 } + %struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* } + %struct.sqlite3_file = type { %struct.sqlite3_io_methods* } + %struct.sqlite3_index_constraint = type { i32, i8, i8, i32 } + %struct.sqlite3_index_constraint_usage = type { i32, i8 } + %struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double } + %struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* } + %struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* } + %struct.sqlite3_mutex = type opaque + %struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* } + %struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* } + %struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* } + +define fastcc void @sqlite3Insert(%struct.Parse* %pParse, %struct.SrcList* %pTabList, %struct.ExprList* %pList, %struct.Select* %pSelect, %struct.IdList* %pColumn, i32 %onError) nounwind { +entry: + br i1 false, label %bb54, label %bb69.loopexit + +bb54: ; preds = %entry + br label %bb69.loopexit + +bb59: ; preds = %bb63.preheader + %0 = load %struct..4sPragmaType*, %struct..4sPragmaType** %3, align 4 ; <%struct..4sPragmaType*> [#uses=0] + br label %bb65 + +bb65: ; preds = %bb63.preheader, %bb59 + %1 = load %struct..4sPragmaType*, %struct..4sPragmaType** %4, align 4 ; <%struct..4sPragmaType*> [#uses=0] + br i1 false, label %bb67, label %bb63.preheader + +bb67: ; preds = %bb65 + %2 = getelementptr %struct.IdList, %struct.IdList* %pColumn, i32 0, i32 0 ; <%struct..4sPragmaType**> [#uses=0] + unreachable + +bb69.loopexit: ; preds = %bb54, %entry + %3 = getelementptr %struct.IdList, %struct.IdList* %pColumn, i32 0, i32 0 ; <%struct..4sPragmaType**> [#uses=1] + %4 = getelementptr %struct.IdList, %struct.IdList* %pColumn, i32 0, i32 0 ; <%struct..4sPragmaType**> [#uses=1] + br label %bb63.preheader + +bb63.preheader: ; preds = %bb69.loopexit, %bb65 + br i1 false, label %bb59, label %bb65 +} Index: llvm/trunk/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll +++ llvm/trunk/test/Transforms/NewGVN/2009-03-10-PREOnVoid.ll @@ -0,0 +1,110 @@ +; RUN: opt < %s -newgvn -disable-output +; PR3775 + +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" +target triple = "i386-pc-linux-gnu" + %llvm.dbg.anchor.type = type { i32, i32 } + %"struct.__gnu_cxx::hash" = type <{ i8 }> + %struct.__sched_param = type { i32 } + %struct._pthread_descr_struct = type opaque + %struct.pthread_attr_t = type { i32, i32, %struct.__sched_param, i32, i32, i32, i32, i8*, i32 } + %struct.pthread_mutex_t = type { i32, i32, %struct._pthread_descr_struct*, i32, %llvm.dbg.anchor.type } + %"struct.std::_Rb_tree > >,std::_Select1st > > >,std::less,std::allocator > > > >" = type { %"struct.std::_Rb_tree > >,std::_Select1st > > >,std::less,std::allocator > > > >::_Rb_tree_impl,false>" } + %"struct.std::_Rb_tree > >,std::_Select1st > > >,std::less,std::allocator > > > >::_Rb_tree_impl,false>" = type { %"struct.__gnu_cxx::hash", %"struct.std::_Rb_tree_node_base", i32 } + %"struct.std::_Rb_tree_iterator > > >" = type { %"struct.std::_Rb_tree_node_base"* } + %"struct.std::_Rb_tree_node_base" = type { i32, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } + %"struct.std::pair > > >,bool>" = type { %"struct.std::_Rb_tree_iterator > > >", i8 } + %"struct.std::pair" = type { i8*, i8* } + +@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*), i32 (i32*, void ()*)* @pthread_once ; [#uses=0] +@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32), i8* (i32)* @pthread_getspecific ; [#uses=0] +@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*), i32 (i32, i8*)* @pthread_setspecific ; [#uses=0] +@_ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = weak alias i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*), i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; [#uses=0] +@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i32), i32 (i32)* @pthread_cancel ; [#uses=0] +@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock ; [#uses=0] +@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock ; [#uses=0] +@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock ; [#uses=0] +@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.__sched_param*), i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init ; [#uses=0] +@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*), i32 (i32*, void (i8*)*)* @pthread_key_create ; [#uses=0] +@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32), i32 (i32)* @pthread_key_delete ; [#uses=0] +@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*), i32 (%struct.__sched_param*)* @pthread_mutexattr_init ; [#uses=0] +@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.__sched_param*, i32), i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype ; [#uses=0] +@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*), i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy ; [#uses=0] + +declare fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind readnone + +define fastcc void @_ZNSt8_Rb_treeIPvSt4pairIKS0_S0_ESt10_Select1stIS3_ESt4lessIS0_ESaIS3_EE16_M_insert_uniqueERKS3_(%"struct.std::pair > > >,bool>"* noalias nocapture sret %agg.result, %"struct.std::_Rb_tree > >,std::_Select1st > > >,std::less,std::allocator > > > >"* %this, %"struct.std::pair"* %__v) nounwind { +entry: + br i1 false, label %bb7, label %bb + +bb: ; preds = %bb, %entry + br i1 false, label %bb5, label %bb + +bb5: ; preds = %bb + call fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind + br i1 false, label %bb11, label %bb7 + +bb7: ; preds = %bb5, %entry + br label %bb11 + +bb11: ; preds = %bb7, %bb5 + call fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind + unreachable +} + +define i32 @pthread_once(i32*, void ()*) { + ret i32 0 +} + +define i8* @pthread_getspecific(i32) { + ret i8* null +} + +define i32 @pthread_setspecific(i32, i8*) { + ret i32 0 +} + +define i32 @pthread_create(i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*) { + ret i32 0 +} + +define i32 @pthread_cancel(i32) { + ret i32 0 +} + +define i32 @pthread_mutex_lock(%struct.pthread_mutex_t*) { + ret i32 0 +} + +define i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*) { + ret i32 0 +} + +define i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*) { + ret i32 0 +} + +define i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.__sched_param*) { + ret i32 0 +} + +define i32 @pthread_key_create(i32*, void (i8*)*) { + ret i32 0 +} + +define i32 @pthread_key_delete(i32) { + ret i32 0 +} + +define i32 @pthread_mutexattr_init(%struct.__sched_param*) { + ret i32 0 +} + +define i32 @pthread_mutexattr_settype(%struct.__sched_param*, i32) { + ret i32 0 +} + +define i32 @pthread_mutexattr_destroy(%struct.__sched_param*) { + ret i32 0 +} Index: llvm/trunk/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll +++ llvm/trunk/test/Transforms/NewGVN/2009-07-13-MemDepSortFail.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -newgvn | llvm-dis +; PR4256 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" +target triple = "i386-pc-linux-gnu" + %llvm.dbg.anchor.type = type { i32, i32 } + %struct.cset = type { i8*, i8, i8, i32, i8* } + %struct.lmat = type { %struct.re_guts*, i32, %llvm.dbg.anchor.type*, i8*, i8*, i8*, i8*, i8**, i32, i8*, i8*, i8*, i8*, i8* } + %struct.re_guts = type { i32*, %struct.cset*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, [1 x i8] } + +define i8* @lbackref(%struct.lmat* %m, i8* %start, i8* %stop, i32 %startst, i32 %stopst, i32 %lev, i32 %rec) nounwind { +entry: + br label %bb63 + +bb: ; preds = %bb63 + switch i32 0, label %bb62 [ + i32 268435456, label %bb2 + i32 805306368, label %bb9 + i32 -1610612736, label %bb51 + ] + +bb2: ; preds = %bb + br label %bb62 + +bb9: ; preds = %bb + %0 = load i8, i8* %sp.1, align 1 ; [#uses=0] + br label %bb62 + +bb51: ; preds = %bb + %1 = load i8, i8* %sp.1, align 1 ; [#uses=0] + ret i8* null + +bb62: ; preds = %bb9, %bb2, %bb + br label %bb63 + +bb63: ; preds = %bb84, %bb69, %bb62, %entry + %sp.1 = phi i8* [ null, %bb62 ], [ %sp.1.lcssa, %bb84 ], [ %start, %entry ], [ %sp.1.lcssa, %bb69 ] ; [#uses=3] + br i1 false, label %bb, label %bb65 + +bb65: ; preds = %bb63 + %sp.1.lcssa = phi i8* [ %sp.1, %bb63 ] ; [#uses=4] + br i1 false, label %bb66, label %bb69 + +bb66: ; preds = %bb65 + ret i8* null + +bb69: ; preds = %bb65 + switch i32 0, label %bb108.loopexit2.loopexit.loopexit [ + i32 1342177280, label %bb63 + i32 1476395008, label %bb84 + i32 1879048192, label %bb104 + i32 2013265920, label %bb93 + ] + +bb84: ; preds = %bb69 + %2 = tail call i8* @lbackref(%struct.lmat* %m, i8* %sp.1.lcssa, i8* %stop, i32 0, i32 %stopst, i32 0, i32 0) nounwind ; [#uses=0] + br label %bb63 + +bb93: ; preds = %bb69 + ret i8* null + +bb104: ; preds = %bb69 + %sp.1.lcssa.lcssa33 = phi i8* [ %sp.1.lcssa, %bb69 ] ; [#uses=0] + unreachable + +bb108.loopexit2.loopexit.loopexit: ; preds = %bb69 + ret i8* null +} Index: llvm/trunk/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll +++ llvm/trunk/test/Transforms/NewGVN/2009-11-12-MemDepMallocBitCast.ll @@ -0,0 +1,15 @@ +; Test to make sure malloc's bitcast does not block detection of a store +; to aliased memory; GVN should not optimize away the load in this program. +; RUN: opt < %s -newgvn -S | FileCheck %s + +define i64 @test() { + %1 = tail call i8* @malloc(i64 mul (i64 4, i64 ptrtoint (i64* getelementptr (i64, i64* null, i64 1) to i64))) ; [#uses=2] + store i8 42, i8* %1 + %X = bitcast i8* %1 to i64* ; [#uses=1] + %Y = load i64, i64* %X ; [#uses=1] + ret i64 %Y +; CHECK: %Y = load i64, i64* %X +; CHECK: ret i64 %Y +} + +declare noalias i8* @malloc(i64) Index: llvm/trunk/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll +++ llvm/trunk/test/Transforms/NewGVN/2010-03-31-RedundantPHIs.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +; CHECK-NOT: load +; CHECK-NOT: phi + +define i8* @cat(i8* %s1, ...) nounwind { +entry: + br i1 undef, label %bb, label %bb3 + +bb: ; preds = %entry + unreachable + +bb3: ; preds = %entry + store i8* undef, i8** undef, align 4 + br i1 undef, label %bb5, label %bb6 + +bb5: ; preds = %bb3 + unreachable + +bb6: ; preds = %bb3 + br label %bb12 + +bb8: ; preds = %bb12 + br i1 undef, label %bb9, label %bb10 + +bb9: ; preds = %bb8 + %0 = load i8*, i8** undef, align 4 ; [#uses=0] + %1 = load i8*, i8** undef, align 4 ; [#uses=0] + br label %bb11 + +bb10: ; preds = %bb8 + br label %bb11 + +bb11: ; preds = %bb10, %bb9 + br label %bb12 + +bb12: ; preds = %bb11, %bb6 + br i1 undef, label %bb8, label %bb13 + +bb13: ; preds = %bb12 + ret i8* undef +} Index: llvm/trunk/test/Transforms/NewGVN/2010-05-08-OneBit.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2010-05-08-OneBit.ll +++ llvm/trunk/test/Transforms/NewGVN/2010-05-08-OneBit.ll @@ -0,0 +1,67 @@ +; RUN: opt < %s -newgvn +; PR7052 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main(i32 %argc, i8** nocapture %argv) personality i32 (...)* @__gxx_personality_v0 { +entry: + %0 = getelementptr inbounds i8, i8* undef, i64 5 ; [#uses=1] + %1 = bitcast i8* %0 to i32* ; [#uses=1] + store i32 undef, i32* %1, align 1 + br i1 undef, label %k121.i.i, label %l117.i.i + +l117.i.i: ; preds = %entry + invoke fastcc void @foo() + to label %.noexc5 unwind label %landing_pad + +.noexc5: ; preds = %l117.i.i + unreachable + +k121.i.i: ; preds = %entry + br i1 undef, label %l129.i.i, label %k133.i.i + +l129.i.i: ; preds = %k121.i.i + invoke fastcc void @foo() + to label %.noexc7 unwind label %landing_pad + +.noexc7: ; preds = %l129.i.i + unreachable + +k133.i.i: ; preds = %k121.i.i + %2 = getelementptr i8, i8* undef, i64 5 ; [#uses=1] + %3 = bitcast i8* %2 to i1* ; [#uses=1] + %4 = load i1, i1* %3 ; [#uses=1] + br i1 %4, label %k151.i.i, label %l147.i.i + +l147.i.i: ; preds = %k133.i.i + invoke fastcc void @foo() + to label %.noexc10 unwind label %landing_pad + +.noexc10: ; preds = %l147.i.i + unreachable + +k151.i.i: ; preds = %k133.i.i + ret i32 0 + +landing_pad: ; preds = %l147.i.i, %l129.i.i, %l117.i.i + %exn = landingpad {i8*, i32} + cleanup + switch i32 undef, label %fin [ + i32 1, label %catch1 + i32 2, label %catch + ] + +fin: ; preds = %landing_pad + unreachable + +catch: ; preds = %landing_pad + ret i32 1 + +catch1: ; preds = %landing_pad + ret i32 2 +} + +declare fastcc void @foo() + +declare i32 @__gxx_personality_v0(...) Index: llvm/trunk/test/Transforms/NewGVN/2010-11-13-Simplify.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2010-11-13-Simplify.ll +++ llvm/trunk/test/Transforms/NewGVN/2010-11-13-Simplify.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +declare i32 @foo(i32) readnone + +define i1 @bar() { +; CHECK-LABEL: @bar( + %a = call i32 @foo (i32 0) readnone + %b = call i32 @foo (i32 0) readnone + %c = and i32 %a, %b + %x = call i32 @foo (i32 %a) readnone + %y = call i32 @foo (i32 %c) readnone + %z = icmp eq i32 %x, %y + ret i1 %z +; CHECK: ret i1 true +} Index: llvm/trunk/test/Transforms/NewGVN/2011-04-27-phioperands.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2011-04-27-phioperands.ll +++ llvm/trunk/test/Transforms/NewGVN/2011-04-27-phioperands.ll @@ -0,0 +1,106 @@ +; RUN: opt -newgvn -disable-output < %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64" + +@nuls = external global [10 x i8] + +define fastcc void @p_ere() nounwind { +entry: + br label %"" + +".i": + br i1 undef, label %".i30.i", label %doemit.exit51.i + +".i30.i": + unreachable + +doemit.exit51.i: + br label %".i" + +".i": + br i1 undef, label %".i55.i", label %doemit.exit76.i + +".i55.i": + unreachable + +doemit.exit76.i: + br label %".i" + +".i": + store i8* getelementptr inbounds ([10 x i8], [10 x i8]* @nuls, i64 0, i64 0), i8** undef, align 8 + br label %".i" + +".i": + br label %".i" + +".i": + br i1 undef, label %".i", label %".i" + +".i": + br label %".i" + +".i": + br label %".i" + +".i": + br label %".i" + +".i": + br label %".i" + +".i": + %wascaret_2.i = phi i32 [ 0, %".i" ], [ 0, %".i" ], [ 0, %".i" ], [ 0, %".i" ], [ 0, %".i" ], [ 0, %".i" ], [ 0, %doemit.exit76.i ], [ 1, %doemit.exit51.i ], [ 0, %".i" ] + %D.5496_84.i = load i8*, i8** undef, align 8 + br i1 undef, label %".i", label %"" + +".i": + br i1 undef, label %"", label %".i" + +".i": + br i1 undef, label %".i", label %".i" + +".i": + br label %".i" + +".i": + switch i32 undef, label %"" [ + i32 42, label %".i" + i32 43, label %".i" + i32 63, label %".i" + i32 123, label %".i258.i" + ] + +".i": + br i1 undef, label %".i105.i", label %doemit.exit127.i + +".i105.i": + unreachable + +doemit.exit127.i: + unreachable + +".i": + br i1 undef, label %".i157.i", label %"" + +".i157.i": + unreachable + +".i": + br label %"" + +".i258.i": + unreachable + +"": + switch i32 undef, label %".i" [ + i32 36, label %".i" + i32 94, label %".i" + i32 124, label %".i" + i32 42, label %".i" + i32 43, label %".i" + i32 46, label %".i" + i32 63, label %".i" + i32 91, label %".i" + i32 92, label %".i" + ] +} Index: llvm/trunk/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll +++ llvm/trunk/test/Transforms/NewGVN/2011-07-07-MatchIntrinsicExtract.ll @@ -0,0 +1,86 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s +; + +%0 = type { i64, i1 } + +define i64 @test1(i64 %a, i64 %b) nounwind ssp { +entry: + %uadd = tail call %0 @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %uadd.0 = extractvalue %0 %uadd, 0 + %add1 = add i64 %a, %b + ret i64 %add1 +} + +; CHECK-LABEL: @test1( +; CHECK-NOT: add1 +; CHECK: ret + +define i64 @test2(i64 %a, i64 %b) nounwind ssp { +entry: + %usub = tail call %0 @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %usub.0 = extractvalue %0 %usub, 0 + %sub1 = sub i64 %a, %b + ret i64 %sub1 +} + +; CHECK-LABEL: @test2( +; CHECK-NOT: sub1 +; CHECK: ret + +define i64 @test3(i64 %a, i64 %b) nounwind ssp { +entry: + %umul = tail call %0 @llvm.umul.with.overflow.i64(i64 %a, i64 %b) + %umul.0 = extractvalue %0 %umul, 0 + %mul1 = mul i64 %a, %b + ret i64 %mul1 +} + +; CHECK-LABEL: @test3( +; CHECK-NOT: mul1 +; CHECK: ret + +define i64 @test4(i64 %a, i64 %b) nounwind ssp { +entry: + %sadd = tail call %0 @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %sadd.0 = extractvalue %0 %sadd, 0 + %add1 = add i64 %a, %b + ret i64 %add1 +} + +; CHECK-LABEL: @test4( +; CHECK-NOT: add1 +; CHECK: ret + +define i64 @test5(i64 %a, i64 %b) nounwind ssp { +entry: + %ssub = tail call %0 @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %ssub.0 = extractvalue %0 %ssub, 0 + %sub1 = sub i64 %a, %b + ret i64 %sub1 +} + +; CHECK-LABEL: @test5( +; CHECK-NOT: sub1 +; CHECK: ret + +define i64 @test6(i64 %a, i64 %b) nounwind ssp { +entry: + %smul = tail call %0 @llvm.smul.with.overflow.i64(i64 %a, i64 %b) + %smul.0 = extractvalue %0 %smul, 0 + %mul1 = mul i64 %a, %b + ret i64 %mul1 +} + +; CHECK-LABEL: @test6( +; CHECK-NOT: mul1 +; CHECK: ret + +declare void @exit(i32) noreturn +declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone +declare %0 @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone +declare %0 @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone +declare %0 @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone +declare %0 @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone +declare %0 @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone + Index: llvm/trunk/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll +++ llvm/trunk/test/Transforms/NewGVN/2011-09-07-TypeIdFor.ll @@ -0,0 +1,81 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +%struct.__fundamental_type_info_pseudo = type { %struct.__type_info_pseudo } +%struct.__type_info_pseudo = type { i8*, i8* } + +@_ZTIi = external constant %struct.__fundamental_type_info_pseudo +@_ZTIb = external constant %struct.__fundamental_type_info_pseudo + +declare void @_Z4barv() + +declare void @_Z7cleanupv() + +declare i32 @llvm.eh.typeid.for(i8*) nounwind readonly + +declare i8* @__cxa_begin_catch(i8*) nounwind + +declare void @__cxa_end_catch() + +declare i32 @__gxx_personality_v0(i32, i64, i8*, i8*) + +define void @_Z3foov() uwtable personality i32 (i32, i64, i8*, i8*)* @__gxx_personality_v0 { +entry: + invoke void @_Z4barv() + to label %return unwind label %lpad + +lpad: ; preds = %entry + %0 = landingpad { i8*, i32 } + catch %struct.__fundamental_type_info_pseudo* @_ZTIi + catch %struct.__fundamental_type_info_pseudo* @_ZTIb + catch %struct.__fundamental_type_info_pseudo* @_ZTIi + catch %struct.__fundamental_type_info_pseudo* @_ZTIb + %exc_ptr2.i = extractvalue { i8*, i32 } %0, 0 + %filter3.i = extractvalue { i8*, i32 } %0, 1 + %typeid.i = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIi to i8*)) +; CHECK: call i32 @llvm.eh.typeid.for + %1 = icmp eq i32 %filter3.i, %typeid.i + br i1 %1, label %ppad, label %next + +next: ; preds = %lpad + %typeid1.i = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIb to i8*)) +; CHECK: call i32 @llvm.eh.typeid.for + %2 = icmp eq i32 %filter3.i, %typeid1.i + br i1 %2, label %ppad2, label %next2 + +ppad: ; preds = %lpad + %3 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr2.i) nounwind + tail call void @__cxa_end_catch() nounwind + br label %return + +ppad2: ; preds = %next + %D.2073_5.i = tail call i8* @__cxa_begin_catch(i8* %exc_ptr2.i) nounwind + tail call void @__cxa_end_catch() nounwind + br label %return + +next2: ; preds = %next + call void @_Z7cleanupv() + %typeid = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIi to i8*)) +; CHECK-NOT: call i32 @llvm.eh.typeid.for + %4 = icmp eq i32 %filter3.i, %typeid + br i1 %4, label %ppad3, label %next3 + +next3: ; preds = %next2 + %typeid1 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%struct.__fundamental_type_info_pseudo* @_ZTIb to i8*)) + %5 = icmp eq i32 %filter3.i, %typeid1 + br i1 %5, label %ppad4, label %unwind + +unwind: ; preds = %next3 + resume { i8*, i32 } %0 + +ppad3: ; preds = %next2 + %6 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr2.i) nounwind + tail call void @__cxa_end_catch() nounwind + br label %return + +ppad4: ; preds = %next3 + %D.2080_5 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr2.i) nounwind + tail call void @__cxa_end_catch() nounwind + br label %return + +return: ; preds = %ppad4, %ppad3, %ppad2, %ppad, %entry + ret void +} Index: llvm/trunk/test/Transforms/NewGVN/2012-05-22-PreCrash.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2012-05-22-PreCrash.ll +++ llvm/trunk/test/Transforms/NewGVN/2012-05-22-PreCrash.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -newgvn +; PR12858 + +define void @fn5(i16 signext %p1, i8 signext %p2) nounwind uwtable { +entry: + br i1 undef, label %if.else, label %if.then + +if.then: ; preds = %entry + br label %if.end + +if.else: ; preds = %entry + %conv = sext i16 %p1 to i32 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %conv1 = sext i16 %p1 to i32 + br i1 undef, label %if.then3, label %if.else4 + +if.then3: ; preds = %if.end + br label %if.end12 + +if.else4: ; preds = %if.end + %conv7 = sext i8 %p2 to i32 + %cmp8 = icmp eq i32 %conv1, %conv7 + br i1 %cmp8, label %if.then10, label %if.end12 + +if.then10: ; preds = %if.else4 + br label %if.end12 + +if.end12: ; preds = %if.then10, %if.else4, %if.then3 + %conv13 = sext i8 %p2 to i32 + ret void +} Index: llvm/trunk/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll +++ llvm/trunk/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll @@ -0,0 +1,43 @@ +; XFAIL: * +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> ) +declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) + +; This test ensures that masked scatter and gather operations, which take vectors of pointers, +; do not have pointer aliasing ignored when being processed. +; No scatter/gather calls should end up eliminated +; CHECK: llvm.masked.gather +; CHECK: llvm.masked.gather +; CHECK: llvm.masked.scatter +; CHECK: llvm.masked.gather +; CHECK: llvm.masked.scatter +; CHECK: llvm.masked.gather +define spir_kernel void @test(<2 x i32*> %in1, <2 x i32*> %in2, i32* %out) { +entry: + ; Just some temporary storage + %tmp.0 = alloca i32 + %tmp.1 = alloca i32 + %tmp.i = insertelement <2 x i32*> undef, i32* %tmp.0, i32 0 + %tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1 + ; Read from in1 and in2 + %in1.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in1, i32 1, <2 x i1> , <2 x i32> undef) #1 + %in2.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in2, i32 1, <2 x i1> , <2 x i32> undef) #1 + ; Store in1 to the allocas + call void @llvm.masked.scatter.v2i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> ); + ; Read in1 from the allocas + ; This gather should alias the scatter we just saw + %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1 + ; Store in2 to the allocas + call void @llvm.masked.scatter.v2i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> ); + ; Read in2 from the allocas + ; This gather should alias the scatter we just saw, and not be eliminated + %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1 + ; Store in2 to out for good measure + %tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0 + %tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1 + store i32 %tmp.v.1.0, i32* %out + %out.1 = getelementptr i32, i32* %out, i32 1 + store i32 %tmp.v.1.1, i32* %out.1 + ret void +} Index: llvm/trunk/test/Transforms/NewGVN/MemdepMiscompile.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/MemdepMiscompile.ll +++ llvm/trunk/test/Transforms/NewGVN/MemdepMiscompile.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-macosx10.7.0" + +; rdar://12801584 +; Value of %shouldExit can be changed by RunInMode. +; Make sure we do not replace load %shouldExit in while.cond.backedge +; with a phi node where the value from while.body is 0. +define i32 @test() nounwind ssp { +entry: +; CHECK: test() +; CHECK: while.body: +; CHECK: call void @RunInMode +; CHECK: br i1 %tobool, label %while.cond.backedge, label %if.then +; CHECK: while.cond.backedge: +; CHECK: load i32, i32* %shouldExit +; CHECK: br i1 %cmp, label %while.body + %shouldExit = alloca i32, align 4 + %tasksIdle = alloca i32, align 4 + store i32 0, i32* %shouldExit, align 4 + store i32 0, i32* %tasksIdle, align 4 + call void @CTestInitialize(i32* %tasksIdle) nounwind + %0 = load i32, i32* %shouldExit, align 4 + %cmp1 = icmp eq i32 %0, 0 + br i1 %cmp1, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: + br label %while.body + +while.body: + call void @RunInMode(i32 100) nounwind + %1 = load i32, i32* %tasksIdle, align 4 + %tobool = icmp eq i32 %1, 0 + br i1 %tobool, label %while.cond.backedge, label %if.then + +if.then: + store i32 0, i32* %tasksIdle, align 4 + call void @TimerCreate(i32* %shouldExit) nounwind + br label %while.cond.backedge + +while.cond.backedge: + %2 = load i32, i32* %shouldExit, align 4 + %cmp = icmp eq i32 %2, 0 + br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge + +while.cond.while.end_crit_edge: + br label %while.end + +while.end: + ret i32 0 +} +declare void @CTestInitialize(i32*) +declare void @RunInMode(i32) +declare void @TimerCreate(i32*) Index: llvm/trunk/test/Transforms/NewGVN/assume-equal.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/assume-equal.ll +++ llvm/trunk/test/Transforms/NewGVN/assume-equal.ll @@ -0,0 +1,276 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s + +%struct.A = type { i32 (...)** } +@_ZTV1A = available_externally unnamed_addr constant [4 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3barEv to i8*)], align 8 +@_ZTI1A = external constant i8* + +; Checks if indirect calls can be replaced with direct +; assuming that %vtable == @_ZTV1A (with alignment). +; Checking const propagation across other BBs +; CHECK-LABEL: define void @_Z1gb( + +define void @_Z1gb(i1 zeroext %p) { +entry: + %call = tail call noalias i8* @_Znwm(i64 8) #4 + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC1Ev(%struct.A* %0) #1 + %1 = bitcast i8* %call to i8*** + %vtable = load i8**, i8*** %1, align 8 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2) + tail call void @llvm.assume(i1 %cmp.vtables) + br i1 %p, label %if.then, label %if.else + +if.then: ; preds = %entry + %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)** + %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8 + + ; CHECK: call i32 @_ZN1A3fooEv( + %call2 = tail call i32 %2(%struct.A* %0) #1 + + br label %if.end + +if.else: ; preds = %entry + %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1 + %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)** + + ; CHECK: call i32 @_ZN1A3barEv( + %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8 + + %call5 = tail call i32 %3(%struct.A* %0) #1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +; Check integration with invariant.group handling +; CHECK-LABEL: define void @invariantGroupHandling(i1 zeroext %p) { +define void @invariantGroupHandling(i1 zeroext %p) { +entry: + %call = tail call noalias i8* @_Znwm(i64 8) #4 + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC1Ev(%struct.A* %0) #1 + %1 = bitcast i8* %call to i8*** + %vtable = load i8**, i8*** %1, align 8, !invariant.group !0 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2) + tail call void @llvm.assume(i1 %cmp.vtables) + br i1 %p, label %if.then, label %if.else + +if.then: ; preds = %entry + %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)** + %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8 + +; CHECK: call i32 @_ZN1A3fooEv( + %call2 = tail call i32 %2(%struct.A* %0) #1 + %vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0 + %vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)** + %call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8 +; FIXME: those loads could be also direct, but right now the invariant.group +; analysis works only on single block +; CHECK-NOT: call i32 @_ZN1A3fooEv( + %callx = tail call i32 %call1(%struct.A* %0) #1 + + %vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0 + %vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)** + %call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8 +; CHECK-NOT: call i32 @_ZN1A3fooEv( + %cally = tail call i32 %call4(%struct.A* %0) #1 + + %b = bitcast i8* %call to %struct.A** + %vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0 + %vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)** + %vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8 +; CHECK-NOT: call i32 @_ZN1A3fooEv( + %unknown = tail call i32 %vfun(%struct.A* %0) #1 + + br label %if.end + +if.else: ; preds = %entry + %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1 + %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)** + + ; CHECK: call i32 @_ZN1A3barEv( + %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8 + + %call5 = tail call i32 %3(%struct.A* %0) #1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + + +; Checking const propagation in the same BB +; CHECK-LABEL: define i32 @main() + +define i32 @main() { +entry: + %call = tail call noalias i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.A* + tail call void @_ZN1AC1Ev(%struct.A* %0) + %1 = bitcast i8* %call to i8*** + %vtable = load i8**, i8*** %1, align 8 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2) + tail call void @llvm.assume(i1 %cmp.vtables) + %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)** + + ; CHECK: call i32 @_ZN1A3fooEv( + %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8 + + %call2 = tail call i32 %2(%struct.A* %0) + ret i32 0 +} + +; This tests checks const propatation with fcmp instruction. +; CHECK-LABEL: define float @_Z1gf(float %p) + +define float @_Z1gf(float %p) { +entry: + %p.addr = alloca float, align 4 + %f = alloca float, align 4 + store float %p, float* %p.addr, align 4 + + store float 3.000000e+00, float* %f, align 4 + %0 = load float, float* %p.addr, align 4 + %1 = load float, float* %f, align 4 + %cmp = fcmp oeq float %1, %0 ; note const on lhs + call void @llvm.assume(i1 %cmp) + + ; CHECK: ret float 3.000000e+00 + ret float %0 +} + +; CHECK-LABEL: define float @_Z1hf(float %p) + +define float @_Z1hf(float %p) { +entry: + %p.addr = alloca float, align 4 + store float %p, float* %p.addr, align 4 + + %0 = load float, float* %p.addr, align 4 + %cmp = fcmp nnan ueq float %0, 3.000000e+00 + call void @llvm.assume(i1 %cmp) + + ; CHECK: ret float 3.000000e+00 + ret float %0 +} + +; CHECK-LABEL: define float @_Z1if(float %p) +define float @_Z1if(float %p) { +entry: + %p.addr = alloca float, align 4 + store float %p, float* %p.addr, align 4 + + %0 = load float, float* %p.addr, align 4 + %cmp = fcmp ueq float %0, 3.000000e+00 ; no nnan flag - can't propagate + call void @llvm.assume(i1 %cmp) + + ; CHECK-NOT: ret float 3.000000e+00 + ret float %0 +} + +; This test checks if constant propagation works for multiple node edges +; CHECK-LABEL: define i32 @_Z1ii(i32 %p) +define i32 @_Z1ii(i32 %p) { +entry: + %cmp = icmp eq i32 %p, 42 + call void @llvm.assume(i1 %cmp) + + ; CHECK: br i1 true, label %bb2, label %bb2 + br i1 %cmp, label %bb2, label %bb2 +bb2: + call void @llvm.assume(i1 true) + ; CHECK: br i1 true, label %bb2, label %bb2 + br i1 %cmp, label %bb2, label %bb2 + + ; CHECK: ret i32 42 + ret i32 %p +} + +; CHECK-LABEL: define i32 @_Z1ij(i32 %p) +define i32 @_Z1ij(i32 %p) { +entry: + %cmp = icmp eq i32 %p, 42 + call void @llvm.assume(i1 %cmp) + + ; CHECK: br i1 true, label %bb2, label %bb2 + br i1 %cmp, label %bb2, label %bb2 +bb2: + ; CHECK-NOT: %cmp2 = + %cmp2 = icmp eq i32 %p, 42 + ; CHECK-NOT: call void @llvm.assume( + call void @llvm.assume(i1 %cmp2) + + ; CHECK: br i1 true, label %bb2, label %bb2 + br i1 %cmp, label %bb2, label %bb2 + + ; CHECK: ret i32 42 + ret i32 %p +} + +; CHECK-LABEL: define i32 @_Z1ik(i32 %p) +define i32 @_Z1ik(i32 %p) { +entry: + %cmp = icmp eq i32 %p, 42 + call void @llvm.assume(i1 %cmp) + + ; CHECK: br i1 true, label %bb2, label %bb3 + br i1 %cmp, label %bb2, label %bb3 +bb2: + ; CHECK-NOT: %cmp3 = + %cmp3 = icmp eq i32 %p, 43 + ; CHECK: store i8 undef, i8* null + call void @llvm.assume(i1 %cmp3) + ret i32 15 +bb3: + ret i32 17 +} + +; This test checks if GVN can do the constant propagation correctly +; when there are multiple uses of the same assume value in the +; basic block that has a loop back-edge pointing to itself. +; +; CHECK-LABEL: define i32 @_Z1il(i32 %val, i1 %k) +define i32 @_Z1il(i32 %val, i1 %k) { + br label %next + +next: +; CHECK: tail call void @llvm.assume(i1 %k) +; CHECK-NEXT: %cmp = icmp eq i32 %val, 50 + tail call void @llvm.assume(i1 %k) + tail call void @llvm.assume(i1 %k) + %cmp = icmp eq i32 %val, 50 + br i1 %cmp, label %next, label %meh + +meh: + ret i32 0 +} + +; This test checks if GVN can prevent the constant propagation correctly +; in the successor blocks that are not dominated by the basic block +; with the assume instruction. +; +; CHECK-LABEL: define i1 @_z1im(i32 %val, i1 %k, i1 %j) +define i1 @_z1im(i32 %val, i1 %k, i1 %j) { + br i1 %j, label %next, label %meh + +next: +; CHECK: tail call void @llvm.assume(i1 %k) +; CHECK-NEXT: br label %meh + tail call void @llvm.assume(i1 %k) + tail call void @llvm.assume(i1 %k) + br label %meh + +meh: +; CHECK: ret i1 %k + ret i1 %k +} + +declare noalias i8* @_Znwm(i64) +declare void @_ZN1AC1Ev(%struct.A*) +declare void @llvm.assume(i1) +declare i32 @_ZN1A3fooEv(%struct.A*) +declare i32 @_ZN1A3barEv(%struct.A*) + +!0 = !{!"struct A"} Index: llvm/trunk/test/Transforms/NewGVN/basic-undef-test.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/basic-undef-test.ll +++ llvm/trunk/test/Transforms/NewGVN/basic-undef-test.ll @@ -0,0 +1,15 @@ +; RUN: opt -basicaa -newgvn -S < %s | FileCheck %s +; ModuleID = 'test3.ll' +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define i32 @main(i32 *%foo) { +entry: +; CHECK: load i32, i32* %foo, align 4 + %0 = load i32, i32* %foo, align 4 + store i32 5, i32* undef, align 4 +; CHECK-NOT: load i32, i32* %foo, align 4 + %1 = load i32, i32* %foo, align 4 +; CHECK: add i32 %0, %0 + %2 = add i32 %0, %1 + ret i32 %2 +} Index: llvm/trunk/test/Transforms/NewGVN/basic.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/basic.ll +++ llvm/trunk/test/Transforms/NewGVN/basic.ll @@ -0,0 +1,17 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s +; RUN: opt < %s -passes=gvn -S | FileCheck %s + +define i32 @main() { +block1: + %z1 = bitcast i32 0 to i32 + br label %block2 +block2: + %z2 = bitcast i32 0 to i32 + ret i32 %z2 +} + +; CHECK: define i32 @main() { +; CHECK-NEXT: block1: +; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: } Index: llvm/trunk/test/Transforms/NewGVN/big-endian.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/big-endian.ll +++ llvm/trunk/test/Transforms/NewGVN/big-endian.ll @@ -0,0 +1,40 @@ +; RUN: opt -newgvn -S < %s | FileCheck %s + +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +;; Make sure we use correct bit shift based on storage size for +;; loads reusing a load value. +define i64 @test1({ i1, i8 }* %predA, { i1, i8 }* %predB) { +; CHECK-LABEL: @test1 +; CHECK-NOT: [[V1:%.*]] = load i16, i16* %{{.*}} +; CHECK-NOT: [[V2:%.*]] = lshr i16 [[V1]], 8 +; CHECK-NOT: trunc i16 [[V2]] to i1 + + %valueLoadA.fca.0.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predA, i64 0, i32 0 + %valueLoadA.fca.0.load = load i1, i1* %valueLoadA.fca.0.gep, align 8 + %valueLoadB.fca.0.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predB, i64 0, i32 0 + %valueLoadB.fca.0.load = load i1, i1* %valueLoadB.fca.0.gep, align 8 + %isTrue = and i1 %valueLoadA.fca.0.load, %valueLoadB.fca.0.load + %valueLoadA.fca.1.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predA, i64 0, i32 1 + %valueLoadA.fca.1.load = load i8, i8* %valueLoadA.fca.1.gep, align 1 + %isNotNullA = icmp ne i8 %valueLoadA.fca.1.load, 0 + %valueLoadB.fca.1.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predB, i64 0, i32 1 + %valueLoadB.fca.1.load = load i8, i8* %valueLoadB.fca.1.gep, align 1 + %isNotNullB = icmp ne i8 %valueLoadB.fca.1.load, 0 + %isNotNull = and i1 %isNotNullA, %isNotNullB + %isTrueAndNotNull = and i1 %isTrue, %isNotNull + %ret = zext i1 %isTrueAndNotNull to i64 + ret i64 %ret +} + +;; And likewise for loads reusing a store value. +define i1 @test2(i8 %V, i8* %P) { +; CHECK-LABEL: @test2 +; CHECK-NOT: lshr + store i8 %V, i8* %P + %P2 = bitcast i8* %P to i1* + %A = load i1, i1* %P2 + ret i1 %A +} + Index: llvm/trunk/test/Transforms/NewGVN/bitcast-of-call.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/bitcast-of-call.ll +++ llvm/trunk/test/Transforms/NewGVN/bitcast-of-call.ll @@ -0,0 +1,14 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s +; PR2213 + +define i32* @f(i8* %x) { +entry: + %tmp = call i8* @m( i32 12 ) ; [#uses=2] + %tmp1 = bitcast i8* %tmp to i32* ; [#uses=0] + %tmp2 = bitcast i8* %tmp to i32* ; [#uses=0] +; CHECK-NOT: %tmp2 + ret i32* %tmp2 +} + +declare i8* @m(i32) Index: llvm/trunk/test/Transforms/NewGVN/br-identical.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/br-identical.ll +++ llvm/trunk/test/Transforms/NewGVN/br-identical.ll @@ -0,0 +1,38 @@ +; RUN: opt -newgvn -S -o - %s | FileCheck %s + +; If a branch has two identical successors, we cannot declare either dead. + +define void @widget(i1 %p) { +entry: + br label %bb2 + +bb2: + %t1 = phi i64 [ 0, %entry ], [ %t5, %bb7 ] + %t2 = add i64 %t1, 1 + %t3 = icmp ult i64 0, %t2 + br i1 %t3, label %bb3, label %bb4 + +bb3: + %t4 = call i64 @f() + br label %bb4 + +bb4: + ; CHECK-NOT: phi {{.*}} undef + %foo = phi i64 [ %t4, %bb3 ], [ 0, %bb2 ] + br i1 %p, label %bb5, label %bb6 + +bb5: + br i1 true, label %bb7, label %bb7 + +bb6: + br i1 true, label %bb7, label %bb7 + +bb7: + %t5 = add i64 %t1, 1 + br i1 %p, label %bb2, label %bb8 + +bb8: + ret void +} + +declare i64 @f() Index: llvm/trunk/test/Transforms/NewGVN/calloc-load-removal.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/calloc-load-removal.ll +++ llvm/trunk/test/Transforms/NewGVN/calloc-load-removal.ll @@ -0,0 +1,26 @@ +; XFAIL: * +; RUN: opt -S -basicaa -newgvn < %s | FileCheck %s +; RUN: opt -S -basicaa -newgvn -disable-simplify-libcalls < %s | FileCheck %s -check-prefix=CHECK_NO_LIBCALLS +; Check that loads from calloc are recognized as being zero. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Function Attrs: nounwind uwtable +define i32 @test1() { + %1 = tail call noalias i8* @calloc(i64 1, i64 4) + %2 = bitcast i8* %1 to i32* + ; This load is trivially constant zero + %3 = load i32, i32* %2, align 4 + ret i32 %3 + +; CHECK-LABEL: @test1( +; CHECK-NOT: %3 = load i32, i32* %2, align 4 +; CHECK: ret i32 0 + +; CHECK_NO_LIBCALLS-LABEL: @test1( +; CHECK_NO_LIBCALLS: load +; CHECK_NO_LIBCALLS: ret i32 % + +} + +declare noalias i8* @calloc(i64, i64) Index: llvm/trunk/test/Transforms/NewGVN/calls-nonlocal.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/calls-nonlocal.ll +++ llvm/trunk/test/Transforms/NewGVN/calls-nonlocal.ll @@ -0,0 +1,76 @@ +; XFAIL: * +; Two occurrences of strlen should be zapped. +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin9" + +define i32 @test(i32 %g, i8* %P) nounwind { +entry: + %tmp2 = call i32 @strlen( i8* %P ) nounwind readonly ; [#uses=1] + %tmp3 = icmp eq i32 %tmp2, 100 ; [#uses=1] + %tmp34 = zext i1 %tmp3 to i8 ; [#uses=1] + %toBool = icmp ne i8 %tmp34, 0 ; [#uses=1] + br i1 %toBool, label %bb, label %bb6 + +bb: ; preds = %entry + br label %bb27 + +bb6: ; preds = %entry + %tmp8 = add i32 %g, 42 ; [#uses=2] + %tmp10 = call i32 @strlen( i8* %P ) nounwind readonly ; [#uses=1] + %tmp11 = icmp eq i32 %tmp10, 100 ; [#uses=1] + %tmp1112 = zext i1 %tmp11 to i8 ; [#uses=1] + %toBool13 = icmp ne i8 %tmp1112, 0 ; [#uses=1] + br i1 %toBool13, label %bb14, label %bb16 + +bb14: ; preds = %bb6 + br label %bb27 + +bb16: ; preds = %bb6 + %tmp18 = mul i32 %tmp8, 2 ; [#uses=1] + %tmp20 = call i32 @strlen( i8* %P ) nounwind readonly ; [#uses=1] + %tmp21 = icmp eq i32 %tmp20, 100 ; [#uses=1] + %tmp2122 = zext i1 %tmp21 to i8 ; [#uses=1] + %toBool23 = icmp ne i8 %tmp2122, 0 ; [#uses=1] + br i1 %toBool23, label %bb24, label %bb26 + +bb24: ; preds = %bb16 + br label %bb27 + +bb26: ; preds = %bb16 + br label %bb27 + +bb27: ; preds = %bb26, %bb24, %bb14, %bb + %tmp.0 = phi i32 [ 11, %bb26 ], [ %tmp18, %bb24 ], [ %tmp8, %bb14 ], [ %g, %bb ] ; [#uses=1] + br label %return + +return: ; preds = %bb27 + ret i32 %tmp.0 +} + +; CHECK: define i32 @test(i32 %g, i8* %P) #0 { +; CHECK: entry: +; CHECK: %tmp2 = call i32 @strlen(i8* %P) #1 +; CHECK: %tmp3 = icmp eq i32 %tmp2, 100 +; CHECK: %tmp34 = zext i1 %tmp3 to i8 +; CHECK: br i1 %tmp3, label %bb, label %bb6 +; CHECK: bb: +; CHECK: br label %bb27 +; CHECK: bb6: +; CHECK: %tmp8 = add i32 %g, 42 +; CHECK: br i1 false, label %bb14, label %bb16 +; CHECK: bb14: +; CHECK: br label %bb27 +; CHECK: bb16: +; CHECK: %tmp18 = mul i32 %tmp8, 2 +; CHECK: br i1 false, label %bb24, label %bb26 +; CHECK: bb24: +; CHECK: br label %bb27 +; CHECK: bb26: +; CHECK: br label %bb27 +; CHECK: bb27: +; CHECK: %tmp.0 = phi i32 [ 11, %bb26 ], [ undef, %bb24 ], [ undef, %bb14 ], [ %g, %bb ] +; CHECK: ret i32 %tmp.0 +; CHECK: } + +declare i32 @strlen(i8*) nounwind readonly Index: llvm/trunk/test/Transforms/NewGVN/calls-readonly.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/calls-readonly.ll +++ llvm/trunk/test/Transforms/NewGVN/calls-readonly.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s +; Should delete the second call to strlen even though the intervening strchr call exists. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" + +define i8* @test(i8* %P, i8* %Q, i32 %x, i32 %y) nounwind readonly { +entry: + %0 = tail call i32 @strlen(i8* %P) ; [#uses=2] + %1 = icmp eq i32 %0, 0 ; [#uses=1] + br i1 %1, label %bb, label %bb1 + +bb: ; preds = %entry + %2 = sdiv i32 %x, %y ; [#uses=1] + br label %bb1 + +bb1: ; preds = %bb, %entry + %x_addr.0 = phi i32 [ %2, %bb ], [ %x, %entry ] ; [#uses=1] + %3 = tail call i8* @strchr(i8* %Q, i32 97) ; [#uses=1] + %4 = tail call i32 @strlen(i8* %P) ; [#uses=1] + %5 = add i32 %x_addr.0, %0 ; [#uses=1] + %.sum = sub i32 %5, %4 ; [#uses=1] + %6 = getelementptr i8, i8* %3, i32 %.sum ; [#uses=1] + ret i8* %6 +} + +; CHECK: define i8* @test(i8* %P, i8* %Q, i32 %x, i32 %y) #0 { +; CHECK: entry: +; CHECK-NEXT: %0 = tail call i32 @strlen(i8* %P) +; CHECK-NEXT: %1 = icmp eq i32 %0, 0 +; CHECK-NEXT: br i1 %1, label %bb, label %bb1 +; CHECK: bb: +; CHECK-NEXT: %2 = sdiv i32 %x, %y +; CHECK-NEXT: br label %bb1 +; CHECK: bb1: +; CHECK-NEXT: %x_addr.0 = phi i32 [ %2, %bb ], [ %x, %entry ] +; CHECK-NEXT: %3 = tail call i8* @strchr(i8* %Q, i32 97) +; CHECK-NEXT: %4 = add i32 %x_addr.0, %0 +; CHECK-NEXT: %5 = getelementptr i8, i8* %3, i32 %x_addr.0 +; CHECK-NEXT: ret i8* %5 +; CHECK: } + +declare i32 @strlen(i8*) nounwind readonly + +declare i8* @strchr(i8*, i32) nounwind readonly Index: llvm/trunk/test/Transforms/NewGVN/commute.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/commute.ll +++ llvm/trunk/test/Transforms/NewGVN/commute.ll @@ -0,0 +1,23 @@ +; RUN: opt -newgvn -S < %s | FileCheck %s + +declare void @use(i32, i32) + +define void @foo(i32 %x, i32 %y) { + ; CHECK-LABEL: @foo( + %add1 = add i32 %x, %y + %add2 = add i32 %y, %x + call void @use(i32 %add1, i32 %add2) + ; CHECK: @use(i32 %add1, i32 %add1) + ret void +} + +declare void @vse(i1, i1) + +define void @bar(i32 %x, i32 %y) { + ; CHECK-LABEL: @bar( + %cmp1 = icmp ult i32 %x, %y + %cmp2 = icmp ugt i32 %y, %x + call void @vse(i1 %cmp1, i1 %cmp2) + ; CHECK: @vse(i1 %cmp1, i1 %cmp1) + ret void +} Index: llvm/trunk/test/Transforms/NewGVN/cond_br.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/cond_br.ll +++ llvm/trunk/test/Transforms/NewGVN/cond_br.ll @@ -0,0 +1,55 @@ +; RUN: opt -basicaa -newgvn -S < %s | FileCheck %s +@y = external global i32 +@z = external global i32 + +; Function Attrs: nounwind ssp uwtable +define void @foo(i32 %x) { +; CHECK: @foo(i32 %x) +; CHECK: %.pre = load i32, i32* @y +; CHECK: call void @bar(i32 %.pre) + + %t = sub i32 %x, %x + %.pre = load i32, i32* @y, align 4 + %cmp = icmp sgt i32 %t, 2 + br i1 %cmp, label %if.then, label %entry.if.end_crit_edge + +entry.if.end_crit_edge: ; preds = %entry + br label %if.end + +if.then: ; preds = %entry + %add = add nsw i32 %x, 3 + store i32 %add, i32* @y, align 4 + br label %if.end + +if.end: ; preds = %entry.if.end_crit_edge, %if.then + %1 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %add, %if.then ] + tail call void @bar(i32 %1) + ret void +} + +define void @foo2(i32 %x) { +; CHECK: @foo2(i32 %x) +; CHECK: %.pre = load i32, i32* @y +; CHECK: tail call void @bar(i32 %.pre) +entry: + %t = sub i32 %x, %x + %.pre = load i32, i32* @y, align 4 + %cmp = icmp sgt i32 %t, 2 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %add = add nsw i32 %x, 3 + store i32 %add, i32* @y, align 4 + br label %if.end + +if.else: ; preds = %entry + store i32 1, i32* @z, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %0 = phi i32 [ %.pre, %if.else ], [ %add, %if.then ] + tail call void @bar(i32 %0) + ret void +} + +declare void @bar(i32) Index: llvm/trunk/test/Transforms/NewGVN/cond_br2.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/cond_br2.ll +++ llvm/trunk/test/Transforms/NewGVN/cond_br2.ll @@ -0,0 +1,141 @@ +; XFAIL: * +; RUN: opt -basicaa -newgvn -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl", [1 x %"union.llvm::SmallVectorBase::U"] } +%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase" } +%"class.llvm::SmallVectorTemplateBase" = type { %"class.llvm::SmallVectorTemplateCommon" } +%"class.llvm::SmallVectorTemplateCommon" = type { %"class.llvm::SmallVectorBase" } +%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8*, %"union.llvm::SmallVectorBase::U" } +%"union.llvm::SmallVectorBase::U" = type { x86_fp80 } + +; Function Attrs: ssp uwtable +define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK: @_Z4testv() +; CHECK: invoke.cont: +; CHECK: br i1 true, label %new.notnull.i11, label %if.end.i14 +; CHECK: Retry.i10: + +entry: + %sv = alloca %"class.llvm::SmallVector", align 16 + %0 = bitcast %"class.llvm::SmallVector"* %sv to i8* + call void @llvm.lifetime.start(i64 64, i8* %0) #1 + %BeginX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0 + %FirstEl.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 3 + %1 = bitcast %"union.llvm::SmallVectorBase::U"* %FirstEl.i.i.i.i.i.i to i8* + store i8* %1, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4 + %EndX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1 + store i8* %1, i8** %EndX.i.i.i.i.i.i, align 8, !tbaa !4 + %CapacityX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2 + %add.ptr.i.i.i.i2.i.i = getelementptr inbounds %"union.llvm::SmallVectorBase::U", %"union.llvm::SmallVectorBase::U"* %FirstEl.i.i.i.i.i.i, i64 2 + %add.ptr.i.i.i.i.i.i = bitcast %"union.llvm::SmallVectorBase::U"* %add.ptr.i.i.i.i2.i.i to i8* + store i8* %add.ptr.i.i.i.i.i.i, i8** %CapacityX.i.i.i.i.i.i, align 16, !tbaa !4 + %EndX.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1 + %2 = load i8*, i8** %EndX.i, align 8, !tbaa !4 + %CapacityX.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2 + %cmp.i = icmp ult i8* %2, %add.ptr.i.i.i.i.i.i + br i1 %cmp.i, label %Retry.i, label %if.end.i + +Retry.i: ; preds = %.noexc, %entry + %3 = phi i8* [ %2, %entry ], [ %.pre.i, %.noexc ] + %new.isnull.i = icmp eq i8* %3, null + br i1 %new.isnull.i, label %invoke.cont, label %new.notnull.i + +new.notnull.i: ; preds = %Retry.i + %4 = bitcast i8* %3 to i32* + store i32 1, i32* %4, align 4, !tbaa !5 + br label %invoke.cont + +if.end.i: ; preds = %entry + %5 = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0 + invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"* %5, i64 0, i64 4) + to label %.noexc unwind label %lpad + +.noexc: ; preds = %if.end.i + %.pre.i = load i8*, i8** %EndX.i, align 8, !tbaa !4 + br label %Retry.i + +invoke.cont: ; preds = %new.notnull.i, %Retry.i + %add.ptr.i = getelementptr inbounds i8, i8* %3, i64 4 + store i8* %add.ptr.i, i8** %EndX.i, align 8, !tbaa !4 + %6 = load i8*, i8** %CapacityX.i, align 16, !tbaa !4 + %cmp.i8 = icmp ult i8* %add.ptr.i, %6 + br i1 %cmp.i8, label %new.notnull.i11, label %if.end.i14 + +Retry.i10: ; preds = %if.end.i14 + %.pre.i13 = load i8*, i8** %EndX.i, align 8, !tbaa !4 + %new.isnull.i9 = icmp eq i8* %.pre.i13, null + br i1 %new.isnull.i9, label %invoke.cont2, label %new.notnull.i11 + +new.notnull.i11: ; preds = %invoke.cont, %Retry.i10 + %7 = phi i8* [ %.pre.i13, %Retry.i10 ], [ %add.ptr.i, %invoke.cont ] + %8 = bitcast i8* %7 to i32* + store i32 2, i32* %8, align 4, !tbaa !5 + br label %invoke.cont2 + +if.end.i14: ; preds = %invoke.cont + %9 = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0 + invoke void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"* %9, i64 0, i64 4) + to label %Retry.i10 unwind label %lpad + +invoke.cont2: ; preds = %new.notnull.i11, %Retry.i10 + %10 = phi i8* [ null, %Retry.i10 ], [ %7, %new.notnull.i11 ] + %add.ptr.i12 = getelementptr inbounds i8, i8* %10, i64 4 + store i8* %add.ptr.i12, i8** %EndX.i, align 8, !tbaa !4 + invoke void @_Z1gRN4llvm11SmallVectorIiLj8EEE(%"class.llvm::SmallVector"* %sv) + to label %invoke.cont3 unwind label %lpad + +invoke.cont3: ; preds = %invoke.cont2 + %11 = load i8*, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4 + %cmp.i.i.i.i19 = icmp eq i8* %11, %1 + br i1 %cmp.i.i.i.i19, label %_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21, label %if.then.i.i.i20 + +if.then.i.i.i20: ; preds = %invoke.cont3 + call void @free(i8* %11) #1 + br label %_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21 + +_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21: ; preds = %invoke.cont3, %if.then.i.i.i20 + call void @llvm.lifetime.end(i64 64, i8* %0) #1 + ret void + +lpad: ; preds = %if.end.i14, %if.end.i, %invoke.cont2 + %12 = landingpad { i8*, i32 } + cleanup + %13 = load i8*, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4 + %cmp.i.i.i.i = icmp eq i8* %13, %1 + br i1 %cmp.i.i.i.i, label %eh.resume, label %if.then.i.i.i + +if.then.i.i.i: ; preds = %lpad + call void @free(i8* %13) #1 + br label %eh.resume + +eh.resume: ; preds = %if.then.i.i.i, %lpad + resume { i8*, i32 } %12 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #1 + +declare i32 @__gxx_personality_v0(...) + +declare void @_Z1gRN4llvm11SmallVectorIiLj8EEE(%"class.llvm::SmallVector"*) #2 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #1 + +declare void @_ZN4llvm15SmallVectorBase8grow_podEmm(%"class.llvm::SmallVectorBase"*, i64, i64) #2 + +; Function Attrs: nounwind +declare void @free(i8* nocapture) #3 + +attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } +attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!0 = !{!"any pointer", !1} +!1 = !{!"omnipotent char", !2} +!2 = !{!"Simple C/C++ TBAA"} +!3 = !{!"int", !1} +!4 = !{!0, !0, i64 0} +!5 = !{!3, !3, i64 0} Index: llvm/trunk/test/Transforms/NewGVN/condprop.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/condprop.ll +++ llvm/trunk/test/Transforms/NewGVN/condprop.ll @@ -0,0 +1,300 @@ +; XFAIL: * +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +@a = external global i32 ; [#uses=7] + +; CHECK-LABEL: @test1( +define i32 @test1() nounwind { +entry: + %0 = load i32, i32* @a, align 4 + %1 = icmp eq i32 %0, 4 + br i1 %1, label %bb, label %bb1 + +bb: ; preds = %entry + br label %bb8 + +bb1: ; preds = %entry + %2 = load i32, i32* @a, align 4 + %3 = icmp eq i32 %2, 5 + br i1 %3, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + br label %bb8 + +bb3: ; preds = %bb1 + %4 = load i32, i32* @a, align 4 + %5 = icmp eq i32 %4, 4 +; CHECK: br i1 false, label %bb4, label %bb5 + br i1 %5, label %bb4, label %bb5 + +bb4: ; preds = %bb3 + %6 = load i32, i32* @a, align 4 + %7 = add i32 %6, 5 + br label %bb8 + +bb5: ; preds = %bb3 + %8 = load i32, i32* @a, align 4 + %9 = icmp eq i32 %8, 5 +; CHECK: br i1 false, label %bb6, label %bb7 + br i1 %9, label %bb6, label %bb7 + +bb6: ; preds = %bb5 + %10 = load i32, i32* @a, align 4 + %11 = add i32 %10, 4 + br label %bb8 + +bb7: ; preds = %bb5 + %12 = load i32, i32* @a, align 4 + br label %bb8 + +bb8: ; preds = %bb7, %bb6, %bb4, %bb2, %bb + %.0 = phi i32 [ %12, %bb7 ], [ %11, %bb6 ], [ %7, %bb4 ], [ 4, %bb2 ], [ 5, %bb ] + br label %return + +return: ; preds = %bb8 + ret i32 %.0 +} + +declare void @foo(i1) +declare void @bar(i32) + +; CHECK-LABEL: @test3( +define void @test3(i32 %x, i32 %y) { + %xz = icmp eq i32 %x, 0 + %yz = icmp eq i32 %y, 0 + %z = and i1 %xz, %yz + br i1 %z, label %both_zero, label %nope +both_zero: + call void @foo(i1 %xz) +; CHECK: call void @foo(i1 true) + call void @foo(i1 %yz) +; CHECK: call void @foo(i1 true) + call void @bar(i32 %x) +; CHECK: call void @bar(i32 0) + call void @bar(i32 %y) +; CHECK: call void @bar(i32 0) + ret void +nope: + call void @foo(i1 %z) +; CHECK: call void @foo(i1 false) + ret void +} + +; CHECK-LABEL: @test4( +define void @test4(i1 %b, i32 %x) { + br i1 %b, label %sw, label %case3 +sw: + switch i32 %x, label %default [ + i32 0, label %case0 + i32 1, label %case1 + i32 2, label %case0 + i32 3, label %case3 + i32 4, label %default + ] +default: +; CHECK: default: + call void @bar(i32 %x) +; CHECK: call void @bar(i32 %x) + ret void +case0: +; CHECK: case0: + call void @bar(i32 %x) +; CHECK: call void @bar(i32 %x) + ret void +case1: +; CHECK: case1: + call void @bar(i32 %x) +; CHECK: call void @bar(i32 1) + ret void +case3: +; CHECK: case3: + call void @bar(i32 %x) +; CHECK: call void @bar(i32 %x) + ret void +} + +; CHECK-LABEL: @test5( +define i1 @test5(i32 %x, i32 %y) { + %cmp = icmp eq i32 %x, %y + br i1 %cmp, label %same, label %different + +same: + %cmp2 = icmp ne i32 %x, %y +; CHECK: ret i1 false + ret i1 %cmp2 + +different: + %cmp3 = icmp eq i32 %x, %y +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; CHECK-LABEL: @test6( +define i1 @test6(i32 %x, i32 %y) { + %cmp2 = icmp ne i32 %x, %y + %cmp = icmp eq i32 %x, %y + %cmp3 = icmp eq i32 %x, %y + br i1 %cmp, label %same, label %different + +same: +; CHECK: ret i1 false + ret i1 %cmp2 + +different: +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; CHECK-LABEL: @test6_fp( +define i1 @test6_fp(float %x, float %y) { + %cmp2 = fcmp une float %x, %y + %cmp = fcmp oeq float %x, %y + %cmp3 = fcmp oeq float %x, %y + br i1 %cmp, label %same, label %different + +same: +; CHECK: ret i1 false + ret i1 %cmp2 + +different: +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; CHECK-LABEL: @test7( +define i1 @test7(i32 %x, i32 %y) { + %cmp = icmp sgt i32 %x, %y + br i1 %cmp, label %same, label %different + +same: + %cmp2 = icmp sle i32 %x, %y +; CHECK: ret i1 false + ret i1 %cmp2 + +different: + %cmp3 = icmp sgt i32 %x, %y +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; CHECK-LABEL: @test7_fp( +define i1 @test7_fp(float %x, float %y) { + %cmp = fcmp ogt float %x, %y + br i1 %cmp, label %same, label %different + +same: + %cmp2 = fcmp ule float %x, %y +; CHECK: ret i1 false + ret i1 %cmp2 + +different: + %cmp3 = fcmp ogt float %x, %y +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; CHECK-LABEL: @test8( +define i1 @test8(i32 %x, i32 %y) { + %cmp2 = icmp sle i32 %x, %y + %cmp = icmp sgt i32 %x, %y + %cmp3 = icmp sgt i32 %x, %y + br i1 %cmp, label %same, label %different + +same: +; CHECK: ret i1 false + ret i1 %cmp2 + +different: +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; CHECK-LABEL: @test8_fp( +define i1 @test8_fp(float %x, float %y) { + %cmp2 = fcmp ule float %x, %y + %cmp = fcmp ogt float %x, %y + %cmp3 = fcmp ogt float %x, %y + br i1 %cmp, label %same, label %different + +same: +; CHECK: ret i1 false + ret i1 %cmp2 + +different: +; CHECK: ret i1 false + ret i1 %cmp3 +} + +; PR1768 +; CHECK-LABEL: @test9( +define i32 @test9(i32 %i, i32 %j) { + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %cond_true, label %ret + +cond_true: + %diff = sub i32 %i, %j + ret i32 %diff +; CHECK: ret i32 0 + +ret: + ret i32 5 +; CHECK: ret i32 5 +} + +; PR1768 +; CHECK-LABEL: @test10( +define i32 @test10(i32 %j, i32 %i) { + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %cond_true, label %ret + +cond_true: + %diff = sub i32 %i, %j + ret i32 %diff +; CHECK: ret i32 0 + +ret: + ret i32 5 +; CHECK: ret i32 5 +} + +declare i32 @yogibar() + +; CHECK-LABEL: @test11( +define i32 @test11(i32 %x) { + %v0 = call i32 @yogibar() + %v1 = call i32 @yogibar() + %cmp = icmp eq i32 %v0, %v1 + br i1 %cmp, label %cond_true, label %next + +cond_true: + ret i32 %v1 +; CHECK: ret i32 %v0 + +next: + %cmp2 = icmp eq i32 %x, %v0 + br i1 %cmp2, label %cond_true2, label %next2 + +cond_true2: + ret i32 %v0 +; CHECK: ret i32 %x + +next2: + ret i32 0 +} + +; CHECK-LABEL: @test12( +define i32 @test12(i32 %x) { + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %cond_true, label %cond_false + +cond_true: + br label %ret + +cond_false: + br label %ret + +ret: + %res = phi i32 [ %x, %cond_true ], [ %x, %cond_false ] +; CHECK: %res = phi i32 [ 0, %cond_true ], [ %x, %cond_false ] + ret i32 %res +} Index: llvm/trunk/test/Transforms/NewGVN/crash-no-aa.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/crash-no-aa.ll +++ llvm/trunk/test/Transforms/NewGVN/crash-no-aa.ll @@ -0,0 +1,15 @@ +; RUN: opt -disable-basicaa -newgvn -S < %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-freebsd8.0" + +; PR5744 +define i32 @test1({i16, i32} *%P) { + %P2 = getelementptr {i16, i32}, {i16, i32} *%P, i32 0, i32 0 + store i16 42, i16* %P2 + + %P3 = getelementptr {i16, i32}, {i16, i32} *%P, i32 0, i32 1 + %V = load i32, i32* %P3 + ret i32 %V +} + Index: llvm/trunk/test/Transforms/NewGVN/crash.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/crash.ll +++ llvm/trunk/test/Transforms/NewGVN/crash.ll @@ -0,0 +1,201 @@ +; RUN: opt -newgvn -disable-output < %s + +; PR5631 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0" + +define i32* @test1(i8* %name, i32 %namelen, i32* %o, i32 %expected_type) nounwind ssp { +entry: + br i1 undef, label %if.end13, label %while.body.preheader + + +if.end13: ; preds = %if.then6 + br label %while.body.preheader + +while.body.preheader: ; preds = %if.end13, %if.end + br label %while.body + +while.body: ; preds = %while.body.backedge, %while.body.preheader + %o.addr.0 = phi i32* [ undef, %while.body.preheader ], [ %o.addr.0.be, %while.body.backedge ] ; [#uses=2] + br i1 false, label %return.loopexit, label %lor.lhs.false + +lor.lhs.false: ; preds = %while.body + %tmp20 = bitcast i32* %o.addr.0 to i32* ; [#uses=1] + %tmp22 = load i32, i32* %tmp20 ; [#uses=0] + br i1 undef, label %land.lhs.true24, label %if.end31 + +land.lhs.true24: ; preds = %lor.lhs.false + %call28 = call i32* @parse_object(i8* undef) nounwind ; [#uses=0] + br i1 undef, label %return.loopexit, label %if.end31 + +if.end31: ; preds = %land.lhs.true24, %lor.lhs.false + br i1 undef, label %return.loopexit, label %if.end41 + +if.end41: ; preds = %if.end31 + %tmp43 = bitcast i32* %o.addr.0 to i32* ; [#uses=1] + %tmp45 = load i32, i32* %tmp43 ; [#uses=0] + br i1 undef, label %if.then50, label %if.else + +if.then50: ; preds = %if.end41 + %tmp53 = load i32*, i32** undef ; [#uses=1] + br label %while.body.backedge + +if.else: ; preds = %if.end41 + br i1 undef, label %if.then62, label %if.else67 + +if.then62: ; preds = %if.else + br label %while.body.backedge + +while.body.backedge: ; preds = %if.then62, %if.then50 + %o.addr.0.be = phi i32* [ %tmp53, %if.then50 ], [ undef, %if.then62 ] ; [#uses=1] + br label %while.body + +if.else67: ; preds = %if.else + ret i32* null + +return.loopexit: ; preds = %if.end31, %land.lhs.true24, %while.body + ret i32* undef +} + +declare i32* @parse_object(i8*) + + + + + + +%struct.attribute_spec = type { i8*, i32, i32, i8, i8, i8 } + +@attribute_tables = external global [4 x %struct.attribute_spec*] ; <[4 x %struct.attribute_spec*]*> [#uses=2] + +define void @test2() nounwind { +entry: + br label %bb69.i + +bb69.i: ; preds = %bb57.i.preheader + %tmp4 = getelementptr inbounds [4 x %struct.attribute_spec*], [4 x %struct.attribute_spec*]* @attribute_tables, i32 0, i32 undef ; <%struct.attribute_spec**> [#uses=1] + %tmp3 = load %struct.attribute_spec*, %struct.attribute_spec** %tmp4, align 4 ; <%struct.attribute_spec*> [#uses=1] + br label %bb65.i + +bb65.i: ; preds = %bb65.i.preheader, %bb64.i + %storemerge6.i = phi i32 [ 1, %bb64.i ], [ 0, %bb69.i ] ; [#uses=3] + %scevgep14 = getelementptr inbounds %struct.attribute_spec, %struct.attribute_spec* %tmp3, i32 %storemerge6.i, i32 0 ; [#uses=1] + %tmp2 = load i8*, i8** %scevgep14, align 4 ; [#uses=0] + %tmp = load %struct.attribute_spec*, %struct.attribute_spec** %tmp4, align 4 ; <%struct.attribute_spec*> [#uses=1] + %scevgep1516 = getelementptr inbounds %struct.attribute_spec, %struct.attribute_spec* %tmp, i32 %storemerge6.i, i32 0 ; [#uses=0] + unreachable + +bb64.i: ; Unreachable + br label %bb65.i + +bb66.i: ; Unreachable + br label %bb69.i +} + + + +; rdar://7438974 + +@g = external global i64, align 8 + +define i32* @test3() { +do.end17.i: + %tmp18.i = load i7*, i7** undef + %tmp1 = bitcast i7* %tmp18.i to i8* + br i1 undef, label %do.body36.i, label %if.then21.i + +if.then21.i: + %tmp2 = bitcast i7* %tmp18.i to i8* + ret i32* undef + +do.body36.i: + %ivar38.i = load i64, i64* @g + %tmp3 = bitcast i7* %tmp18.i to i8* + %add.ptr39.sum.i = add i64 %ivar38.i, 8 + %tmp40.i = getelementptr inbounds i8, i8* %tmp3, i64 %add.ptr39.sum.i + %tmp4 = bitcast i8* %tmp40.i to i64* + %tmp41.i = load i64, i64* %tmp4 + br i1 undef, label %if.then48.i, label %do.body57.i + +if.then48.i: + %call54.i = call i32 @foo2() + br label %do.body57.i + +do.body57.i: + %tmp58.i = load i7*, i7** undef + %ivar59.i = load i64, i64* @g + %tmp5 = bitcast i7* %tmp58.i to i8* + %add.ptr65.sum.i = add i64 %ivar59.i, 8 + %tmp66.i = getelementptr inbounds i8, i8* %tmp5, i64 %add.ptr65.sum.i + %tmp6 = bitcast i8* %tmp66.i to i64* + %tmp67.i = load i64, i64* %tmp6 + ret i32* undef +} + +declare i32 @foo2() + + + +define i32 @test4() { +entry: + ret i32 0 + +dead: + %P2 = getelementptr i32, i32 *%P2, i32 52 + %Q2 = getelementptr i32, i32 *%Q2, i32 52 + store i32 4, i32* %P2 + %A = load i32, i32* %Q2 + br i1 true, label %dead, label %dead2 + +dead2: + ret i32 %A +} + + +; PR9841 +define fastcc i8 @test5(i8* %P) nounwind { +entry: + %0 = load i8, i8* %P, align 2 + + %Q = getelementptr i8, i8* %P, i32 1 + %1 = load i8, i8* %Q, align 1 + ret i8 %1 +} + + +; Test that a GEP in an unreachable block with the following form doesn't crash +; GVN: +; +; %x = gep %some.type %x, ... + +%struct.type = type { i64, i32, i32 } + +define fastcc void @func() nounwind uwtable ssp align 2 { +entry: + br label %reachable.bb + +;; Unreachable code. + +unreachable.bb: + %gep.val = getelementptr inbounds %struct.type, %struct.type* %gep.val, i64 1 + br i1 undef, label %u2.bb, label %u1.bb + +u1.bb: + %tmp1 = getelementptr inbounds %struct.type, %struct.type* %gep.val, i64 0, i32 0 + store i64 -1, i64* %tmp1, align 8 + br label %unreachable.bb + +u2.bb: + %0 = load i32, i32* undef, align 4 + %conv.i.i.i.i.i = zext i32 %0 to i64 + br label %u2.bb + +;; Reachable code. + +reachable.bb: + br label %r1.bb + +r1.bb: + br label %u2.bb +} Index: llvm/trunk/test/Transforms/NewGVN/dbg-redundant-load.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/dbg-redundant-load.ll +++ llvm/trunk/test/Transforms/NewGVN/dbg-redundant-load.ll @@ -0,0 +1,52 @@ +; RUN: opt -newgvn -S < %s | FileCheck %s + +; Check that the redundant load from %if.then is removed. +; Also, check that the debug location associated to load %0 still refers to +; line 3 and not line 6. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK: @test_redundant_load( +; CHECK-LABEL: entry: +; CHECK-NEXT: load i32, i32* %Y, align 4, !dbg ![[LOC:[0-9]+]] +; CHECK-LABEL: if.then: +; CHECK-NOT: load +; CHECK-LABEL: if.end: +; CHECK: ![[LOC]] = !DILocation(line: 3, scope: !{{.*}}) + +define i32 @test_redundant_load(i32 %X, i32* %Y) !dbg !6 { +entry: + %0 = load i32, i32* %Y, align 4, !dbg !8 + %cmp = icmp sgt i32 %X, -1, !dbg !9 + br i1 %cmp, label %if.then, label %if.end, !dbg !9 + +if.then: ; preds = %entry + %1 = load i32, i32* %Y, align 4, !dbg !10 + %add = add nsw i32 %0, %1, !dbg !10 + call void @foo(), !dbg !11 + br label %if.end, !dbg !12 + +if.end: ; preds = %if.then, %entry + %Result.0 = phi i32 [ %add, %if.then ], [ %0, %entry ] + ret i32 %Result.0, !dbg !13 +} + +declare void @foo() + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2) +!1 = !DIFile(filename: "test.cpp", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"PIC Level", i32 2} +!6 = distinct !DISubprogram(name: "test_redundant_load", scope: !1, file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!7 = !DISubroutineType(types: !2) +!8 = !DILocation(line: 3, scope: !6) +!9 = !DILocation(line: 5, scope: !6) +!10 = !DILocation(line: 6, scope: !6) +!11 = !DILocation(line: 7, scope: !6) +!12 = !DILocation(line: 8, scope: !6) +!13 = !DILocation(line: 10, scope: !6) Index: llvm/trunk/test/Transforms/NewGVN/edge.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/edge.ll +++ llvm/trunk/test/Transforms/NewGVN/edge.ll @@ -0,0 +1,171 @@ +; XFAIL: * +; RUN: opt -newgvn -S < %s | FileCheck %s + +define i32 @f1(i32 %x) { + ; CHECK-LABEL: define i32 @f1( +bb0: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %bb2, label %bb1 +bb1: + br label %bb2 +bb2: + %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ] + %foo = add i32 %cond, %x + ret i32 %foo + ; CHECK: bb2: + ; CHECK: ret i32 %x +} + +define i32 @f2(i32 %x) { + ; CHECK-LABEL: define i32 @f2( +bb0: + %cmp = icmp ne i32 %x, 0 + br i1 %cmp, label %bb1, label %bb2 +bb1: + br label %bb2 +bb2: + %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ] + %foo = add i32 %cond, %x + ret i32 %foo + ; CHECK: bb2: + ; CHECK: ret i32 %x +} + +define i32 @f3(i32 %x) { + ; CHECK-LABEL: define i32 @f3( +bb0: + switch i32 %x, label %bb1 [ i32 0, label %bb2] +bb1: + br label %bb2 +bb2: + %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ] + %foo = add i32 %cond, %x + ret i32 %foo + ; CHECK: bb2: + ; CHECK: ret i32 %x +} + +declare void @g(i1) +define void @f4(i8 * %x) { +; CHECK-LABEL: define void @f4( +bb0: + %y = icmp eq i8* null, %x + br i1 %y, label %bb2, label %bb1 +bb1: + br label %bb2 +bb2: + %zed = icmp eq i8* null, %x + call void @g(i1 %zed) +; CHECK: call void @g(i1 %y) + ret void +} + +define double @fcmp_oeq_not_zero(double %x, double %y) { +entry: + %cmp = fcmp oeq double %y, 2.0 + br i1 %cmp, label %if, label %return + +if: + %div = fdiv double %x, %y + br label %return + +return: + %retval = phi double [ %div, %if ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_oeq_not_zero( +; CHECK: %div = fdiv double %x, 2.0 +} + +define double @fcmp_une_not_zero(double %x, double %y) { +entry: + %cmp = fcmp une double %y, 2.0 + br i1 %cmp, label %return, label %else + +else: + %div = fdiv double %x, %y + br label %return + +return: + %retval = phi double [ %div, %else ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_une_not_zero( +; CHECK: %div = fdiv double %x, 2.0 +} + +; PR22376 - We can't propagate zero constants because -0.0 +; compares equal to 0.0. If %y is -0.0 in this test case, +; we would produce the wrong sign on the infinity return value. +define double @fcmp_oeq_zero(double %x, double %y) { +entry: + %cmp = fcmp oeq double %y, 0.0 + br i1 %cmp, label %if, label %return + +if: + %div = fdiv double %x, %y + br label %return + +return: + %retval = phi double [ %div, %if ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_oeq_zero( +; CHECK: %div = fdiv double %x, %y +} + +define double @fcmp_une_zero(double %x, double %y) { +entry: + %cmp = fcmp une double %y, -0.0 + br i1 %cmp, label %return, label %else + +else: + %div = fdiv double %x, %y + br label %return + +return: + %retval = phi double [ %div, %else ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_une_zero( +; CHECK: %div = fdiv double %x, %y +} + +; We also cannot propagate a value if it's not a constant. +; This is because the value could be 0.0 or -0.0. + +define double @fcmp_oeq_maybe_zero(double %x, double %y, double %z1, double %z2) { +entry: + %z = fadd double %z1, %z2 + %cmp = fcmp oeq double %y, %z + br i1 %cmp, label %if, label %return + +if: + %div = fdiv double %x, %z + br label %return + +return: + %retval = phi double [ %div, %if ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_oeq_maybe_zero( +; CHECK: %div = fdiv double %x, %z +} + +define double @fcmp_une_maybe_zero(double %x, double %y, double %z1, double %z2) { +entry: + %z = fadd double %z1, %z2 + %cmp = fcmp une double %y, %z + br i1 %cmp, label %return, label %else + +else: + %div = fdiv double %x, %z + br label %return + +return: + %retval = phi double [ %div, %else ], [ %x, %entry ] + ret double %retval + +; CHECK-LABEL: define double @fcmp_une_maybe_zero( +; CHECK: %div = fdiv double %x, %z +} Index: llvm/trunk/test/Transforms/NewGVN/fence.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/fence.ll +++ llvm/trunk/test/Transforms/NewGVN/fence.ll @@ -0,0 +1,70 @@ +; XFAIL: * +; RUN: opt -S -basicaa -newgvn < %s | FileCheck %s + +; We can value forward across the fence since we can (semantically) +; reorder the following load before the fence. +define i32 @test(i32* %addr.i) { +; CHECK-LABEL: @test +; CHECK: store +; CHECK: fence +; CHECK-NOT: load +; CHECK: ret + store i32 5, i32* %addr.i, align 4 + fence release + %a = load i32, i32* %addr.i, align 4 + ret i32 %a +} + +; Same as above +define i32 @test2(i32* %addr.i) { +; CHECK-LABEL: @test2 +; CHECK-NEXT: fence +; CHECK-NOT: load +; CHECK: ret + %a = load i32, i32* %addr.i, align 4 + fence release + %a2 = load i32, i32* %addr.i, align 4 + %res = sub i32 %a, %a2 + ret i32 %res +} + +; We can not value forward across an acquire barrier since we might +; be syncronizing with another thread storing to the same variable +; followed by a release fence. This is not so much enforcing an +; ordering property (though it is that too), but a liveness +; property. We expect to eventually see the value of store by +; another thread when spinning on that location. +define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) { +; CHECK-LABEL: @test3 +; CHECK: load +; CHECK: fence +; CHECK: load +; CHECK: ret i32 %res + ; the following code is intented to model the unrolling of + ; two iterations in a spin loop of the form: + ; do { fence acquire: tmp = *%addr.i; ) while (!tmp); + ; It's hopefully clear that allowing PRE to turn this into: + ; if (!*%addr.i) while(true) {} would be unfortunate + fence acquire + %a = load i32, i32* %addr.i, align 4 + fence acquire + %a2 = load i32, i32* %addr.i, align 4 + %res = sub i32 %a, %a2 + ret i32 %res +} + +; Another example of why forwarding across an acquire fence is problematic +; can be seen in a normal locking operation. Say we had: +; *p = 5; unlock(l); lock(l); use(p); +; forwarding the store to p would be invalid. A reasonable implementation +; of unlock and lock might be: +; unlock() { atomicrmw sub %l, 1 unordered; fence release } +; lock() { +; do { +; %res = cmpxchg %p, 0, 1, monotonic monotonic +; } while(!%res.success) +; fence acquire; +; } +; Given we chose to forward across the release fence, we clearly can't forward +; across the acquire fence as well. + Index: llvm/trunk/test/Transforms/NewGVN/flags.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/flags.ll +++ llvm/trunk/test/Transforms/NewGVN/flags.ll @@ -0,0 +1,19 @@ +; XFAIL: * +; RUN: opt -newgvn -S < %s | FileCheck %s + +declare void @use(i1) + +define void @test1(float %x, float %y) { +entry: + %cmp1 = fcmp nnan oeq float %y, %x + %cmp2 = fcmp oeq float %x, %y + call void @use(i1 %cmp1) + call void @use(i1 %cmp2) + ret void +} + +; CHECK-LABEL: define void @test1( +; CHECK: %[[cmp:.*]] = fcmp oeq float %y, %x +; CHECK-NEXT: call void @use(i1 %[[cmp]]) +; CHECK-NEXT: call void @use(i1 %[[cmp]]) +; CHECK-NEXT: ret void Index: llvm/trunk/test/Transforms/NewGVN/fold-const-expr.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/fold-const-expr.ll +++ llvm/trunk/test/Transforms/NewGVN/fold-const-expr.ll @@ -0,0 +1,100 @@ +; XFAIL: * +; GVN failed to do constant expression folding and expanded +; them unfolded in many places, producing exponentially large const +; expressions. As a result, the compilation never fisished. +; This test checks that we are folding constant expression +; PR 28418 +; RUN: opt -newgvn -S < %s | FileCheck %s + +%2 = type { i32, i32, i32, i32, i32 } +define i32 @_Z16vector3util_mainv(i32 %x, i32 %y) { + %tmp1 = alloca %2, align 4 + %tmp114 = getelementptr inbounds %2, %2* %tmp1, i64 0, i32 1 + %tmp115 = bitcast i32* %tmp114 to <4 x i32>* + store <4 x i32> , <4 x i32>* %tmp115, align 4 + %tmp1683 = getelementptr inbounds %2, %2* %tmp1, i64 0, i32 1 + %tmp1688 = load i32, i32* %tmp1683, align 4 + %tmp1693 = shl i32 %tmp1688, 5 + %tmp1694 = xor i32 %tmp1693, %tmp1688 + %tmp1695 = lshr i32 %tmp1694, 7 + %tmp1696 = xor i32 %tmp1695, %tmp1694 + %tmp1697 = shl i32 %tmp1696, 22 + %tmp1698 = xor i32 %tmp1697, %tmp1696 + %tmp1707 = shl i32 %tmp1698, 5 + %tmp1708 = xor i32 %tmp1707, %tmp1698 + %tmp1709 = lshr i32 %tmp1708, 7 + %tmp1710 = xor i32 %tmp1709, %tmp1708 + %tmp1711 = shl i32 %tmp1710, 22 + %tmp1712 = xor i32 %tmp1711, %tmp1710 + %tmp1721 = shl i32 %tmp1712, 5 + %tmp1722 = xor i32 %tmp1721, %tmp1712 + %tmp1723 = lshr i32 %tmp1722, 7 + %tmp1724 = xor i32 %tmp1723, %tmp1722 + %tmp1725 = shl i32 %tmp1724, 22 + %tmp1726 = xor i32 %tmp1725, %tmp1724 + %tmp1735 = shl i32 %tmp1726, 5 + %tmp1736 = xor i32 %tmp1735, %tmp1726 + %tmp1737 = lshr i32 %tmp1736, 7 + %tmp1738 = xor i32 %tmp1737, %tmp1736 + %tmp1739 = shl i32 %tmp1738, 22 + %tmp1740 = xor i32 %tmp1739, %tmp1738 + store i32 %tmp1740, i32* %tmp1683, align 4 +; CHECK: store i32 310393545, i32* %tmp114, align 4 + %tmp1756 = getelementptr inbounds %2, %2* %tmp1, i64 0, i32 1 + %tmp1761 = load i32, i32* %tmp1756, align 4 + %tmp1766 = shl i32 %tmp1761, 5 + %tmp1767 = xor i32 %tmp1766, %tmp1761 + %tmp1768 = lshr i32 %tmp1767, 7 + %tmp1769 = xor i32 %tmp1768, %tmp1767 + %tmp1770 = shl i32 %tmp1769, 22 + %tmp1771 = xor i32 %tmp1770, %tmp1769 + %tmp1780 = shl i32 %tmp1771, 5 + %tmp1781 = xor i32 %tmp1780, %tmp1771 + %tmp1782 = lshr i32 %tmp1781, 7 + %tmp1783 = xor i32 %tmp1782, %tmp1781 + %tmp1784 = shl i32 %tmp1783, 22 + %tmp1785 = xor i32 %tmp1784, %tmp1783 + %tmp1794 = shl i32 %tmp1785, 5 + %tmp1795 = xor i32 %tmp1794, %tmp1785 + %tmp1796 = lshr i32 %tmp1795, 7 + %tmp1797 = xor i32 %tmp1796, %tmp1795 + %tmp1798 = shl i32 %tmp1797, 22 + %tmp1799 = xor i32 %tmp1798, %tmp1797 + %tmp1808 = shl i32 %tmp1799, 5 + %tmp1809 = xor i32 %tmp1808, %tmp1799 + %tmp1810 = lshr i32 %tmp1809, 7 + %tmp1811 = xor i32 %tmp1810, %tmp1809 + %tmp1812 = shl i32 %tmp1811, 22 + %tmp1813 = xor i32 %tmp1812, %tmp1811 + store i32 %tmp1813, i32* %tmp1756, align 4 +; CHECK: store i32 -383584258, i32* %tmp114, align 4 + %tmp2645 = getelementptr inbounds %2, %2* %tmp1, i64 0, i32 1 + %tmp2650 = load i32, i32* %tmp2645, align 4 + %tmp2655 = shl i32 %tmp2650, 5 + %tmp2656 = xor i32 %tmp2655, %tmp2650 + %tmp2657 = lshr i32 %tmp2656, 7 + %tmp2658 = xor i32 %tmp2657, %tmp2656 + %tmp2659 = shl i32 %tmp2658, 22 + %tmp2660 = xor i32 %tmp2659, %tmp2658 + %tmp2669 = shl i32 %tmp2660, 5 + %tmp2670 = xor i32 %tmp2669, %tmp2660 + %tmp2671 = lshr i32 %tmp2670, 7 + %tmp2672 = xor i32 %tmp2671, %tmp2670 + %tmp2673 = shl i32 %tmp2672, 22 + %tmp2674 = xor i32 %tmp2673, %tmp2672 + %tmp2683 = shl i32 %tmp2674, 5 + %tmp2684 = xor i32 %tmp2683, %tmp2674 + %tmp2685 = lshr i32 %tmp2684, 7 + %tmp2686 = xor i32 %tmp2685, %tmp2684 + %tmp2687 = shl i32 %tmp2686, 22 + %tmp2688 = xor i32 %tmp2687, %tmp2686 + %tmp2697 = shl i32 %tmp2688, 5 + %tmp2698 = xor i32 %tmp2697, %tmp2688 + %tmp2699 = lshr i32 %tmp2698, 7 + %tmp2700 = xor i32 %tmp2699, %tmp2698 + %tmp2701 = shl i32 %tmp2700, 22 + %tmp2702 = xor i32 %tmp2701, %tmp2700 + store i32 %tmp2702, i32* %tmp2645, align 4 +; CHECK: store i32 -57163022, i32* %tmp114, align 4 + ret i32 0 +} Index: llvm/trunk/test/Transforms/NewGVN/fpmath.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/fpmath.ll +++ llvm/trunk/test/Transforms/NewGVN/fpmath.ll @@ -0,0 +1,45 @@ +; RUN: opt -newgvn -S < %s | FileCheck %s + +define double @test1(double %x, double %y) { +; CHECK: @test1(double %x, double %y) +; CHECK: %add1 = fadd double %x, %y +; CHECK-NOT: fpmath +; CHECK: %foo = fadd double %add1, %add1 + %add1 = fadd double %x, %y, !fpmath !0 + %add2 = fadd double %x, %y + %foo = fadd double %add1, %add2 + ret double %foo +} + +define double @test2(double %x, double %y) { +; CHECK: @test2(double %x, double %y) +; CHECK: %add1 = fadd double %x, %y, !fpmath !0 +; CHECK: %foo = fadd double %add1, %add1 + %add1 = fadd double %x, %y, !fpmath !0 + %add2 = fadd double %x, %y, !fpmath !0 + %foo = fadd double %add1, %add2 + ret double %foo +} + +define double @test3(double %x, double %y) { +; CHECK: @test3(double %x, double %y) +; CHECK: %add1 = fadd double %x, %y, !fpmath !1 +; CHECK: %foo = fadd double %add1, %add1 + %add1 = fadd double %x, %y, !fpmath !1 + %add2 = fadd double %x, %y, !fpmath !0 + %foo = fadd double %add1, %add2 + ret double %foo +} + +define double @test4(double %x, double %y) { +; CHECK: @test4(double %x, double %y) +; CHECK: %add1 = fadd double %x, %y, !fpmath !1 +; CHECK: %foo = fadd double %add1, %add1 + %add1 = fadd double %x, %y, !fpmath !0 + %add2 = fadd double %x, %y, !fpmath !1 + %foo = fadd double %add1, %add2 + ret double %foo +} + +!0 = !{ float 5.0 } +!1 = !{ float 2.5 } Index: llvm/trunk/test/Transforms/NewGVN/funclet.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/funclet.ll +++ llvm/trunk/test/Transforms/NewGVN/funclet.ll @@ -0,0 +1,44 @@ +; RUN: opt -basicaa -newgvn -S < %s | FileCheck %s +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i686-pc-windows-msvc" + +%eh.ThrowInfo = type { i32, i8*, i8*, i8* } +%struct.A = type { i32* } + +@"_TI1?AUA@@" = external constant %eh.ThrowInfo + +define i8 @f() personality i32 (...)* @__CxxFrameHandler3 { +entry: + %b = alloca i8 + %c = alloca i8 + store i8 42, i8* %b + store i8 13, i8* %c + invoke void @_CxxThrowException(i8* %b, %eh.ThrowInfo* nonnull @"_TI1?AUA@@") + to label %unreachable unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %cs1 = catchswitch within none [label %catch] unwind to caller + +catch: ; preds = %catch.dispatch + %catchpad = catchpad within %cs1 [i8* null, i32 64, i8* null] + store i8 5, i8* %b + catchret from %catchpad to label %try.cont + +try.cont: ; preds = %catch + %load_b = load i8, i8* %b + %load_c = load i8, i8* %c + %add = add i8 %load_b, %load_c + ret i8 %add + +unreachable: ; preds = %entry + unreachable +} +; CHECK-LABEL: define i8 @f( +; CHECK: %[[load_b:.*]] = load i8, i8* %b +; CHECK-NEXT: %[[load_c:.*]] = load i8, i8* %c +; CHECK-NEXT: %[[add:.*]] = add i8 %[[load_b]], %[[load_c]] +; CHECK-NEXT: ret i8 %[[add]] + +declare i32 @__CxxFrameHandler3(...) + +declare x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*) Index: llvm/trunk/test/Transforms/NewGVN/invariant.group.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/invariant.group.ll +++ llvm/trunk/test/Transforms/NewGVN/invariant.group.ll @@ -0,0 +1,338 @@ +; XFAIL: * +; RUN: opt < %s -newgvn -S | FileCheck %s + +%struct.A = type { i32 (...)** } +@_ZTV1A = available_externally unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 8 +@_ZTI1A = external constant i8* + +@unknownPtr = external global i8 + +; CHECK-LABEL: define i8 @simple() { +define i8 @simple() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + %a = load i8, i8* %ptr, !invariant.group !0 + %b = load i8, i8* %ptr, !invariant.group !0 + %c = load i8, i8* %ptr, !invariant.group !0 +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @optimizable1() { +define i8 @optimizable1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 + + call void @foo(i8* %ptr2); call to use %ptr2 +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @optimizable2() { +define i8 @optimizable2() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + store i8 13, i8* %ptr ; can't use this store with invariant.group + %a = load i8, i8* %ptr + call void @bar(i8 %a) ; call to use %a + + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !0 + +; CHECK: ret i8 42 + ret i8 %b +} + +; CHECK-LABEL: define i8 @unoptimizable1() { +define i8 @unoptimizable1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define void @indirectLoads() { +define void @indirectLoads() { +entry: + %a = alloca %struct.A*, align 8 + %0 = bitcast %struct.A** %a to i8* + + %call = call i8* @getPointer(i8* null) + %1 = bitcast i8* %call to %struct.A* + call void @_ZN1AC1Ev(%struct.A* %1) + %2 = bitcast %struct.A* %1 to i8*** + +; CHECK: %vtable = load {{.*}} !invariant.group + %vtable = load i8**, i8*** %2, align 8, !invariant.group !2 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) + call void @llvm.assume(i1 %cmp.vtables) + + store %struct.A* %1, %struct.A** %a, align 8 + %3 = load %struct.A*, %struct.A** %a, align 8 + %4 = bitcast %struct.A* %3 to void (%struct.A*)*** + +; CHECK: call void @_ZN1A3fooEv( + %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2 + %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0 + %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8 + call void %5(%struct.A* %3) + %6 = load %struct.A*, %struct.A** %a, align 8 + %7 = bitcast %struct.A* %6 to void (%struct.A*)*** + +; CHECK: call void @_ZN1A3fooEv( + %vtable2 = load void (%struct.A*)**, void (%struct.A*)*** %7, align 8, !invariant.group !2 + %vfn3 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable2, i64 0 + %8 = load void (%struct.A*)*, void (%struct.A*)** %vfn3, align 8 + + call void %8(%struct.A* %6) + %9 = load %struct.A*, %struct.A** %a, align 8 + %10 = bitcast %struct.A* %9 to void (%struct.A*)*** + + %vtable4 = load void (%struct.A*)**, void (%struct.A*)*** %10, align 8, !invariant.group !2 + %vfn5 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable4, i64 0 + %11 = load void (%struct.A*)*, void (%struct.A*)** %vfn5, align 8 +; CHECK: call void @_ZN1A3fooEv( + call void %11(%struct.A* %9) + + %vtable5 = load i8**, i8*** %2, align 8, !invariant.group !2 + %vfn6 = getelementptr inbounds i8*, i8** %vtable5, i64 0 + %12 = bitcast i8** %vfn6 to void (%struct.A*)** + %13 = load void (%struct.A*)*, void (%struct.A*)** %12, align 8 +; CHECK: call void @_ZN1A3fooEv( + call void %13(%struct.A* %9) + + ret void +} + +; CHECK-LABEL: define void @combiningBitCastWithLoad() { +define void @combiningBitCastWithLoad() { +entry: + %a = alloca %struct.A*, align 8 + %0 = bitcast %struct.A** %a to i8* + + %call = call i8* @getPointer(i8* null) + %1 = bitcast i8* %call to %struct.A* + call void @_ZN1AC1Ev(%struct.A* %1) + %2 = bitcast %struct.A* %1 to i8*** + +; CHECK: %vtable = load {{.*}} !invariant.group + %vtable = load i8**, i8*** %2, align 8, !invariant.group !2 + %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) + + store %struct.A* %1, %struct.A** %a, align 8 +; CHECK-NOT: !invariant.group + %3 = load %struct.A*, %struct.A** %a, align 8 + %4 = bitcast %struct.A* %3 to void (%struct.A*)*** + + %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2 + %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0 + %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8 + call void %5(%struct.A* %3) + + ret void +} + +; CHECK-LABEL:define void @loadCombine() { +define void @loadCombine() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[A:.*]] = load i8, i8* %ptr, !invariant.group + %a = load i8, i8* %ptr, !invariant.group !0 +; CHECK-NOT: load + %b = load i8, i8* %ptr, !invariant.group !1 +; CHECK: call void @bar(i8 %[[A]]) + call void @bar(i8 %a) +; CHECK: call void @bar(i8 %[[A]]) + call void @bar(i8 %b) + ret void +} + +; CHECK-LABEL: define void @loadCombine1() { +define void @loadCombine1() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[D:.*]] = load i8, i8* %ptr, !invariant.group + %c = load i8, i8* %ptr +; CHECK-NOT: load + %d = load i8, i8* %ptr, !invariant.group !1 +; CHECK: call void @bar(i8 %[[D]]) + call void @bar(i8 %c) +; CHECK: call void @bar(i8 %[[D]]) + call void @bar(i8 %d) + ret void +} + +; CHECK-LABEL: define void @loadCombine2() { +define void @loadCombine2() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group + %e = load i8, i8* %ptr, !invariant.group !1 +; CHECK-NOT: load + %f = load i8, i8* %ptr +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %e) +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %f) + ret void +} + +; CHECK-LABEL: define void @loadCombine3() { +define void @loadCombine3() { +enter: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) +; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group ![[OneMD:[0-9]]] + %e = load i8, i8* %ptr, !invariant.group !1 +; CHECK-NOT: load + %f = load i8, i8* %ptr, !invariant.group !1 +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %e) +; CHECK: call void @bar(i8 %[[E]]) + call void @bar(i8 %f) + ret void +} + +; CHECK-LABEL: define i8 @unoptimizable2() { +define i8 @unoptimizable2() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !0 + +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define i8 @unoptimizable3() { +define i8 @unoptimizable3() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + %ptr2 = call i8* @getPointer(i8* %ptr) + %a = load i8, i8* %ptr2, !invariant.group !0 + +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define i8 @unoptimizable4() { +define i8 @unoptimizable4() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %a = load i8, i8* %ptr2, !invariant.group !0 + +; CHECK: ret i8 %a + ret i8 %a +} + +; CHECK-LABEL: define i8 @volatile1() { +define i8 @volatile1() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 + %b = load volatile i8, i8* %ptr +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %b) + + %c = load volatile i8, i8* %ptr, !invariant.group !0 +; FIXME: we could change %c to 42, preserving volatile load +; CHECK: call void @bar(i8 %c) + call void @bar(i8 %c) +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @volatile2() { +define i8 @volatile2() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + %a = load i8, i8* %ptr, !invariant.group !0 + %b = load volatile i8, i8* %ptr +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %b) + + %c = load volatile i8, i8* %ptr, !invariant.group !0 +; FIXME: we could change %c to 42, preserving volatile load +; CHECK: call void @bar(i8 %c) + call void @bar(i8 %c) +; CHECK: ret i8 42 + ret i8 %a +} + +; CHECK-LABEL: define i8 @fun() { +define i8 @fun() { +entry: + %ptr = alloca i8 + store i8 42, i8* %ptr, !invariant.group !0 + call void @foo(i8* %ptr) + + %a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change +; CHECK: call void @bar(i8 42) + call void @bar(i8 %a) + + call void @foo(i8* %ptr) + %b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed +; CHECK: call void @bar(i8 %b) + call void @bar(i8 %b) + + %newPtr = call i8* @getPointer(i8* %ptr) + %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr +; CHECK: call void @bar(i8 %c) + call void @bar(i8 %c) + + %unknownValue = load i8, i8* @unknownPtr +; FIXME: Can assume that %unknownValue == 42 +; CHECK: store i8 %unknownValue, i8* %ptr, !invariant.group !0 + store i8 %unknownValue, i8* %ptr, !invariant.group !0 + + %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr) + %d = load i8, i8* %newPtr2, !invariant.group !0 ; Can't step through invariant.group.barrier to get value of %ptr +; CHECK: ret i8 %d + ret i8 %d +} + +declare void @foo(i8*) +declare void @bar(i8) +declare i8* @getPointer(i8*) +declare void @_ZN1A3fooEv(%struct.A*) +declare void @_ZN1AC1Ev(%struct.A*) +declare i8* @llvm.invariant.group.barrier(i8*) + +; Function Attrs: nounwind +declare void @llvm.assume(i1 %cmp.vtables) #0 + + +attributes #0 = { nounwind } +; CHECK: ![[OneMD]] = !{!"other ptr"} +!0 = !{!"magic ptr"} +!1 = !{!"other ptr"} +!2 = !{!"vtable_of_a"} Index: llvm/trunk/test/Transforms/NewGVN/invariant.start.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/invariant.start.ll +++ llvm/trunk/test/Transforms/NewGVN/invariant.start.ll @@ -0,0 +1,59 @@ +; Test to make sure llvm.invariant.start calls are not treated as clobbers. +; RUN: opt < %s -newgvn -S | FileCheck %s + + +declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly +declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind + +; We forward store to the load across the invariant.start intrinsic +define i8 @forward_store() { +; CHECK-LABEL: @forward_store +; CHECK: call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) +; CHECK-NOT: load +; CHECK: ret i8 0 + %a = alloca i8 + store i8 0, i8* %a + %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) + %r = load i8, i8* %a + ret i8 %r +} + +declare i8 @dummy(i8* nocapture) nounwind readonly + +; We forward store to the load in the non-local analysis case, +; i.e. invariant.start is in another basic block. +define i8 @forward_store_nonlocal(i1 %cond) { +; CHECK-LABEL: forward_store_nonlocal +; CHECK: call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) +; CHECK: ret i8 0 +; CHECK: ret i8 %val + %a = alloca i8 + store i8 0, i8* %a + %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) + br i1 %cond, label %loadblock, label %exit + +loadblock: + %r = load i8, i8* %a + ret i8 %r + +exit: + %val = call i8 @dummy(i8* %a) + ret i8 %val +} + +; We should not value forward %foo to the invariant.end corresponding to %bar. +define i8 @forward_store1() { +; CHECK-LABEL: forward_store1 +; CHECK: %foo = call {}* @llvm.invariant.start.p0i8 +; CHECK-NOT: load +; CHECK: %bar = call {}* @llvm.invariant.start.p0i8 +; CHECK: call void @llvm.invariant.end.p0i8({}* %bar, i64 1, i8* %a) +; CHECK: ret i8 0 + %a = alloca i8 + store i8 0, i8* %a + %foo = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) + %r = load i8, i8* %a + %bar = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %a) + call void @llvm.invariant.end.p0i8({}* %bar, i64 1, i8* %a) + ret i8 %r +} Index: llvm/trunk/test/Transforms/NewGVN/lifetime-simple.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/lifetime-simple.ll +++ llvm/trunk/test/Transforms/NewGVN/lifetime-simple.ll @@ -0,0 +1,20 @@ +; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin7" + +define i8 @test(i8* %P) nounwind { +; CHECK: lifetime.start +; CHECK-NOT: load +; CHECK: lifetime.end +entry: + call void @llvm.lifetime.start(i64 32, i8* %P) + %0 = load i8, i8* %P + store i8 1, i8* %P + call void @llvm.lifetime.end(i64 32, i8* %P) + %1 = load i8, i8* %P + ret i8 %1 +} + +declare void @llvm.lifetime.start(i64 %S, i8* nocapture %P) readonly +declare void @llvm.lifetime.end(i64 %S, i8* nocapture %P) Index: llvm/trunk/test/Transforms/NewGVN/load-constant-mem.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/load-constant-mem.ll +++ llvm/trunk/test/Transforms/NewGVN/load-constant-mem.ll @@ -0,0 +1,19 @@ +; RUN: opt < %s -basicaa -newgvn -instcombine -S | FileCheck %s +; PR4189 +@G = external constant [4 x i32] + +define i32 @test(i8* %p, i32 %i) nounwind { +entry: + %P = getelementptr [4 x i32], [4 x i32]* @G, i32 0, i32 %i + %A = load i32, i32* %P + store i8 4, i8* %p + %B = load i32, i32* %P + %C = sub i32 %A, %B + ret i32 %C +} + +; CHECK: define i32 @test(i8* %p, i32 %i) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: store i8 4, i8* %p, align 1 +; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: } Index: llvm/trunk/test/Transforms/NewGVN/load-from-unreachable-predecessor.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/load-from-unreachable-predecessor.ll +++ llvm/trunk/test/Transforms/NewGVN/load-from-unreachable-predecessor.ll @@ -0,0 +1,20 @@ +; RUN: opt -newgvn -S < %s | FileCheck %s + +; Check that an unreachable predecessor to a PHI node doesn't cause a crash. +; PR21625. + +define i32 @f(i32** %f) { +; CHECK: bb0: +; Load should be removed, since it's ignored. +; CHECK-NEXT: br label +bb0: + %bar = load i32*, i32** %f + br label %bb2 +bb1: + %zed = load i32*, i32** %f + br i1 false, label %bb1, label %bb2 +bb2: + %foo = phi i32* [ null, %bb0 ], [ %zed, %bb1 ] + %storemerge = load i32, i32* %foo + ret i32 %storemerge +} Index: llvm/trunk/test/Transforms/NewGVN/malloc-load-removal.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/malloc-load-removal.ll +++ llvm/trunk/test/Transforms/NewGVN/malloc-load-removal.ll @@ -0,0 +1,57 @@ +; XFAIL: * +; RUN: opt -S -basicaa -newgvn < %s | FileCheck %s +; RUN: opt -S -basicaa -newgvn -disable-simplify-libcalls < %s | FileCheck %s -check-prefix=CHECK_NO_LIBCALLS +; PR13694 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +declare i8* @malloc(i64) nounwind + +define noalias i8* @test1() nounwind uwtable ssp { +entry: + %call = tail call i8* @malloc(i64 100) nounwind + %0 = load i8, i8* %call, align 1 + %tobool = icmp eq i8 %0, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + store i8 0, i8* %call, align 1 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i8* %call + +; CHECK-LABEL: @test1( +; CHECK-NOT: load +; CHECK-NOT: icmp + +; CHECK_NO_LIBCALLS-LABEL: @test1( +; CHECK_NO_LIBCALLS: load +; CHECK_NO_LIBCALLS: icmp +} + +declare i8* @_Znwm(i64) nounwind + +define noalias i8* @test2() nounwind uwtable ssp { +entry: + %call = tail call i8* @_Znwm(i64 100) nounwind + %0 = load i8, i8* %call, align 1 + %tobool = icmp eq i8 %0, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + store i8 0, i8* %call, align 1 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i8* %call + +; CHECK-LABEL: @test2( +; CHECK-NOT: load +; CHECK-NOT: icmp + +; CHECK_NO_LIBCALLS-LABEL: @test2( +; CHECK_NO_LIBCALLS: load +; CHECK_NO_LIBCALLS: icmp +} Index: llvm/trunk/test/Transforms/NewGVN/no_speculative_loads_with_asan.ll =================================================================== --- llvm/trunk/test/Transforms/NewGVN/no_speculative_loads_with_asan.ll +++ llvm/trunk/test/Transforms/NewGVN/no_speculative_loads_with_asan.ll @@ -0,0 +1,55 @@ +; RUN: opt -O3 -S %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +declare noalias i8* @_Znam(i64) #1 + +define i32 @TestNoAsan() { + %1 = tail call noalias i8* @_Znam(i64 2) + %2 = getelementptr inbounds i8, i8* %1, i64 1 + store i8 0, i8* %2, align 1 + store i8 0, i8* %1, align 1 + %3 = bitcast i8* %1 to i16* + %4 = load i16, i16* %3, align 4 + %5 = icmp eq i16 %4, 0 + br i1 %5, label %11, label %6 + +;