Index: llvm/include/llvm/Transforms/IPO/Attributor.h =================================================================== --- llvm/include/llvm/Transforms/IPO/Attributor.h +++ llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1701,6 +1701,9 @@ /// Return true if "undefined behavior" is known. bool isKnownToCauseUB() const { return getKnown(); } + /// Return true if "undefined behavior" is known for a specific instruction. + virtual bool isKnownToCauseUB(Instruction *I) const = 0; + /// Return an IR position, see struct IRPosition. const IRPosition &getIRPosition() const override { return *this; } Index: llvm/lib/Transforms/IPO/Attributor.cpp =================================================================== --- llvm/lib/Transforms/IPO/Attributor.cpp +++ llvm/lib/Transforms/IPO/Attributor.cpp @@ -1993,28 +1993,31 @@ AAUndefinedBehaviorImpl(const IRPosition &IRP) : AAUndefinedBehavior(IRP) {} /// See AbstractAttribute::updateImpl(...). - // TODO: We should not only check instructions that access memory // through a pointer (i.e. also branches etc.) ChangeStatus updateImpl(Attributor &A) override { - const size_t PrevSize = NoUBMemAccessInsts.size(); + const size_t UBPrevSize = KnownUBInsts.size(); + const size_t NoUBPrevSize = AssumedNoUBInsts.size(); auto InspectMemAccessInstForUB = [&](Instruction &I) { // Skip instructions that are already saved. - if (NoUBMemAccessInsts.count(&I) || UBMemAccessInsts.count(&I)) + if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I)) return true; - // `InspectMemAccessInstForUB` is only called on instructions - // for which getPointerOperand() should give us their - // pointer operand unless they're volatile. + // If we reach here, we know we have an instruction + // that accesses memory through a pointer operand, + // for which getPointerOperand() should give it to us, + // it is volatile. const Value *PtrOp = getPointerOperand(&I); - if (!PtrOp) + if (!PtrOp) { + AssumedNoUBInsts.insert(&I); return true; + } // A memory access through a pointer is considered UB // only if the pointer has constant null value. // TODO: Expand it to not only check constant values. if (!isa(PtrOp)) { - NoUBMemAccessInsts.insert(&I); + AssumedNoUBInsts.insert(&I); return true; } const Type *PtrTy = PtrOp->getType(); @@ -2025,10 +2028,50 @@ // A memory access using constant null pointer is only considered UB // if null pointer is _not_ defined for the target platform. - if (!llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace())) - UBMemAccessInsts.insert(&I); + if (llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace())) + AssumedNoUBInsts.insert(&I); else - NoUBMemAccessInsts.insert(&I); + KnownUBInsts.insert(&I); + return true; + }; + + auto InspectBrInstForUB = [&](Instruction &I) { + // A conditional branch instruction is considered UB if it has `undef` + // condition. + + // Skip instructions that are already saved. + if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I)) + return true; + + // We know we have a branch instruction. + auto BrInst = cast(&I); + + // Unconditional branches are never considered UB. + if (BrInst->isUnconditional()) + return true; + + const Value *Cond = BrInst->getCondition(); + + const auto &ValueSimplifyAA = + A.getAAFor(*this, IRPosition::value(*Cond)); + Optional SimplifiedV = + ValueSimplifyAA.getAssumedSimplifiedValue(A); + if (!ValueSimplifyAA.isKnown()) { + // Don't depend on assumed values. + return true; + } + if (!SimplifiedV.hasValue()) { + // If it is known (which we tested above) but it doesn't have a value, + // then we can assume `undef` and hence the instruction is UB. + KnownUBInsts.insert(&I); + return true; + } + Value *CondVal = SimplifiedV.getValue(); + if (!isa(CondVal)) { + AssumedNoUBInsts.insert(&I); + return true; + } + KnownUBInsts.insert(&I); return true; }; @@ -2036,20 +2079,48 @@ {Instruction::Load, Instruction::Store, Instruction::AtomicCmpXchg, Instruction::AtomicRMW}); - if (PrevSize != NoUBMemAccessInsts.size()) + A.checkForAllInstructions(InspectBrInstForUB, *this, {Instruction::Br}); + if (NoUBPrevSize != AssumedNoUBInsts.size() || + UBPrevSize != KnownUBInsts.size()) return ChangeStatus::CHANGED; return ChangeStatus::UNCHANGED; } + bool isKnownToCauseUB(Instruction *I) const override { + return KnownUBInsts.count(I); + } + bool isAssumedToCauseUB(Instruction *I) const override { - return UBMemAccessInsts.count(I); + // In simple words, if an instruction is not in the assumed to _not_ + // cause UB, then it is assumed UB (that includes those + // in the KnownUBInsts set). The rest is boilerplate + // is to ensure that it is one of the instructions we test + // for UB. + + switch (I->getOpcode()) { + case Instruction::Load: + case Instruction::Store: + case Instruction::AtomicCmpXchg: + case Instruction::AtomicRMW: + return !AssumedNoUBInsts.count(I); + case Instruction::Br: { + auto BrInst = cast(I); + if (BrInst->isUnconditional()) + return false; + return !AssumedNoUBInsts.count(I); + } break; + default: + return false; + } + return false; } ChangeStatus manifest(Attributor &A) override { - if (!UBMemAccessInsts.size()) + if (KnownUBInsts.empty()) return ChangeStatus::UNCHANGED; - for (Instruction *I : UBMemAccessInsts) + for (Instruction *I : KnownUBInsts) { changeToUnreachable(I, /* UseLLVMTrap */ false); + } return ChangeStatus::CHANGED; } @@ -2058,22 +2129,36 @@ return getAssumed() ? "undefined-behavior" : "no-ub"; } + /// Note: The correctness of this analysis depends on the fact that the + /// following 2 sets will stop changing after some point. + /// "Change" here means that their size changes. + /// The size of each set is monotonically increasing + /// (we only add items to them) and it is upper bounded by the number of + /// instructions in the processed function (we can never save more + /// elements in either set than this number). Hence, at some point, + /// they will stop increasing. + /// Consequently, at some point, both sets will have stopped + /// changing, effectively making the analysis reach a fixpoint. + + /// Note: These 2 sets are disjoint and an instruction can be considered + /// one of 3 things: + /// 1) Known to cause UB (AAUndefinedBehavior could prove it) and put it in + /// the KnownUBInsts set. + /// 2) Assumed to cause UB (in every updateImpl, AAUndefinedBehavior + /// has a reason to assume it). + /// 3) Assumed to not cause UB. very other instruction - AAUndefinedBehavior + /// could not find a reason to assume or prove that it can cause UB, + /// hence it assumes it doesn't. We have a set for these instructions + /// so that we don't reprocess them in every update. + /// Note however that instructions in this set may cause UB. + protected: - // A set of all the (live) memory accessing instructions that _are_ assumed to - // cause UB. - SmallPtrSet UBMemAccessInsts; + /// A set of all live instructions _known_ to cause UB. + SmallPtrSet KnownUBInsts; private: - // A set of all the (live) memory accessing instructions - // that are _not_ assumed to cause UB. - // Note: The correctness of the procedure depends on the fact that this - // set stops changing after some point. "Change" here means that the size - // of the set changes. The size of this set is monotonically increasing - // (we only add items to it) and is upper bounded by the number of memory - // accessing instructions in the processed function (we can never save more - // elements in this set than this number). Hence, the size of this set, at - // some point, will stop increasing, effectively reaching a fixpoint. - SmallPtrSet NoUBMemAccessInsts; + /// A set of all the (live) instructions that are assumed to _not_ cause UB. + SmallPtrSet AssumedNoUBInsts; }; struct AAUndefinedBehaviorFunction final : AAUndefinedBehaviorImpl { @@ -2085,7 +2170,7 @@ STATS_DECL(UndefinedBehaviorInstruction, Instruction, "Number of instructions known to have UB"); BUILD_STAT_NAME(UndefinedBehaviorInstruction, Instruction) += - UBMemAccessInsts.size(); + KnownUBInsts.size(); } }; @@ -5591,6 +5676,7 @@ case Instruction::Invoke: case Instruction::CleanupRet: case Instruction::CatchSwitch: + case Instruction::Br: case Instruction::AtomicRMW: case Instruction::AtomicCmpXchg: case Instruction::Resume: Index: llvm/test/Transforms/Attributor/undefined_behavior.ll =================================================================== --- llvm/test/Transforms/Attributor/undefined_behavior.ll +++ llvm/test/Transforms/Attributor/undefined_behavior.ll @@ -8,9 +8,10 @@ ; -- Load tests -- -; ATTRIBUTOR-LABEL: define void @load_wholly_unreachable() define void @load_wholly_unreachable() { -; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR-LABEL: @load_wholly_unreachable( +; ATTRIBUTOR-NEXT: unreachable +; %a = load i32, i32* null ret void } @@ -144,3 +145,135 @@ %a = cmpxchg i32* null, i32 2, i32 3 acq_rel monotonic ret void } + +; Note: The unreachable on %t and %e is _not_ from AAUndefinedBehavior + +define i32 @cond_br_on_undef() { +; ATTRIBUTOR-LABEL: @cond_br_on_undef( +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: t: +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: e: +; ATTRIBUTOR-NEXT: unreachable +; + + br i1 undef, label %t, label %e +t: + ret i32 1 +e: + ret i32 2 +} + +; More complicated branching +define void @cond_br_on_undef2(i1 %cond) { +; ATTRIBUTOR-LABEL: @cond_br_on_undef2( +; ATTRIBUTOR-NEXT: br i1 [[COND:%.*]], label [[T1:%.*]], label [[E1:%.*]] +; ATTRIBUTOR: t1: +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: t2: +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: e2: +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: e1: +; ATTRIBUTOR-NEXT: ret void +; + + ; Valid branch - verify that this is not converted + ; to unreachable. + br i1 %cond, label %t1, label %e1 +t1: + br i1 undef, label %t2, label %e2 +t2: + ret void +e2: + ret void +e1: + ret void +} + +define i1 @ret_undef() { + ret i1 undef +} + +define void @cond_br_on_undef_interproc() { +; ATTRIBUTOR-LABEL: @cond_br_on_undef_interproc( +; ATTRIBUTOR-NEXT: %cond = call i1 @ret_undef() +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: t: +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: e: +; ATTRIBUTOR-NEXT: unreachable + + %cond = call i1 @ret_undef() + br i1 %cond, label %t, label %e +t: + ret void +e: + ret void +} + +define i1 @ret_undef2() { + br i1 true, label %t, label %e +t: + ret i1 undef +e: + ret i1 undef +} + +; More complicated interproc deduction of undef +define void @cond_br_on_undef_interproc2() { +; ATTRIBUTOR-LABEL: @cond_br_on_undef_interproc2( +; ATTRIBUTOR-NEXT: %cond = call i1 @ret_undef2() +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: t: +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: e: +; ATTRIBUTOR-NEXT: unreachable + %cond = call i1 @ret_undef2() + br i1 %cond, label %t, label %e +t: + ret void +e: + ret void +} + +; Branch on undef that depends on propagation of +; undef of a previous instruction. +; FIXME: Currently it doesn't propagate the undef. +define i32 @cond_br_on_undef3() { +; ATTRIBUTOR-LABEL: @cond_br_on_undef3( +; ATTRIBUTOR-NEXT: %cond = icmp ne i32 1, undef +; ATTRIBUTOR-NEXT: br i1 %cond, label %t, label %e +; ATTRIBUTOR: t: +; ATTRIBUTOR-NEXT: ret i32 1 +; ATTRIBUTOR: e: +; ATTRIBUTOR-NEXT: ret i32 2 + + %cond = icmp ne i32 1, undef + br i1 %cond, label %t, label %e +t: + ret i32 1 +e: + ret i32 2 +} + +; Branch on undef because of uninitialized value. +; FIXME: Currently it doesn't propagate the undef. +define i32 @cond_br_on_undef_uninit() { +; ATTRIBUTOR-LABEL: @cond_br_on_undef_uninit( +; ATTRIBUTOR-NEXT: %alloc = alloca i1 +; ATTRIBUTOR-NEXT: %cond = load i1, i1* %alloc +; ATTRIBUTOR-NEXT: br i1 %cond, label %t, label %e +; ATTRIBUTOR: t: +; ATTRIBUTOR-NEXT: ret i32 1 +; ATTRIBUTOR: e: +; ATTRIBUTOR-NEXT: ret i32 2 + + %alloc = alloca i1 + %cond = load i1, i1* %alloc + br i1 %cond, label %t, label %e +t: + ret i32 1 +e: + ret i32 2 +}