diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h --- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h @@ -6,6 +6,26 @@ // //===----------------------------------------------------------------------===// +// +// !! FIXME FIXME FIXME !! +// +// Python APIs nearly all can return an exception. They do this +// by returning NULL, or -1, or some such value and setting +// the exception state with PyErr_Set*(). Exceptions must be +// handled before further python API functions are called. Failure +// to do so will result in asserts on debug builds of python. +// It will also sometimes, but not usually result in crashes of +// release builds. +// +// Nearly all the code in this header does not handle python exceptions +// correctly. It should all be converted to return Expected<> or +// Error types to capture the exception. +// +// Everything in this file except functions that return Error or +// Expected<> is considered deprecated and should not be +// used in new code. If you need to use it, fix it first. +// + #ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H #define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H @@ -21,11 +41,13 @@ namespace lldb_private { +class PythonObject; class PythonBytes; class PythonString; class PythonList; class PythonDictionary; class PythonInteger; +class PythonException; class StructuredPythonObject : public StructuredData::Generic { public: @@ -72,8 +94,67 @@ // not call Py_INCREF. }; +namespace python { + +// Take a reference that you already own, and turn it into +// a PythonObject. +// +// Most python API methods will return a +1 reference +// if they succeed or NULL if and only if +// they set an exception. Use this to collect such return +// values, after checking for NULL. +// +// If T is not just PythonObject, then obj must be already be +// checked to be of the correct type. +template T Take(PyObject *obj) { + assert(obj); + assert(!PyErr_Occurred()); + T thing(PyRefType::Owned, obj); + assert(thing.IsValid()); + return std::move(thing); +} + +// Retain a reference you have borrowed, and turn it into +// a PythonObject. +// +// A minority of python APIs return a borrowed reference +// instead of a +1. They will also return NULL if and only +// if they set an exception. Use this to collect such return +// values, after checking for NULL. +// +// If T is not just PythonObject, then obj must be already be +// checked to be of the correct type. +template T Retain(PyObject *obj) { + assert(obj); + assert(!PyErr_Occurred()); + T thing(PyRefType::Borrowed, obj); + assert(thing.IsValid()); + return std::move(thing); +} + +} // namespace python + enum class PyInitialValue { Invalid, Empty }; +template struct PythonFormat; + +template <> struct PythonFormat { + static constexpr char format = 'K'; + static auto get(unsigned long long value) { return value; } +}; + +template <> struct PythonFormat { + static constexpr char format = 'L'; + static auto get(long long value) { return value; } +}; + +template +struct PythonFormat< + T, typename std::enable_if::value>::type> { + static constexpr char format = 'O'; + static auto get(const T &value) { return value.get(); } +}; + class PythonObject { public: PythonObject() : m_py_obj(nullptr) {} @@ -84,14 +165,19 @@ PythonObject(const PythonObject &rhs) : m_py_obj(nullptr) { Reset(rhs); } + PythonObject(PythonObject &&rhs) { + m_py_obj = rhs.m_py_obj; + rhs.m_py_obj = nullptr; + } + virtual ~PythonObject() { Reset(); } void Reset() { // Avoid calling the virtual method since it's not necessary // to actually validate the type of the PyObject if we're // just setting to null. - if (Py_IsInitialized()) - Py_XDECREF(m_py_obj); + if (m_py_obj && Py_IsInitialized()) + Py_DECREF(m_py_obj); m_py_obj = nullptr; } @@ -110,6 +196,8 @@ // PyRefType doesn't make sense, and the copy constructor should be used. void Reset(PyRefType type, const PythonObject &ref) = delete; + // FIXME We shouldn't have virtual anything. PythonObject should be a + // strictly pass-by-value type. virtual void Reset(PyRefType type, PyObject *py_obj) { if (py_obj == m_py_obj) return; @@ -123,7 +211,7 @@ // an owned reference by incrementing it. If it is an owned // reference (for example the caller allocated it with PyDict_New() // then we must *not* increment it. - if (Py_IsInitialized() && type == PyRefType::Borrowed) + if (m_py_obj && Py_IsInitialized() && type == PyRefType::Borrowed) Py_XINCREF(m_py_obj); } @@ -149,6 +237,17 @@ return *this; } + void Reset(PythonObject &&other) { + Reset(); + m_py_obj = other.m_py_obj; + other.m_py_obj = nullptr; + } + + PythonObject &operator=(PythonObject &&other) { + Reset(std::move(other)); + return *this; + } + PyObjectType GetObjectType() const; PythonString Repr() const; @@ -174,11 +273,13 @@ PythonObject GetAttributeValue(llvm::StringRef attribute) const; - bool IsValid() const; + bool IsNone() const { return m_py_obj == Py_None; } + + bool IsValid() const { return m_py_obj != nullptr; } - bool IsAllocated() const; + bool IsAllocated() const { return IsValid() && !IsNone(); } - bool IsNone() const; + explicit operator bool() const { return IsValid() && !IsNone(); } template T AsType() const { if (!T::Check(m_py_obj)) @@ -189,9 +290,92 @@ StructuredData::ObjectSP CreateStructuredObject() const; protected: + static llvm::Error nullDeref() { + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "A NULL PyObject* was dereferenced"); + } + static llvm::Error exception(const char *s = nullptr) { + return llvm::make_error(s); + } + +public: + template + llvm::Expected CallMethod(const char *name, + const T &... t) const { + const char format[] = {'(', PythonFormat::format..., ')', 0}; +#if PY_MAJOR_VERSION < 3 + PyObject *obj = PyObject_CallMethod(m_py_obj, const_cast(name), + const_cast(format), + PythonFormat::get(t)...); +#else + PyObject *obj = + PyObject_CallMethod(m_py_obj, name, format, PythonFormat::get(t)...); +#endif + if (!obj) + return exception(); + return python::Take(obj); + } + + llvm::Expected GetAttribute(const char *name) const { + if (!m_py_obj) + return nullDeref(); + PyObject *obj = PyObject_GetAttrString(m_py_obj, name); + if (!obj) + return exception(); + return python::Take(obj); + } + + llvm::Expected IsTrue() { + if (!m_py_obj) + return nullDeref(); + int r = PyObject_IsTrue(m_py_obj); + if (r < 0) + return exception(); + return !!r; + } + + llvm::Expected AsLongLong() { + if (!m_py_obj) + return nullDeref(); + assert(!PyErr_Occurred()); + long long r = PyLong_AsLongLong(m_py_obj); + if (PyErr_Occurred()) + return exception(); + return r; + } + + llvm::Expected IsInstance(const PythonObject &cls) { + if (!m_py_obj || !cls.IsValid()) + return nullDeref(); + int r = PyObject_IsInstance(m_py_obj, cls.get()); + if (r < 0) + return exception(); + return !!r; + } + +protected: PyObject *m_py_obj; }; +namespace python { + +// This is why C++ needs monads. +template llvm::Expected As(llvm::Expected &&obj) { + if (!obj) + return obj.takeError(); + if (!T::Check(obj.get().get())) + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "type error"); + return T(PyRefType::Borrowed, std::move(obj.get().get())); +} + +template <> llvm::Expected As(llvm::Expected &&obj); + +template <> +llvm::Expected As(llvm::Expected &&obj); + +} // namespace python + class PythonBytes : public PythonObject { public: PythonBytes(); @@ -245,9 +429,10 @@ class PythonString : public PythonObject { public: + static llvm::Expected FromUTF8(llvm::StringRef string); + PythonString(); - explicit PythonString(llvm::StringRef string); - explicit PythonString(const char *string); + explicit PythonString(llvm::StringRef string); // safe, null on error PythonString(PyRefType type, PyObject *o); ~PythonString() override; @@ -259,11 +444,13 @@ void Reset(PyRefType type, PyObject *py_obj) override; - llvm::StringRef GetString() const; + llvm::StringRef GetString() const; // safe, empty string on error + + llvm::Expected AsUTF8() const; size_t GetSize() const; - void SetString(llvm::StringRef string); + void SetString(llvm::StringRef string); // safe, null on error StructuredData::StringSP CreateStructuredString() const; }; @@ -406,7 +593,20 @@ static PythonModule AddModule(llvm::StringRef module); - static PythonModule ImportModule(llvm::StringRef module); + // safe, returns invalid on error; + static PythonModule ImportModule(llvm::StringRef name) { + std::string s = name; + auto mod = Import(s.c_str()); + if (!mod) { + llvm::consumeError(mod.takeError()); + return PythonModule(); + } + return std::move(mod.get()); + } + + static llvm::Expected Import(const char *name); + + llvm::Expected Get(const char *name); // Bring in the no-argument base class version using PythonObject::Reset; @@ -471,8 +671,59 @@ void Reset(File &file, const char *mode); lldb::FileUP GetUnderlyingFile() const; + + llvm::Expected ConvertToFile(bool borrowed = false); + llvm::Expected + ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed = false); +}; + +class PythonException : public llvm::ErrorInfo { +private: + PyObject *m_exception_type, *m_exception, *m_traceback; + PyObject *m_repr_bytes; + +public: + static char ID; + const char *toCString() const; + PythonException(const char *caller = nullptr); + void Restore(); + ~PythonException(); + void log(llvm::raw_ostream &OS) const override; + std::error_code convertToErrorCode() const override; }; +// This extracts the underlying T out of an Expected and returns it. +// If the Expected is an Error instead of a T, that error will be converted +// into a python exception, and this will return a default-constructed T. +// +// This is appropriate for use right at the boundary of python calling into +// C++, such as in a SWIG typemap. In such a context you should simply +// check if the returned T is valid, and if it is, return a NULL back +// to python. This will result in the Error being raised as an exception +// from python code's point of view. +// +// For example: +// ``` +// Expected efoop = some_cpp_function(); +// Foo *foop = unwrapOrSetPythonException(efoop); +// if (!foop) +// return NULL; +// do_something(*foop); +// +// If the Error returned was itself created because a python exception was +// raised when C++ code called into python, then the original exception +// will be restored. Otherwise a simple string exception will be raised. +template T unwrapOrSetPythonException(llvm::Expected expected) { + if (expected) + return expected.get(); + llvm::handleAllErrors( + expected.takeError(), [](PythonException &E) { E.Restore(); }, + [](const llvm::ErrorInfoBase &E) { + PyErr_SetString(PyExc_Exception, E.message().c_str()); + }); + return T(); +} + } // namespace lldb_private #endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp --- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp @@ -18,6 +18,7 @@ #include "lldb/Host/File.h" #include "lldb/Host/FileSystem.h" #include "lldb/Interpreter/ScriptInterpreter.h" +#include "lldb/Utility/Log.h" #include "lldb/Utility/Stream.h" #include "llvm/ADT/StringSwitch.h" @@ -28,6 +29,22 @@ using namespace lldb_private; using namespace lldb; +using namespace lldb_private::python; +using llvm::Error; +using llvm::Expected; + +template <> Expected python::As(Expected &&obj) { + if (!obj) + return obj.takeError(); + return obj.get().IsTrue(); +} + +template <> +Expected python::As(Expected &&obj) { + if (!obj) + return obj.takeError(); + return obj.get().AsLongLong(); +} void StructuredPythonObject::Serialize(llvm::json::OStream &s) const { s.value(llvm::formatv("Python Obj: {0:X}", GetValue()).str()); @@ -167,12 +184,6 @@ PyObject_GetAttr(m_py_obj, py_attr.get())); } -bool PythonObject::IsNone() const { return m_py_obj == Py_None; } - -bool PythonObject::IsValid() const { return m_py_obj != nullptr; } - -bool PythonObject::IsAllocated() const { return IsValid() && !IsNone(); } - StructuredData::ObjectSP PythonObject::CreateStructuredObject() const { switch (GetObjectType()) { case PyObjectType::Dictionary: @@ -334,6 +345,17 @@ // PythonString +Expected PythonString::FromUTF8(llvm::StringRef string) { +#if PY_MAJOR_VERSION >= 3 + PyObject *str = PyUnicode_FromStringAndSize(string.data(), string.size()); +#else + PyObject *str = PyString_FromStringAndSize(string.data(), string.size()); +#endif + if (!str) + return llvm::make_error(); + return Take(str); +} + PythonString::PythonString(PyRefType type, PyObject *py_obj) : PythonObject() { Reset(type, py_obj); // Use "Reset()" to ensure that py_obj is a string } @@ -342,10 +364,6 @@ SetString(string); } -PythonString::PythonString(const char *string) : PythonObject() { - SetString(llvm::StringRef(string)); -} - PythonString::PythonString() : PythonObject() {} PythonString::~PythonString() {} @@ -376,8 +394,12 @@ // In Python 2, Don't store PyUnicode objects directly, because we need // access to their underlying character buffers which Python 2 doesn't // provide. - if (PyUnicode_Check(py_obj)) - result.Reset(PyRefType::Owned, PyUnicode_AsUTF8String(result.get())); + if (PyUnicode_Check(py_obj)) { + PyObject *s = PyUnicode_AsUTF8String(result.get()); + if (s == NULL) + PyErr_Clear(); + result.Reset(PyRefType::Owned, s); + } #endif // Calling PythonObject::Reset(const PythonObject&) will lead to stack // overflow since it calls back into the virtual implementation. @@ -385,8 +407,17 @@ } llvm::StringRef PythonString::GetString() const { + auto s = AsUTF8(); + if (!s) { + llvm::consumeError(s.takeError()); + return llvm::StringRef(""); + } + return s.get(); +} + +Expected PythonString::AsUTF8() const { if (!IsValid()) - return llvm::StringRef(); + return nullDeref(); Py_ssize_t size; const char *data; @@ -394,10 +425,16 @@ #if PY_MAJOR_VERSION >= 3 data = PyUnicode_AsUTF8AndSize(m_py_obj, &size); #else - char *c; - PyString_AsStringAndSize(m_py_obj, &c, &size); + char *c = NULL; + int r = PyString_AsStringAndSize(m_py_obj, &c, &size); + if (r < 0) + c = NULL; data = c; #endif + + if (!data) + return exception(); + return llvm::StringRef(data, size); } @@ -413,13 +450,13 @@ } void PythonString::SetString(llvm::StringRef string) { -#if PY_MAJOR_VERSION >= 3 - PyObject *unicode = PyUnicode_FromStringAndSize(string.data(), string.size()); - PythonObject::Reset(PyRefType::Owned, unicode); -#else - PyObject *str = PyString_FromStringAndSize(string.data(), string.size()); - PythonObject::Reset(PyRefType::Owned, str); -#endif + auto s = FromUTF8(string); + if (!s) { + llvm::consumeError(s.takeError()); + Reset(); + } else { + PythonObject::Reset(std::move(s.get())); + } } StructuredData::StringSP PythonString::CreateStructuredString() const { @@ -826,9 +863,23 @@ return PythonModule(PyRefType::Borrowed, PyImport_AddModule(str.c_str())); } -PythonModule PythonModule::ImportModule(llvm::StringRef module) { - std::string str = module.str(); - return PythonModule(PyRefType::Owned, PyImport_ImportModule(str.c_str())); +Expected PythonModule::Import(const char *name) { + PyObject *mod = PyImport_ImportModule(name); + if (!mod) + return exception(); + return Take(mod); +} + +Expected PythonModule::Get(const char *name) { + if (!IsValid()) + return nullDeref(); + PyObject *dict = PyModule_GetDict(m_py_obj); + if (!dict) + return exception(); + PyObject *item = PyDict_GetItemString(dict, name); + if (!item) + return exception(); + return Retain(item); } bool PythonModule::Check(PyObject *py_obj) { @@ -1045,4 +1096,58 @@ return file; } +const char *PythonException::toCString() const { + if (!m_repr_bytes) + return "unknown exception"; + return PyBytes_AS_STRING(m_repr_bytes); +} + +PythonException::PythonException(const char *caller) { + assert(PyErr_Occurred()); + m_exception_type = m_exception = m_traceback = m_repr_bytes = NULL; + PyErr_Fetch(&m_exception_type, &m_exception, &m_traceback); + PyErr_NormalizeException(&m_exception_type, &m_exception, &m_traceback); + PyErr_Clear(); + if (m_exception) { + PyObject *repr = PyObject_Repr(m_exception); + if (repr) { + m_repr_bytes = PyUnicode_AsEncodedString(repr, "utf-8", nullptr); + if (!m_repr_bytes) { + PyErr_Clear(); + } + Py_XDECREF(repr); + } else { + PyErr_Clear(); + } + } + Log *log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_SCRIPT); + if (caller) + LLDB_LOGF(log, "%s failed with exception: %s", caller, toCString()); + else + LLDB_LOGF(log, "python exception: %s", toCString()); +} +void PythonException::Restore() { + if (m_exception_type && m_exception) { + PyErr_Restore(m_exception_type, m_exception, m_traceback); + } else { + PyErr_SetString(PyExc_Exception, toCString()); + } + m_exception_type = m_exception = m_traceback = NULL; +} + +PythonException::~PythonException() { + Py_XDECREF(m_exception_type); + Py_XDECREF(m_exception); + Py_XDECREF(m_traceback); + Py_XDECREF(m_repr_bytes); +} + +void PythonException::log(llvm::raw_ostream &OS) const { OS << toCString(); } + +std::error_code PythonException::convertToErrorCode() const { + return llvm::inconvertibleErrorCode(); +} + +char PythonException::ID = 0; + #endif