descriptor_pool.cc 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Implements the DescriptorPool, which collects all descriptors.
  31. #include <Python.h>
  32. #include <google/protobuf/descriptor.pb.h>
  33. #include <google/protobuf/dynamic_message.h>
  34. #include <google/protobuf/pyext/descriptor_pool.h>
  35. #include <google/protobuf/pyext/descriptor.h>
  36. #include <google/protobuf/pyext/message.h>
  37. #include <google/protobuf/pyext/scoped_pyobject_ptr.h>
  38. #if PY_MAJOR_VERSION >= 3
  39. #define PyString_FromStringAndSize PyUnicode_FromStringAndSize
  40. #if PY_VERSION_HEX < 0x03030000
  41. #error "Python 3.0 - 3.2 are not supported."
  42. #endif
  43. #define PyString_AsStringAndSize(ob, charpp, sizep) \
  44. (PyUnicode_Check(ob)? \
  45. ((*(charpp) = PyUnicode_AsUTF8AndSize(ob, (sizep))) == NULL? -1: 0): \
  46. PyBytes_AsStringAndSize(ob, (charpp), (sizep)))
  47. #endif
  48. namespace google {
  49. namespace protobuf {
  50. namespace python {
  51. // A map to cache Python Pools per C++ pointer.
  52. // Pointers are not owned here, and belong to the PyDescriptorPool.
  53. static hash_map<const DescriptorPool*, PyDescriptorPool*> descriptor_pool_map;
  54. namespace cdescriptor_pool {
  55. static PyDescriptorPool* NewDescriptorPool() {
  56. PyDescriptorPool* cdescriptor_pool = PyObject_New(
  57. PyDescriptorPool, &PyDescriptorPool_Type);
  58. if (cdescriptor_pool == NULL) {
  59. return NULL;
  60. }
  61. // Build a DescriptorPool for messages only declared in Python libraries.
  62. // generated_pool() contains all messages linked in C++ libraries, and is used
  63. // as underlay.
  64. cdescriptor_pool->pool = new DescriptorPool(DescriptorPool::generated_pool());
  65. DynamicMessageFactory* message_factory = new DynamicMessageFactory();
  66. // This option might be the default some day.
  67. message_factory->SetDelegateToGeneratedFactory(true);
  68. cdescriptor_pool->message_factory = message_factory;
  69. // TODO(amauryfa): Rewrite the SymbolDatabase in C so that it uses the same
  70. // storage.
  71. cdescriptor_pool->classes_by_descriptor =
  72. new PyDescriptorPool::ClassesByMessageMap();
  73. cdescriptor_pool->descriptor_options =
  74. new hash_map<const void*, PyObject *>();
  75. if (!descriptor_pool_map.insert(
  76. std::make_pair(cdescriptor_pool->pool, cdescriptor_pool)).second) {
  77. // Should never happen -- would indicate an internal error / bug.
  78. PyErr_SetString(PyExc_ValueError, "DescriptorPool already registered");
  79. return NULL;
  80. }
  81. return cdescriptor_pool;
  82. }
  83. static void Dealloc(PyDescriptorPool* self) {
  84. typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator;
  85. descriptor_pool_map.erase(self->pool);
  86. for (iterator it = self->classes_by_descriptor->begin();
  87. it != self->classes_by_descriptor->end(); ++it) {
  88. Py_DECREF(it->second);
  89. }
  90. delete self->classes_by_descriptor;
  91. for (hash_map<const void*, PyObject*>::iterator it =
  92. self->descriptor_options->begin();
  93. it != self->descriptor_options->end(); ++it) {
  94. Py_DECREF(it->second);
  95. }
  96. delete self->descriptor_options;
  97. delete self->message_factory;
  98. Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
  99. }
  100. PyObject* FindMessageByName(PyDescriptorPool* self, PyObject* arg) {
  101. Py_ssize_t name_size;
  102. char* name;
  103. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  104. return NULL;
  105. }
  106. const Descriptor* message_descriptor =
  107. self->pool->FindMessageTypeByName(string(name, name_size));
  108. if (message_descriptor == NULL) {
  109. PyErr_Format(PyExc_KeyError, "Couldn't find message %.200s", name);
  110. return NULL;
  111. }
  112. return PyMessageDescriptor_FromDescriptor(message_descriptor);
  113. }
  114. // Add a message class to our database.
  115. const Descriptor* RegisterMessageClass(
  116. PyDescriptorPool* self, PyObject *message_class, PyObject* descriptor) {
  117. ScopedPyObjectPtr full_message_name(
  118. PyObject_GetAttrString(descriptor, "full_name"));
  119. Py_ssize_t name_size;
  120. char* name;
  121. if (PyString_AsStringAndSize(full_message_name, &name, &name_size) < 0) {
  122. return NULL;
  123. }
  124. const Descriptor *message_descriptor =
  125. self->pool->FindMessageTypeByName(string(name, name_size));
  126. if (!message_descriptor) {
  127. PyErr_Format(PyExc_TypeError, "Could not find C++ descriptor for '%s'",
  128. name);
  129. return NULL;
  130. }
  131. Py_INCREF(message_class);
  132. typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator;
  133. std::pair<iterator, bool> ret = self->classes_by_descriptor->insert(
  134. std::make_pair(message_descriptor, message_class));
  135. if (!ret.second) {
  136. // Update case: DECREF the previous value.
  137. Py_DECREF(ret.first->second);
  138. ret.first->second = message_class;
  139. }
  140. return message_descriptor;
  141. }
  142. // Retrieve the message class added to our database.
  143. PyObject *GetMessageClass(PyDescriptorPool* self,
  144. const Descriptor *message_descriptor) {
  145. typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator;
  146. iterator ret = self->classes_by_descriptor->find(message_descriptor);
  147. if (ret == self->classes_by_descriptor->end()) {
  148. PyErr_Format(PyExc_TypeError, "No message class registered for '%s'",
  149. message_descriptor->full_name().c_str());
  150. return NULL;
  151. } else {
  152. return ret->second;
  153. }
  154. }
  155. PyObject* FindFileByName(PyDescriptorPool* self, PyObject* arg) {
  156. Py_ssize_t name_size;
  157. char* name;
  158. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  159. return NULL;
  160. }
  161. const FileDescriptor* file_descriptor =
  162. self->pool->FindFileByName(string(name, name_size));
  163. if (file_descriptor == NULL) {
  164. PyErr_Format(PyExc_KeyError, "Couldn't find file %.200s",
  165. name);
  166. return NULL;
  167. }
  168. return PyFileDescriptor_FromDescriptor(file_descriptor);
  169. }
  170. PyObject* FindFieldByName(PyDescriptorPool* self, PyObject* arg) {
  171. Py_ssize_t name_size;
  172. char* name;
  173. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  174. return NULL;
  175. }
  176. const FieldDescriptor* field_descriptor =
  177. self->pool->FindFieldByName(string(name, name_size));
  178. if (field_descriptor == NULL) {
  179. PyErr_Format(PyExc_KeyError, "Couldn't find field %.200s",
  180. name);
  181. return NULL;
  182. }
  183. return PyFieldDescriptor_FromDescriptor(field_descriptor);
  184. }
  185. PyObject* FindExtensionByName(PyDescriptorPool* self, PyObject* arg) {
  186. Py_ssize_t name_size;
  187. char* name;
  188. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  189. return NULL;
  190. }
  191. const FieldDescriptor* field_descriptor =
  192. self->pool->FindExtensionByName(string(name, name_size));
  193. if (field_descriptor == NULL) {
  194. PyErr_Format(PyExc_KeyError, "Couldn't find extension field %.200s", name);
  195. return NULL;
  196. }
  197. return PyFieldDescriptor_FromDescriptor(field_descriptor);
  198. }
  199. PyObject* FindEnumTypeByName(PyDescriptorPool* self, PyObject* arg) {
  200. Py_ssize_t name_size;
  201. char* name;
  202. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  203. return NULL;
  204. }
  205. const EnumDescriptor* enum_descriptor =
  206. self->pool->FindEnumTypeByName(string(name, name_size));
  207. if (enum_descriptor == NULL) {
  208. PyErr_Format(PyExc_KeyError, "Couldn't find enum %.200s", name);
  209. return NULL;
  210. }
  211. return PyEnumDescriptor_FromDescriptor(enum_descriptor);
  212. }
  213. PyObject* FindOneofByName(PyDescriptorPool* self, PyObject* arg) {
  214. Py_ssize_t name_size;
  215. char* name;
  216. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  217. return NULL;
  218. }
  219. const OneofDescriptor* oneof_descriptor =
  220. self->pool->FindOneofByName(string(name, name_size));
  221. if (oneof_descriptor == NULL) {
  222. PyErr_Format(PyExc_KeyError, "Couldn't find oneof %.200s", name);
  223. return NULL;
  224. }
  225. return PyOneofDescriptor_FromDescriptor(oneof_descriptor);
  226. }
  227. // The code below loads new Descriptors from a serialized FileDescriptorProto.
  228. // Collects errors that occur during proto file building to allow them to be
  229. // propagated in the python exception instead of only living in ERROR logs.
  230. class BuildFileErrorCollector : public DescriptorPool::ErrorCollector {
  231. public:
  232. BuildFileErrorCollector() : error_message(""), had_errors(false) {}
  233. void AddError(const string& filename, const string& element_name,
  234. const Message* descriptor, ErrorLocation location,
  235. const string& message) {
  236. // Replicates the logging behavior that happens in the C++ implementation
  237. // when an error collector is not passed in.
  238. if (!had_errors) {
  239. error_message +=
  240. ("Invalid proto descriptor for file \"" + filename + "\":\n");
  241. had_errors = true;
  242. }
  243. // As this only happens on failure and will result in the program not
  244. // running at all, no effort is made to optimize this string manipulation.
  245. error_message += (" " + element_name + ": " + message + "\n");
  246. }
  247. string error_message;
  248. bool had_errors;
  249. };
  250. PyObject* AddSerializedFile(PyDescriptorPool* self, PyObject* serialized_pb) {
  251. char* message_type;
  252. Py_ssize_t message_len;
  253. if (PyBytes_AsStringAndSize(serialized_pb, &message_type, &message_len) < 0) {
  254. return NULL;
  255. }
  256. FileDescriptorProto file_proto;
  257. if (!file_proto.ParseFromArray(message_type, message_len)) {
  258. PyErr_SetString(PyExc_TypeError, "Couldn't parse file content!");
  259. return NULL;
  260. }
  261. // If the file was already part of a C++ library, all its descriptors are in
  262. // the underlying pool. No need to do anything else.
  263. const FileDescriptor* generated_file =
  264. DescriptorPool::generated_pool()->FindFileByName(file_proto.name());
  265. if (generated_file != NULL) {
  266. return PyFileDescriptor_FromDescriptorWithSerializedPb(
  267. generated_file, serialized_pb);
  268. }
  269. BuildFileErrorCollector error_collector;
  270. const FileDescriptor* descriptor =
  271. self->pool->BuildFileCollectingErrors(file_proto,
  272. &error_collector);
  273. if (descriptor == NULL) {
  274. PyErr_Format(PyExc_TypeError,
  275. "Couldn't build proto file into descriptor pool!\n%s",
  276. error_collector.error_message.c_str());
  277. return NULL;
  278. }
  279. return PyFileDescriptor_FromDescriptorWithSerializedPb(
  280. descriptor, serialized_pb);
  281. }
  282. PyObject* Add(PyDescriptorPool* self, PyObject* file_descriptor_proto) {
  283. ScopedPyObjectPtr serialized_pb(
  284. PyObject_CallMethod(file_descriptor_proto, "SerializeToString", NULL));
  285. if (serialized_pb == NULL) {
  286. return NULL;
  287. }
  288. return AddSerializedFile(self, serialized_pb);
  289. }
  290. static PyMethodDef Methods[] = {
  291. { "Add", (PyCFunction)Add, METH_O,
  292. "Adds the FileDescriptorProto and its types to this pool." },
  293. { "AddSerializedFile", (PyCFunction)AddSerializedFile, METH_O,
  294. "Adds a serialized FileDescriptorProto to this pool." },
  295. { "FindFileByName", (PyCFunction)FindFileByName, METH_O,
  296. "Searches for a file descriptor by its .proto name." },
  297. { "FindMessageTypeByName", (PyCFunction)FindMessageByName, METH_O,
  298. "Searches for a message descriptor by full name." },
  299. { "FindFieldByName", (PyCFunction)FindFieldByName, METH_O,
  300. "Searches for a field descriptor by full name." },
  301. { "FindExtensionByName", (PyCFunction)FindExtensionByName, METH_O,
  302. "Searches for extension descriptor by full name." },
  303. { "FindEnumTypeByName", (PyCFunction)FindEnumTypeByName, METH_O,
  304. "Searches for enum type descriptor by full name." },
  305. { "FindOneofByName", (PyCFunction)FindOneofByName, METH_O,
  306. "Searches for oneof descriptor by full name." },
  307. {NULL}
  308. };
  309. } // namespace cdescriptor_pool
  310. PyTypeObject PyDescriptorPool_Type = {
  311. PyVarObject_HEAD_INIT(&PyType_Type, 0)
  312. FULL_MODULE_NAME ".DescriptorPool", // tp_name
  313. sizeof(PyDescriptorPool), // tp_basicsize
  314. 0, // tp_itemsize
  315. (destructor)cdescriptor_pool::Dealloc, // tp_dealloc
  316. 0, // tp_print
  317. 0, // tp_getattr
  318. 0, // tp_setattr
  319. 0, // tp_compare
  320. 0, // tp_repr
  321. 0, // tp_as_number
  322. 0, // tp_as_sequence
  323. 0, // tp_as_mapping
  324. 0, // tp_hash
  325. 0, // tp_call
  326. 0, // tp_str
  327. 0, // tp_getattro
  328. 0, // tp_setattro
  329. 0, // tp_as_buffer
  330. Py_TPFLAGS_DEFAULT, // tp_flags
  331. "A Descriptor Pool", // tp_doc
  332. 0, // tp_traverse
  333. 0, // tp_clear
  334. 0, // tp_richcompare
  335. 0, // tp_weaklistoffset
  336. 0, // tp_iter
  337. 0, // tp_iternext
  338. cdescriptor_pool::Methods, // tp_methods
  339. 0, // tp_members
  340. 0, // tp_getset
  341. 0, // tp_base
  342. 0, // tp_dict
  343. 0, // tp_descr_get
  344. 0, // tp_descr_set
  345. 0, // tp_dictoffset
  346. 0, // tp_init
  347. 0, // tp_alloc
  348. 0, // tp_new
  349. PyObject_Del, // tp_free
  350. };
  351. // This is the DescriptorPool which contains all the definitions from the
  352. // generated _pb2.py modules.
  353. static PyDescriptorPool* python_generated_pool = NULL;
  354. bool InitDescriptorPool() {
  355. if (PyType_Ready(&PyDescriptorPool_Type) < 0)
  356. return false;
  357. python_generated_pool = cdescriptor_pool::NewDescriptorPool();
  358. if (python_generated_pool == NULL) {
  359. return false;
  360. }
  361. // Register this pool to be found for C++-generated descriptors.
  362. descriptor_pool_map.insert(
  363. std::make_pair(DescriptorPool::generated_pool(),
  364. python_generated_pool));
  365. return true;
  366. }
  367. PyDescriptorPool* GetDescriptorPool() {
  368. return python_generated_pool;
  369. }
  370. PyDescriptorPool* GetDescriptorPool_FromPool(const DescriptorPool* pool) {
  371. // Fast path for standard descriptors.
  372. if (pool == python_generated_pool->pool ||
  373. pool == DescriptorPool::generated_pool()) {
  374. return python_generated_pool;
  375. }
  376. hash_map<const DescriptorPool*, PyDescriptorPool*>::iterator it =
  377. descriptor_pool_map.find(pool);
  378. if (it != descriptor_pool_map.end()) {
  379. PyErr_SetString(PyExc_KeyError, "Unknown descriptor pool");
  380. return NULL;
  381. }
  382. return it->second;
  383. }
  384. } // namespace python
  385. } // namespace protobuf
  386. } // namespace google