descriptor_pool.cc 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Implements the DescriptorPool, which collects all descriptors.
  31. #include <Python.h>
  32. #include <google/protobuf/descriptor.pb.h>
  33. #include <google/protobuf/dynamic_message.h>
  34. #include <google/protobuf/pyext/descriptor_pool.h>
  35. #include <google/protobuf/pyext/descriptor.h>
  36. #include <google/protobuf/pyext/message.h>
  37. #include <google/protobuf/pyext/scoped_pyobject_ptr.h>
  38. #if PY_MAJOR_VERSION >= 3
  39. #define PyString_FromStringAndSize PyUnicode_FromStringAndSize
  40. #if PY_VERSION_HEX < 0x03030000
  41. #error "Python 3.0 - 3.2 are not supported."
  42. #endif
  43. #define PyString_AsStringAndSize(ob, charpp, sizep) \
  44. (PyUnicode_Check(ob)? \
  45. ((*(charpp) = PyUnicode_AsUTF8AndSize(ob, (sizep))) == NULL? -1: 0): \
  46. PyBytes_AsStringAndSize(ob, (charpp), (sizep)))
  47. #endif
  48. namespace google {
  49. namespace protobuf {
  50. namespace python {
  51. // A map to cache Python Pools per C++ pointer.
  52. // Pointers are not owned here, and belong to the PyDescriptorPool.
  53. static hash_map<const DescriptorPool*, PyDescriptorPool*> descriptor_pool_map;
  54. namespace cdescriptor_pool {
  55. static PyDescriptorPool* NewDescriptorPool() {
  56. PyDescriptorPool* cdescriptor_pool = PyObject_New(
  57. PyDescriptorPool, &PyDescriptorPool_Type);
  58. if (cdescriptor_pool == NULL) {
  59. return NULL;
  60. }
  61. // Build a DescriptorPool for messages only declared in Python libraries.
  62. // generated_pool() contains all messages linked in C++ libraries, and is used
  63. // as underlay.
  64. cdescriptor_pool->pool = new DescriptorPool(DescriptorPool::generated_pool());
  65. DynamicMessageFactory* message_factory = new DynamicMessageFactory();
  66. // This option might be the default some day.
  67. message_factory->SetDelegateToGeneratedFactory(true);
  68. cdescriptor_pool->message_factory = message_factory;
  69. // TODO(amauryfa): Rewrite the SymbolDatabase in C so that it uses the same
  70. // storage.
  71. cdescriptor_pool->classes_by_descriptor =
  72. new PyDescriptorPool::ClassesByMessageMap();
  73. cdescriptor_pool->descriptor_options =
  74. new hash_map<const void*, PyObject *>();
  75. if (!descriptor_pool_map.insert(
  76. std::make_pair(cdescriptor_pool->pool, cdescriptor_pool)).second) {
  77. // Should never happen -- would indicate an internal error / bug.
  78. PyErr_SetString(PyExc_ValueError, "DescriptorPool already registered");
  79. return NULL;
  80. }
  81. return cdescriptor_pool;
  82. }
  83. static void Dealloc(PyDescriptorPool* self) {
  84. typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator;
  85. descriptor_pool_map.erase(self->pool);
  86. for (iterator it = self->classes_by_descriptor->begin();
  87. it != self->classes_by_descriptor->end(); ++it) {
  88. Py_DECREF(it->second);
  89. }
  90. delete self->classes_by_descriptor;
  91. for (hash_map<const void*, PyObject*>::iterator it =
  92. self->descriptor_options->begin();
  93. it != self->descriptor_options->end(); ++it) {
  94. Py_DECREF(it->second);
  95. }
  96. delete self->descriptor_options;
  97. delete self->pool;
  98. delete self->message_factory;
  99. Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
  100. }
  101. PyObject* FindMessageByName(PyDescriptorPool* self, PyObject* arg) {
  102. Py_ssize_t name_size;
  103. char* name;
  104. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  105. return NULL;
  106. }
  107. const Descriptor* message_descriptor =
  108. self->pool->FindMessageTypeByName(string(name, name_size));
  109. if (message_descriptor == NULL) {
  110. PyErr_Format(PyExc_KeyError, "Couldn't find message %.200s", name);
  111. return NULL;
  112. }
  113. return PyMessageDescriptor_FromDescriptor(message_descriptor);
  114. }
  115. // Add a message class to our database.
  116. int RegisterMessageClass(PyDescriptorPool* self,
  117. const Descriptor *message_descriptor,
  118. PyObject *message_class) {
  119. Py_INCREF(message_class);
  120. typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator;
  121. std::pair<iterator, bool> ret = self->classes_by_descriptor->insert(
  122. std::make_pair(message_descriptor, message_class));
  123. if (!ret.second) {
  124. // Update case: DECREF the previous value.
  125. Py_DECREF(ret.first->second);
  126. ret.first->second = message_class;
  127. }
  128. return 0;
  129. }
  130. // Retrieve the message class added to our database.
  131. PyObject *GetMessageClass(PyDescriptorPool* self,
  132. const Descriptor *message_descriptor) {
  133. typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator;
  134. iterator ret = self->classes_by_descriptor->find(message_descriptor);
  135. if (ret == self->classes_by_descriptor->end()) {
  136. PyErr_Format(PyExc_TypeError, "No message class registered for '%s'",
  137. message_descriptor->full_name().c_str());
  138. return NULL;
  139. } else {
  140. return ret->second;
  141. }
  142. }
  143. PyObject* FindFileByName(PyDescriptorPool* self, PyObject* arg) {
  144. Py_ssize_t name_size;
  145. char* name;
  146. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  147. return NULL;
  148. }
  149. const FileDescriptor* file_descriptor =
  150. self->pool->FindFileByName(string(name, name_size));
  151. if (file_descriptor == NULL) {
  152. PyErr_Format(PyExc_KeyError, "Couldn't find file %.200s",
  153. name);
  154. return NULL;
  155. }
  156. return PyFileDescriptor_FromDescriptor(file_descriptor);
  157. }
  158. PyObject* FindFieldByName(PyDescriptorPool* self, PyObject* arg) {
  159. Py_ssize_t name_size;
  160. char* name;
  161. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  162. return NULL;
  163. }
  164. const FieldDescriptor* field_descriptor =
  165. self->pool->FindFieldByName(string(name, name_size));
  166. if (field_descriptor == NULL) {
  167. PyErr_Format(PyExc_KeyError, "Couldn't find field %.200s",
  168. name);
  169. return NULL;
  170. }
  171. return PyFieldDescriptor_FromDescriptor(field_descriptor);
  172. }
  173. PyObject* FindExtensionByName(PyDescriptorPool* self, PyObject* arg) {
  174. Py_ssize_t name_size;
  175. char* name;
  176. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  177. return NULL;
  178. }
  179. const FieldDescriptor* field_descriptor =
  180. self->pool->FindExtensionByName(string(name, name_size));
  181. if (field_descriptor == NULL) {
  182. PyErr_Format(PyExc_KeyError, "Couldn't find extension field %.200s", name);
  183. return NULL;
  184. }
  185. return PyFieldDescriptor_FromDescriptor(field_descriptor);
  186. }
  187. PyObject* FindEnumTypeByName(PyDescriptorPool* self, PyObject* arg) {
  188. Py_ssize_t name_size;
  189. char* name;
  190. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  191. return NULL;
  192. }
  193. const EnumDescriptor* enum_descriptor =
  194. self->pool->FindEnumTypeByName(string(name, name_size));
  195. if (enum_descriptor == NULL) {
  196. PyErr_Format(PyExc_KeyError, "Couldn't find enum %.200s", name);
  197. return NULL;
  198. }
  199. return PyEnumDescriptor_FromDescriptor(enum_descriptor);
  200. }
  201. PyObject* FindOneofByName(PyDescriptorPool* self, PyObject* arg) {
  202. Py_ssize_t name_size;
  203. char* name;
  204. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  205. return NULL;
  206. }
  207. const OneofDescriptor* oneof_descriptor =
  208. self->pool->FindOneofByName(string(name, name_size));
  209. if (oneof_descriptor == NULL) {
  210. PyErr_Format(PyExc_KeyError, "Couldn't find oneof %.200s", name);
  211. return NULL;
  212. }
  213. return PyOneofDescriptor_FromDescriptor(oneof_descriptor);
  214. }
  215. PyObject* FindFileContainingSymbol(PyDescriptorPool* self, PyObject* arg) {
  216. Py_ssize_t name_size;
  217. char* name;
  218. if (PyString_AsStringAndSize(arg, &name, &name_size) < 0) {
  219. return NULL;
  220. }
  221. const FileDescriptor* file_descriptor =
  222. self->pool->FindFileContainingSymbol(string(name, name_size));
  223. if (file_descriptor == NULL) {
  224. PyErr_Format(PyExc_KeyError, "Couldn't find symbol %.200s", name);
  225. return NULL;
  226. }
  227. return PyFileDescriptor_FromDescriptor(file_descriptor);
  228. }
  229. // These functions should not exist -- the only valid way to create
  230. // descriptors is to call Add() or AddSerializedFile().
  231. // But these AddDescriptor() functions were created in Python and some people
  232. // call them, so we support them for now for compatibility.
  233. // However we do check that the existing descriptor already exists in the pool,
  234. // which appears to always be true for existing calls -- but then why do people
  235. // call a function that will just be a no-op?
  236. // TODO(amauryfa): Need to investigate further.
  237. PyObject* AddFileDescriptor(PyDescriptorPool* self, PyObject* descriptor) {
  238. const FileDescriptor* file_descriptor =
  239. PyFileDescriptor_AsDescriptor(descriptor);
  240. if (!file_descriptor) {
  241. return NULL;
  242. }
  243. if (file_descriptor !=
  244. self->pool->FindFileByName(file_descriptor->name())) {
  245. PyErr_Format(PyExc_ValueError,
  246. "The file descriptor %s does not belong to this pool",
  247. file_descriptor->name().c_str());
  248. return NULL;
  249. }
  250. Py_RETURN_NONE;
  251. }
  252. PyObject* AddDescriptor(PyDescriptorPool* self, PyObject* descriptor) {
  253. const Descriptor* message_descriptor =
  254. PyMessageDescriptor_AsDescriptor(descriptor);
  255. if (!message_descriptor) {
  256. return NULL;
  257. }
  258. if (message_descriptor !=
  259. self->pool->FindMessageTypeByName(message_descriptor->full_name())) {
  260. PyErr_Format(PyExc_ValueError,
  261. "The message descriptor %s does not belong to this pool",
  262. message_descriptor->full_name().c_str());
  263. return NULL;
  264. }
  265. Py_RETURN_NONE;
  266. }
  267. PyObject* AddEnumDescriptor(PyDescriptorPool* self, PyObject* descriptor) {
  268. const EnumDescriptor* enum_descriptor =
  269. PyEnumDescriptor_AsDescriptor(descriptor);
  270. if (!enum_descriptor) {
  271. return NULL;
  272. }
  273. if (enum_descriptor !=
  274. self->pool->FindEnumTypeByName(enum_descriptor->full_name())) {
  275. PyErr_Format(PyExc_ValueError,
  276. "The enum descriptor %s does not belong to this pool",
  277. enum_descriptor->full_name().c_str());
  278. return NULL;
  279. }
  280. Py_RETURN_NONE;
  281. }
  282. // The code below loads new Descriptors from a serialized FileDescriptorProto.
  283. // Collects errors that occur during proto file building to allow them to be
  284. // propagated in the python exception instead of only living in ERROR logs.
  285. class BuildFileErrorCollector : public DescriptorPool::ErrorCollector {
  286. public:
  287. BuildFileErrorCollector() : error_message(""), had_errors(false) {}
  288. void AddError(const string& filename, const string& element_name,
  289. const Message* descriptor, ErrorLocation location,
  290. const string& message) {
  291. // Replicates the logging behavior that happens in the C++ implementation
  292. // when an error collector is not passed in.
  293. if (!had_errors) {
  294. error_message +=
  295. ("Invalid proto descriptor for file \"" + filename + "\":\n");
  296. had_errors = true;
  297. }
  298. // As this only happens on failure and will result in the program not
  299. // running at all, no effort is made to optimize this string manipulation.
  300. error_message += (" " + element_name + ": " + message + "\n");
  301. }
  302. string error_message;
  303. bool had_errors;
  304. };
  305. PyObject* AddSerializedFile(PyDescriptorPool* self, PyObject* serialized_pb) {
  306. char* message_type;
  307. Py_ssize_t message_len;
  308. if (PyBytes_AsStringAndSize(serialized_pb, &message_type, &message_len) < 0) {
  309. return NULL;
  310. }
  311. FileDescriptorProto file_proto;
  312. if (!file_proto.ParseFromArray(message_type, message_len)) {
  313. PyErr_SetString(PyExc_TypeError, "Couldn't parse file content!");
  314. return NULL;
  315. }
  316. // If the file was already part of a C++ library, all its descriptors are in
  317. // the underlying pool. No need to do anything else.
  318. const FileDescriptor* generated_file =
  319. DescriptorPool::generated_pool()->FindFileByName(file_proto.name());
  320. if (generated_file != NULL) {
  321. return PyFileDescriptor_FromDescriptorWithSerializedPb(
  322. generated_file, serialized_pb);
  323. }
  324. BuildFileErrorCollector error_collector;
  325. const FileDescriptor* descriptor =
  326. self->pool->BuildFileCollectingErrors(file_proto,
  327. &error_collector);
  328. if (descriptor == NULL) {
  329. PyErr_Format(PyExc_TypeError,
  330. "Couldn't build proto file into descriptor pool!\n%s",
  331. error_collector.error_message.c_str());
  332. return NULL;
  333. }
  334. return PyFileDescriptor_FromDescriptorWithSerializedPb(
  335. descriptor, serialized_pb);
  336. }
  337. PyObject* Add(PyDescriptorPool* self, PyObject* file_descriptor_proto) {
  338. ScopedPyObjectPtr serialized_pb(
  339. PyObject_CallMethod(file_descriptor_proto, "SerializeToString", NULL));
  340. if (serialized_pb == NULL) {
  341. return NULL;
  342. }
  343. return AddSerializedFile(self, serialized_pb);
  344. }
  345. static PyMethodDef Methods[] = {
  346. { "Add", (PyCFunction)Add, METH_O,
  347. "Adds the FileDescriptorProto and its types to this pool." },
  348. { "AddSerializedFile", (PyCFunction)AddSerializedFile, METH_O,
  349. "Adds a serialized FileDescriptorProto to this pool." },
  350. // TODO(amauryfa): Understand why the Python implementation differs from
  351. // this one, ask users to use another API and deprecate these functions.
  352. { "AddFileDescriptor", (PyCFunction)AddFileDescriptor, METH_O,
  353. "No-op. Add() must have been called before." },
  354. { "AddDescriptor", (PyCFunction)AddDescriptor, METH_O,
  355. "No-op. Add() must have been called before." },
  356. { "AddEnumDescriptor", (PyCFunction)AddEnumDescriptor, METH_O,
  357. "No-op. Add() must have been called before." },
  358. { "FindFileByName", (PyCFunction)FindFileByName, METH_O,
  359. "Searches for a file descriptor by its .proto name." },
  360. { "FindMessageTypeByName", (PyCFunction)FindMessageByName, METH_O,
  361. "Searches for a message descriptor by full name." },
  362. { "FindFieldByName", (PyCFunction)FindFieldByName, METH_O,
  363. "Searches for a field descriptor by full name." },
  364. { "FindExtensionByName", (PyCFunction)FindExtensionByName, METH_O,
  365. "Searches for extension descriptor by full name." },
  366. { "FindEnumTypeByName", (PyCFunction)FindEnumTypeByName, METH_O,
  367. "Searches for enum type descriptor by full name." },
  368. { "FindOneofByName", (PyCFunction)FindOneofByName, METH_O,
  369. "Searches for oneof descriptor by full name." },
  370. { "FindFileContainingSymbol", (PyCFunction)FindFileContainingSymbol, METH_O,
  371. "Gets the FileDescriptor containing the specified symbol." },
  372. {NULL}
  373. };
  374. } // namespace cdescriptor_pool
  375. PyTypeObject PyDescriptorPool_Type = {
  376. PyVarObject_HEAD_INIT(&PyType_Type, 0)
  377. FULL_MODULE_NAME ".DescriptorPool", // tp_name
  378. sizeof(PyDescriptorPool), // tp_basicsize
  379. 0, // tp_itemsize
  380. (destructor)cdescriptor_pool::Dealloc, // tp_dealloc
  381. 0, // tp_print
  382. 0, // tp_getattr
  383. 0, // tp_setattr
  384. 0, // tp_compare
  385. 0, // tp_repr
  386. 0, // tp_as_number
  387. 0, // tp_as_sequence
  388. 0, // tp_as_mapping
  389. 0, // tp_hash
  390. 0, // tp_call
  391. 0, // tp_str
  392. 0, // tp_getattro
  393. 0, // tp_setattro
  394. 0, // tp_as_buffer
  395. Py_TPFLAGS_DEFAULT, // tp_flags
  396. "A Descriptor Pool", // tp_doc
  397. 0, // tp_traverse
  398. 0, // tp_clear
  399. 0, // tp_richcompare
  400. 0, // tp_weaklistoffset
  401. 0, // tp_iter
  402. 0, // tp_iternext
  403. cdescriptor_pool::Methods, // tp_methods
  404. 0, // tp_members
  405. 0, // tp_getset
  406. 0, // tp_base
  407. 0, // tp_dict
  408. 0, // tp_descr_get
  409. 0, // tp_descr_set
  410. 0, // tp_dictoffset
  411. 0, // tp_init
  412. 0, // tp_alloc
  413. 0, // tp_new
  414. PyObject_Del, // tp_free
  415. };
  416. // This is the DescriptorPool which contains all the definitions from the
  417. // generated _pb2.py modules.
  418. static PyDescriptorPool* python_generated_pool = NULL;
  419. bool InitDescriptorPool() {
  420. if (PyType_Ready(&PyDescriptorPool_Type) < 0)
  421. return false;
  422. python_generated_pool = cdescriptor_pool::NewDescriptorPool();
  423. if (python_generated_pool == NULL) {
  424. return false;
  425. }
  426. // Register this pool to be found for C++-generated descriptors.
  427. descriptor_pool_map.insert(
  428. std::make_pair(DescriptorPool::generated_pool(),
  429. python_generated_pool));
  430. return true;
  431. }
  432. PyDescriptorPool* GetDefaultDescriptorPool() {
  433. return python_generated_pool;
  434. }
  435. PyDescriptorPool* GetDescriptorPool_FromPool(const DescriptorPool* pool) {
  436. // Fast path for standard descriptors.
  437. if (pool == python_generated_pool->pool ||
  438. pool == DescriptorPool::generated_pool()) {
  439. return python_generated_pool;
  440. }
  441. hash_map<const DescriptorPool*, PyDescriptorPool*>::iterator it =
  442. descriptor_pool_map.find(pool);
  443. if (it == descriptor_pool_map.end()) {
  444. PyErr_SetString(PyExc_KeyError, "Unknown descriptor pool");
  445. return NULL;
  446. }
  447. return it->second;
  448. }
  449. } // namespace python
  450. } // namespace protobuf
  451. } // namespace google