text_format_test.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. #! /usr/bin/python
  2. #
  3. # Protocol Buffers - Google's data interchange format
  4. # Copyright 2008 Google Inc. All rights reserved.
  5. # http://code.google.com/p/protobuf/
  6. #
  7. # Redistribution and use in source and binary forms, with or without
  8. # modification, are permitted provided that the following conditions are
  9. # met:
  10. #
  11. # * Redistributions of source code must retain the above copyright
  12. # notice, this list of conditions and the following disclaimer.
  13. # * Redistributions in binary form must reproduce the above
  14. # copyright notice, this list of conditions and the following disclaimer
  15. # in the documentation and/or other materials provided with the
  16. # distribution.
  17. # * Neither the name of Google Inc. nor the names of its
  18. # contributors may be used to endorse or promote products derived from
  19. # this software without specific prior written permission.
  20. #
  21. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. """Test for google.protobuf.text_format."""
  33. __author__ = 'kenton@google.com (Kenton Varda)'
  34. import difflib
  35. import unittest
  36. from google.protobuf import text_format
  37. from google.protobuf.internal import test_util
  38. from google.protobuf import unittest_pb2
  39. from google.protobuf import unittest_mset_pb2
  40. class TextFormatTest(unittest.TestCase):
  41. def ReadGolden(self, golden_filename):
  42. f = test_util.GoldenFile(golden_filename)
  43. golden_lines = f.readlines()
  44. f.close()
  45. return golden_lines
  46. def CompareToGoldenFile(self, text, golden_filename):
  47. golden_lines = self.ReadGolden(golden_filename)
  48. self.CompareToGoldenLines(text, golden_lines)
  49. def CompareToGoldenText(self, text, golden_text):
  50. self.CompareToGoldenLines(text, golden_text.splitlines(1))
  51. def CompareToGoldenLines(self, text, golden_lines):
  52. actual_lines = text.splitlines(1)
  53. self.assertEqual(golden_lines, actual_lines,
  54. "Text doesn't match golden. Diff:\n" +
  55. ''.join(difflib.ndiff(golden_lines, actual_lines)))
  56. def testPrintAllFields(self):
  57. message = unittest_pb2.TestAllTypes()
  58. test_util.SetAllFields(message)
  59. self.CompareToGoldenFile(
  60. self.RemoveRedundantZeros(text_format.MessageToString(message)),
  61. 'text_format_unittest_data.txt')
  62. def testPrintAllExtensions(self):
  63. message = unittest_pb2.TestAllExtensions()
  64. test_util.SetAllExtensions(message)
  65. self.CompareToGoldenFile(
  66. self.RemoveRedundantZeros(text_format.MessageToString(message)),
  67. 'text_format_unittest_extensions_data.txt')
  68. def testPrintMessageSet(self):
  69. message = unittest_mset_pb2.TestMessageSetContainer()
  70. ext1 = unittest_mset_pb2.TestMessageSetExtension1.message_set_extension
  71. ext2 = unittest_mset_pb2.TestMessageSetExtension2.message_set_extension
  72. message.message_set.Extensions[ext1].i = 23
  73. message.message_set.Extensions[ext2].str = 'foo'
  74. self.CompareToGoldenText(text_format.MessageToString(message),
  75. 'message_set {\n'
  76. ' [protobuf_unittest.TestMessageSetExtension1] {\n'
  77. ' i: 23\n'
  78. ' }\n'
  79. ' [protobuf_unittest.TestMessageSetExtension2] {\n'
  80. ' str: \"foo\"\n'
  81. ' }\n'
  82. '}\n')
  83. def testPrintExotic(self):
  84. message = unittest_pb2.TestAllTypes()
  85. message.repeated_int64.append(-9223372036854775808);
  86. message.repeated_uint64.append(18446744073709551615);
  87. message.repeated_double.append(123.456);
  88. message.repeated_double.append(1.23e22);
  89. message.repeated_double.append(1.23e-18);
  90. message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'\"');
  91. self.CompareToGoldenText(
  92. self.RemoveRedundantZeros(text_format.MessageToString(message)),
  93. 'repeated_int64: -9223372036854775808\n'
  94. 'repeated_uint64: 18446744073709551615\n'
  95. 'repeated_double: 123.456\n'
  96. 'repeated_double: 1.23e+22\n'
  97. 'repeated_double: 1.23e-18\n'
  98. 'repeated_string: '
  99. '\"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\\"\"\n')
  100. def testMessageToString(self):
  101. message = unittest_pb2.ForeignMessage()
  102. message.c = 123
  103. self.assertEqual('c: 123\n', str(message))
  104. def RemoveRedundantZeros(self, text):
  105. # Some platforms print 1e+5 as 1e+005. This is fine, but we need to remove
  106. # these zeros in order to match the golden file.
  107. return text.replace('e+0','e+').replace('e+0','e+') \
  108. .replace('e-0','e-').replace('e-0','e-')
  109. def testMergeGolden(self):
  110. golden_text = '\n'.join(self.ReadGolden('text_format_unittest_data.txt'))
  111. parsed_message = unittest_pb2.TestAllTypes()
  112. text_format.Merge(golden_text, parsed_message)
  113. message = unittest_pb2.TestAllTypes()
  114. test_util.SetAllFields(message)
  115. self.assertEquals(message, parsed_message)
  116. def testMergeGoldenExtensions(self):
  117. golden_text = '\n'.join(self.ReadGolden(
  118. 'text_format_unittest_extensions_data.txt'))
  119. parsed_message = unittest_pb2.TestAllExtensions()
  120. text_format.Merge(golden_text, parsed_message)
  121. message = unittest_pb2.TestAllExtensions()
  122. test_util.SetAllExtensions(message)
  123. self.assertEquals(message, parsed_message)
  124. def testMergeAllFields(self):
  125. message = unittest_pb2.TestAllTypes()
  126. test_util.SetAllFields(message)
  127. ascii_text = text_format.MessageToString(message)
  128. parsed_message = unittest_pb2.TestAllTypes()
  129. text_format.Merge(ascii_text, parsed_message)
  130. self.assertEqual(message, parsed_message)
  131. test_util.ExpectAllFieldsSet(self, message)
  132. def testMergeAllExtensions(self):
  133. message = unittest_pb2.TestAllExtensions()
  134. test_util.SetAllExtensions(message)
  135. ascii_text = text_format.MessageToString(message)
  136. parsed_message = unittest_pb2.TestAllExtensions()
  137. text_format.Merge(ascii_text, parsed_message)
  138. self.assertEqual(message, parsed_message)
  139. def testMergeMessageSet(self):
  140. message = unittest_pb2.TestAllTypes()
  141. text = ('repeated_uint64: 1\n'
  142. 'repeated_uint64: 2\n')
  143. text_format.Merge(text, message)
  144. self.assertEqual(1, message.repeated_uint64[0])
  145. self.assertEqual(2, message.repeated_uint64[1])
  146. message = unittest_mset_pb2.TestMessageSetContainer()
  147. text = ('message_set {\n'
  148. ' [protobuf_unittest.TestMessageSetExtension1] {\n'
  149. ' i: 23\n'
  150. ' }\n'
  151. ' [protobuf_unittest.TestMessageSetExtension2] {\n'
  152. ' str: \"foo\"\n'
  153. ' }\n'
  154. '}\n')
  155. text_format.Merge(text, message)
  156. ext1 = unittest_mset_pb2.TestMessageSetExtension1.message_set_extension
  157. ext2 = unittest_mset_pb2.TestMessageSetExtension2.message_set_extension
  158. self.assertEquals(23, message.message_set.Extensions[ext1].i)
  159. self.assertEquals('foo', message.message_set.Extensions[ext2].str)
  160. def testMergeExotic(self):
  161. message = unittest_pb2.TestAllTypes()
  162. text = ('repeated_int64: -9223372036854775808\n'
  163. 'repeated_uint64: 18446744073709551615\n'
  164. 'repeated_double: 123.456\n'
  165. 'repeated_double: 1.23e+22\n'
  166. 'repeated_double: 1.23e-18\n'
  167. 'repeated_string: \n'
  168. '\"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\\"\"\n'
  169. 'repeated_string: "foo" \'corge\' "grault"')
  170. text_format.Merge(text, message)
  171. self.assertEqual(-9223372036854775808, message.repeated_int64[0])
  172. self.assertEqual(18446744073709551615, message.repeated_uint64[0])
  173. self.assertEqual(123.456, message.repeated_double[0])
  174. self.assertEqual(1.23e22, message.repeated_double[1])
  175. self.assertEqual(1.23e-18, message.repeated_double[2])
  176. self.assertEqual(
  177. '\000\001\a\b\f\n\r\t\v\\\'\"', message.repeated_string[0])
  178. self.assertEqual('foocorgegrault', message.repeated_string[1])
  179. def testMergeUnknownField(self):
  180. message = unittest_pb2.TestAllTypes()
  181. text = 'unknown_field: 8\n'
  182. self.assertRaisesWithMessage(
  183. text_format.ParseError,
  184. ('1:1 : Message type "protobuf_unittest.TestAllTypes" has no field named '
  185. '"unknown_field".'),
  186. text_format.Merge, text, message)
  187. def testMergeBadExtension(self):
  188. message = unittest_pb2.TestAllExtensions()
  189. text = '[unknown_extension]: 8\n'
  190. self.assertRaisesWithMessage(
  191. text_format.ParseError,
  192. '1:2 : Extension "unknown_extension" not registered.',
  193. text_format.Merge, text, message)
  194. message = unittest_pb2.TestAllTypes()
  195. self.assertRaisesWithMessage(
  196. text_format.ParseError,
  197. ('1:2 : Message type "protobuf_unittest.TestAllTypes" does not have '
  198. 'extensions.'),
  199. text_format.Merge, text, message)
  200. def testMergeGroupNotClosed(self):
  201. message = unittest_pb2.TestAllTypes()
  202. text = 'RepeatedGroup: <'
  203. self.assertRaisesWithMessage(
  204. text_format.ParseError, '1:16 : Expected ">".',
  205. text_format.Merge, text, message)
  206. text = 'RepeatedGroup: {'
  207. self.assertRaisesWithMessage(
  208. text_format.ParseError, '1:16 : Expected "}".',
  209. text_format.Merge, text, message)
  210. def testMergeEmptyGroup(self):
  211. message = unittest_pb2.TestAllTypes()
  212. text = 'OptionalGroup: {}'
  213. text_format.Merge(text, message)
  214. self.assertTrue(message.HasField('optionalgroup'))
  215. message.Clear()
  216. message = unittest_pb2.TestAllTypes()
  217. text = 'OptionalGroup: <>'
  218. text_format.Merge(text, message)
  219. self.assertTrue(message.HasField('optionalgroup'))
  220. def testMergeBadEnumValue(self):
  221. message = unittest_pb2.TestAllTypes()
  222. text = 'optional_nested_enum: BARR'
  223. self.assertRaisesWithMessage(
  224. text_format.ParseError,
  225. ('1:23 : Enum type "protobuf_unittest.TestAllTypes.NestedEnum" '
  226. 'has no value named BARR.'),
  227. text_format.Merge, text, message)
  228. message = unittest_pb2.TestAllTypes()
  229. text = 'optional_nested_enum: 100'
  230. self.assertRaisesWithMessage(
  231. text_format.ParseError,
  232. ('1:23 : Enum type "protobuf_unittest.TestAllTypes.NestedEnum" '
  233. 'has no value with number 100.'),
  234. text_format.Merge, text, message)
  235. def assertRaisesWithMessage(self, e_class, e, func, *args, **kwargs):
  236. """Same as assertRaises, but also compares the exception message."""
  237. if hasattr(e_class, '__name__'):
  238. exc_name = e_class.__name__
  239. else:
  240. exc_name = str(e_class)
  241. try:
  242. func(*args, **kwargs)
  243. except e_class, expr:
  244. if str(expr) != e:
  245. msg = '%s raised, but with wrong message: "%s" instead of "%s"'
  246. raise self.failureException(msg % (exc_name,
  247. str(expr).encode('string_escape'),
  248. e.encode('string_escape')))
  249. return
  250. else:
  251. raise self.failureException('%s not raised' % exc_name)
  252. class TokenizerTest(unittest.TestCase):
  253. def testSimpleTokenCases(self):
  254. text = ('identifier1:"string1"\n \n\n'
  255. 'identifier2 : \n \n123 \n identifier3 :\'string\'\n'
  256. 'identifiER_4 : 1.1e+2 ID5:-0.23 ID6:\'aaaa\\\'bbbb\'\n'
  257. 'ID7 : "aa\\"bb"\n\n\n\n ID8: {A:inf B:-inf C:true D:false}\n'
  258. 'ID9: 22 ID10: -111111111111111111 ID11: -22\n'
  259. 'ID12: 2222222222222222222')
  260. tokenizer = text_format._Tokenizer(text)
  261. methods = [(tokenizer.ConsumeIdentifier, 'identifier1'),
  262. ':',
  263. (tokenizer.ConsumeString, 'string1'),
  264. (tokenizer.ConsumeIdentifier, 'identifier2'),
  265. ':',
  266. (tokenizer.ConsumeInt32, 123),
  267. (tokenizer.ConsumeIdentifier, 'identifier3'),
  268. ':',
  269. (tokenizer.ConsumeString, 'string'),
  270. (tokenizer.ConsumeIdentifier, 'identifiER_4'),
  271. ':',
  272. (tokenizer.ConsumeFloat, 1.1e+2),
  273. (tokenizer.ConsumeIdentifier, 'ID5'),
  274. ':',
  275. (tokenizer.ConsumeFloat, -0.23),
  276. (tokenizer.ConsumeIdentifier, 'ID6'),
  277. ':',
  278. (tokenizer.ConsumeString, 'aaaa\'bbbb'),
  279. (tokenizer.ConsumeIdentifier, 'ID7'),
  280. ':',
  281. (tokenizer.ConsumeString, 'aa\"bb'),
  282. (tokenizer.ConsumeIdentifier, 'ID8'),
  283. ':',
  284. '{',
  285. (tokenizer.ConsumeIdentifier, 'A'),
  286. ':',
  287. (tokenizer.ConsumeFloat, text_format._INFINITY),
  288. (tokenizer.ConsumeIdentifier, 'B'),
  289. ':',
  290. (tokenizer.ConsumeFloat, -text_format._INFINITY),
  291. (tokenizer.ConsumeIdentifier, 'C'),
  292. ':',
  293. (tokenizer.ConsumeBool, True),
  294. (tokenizer.ConsumeIdentifier, 'D'),
  295. ':',
  296. (tokenizer.ConsumeBool, False),
  297. '}',
  298. (tokenizer.ConsumeIdentifier, 'ID9'),
  299. ':',
  300. (tokenizer.ConsumeUint32, 22),
  301. (tokenizer.ConsumeIdentifier, 'ID10'),
  302. ':',
  303. (tokenizer.ConsumeInt64, -111111111111111111),
  304. (tokenizer.ConsumeIdentifier, 'ID11'),
  305. ':',
  306. (tokenizer.ConsumeInt32, -22),
  307. (tokenizer.ConsumeIdentifier, 'ID12'),
  308. ':',
  309. (tokenizer.ConsumeUint64, 2222222222222222222)]
  310. i = 0
  311. while not tokenizer.AtEnd():
  312. m = methods[i]
  313. if type(m) == str:
  314. token = tokenizer.token
  315. self.assertEqual(token, m)
  316. tokenizer.NextToken()
  317. else:
  318. self.assertEqual(m[1], m[0]())
  319. i += 1
  320. def testConsumeIntegers(self):
  321. # This test only tests the failures in the integer parsing methods as well
  322. # as the '0' special cases.
  323. int64_max = (1 << 63) - 1
  324. uint32_max = (1 << 32) - 1
  325. text = '-1 %d %d' % (uint32_max + 1, int64_max + 1)
  326. tokenizer = text_format._Tokenizer(text)
  327. self.assertRaises(text_format.ParseError, tokenizer.ConsumeUint32)
  328. self.assertRaises(text_format.ParseError, tokenizer.ConsumeUint64)
  329. self.assertEqual(-1, tokenizer.ConsumeInt32())
  330. self.assertRaises(text_format.ParseError, tokenizer.ConsumeUint32)
  331. self.assertRaises(text_format.ParseError, tokenizer.ConsumeInt32)
  332. self.assertEqual(uint32_max + 1, tokenizer.ConsumeInt64())
  333. self.assertRaises(text_format.ParseError, tokenizer.ConsumeInt64)
  334. self.assertEqual(int64_max + 1, tokenizer.ConsumeUint64())
  335. self.assertTrue(tokenizer.AtEnd())
  336. text = '-0 -0 0 0'
  337. tokenizer = text_format._Tokenizer(text)
  338. self.assertEqual(0, tokenizer.ConsumeUint32())
  339. self.assertEqual(0, tokenizer.ConsumeUint64())
  340. self.assertEqual(0, tokenizer.ConsumeUint32())
  341. self.assertEqual(0, tokenizer.ConsumeUint64())
  342. self.assertTrue(tokenizer.AtEnd())
  343. def testConsumeByteString(self):
  344. text = '"string1\''
  345. tokenizer = text_format._Tokenizer(text)
  346. self.assertRaises(text_format.ParseError, tokenizer.ConsumeByteString)
  347. text = 'string1"'
  348. tokenizer = text_format._Tokenizer(text)
  349. self.assertRaises(text_format.ParseError, tokenizer.ConsumeByteString)
  350. text = '\n"\\xt"'
  351. tokenizer = text_format._Tokenizer(text)
  352. self.assertRaises(text_format.ParseError, tokenizer.ConsumeByteString)
  353. text = '\n"\\"'
  354. tokenizer = text_format._Tokenizer(text)
  355. self.assertRaises(text_format.ParseError, tokenizer.ConsumeByteString)
  356. text = '\n"\\x"'
  357. tokenizer = text_format._Tokenizer(text)
  358. self.assertRaises(text_format.ParseError, tokenizer.ConsumeByteString)
  359. def testConsumeBool(self):
  360. text = 'not-a-bool'
  361. tokenizer = text_format._Tokenizer(text)
  362. self.assertRaises(text_format.ParseError, tokenizer.ConsumeBool)
  363. def testInfNan(self):
  364. # Make sure our infinity and NaN definitions are sound.
  365. self.assertEquals(float, type(text_format._INFINITY))
  366. self.assertEquals(float, type(text_format._NAN))
  367. self.assertTrue(text_format._NAN != text_format._NAN)
  368. inf_times_zero = text_format._INFINITY * 0
  369. self.assertTrue(inf_times_zero != inf_times_zero)
  370. self.assertTrue(text_format._INFINITY > 0)
  371. if __name__ == '__main__':
  372. unittest.main()