TextFormat.cs 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc.
  3. // http://code.google.com/p/protobuf/
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License");
  6. // you may not use this file except in compliance with the License.
  7. // You may obtain a copy of the License at
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS,
  13. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. // See the License for the specific language governing permissions and
  15. // limitations under the License.
  16. using System;
  17. using System.Collections.Generic;
  18. using System.Globalization;
  19. using System.IO;
  20. using System.Text;
  21. using Google.ProtocolBuffers.Descriptors;
  22. using System.Collections;
  23. namespace Google.ProtocolBuffers {
  24. /// <summary>
  25. /// Provides ASCII text formatting support for messages.
  26. /// TODO(jonskeet): Parsing support.
  27. /// </summary>
  28. public static class TextFormat {
  29. /// <summary>
  30. /// Outputs a textual representation of the Protocol Message supplied into
  31. /// the parameter output.
  32. /// </summary>
  33. public static void Print(IMessage message, TextWriter output) {
  34. TextGenerator generator = new TextGenerator(output);
  35. Print(message, generator);
  36. }
  37. /// <summary>
  38. /// Outputs a textual representation of <paramref name="fields" /> to <paramref name="output"/>.
  39. /// </summary>
  40. /// <param name="fields"></param>
  41. /// <param name="output"></param>
  42. public static void Print(UnknownFieldSet fields, TextWriter output) {
  43. TextGenerator generator = new TextGenerator(output);
  44. PrintUnknownFields(fields, generator);
  45. }
  46. public static string PrintToString(IMessage message) {
  47. StringWriter text = new StringWriter();
  48. Print(message, text);
  49. return text.ToString();
  50. }
  51. public static string PrintToString(UnknownFieldSet fields) {
  52. StringWriter text = new StringWriter();
  53. Print(fields, text);
  54. return text.ToString();
  55. }
  56. private static void Print(IMessage message, TextGenerator generator) {
  57. // TODO(jonskeet): Check why descriptor is never used.
  58. MessageDescriptor descriptor = message.DescriptorForType;
  59. foreach (KeyValuePair<FieldDescriptor, object> entry in message.AllFields) {
  60. PrintField(entry.Key, entry.Value, generator);
  61. }
  62. PrintUnknownFields(message.UnknownFields, generator);
  63. }
  64. internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator) {
  65. if (field.IsRepeated) {
  66. // Repeated field. Print each element.
  67. foreach (object element in (IEnumerable) value) {
  68. PrintSingleField(field, element, generator);
  69. }
  70. } else {
  71. PrintSingleField(field, value, generator);
  72. }
  73. }
  74. private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator) {
  75. if (field.IsExtension) {
  76. generator.Print("[");
  77. // We special-case MessageSet elements for compatibility with proto1.
  78. if (field.ContainingType.Options.MessageSetWireFormat
  79. && field.FieldType == FieldType.Message
  80. && field.IsOptional
  81. // object equality (TODO(jonskeet): Work out what this comment means!)
  82. && field.ExtensionScope == field.MessageType) {
  83. generator.Print(field.MessageType.FullName);
  84. } else {
  85. generator.Print(field.FullName);
  86. }
  87. generator.Print("]");
  88. } else {
  89. if (field.FieldType == FieldType.Group) {
  90. // Groups must be serialized with their original capitalization.
  91. generator.Print(field.MessageType.Name);
  92. } else {
  93. generator.Print(field.Name);
  94. }
  95. }
  96. if (field.MappedType == MappedType.Message) {
  97. generator.Print(" {\n");
  98. generator.Indent();
  99. } else {
  100. generator.Print(": ");
  101. }
  102. PrintFieldValue(field, value, generator);
  103. if (field.MappedType == MappedType.Message) {
  104. generator.Outdent();
  105. generator.Print("}");
  106. }
  107. generator.Print("\n");
  108. }
  109. private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator) {
  110. switch (field.FieldType) {
  111. case FieldType.Int32:
  112. case FieldType.Int64:
  113. case FieldType.SInt32:
  114. case FieldType.SInt64:
  115. case FieldType.SFixed32:
  116. case FieldType.SFixed64:
  117. case FieldType.Float:
  118. case FieldType.Double:
  119. case FieldType.UInt32:
  120. case FieldType.UInt64:
  121. case FieldType.Fixed32:
  122. case FieldType.Fixed64:
  123. // The simple Object.ToString converts using the current culture.
  124. // We want to always use the invariant culture so it's predictable.
  125. generator.Print(((IConvertible) value).ToString(CultureInfo.InvariantCulture));
  126. break;
  127. case FieldType.Bool:
  128. // Explicitly use the Java true/false
  129. generator.Print((bool) value ? "true" : "false");
  130. break;
  131. case FieldType.String:
  132. generator.Print("\"");
  133. generator.Print(EscapeText((string) value));
  134. generator.Print("\"");
  135. break;
  136. case FieldType.Bytes: {
  137. generator.Print("\"");
  138. generator.Print(EscapeBytes((ByteString) value));
  139. generator.Print("\"");
  140. break;
  141. }
  142. case FieldType.Enum: {
  143. generator.Print(((EnumValueDescriptor) value).Name);
  144. break;
  145. }
  146. case FieldType.Message:
  147. case FieldType.Group:
  148. Print((IMessage) value, generator);
  149. break;
  150. }
  151. }
  152. private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator) {
  153. foreach (KeyValuePair<int, UnknownField> entry in unknownFields.FieldDictionary) {
  154. String prefix = entry.Key.ToString() + ": ";
  155. UnknownField field = entry.Value;
  156. foreach (ulong value in field.VarintList) {
  157. generator.Print(prefix);
  158. generator.Print(value.ToString());
  159. generator.Print("\n");
  160. }
  161. foreach (uint value in field.Fixed32List) {
  162. generator.Print(prefix);
  163. generator.Print(string.Format("0x{0:x8}", value));
  164. generator.Print("\n");
  165. }
  166. foreach (ulong value in field.Fixed64List) {
  167. generator.Print(prefix);
  168. generator.Print(string.Format("0x{0:x16}", value));
  169. generator.Print("\n");
  170. }
  171. foreach (ByteString value in field.LengthDelimitedList) {
  172. generator.Print(entry.Key.ToString());
  173. generator.Print(": \"");
  174. generator.Print(EscapeBytes(value));
  175. generator.Print("\"\n");
  176. }
  177. foreach (UnknownFieldSet value in field.GroupList) {
  178. generator.Print(entry.Key.ToString());
  179. generator.Print(" {\n");
  180. generator.Indent();
  181. PrintUnknownFields(value, generator);
  182. generator.Outdent();
  183. generator.Print("}\n");
  184. }
  185. }
  186. }
  187. // TODO(jonskeet): InternalsVisibleTo
  188. public static ulong ParseUInt64(string text) {
  189. return (ulong) ParseInteger(text, false, true);
  190. }
  191. // TODO(jonskeet): InternalsVisibleTo
  192. public static long ParseInt64(string text) {
  193. return ParseInteger(text, true, true);
  194. }
  195. // TODO(jonskeet): InternalsVisibleTo
  196. public static uint ParseUInt32(string text) {
  197. return (uint) ParseInteger(text, false, false);
  198. }
  199. // TODO(jonskeet): InternalsVisibleTo
  200. public static int ParseInt32(string text) {
  201. return (int) ParseInteger(text, true, false);
  202. }
  203. /// <summary>
  204. /// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0).
  205. /// Only a negative sign is permitted, and it must come before the radix indicator.
  206. /// </summary>
  207. private static long ParseInteger(string text, bool isSigned, bool isLong) {
  208. string original = text;
  209. bool negative = false;
  210. if (text.StartsWith("-")) {
  211. if (!isSigned) {
  212. throw new FormatException("Number must be positive: " + original);
  213. }
  214. negative = true;
  215. text = text.Substring(1);
  216. }
  217. int radix = 10;
  218. if (text.StartsWith("0x")) {
  219. radix = 16;
  220. text = text.Substring(2);
  221. } else if (text.StartsWith("0")) {
  222. radix = 8;
  223. }
  224. ulong result;
  225. try {
  226. // Workaround for https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448
  227. // We should be able to use Convert.ToUInt64 for all cases.
  228. result = radix == 10 ? ulong.Parse(text) : Convert.ToUInt64(text, radix);
  229. } catch (OverflowException) {
  230. // Convert OverflowException to FormatException so there's a single exception type this method can throw.
  231. string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
  232. throw new FormatException("Number out of range for " + numberDescription + ": " + original);
  233. }
  234. if (negative) {
  235. ulong max = isLong ? 0x8000000000000000UL : 0x80000000L;
  236. if (result > max) {
  237. string numberDescription = string.Format("{0}-bit signed integer", isLong ? 64 : 32);
  238. throw new FormatException("Number out of range for " + numberDescription + ": " + original);
  239. }
  240. return -((long) result);
  241. } else {
  242. ulong max = isSigned
  243. ? (isLong ? (ulong) long.MaxValue : int.MaxValue)
  244. : (isLong ? ulong.MaxValue : uint.MaxValue);
  245. if (result > max) {
  246. string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
  247. throw new FormatException("Number out of range for " + numberDescription + ": " + original);
  248. }
  249. return (long) result;
  250. }
  251. }
  252. /// <summary>
  253. /// Tests a character to see if it's an octal digit.
  254. /// </summary>
  255. private static bool IsOctal(char c) {
  256. return '0' <= c && c <= '7';
  257. }
  258. /// <summary>
  259. /// Tests a character to see if it's a hex digit.
  260. /// </summary>
  261. private static bool IsHex(char c) {
  262. return ('0' <= c && c <= '9') ||
  263. ('a' <= c && c <= 'f') ||
  264. ('A' <= c && c <= 'F');
  265. }
  266. /// <summary>
  267. /// Interprets a character as a digit (in any base up to 36) and returns the
  268. /// numeric value.
  269. /// </summary>
  270. private static int ParseDigit(char c) {
  271. if ('0' <= c && c <= '9') {
  272. return c - '0';
  273. } else if ('a' <= c && c <= 'z') {
  274. return c - 'a' + 10;
  275. } else {
  276. return c - 'A' + 10;
  277. }
  278. }
  279. /// <summary>
  280. /// Unescapes a text string as escaped using <see cref="EscapeText(string)" />.
  281. /// Two-digit hex escapes (starting with "\x" are also recognised.
  282. /// TODO(jonskeet): InternalsVisibleTo
  283. /// </summary>
  284. public static string UnescapeText(string input) {
  285. return UnescapeBytes(input).ToStringUtf8();
  286. }
  287. /// <summary>
  288. /// Like <see cref="EscapeBytes" /> but escapes a text string.
  289. /// The string is first encoded as UTF-8, then each byte escaped individually.
  290. /// The returned value is guaranteed to be entirely ASCII.
  291. /// TODO(jonskeet): InternalsVisibleTo
  292. /// </summary>
  293. public static string EscapeText(string input) {
  294. return EscapeBytes(ByteString.CopyFromUtf8(input));
  295. }
  296. /// <summary>
  297. /// Escapes bytes in the format used in protocol buffer text format, which
  298. /// is the same as the format used for C string literals. All bytes
  299. /// that are not printable 7-bit ASCII characters are escaped, as well as
  300. /// backslash, single-quote, and double-quote characters. Characters for
  301. /// which no defined short-hand escape sequence is defined will be escaped
  302. /// using 3-digit octal sequences.
  303. /// The returned value is guaranteed to be entirely ASCII.
  304. /// TODO(jonskeet): InternalsVisibleTo
  305. /// </summary>
  306. public static String EscapeBytes(ByteString input) {
  307. StringBuilder builder = new StringBuilder(input.Length);
  308. foreach (byte b in input) {
  309. switch (b) {
  310. // C# does not use \a or \v
  311. case 0x07: builder.Append("\\a" ); break;
  312. case (byte)'\b': builder.Append("\\b" ); break;
  313. case (byte)'\f': builder.Append("\\f" ); break;
  314. case (byte)'\n': builder.Append("\\n" ); break;
  315. case (byte)'\r': builder.Append("\\r" ); break;
  316. case (byte)'\t': builder.Append("\\t" ); break;
  317. case 0x0b: builder.Append("\\v" ); break;
  318. case (byte)'\\': builder.Append("\\\\"); break;
  319. case (byte)'\'': builder.Append("\\\'"); break;
  320. case (byte)'"' : builder.Append("\\\""); break;
  321. default:
  322. if (b >= 0x20 && b < 128) {
  323. builder.Append((char) b);
  324. } else {
  325. builder.Append('\\');
  326. builder.Append((char) ('0' + ((b >> 6) & 3)));
  327. builder.Append((char) ('0' + ((b >> 3) & 7)));
  328. builder.Append((char) ('0' + (b & 7)));
  329. }
  330. break;
  331. }
  332. }
  333. return builder.ToString();
  334. }
  335. /// <summary>
  336. /// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string.
  337. /// TODO(jonskeet): Make this internal again, and use InternalsVisibleTo.
  338. /// </summary>
  339. public static ByteString UnescapeBytes(string input) {
  340. byte[] result = new byte[input.Length];
  341. int pos = 0;
  342. for (int i = 0; i < input.Length; i++) {
  343. char c = input[i];
  344. if (c > 127 || c < 32) {
  345. throw new FormatException("Escaped string must only contain ASCII");
  346. }
  347. if (c != '\\') {
  348. result[pos++] = (byte) c;
  349. continue;
  350. }
  351. if (i + 1 >= input.Length) {
  352. throw new FormatException("Invalid escape sequence: '\\' at end of string.");
  353. }
  354. i++;
  355. c = input[i];
  356. if (c >= '0' && c <= '7') {
  357. // Octal escape.
  358. int code = ParseDigit(c);
  359. if (i + 1 < input.Length && IsOctal(input[i+1])) {
  360. i++;
  361. code = code * 8 + ParseDigit(input[i]);
  362. }
  363. if (i + 1 < input.Length && IsOctal(input[i+1])) {
  364. i++;
  365. code = code * 8 + ParseDigit(input[i]);
  366. }
  367. result[pos++] = (byte) code;
  368. } else {
  369. switch (c) {
  370. case 'a': result[pos++] = 0x07; break;
  371. case 'b': result[pos++] = (byte) '\b'; break;
  372. case 'f': result[pos++] = (byte) '\f'; break;
  373. case 'n': result[pos++] = (byte) '\n'; break;
  374. case 'r': result[pos++] = (byte) '\r'; break;
  375. case 't': result[pos++] = (byte) '\t'; break;
  376. case 'v': result[pos++] = 0x0b; break;
  377. case '\\': result[pos++] = (byte) '\\'; break;
  378. case '\'': result[pos++] = (byte) '\''; break;
  379. case '"': result[pos++] = (byte) '\"'; break;
  380. case 'x':
  381. // hex escape
  382. int code;
  383. if (i + 1 < input.Length && IsHex(input[i+1])) {
  384. i++;
  385. code = ParseDigit(input[i]);
  386. } else {
  387. throw new FormatException("Invalid escape sequence: '\\x' with no digits");
  388. }
  389. if (i + 1 < input.Length && IsHex(input[i+1])) {
  390. ++i;
  391. code = code * 16 + ParseDigit(input[i]);
  392. }
  393. result[pos++] = (byte)code;
  394. break;
  395. default:
  396. throw new FormatException("Invalid escape sequence: '\\" + c + "'");
  397. }
  398. }
  399. }
  400. return ByteString.CopyFrom(result, 0, pos);
  401. }
  402. public static void Merge(string text, IBuilder builder) {
  403. Merge(text, ExtensionRegistry.Empty, builder);
  404. }
  405. public static void Merge(TextReader reader, IBuilder builder) {
  406. Merge(reader, ExtensionRegistry.Empty, builder);
  407. }
  408. public static void Merge(TextReader reader, ExtensionRegistry registry, IBuilder builder) {
  409. Merge(reader.ReadToEnd(), registry, builder);
  410. }
  411. public static void Merge(string text, ExtensionRegistry registry, IBuilder builder) {
  412. TextTokenizer tokenizer = new TextTokenizer(text);
  413. while (!tokenizer.AtEnd) {
  414. MergeField(tokenizer, registry, builder);
  415. }
  416. }
  417. /// <summary>
  418. /// Parses a single field from the specified tokenizer and merges it into
  419. /// the builder.
  420. /// </summary>
  421. private static void MergeField(TextTokenizer tokenizer, ExtensionRegistry extensionRegistry,
  422. IBuilder builder) {
  423. FieldDescriptor field;
  424. MessageDescriptor type = builder.DescriptorForType;
  425. ExtensionInfo extension = null;
  426. if (tokenizer.TryConsume("[")) {
  427. // An extension.
  428. StringBuilder name = new StringBuilder(tokenizer.ConsumeIdentifier());
  429. while (tokenizer.TryConsume(".")) {
  430. name.Append(".");
  431. name.Append(tokenizer.ConsumeIdentifier());
  432. }
  433. extension = extensionRegistry[name.ToString()];
  434. if (extension == null) {
  435. throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" not found in the ExtensionRegistry.");
  436. } else if (extension.Descriptor.ContainingType != type) {
  437. throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" +
  438. type.FullName + "\".");
  439. }
  440. tokenizer.Consume("]");
  441. field = extension.Descriptor;
  442. } else {
  443. String name = tokenizer.ConsumeIdentifier();
  444. field = type.FindDescriptor<FieldDescriptor>(name);
  445. // Group names are expected to be capitalized as they appear in the
  446. // .proto file, which actually matches their type names, not their field
  447. // names.
  448. if (field == null) {
  449. // Explicitly specify the invariant culture so that this code does not break when
  450. // executing in Turkey.
  451. String lowerName = name.ToLowerInvariant();
  452. field = type.FindDescriptor<FieldDescriptor>(lowerName);
  453. // If the case-insensitive match worked but the field is NOT a group,
  454. // TODO(jonskeet): What? Java comment ends here!
  455. if (field != null && field.FieldType != FieldType.Group) {
  456. field = null;
  457. }
  458. }
  459. // Again, special-case group names as described above.
  460. if (field != null && field.FieldType == FieldType.Group && field.MessageType.Name != name) {
  461. field = null;
  462. }
  463. if (field == null) {
  464. throw tokenizer.CreateFormatExceptionPreviousToken(
  465. "Message type \"" + type.FullName + "\" has no field named \"" + name + "\".");
  466. }
  467. }
  468. object value = null;
  469. if (field.MappedType == MappedType.Message) {
  470. tokenizer.TryConsume(":"); // optional
  471. String endToken;
  472. if (tokenizer.TryConsume("<")) {
  473. endToken = ">";
  474. } else {
  475. tokenizer.Consume("{");
  476. endToken = "}";
  477. }
  478. IBuilder subBuilder;
  479. if (extension == null) {
  480. subBuilder = builder.CreateBuilderForField(field);
  481. } else {
  482. subBuilder = extension.DefaultInstance.WeakCreateBuilderForType();
  483. }
  484. while (!tokenizer.TryConsume(endToken)) {
  485. if (tokenizer.AtEnd) {
  486. throw tokenizer.CreateFormatException("Expected \"" + endToken + "\".");
  487. }
  488. MergeField(tokenizer, extensionRegistry, subBuilder);
  489. }
  490. value = subBuilder.WeakBuild();
  491. } else {
  492. tokenizer.Consume(":");
  493. switch (field.FieldType) {
  494. case FieldType.Int32:
  495. case FieldType.SInt32:
  496. case FieldType.SFixed32:
  497. value = tokenizer.ConsumeInt32();
  498. break;
  499. case FieldType.Int64:
  500. case FieldType.SInt64:
  501. case FieldType.SFixed64:
  502. value = tokenizer.ConsumeInt64();
  503. break;
  504. case FieldType.UInt32:
  505. case FieldType.Fixed32:
  506. value = tokenizer.ConsumeUInt32();
  507. break;
  508. case FieldType.UInt64:
  509. case FieldType.Fixed64:
  510. value = tokenizer.ConsumeUInt64();
  511. break;
  512. case FieldType.Float:
  513. value = tokenizer.consumeFloat();
  514. break;
  515. case FieldType.Double:
  516. value = tokenizer.ConsumeDouble();
  517. break;
  518. case FieldType.Bool:
  519. value = tokenizer.ConsumeBoolean();
  520. break;
  521. case FieldType.String:
  522. value = tokenizer.ConsumeString();
  523. break;
  524. case FieldType.Bytes:
  525. value = tokenizer.ConsumeByteString();
  526. break;
  527. case FieldType.Enum: {
  528. EnumDescriptor enumType = field.EnumType;
  529. if (tokenizer.LookingAtInteger()) {
  530. int number = tokenizer.ConsumeInt32();
  531. value = enumType.FindValueByNumber(number);
  532. if (value == null) {
  533. throw tokenizer.CreateFormatExceptionPreviousToken(
  534. "Enum type \"" + enumType.FullName +
  535. "\" has no value with number " + number + ".");
  536. }
  537. } else {
  538. String id = tokenizer.ConsumeIdentifier();
  539. value = enumType.FindValueByName(id);
  540. if (value == null) {
  541. throw tokenizer.CreateFormatExceptionPreviousToken(
  542. "Enum type \"" + enumType.FullName +
  543. "\" has no value named \"" + id + "\".");
  544. }
  545. }
  546. break;
  547. }
  548. case FieldType.Message:
  549. case FieldType.Group:
  550. throw new InvalidOperationException("Can't get here.");
  551. }
  552. }
  553. if (field.IsRepeated) {
  554. builder.WeakAddRepeatedField(field, value);
  555. } else {
  556. builder.SetField(field, value);
  557. }
  558. }
  559. }
  560. }