TextFormat.cs 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Globalization;
  4. using System.IO;
  5. using System.Text;
  6. using Google.ProtocolBuffers.Descriptors;
  7. using System.Collections;
  8. namespace Google.ProtocolBuffers {
  9. /// <summary>
  10. /// Provides ASCII text formatting support for messages.
  11. /// TODO(jonskeet): Parsing support.
  12. /// </summary>
  13. public static class TextFormat {
  14. /// <summary>
  15. /// Outputs a textual representation of the Protocol Message supplied into
  16. /// the parameter output.
  17. /// </summary>
  18. public static void Print(IMessage message, TextWriter output) {
  19. TextGenerator generator = new TextGenerator(output);
  20. Print(message, generator);
  21. }
  22. /// <summary>
  23. /// Outputs a textual representation of <paramref name="fields" /> to <paramref name="output"/>.
  24. /// </summary>
  25. /// <param name="fields"></param>
  26. /// <param name="output"></param>
  27. public static void Print(UnknownFieldSet fields, TextWriter output) {
  28. TextGenerator generator = new TextGenerator(output);
  29. PrintUnknownFields(fields, generator);
  30. }
  31. public static string PrintToString(IMessage message) {
  32. StringWriter text = new StringWriter();
  33. Print(message, text);
  34. return text.ToString();
  35. }
  36. public static string PrintToString(UnknownFieldSet fields) {
  37. StringWriter text = new StringWriter();
  38. Print(fields, text);
  39. return text.ToString();
  40. }
  41. private static void Print(IMessage message, TextGenerator generator) {
  42. // TODO(jonskeet): Check why descriptor is never used.
  43. MessageDescriptor descriptor = message.DescriptorForType;
  44. foreach (KeyValuePair<FieldDescriptor, object> entry in message.AllFields) {
  45. PrintField(entry.Key, entry.Value, generator);
  46. }
  47. PrintUnknownFields(message.UnknownFields, generator);
  48. }
  49. internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator) {
  50. if (field.IsRepeated) {
  51. // Repeated field. Print each element.
  52. foreach (object element in (IEnumerable) value) {
  53. PrintSingleField(field, element, generator);
  54. }
  55. } else {
  56. PrintSingleField(field, value, generator);
  57. }
  58. }
  59. private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator) {
  60. if (field.IsExtension) {
  61. generator.Print("[");
  62. // We special-case MessageSet elements for compatibility with proto1.
  63. if (field.ContainingType.Options.MessageSetWireFormat
  64. && field.FieldType == FieldType.Message
  65. && field.IsOptional
  66. // object equality (TODO(jonskeet): Work out what this comment means!)
  67. && field.ExtensionScope == field.MessageType) {
  68. generator.Print(field.MessageType.FullName);
  69. } else {
  70. generator.Print(field.FullName);
  71. }
  72. generator.Print("]");
  73. } else {
  74. if (field.FieldType == FieldType.Group) {
  75. // Groups must be serialized with their original capitalization.
  76. generator.Print(field.MessageType.Name);
  77. } else {
  78. generator.Print(field.Name);
  79. }
  80. }
  81. if (field.MappedType == MappedType.Message) {
  82. generator.Print(" {\n");
  83. generator.Indent();
  84. } else {
  85. generator.Print(": ");
  86. }
  87. PrintFieldValue(field, value, generator);
  88. if (field.MappedType == MappedType.Message) {
  89. generator.Outdent();
  90. generator.Print("}");
  91. }
  92. generator.Print("\n");
  93. }
  94. private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator) {
  95. switch (field.FieldType) {
  96. case FieldType.Int32:
  97. case FieldType.Int64:
  98. case FieldType.SInt32:
  99. case FieldType.SInt64:
  100. case FieldType.SFixed32:
  101. case FieldType.SFixed64:
  102. case FieldType.Float:
  103. case FieldType.Double:
  104. case FieldType.UInt32:
  105. case FieldType.UInt64:
  106. case FieldType.Fixed32:
  107. case FieldType.Fixed64:
  108. // The simple Object.ToString converts using the current culture.
  109. // We want to always use the invariant culture so it's predictable.
  110. generator.Print(((IConvertible) value).ToString(CultureInfo.InvariantCulture));
  111. break;
  112. case FieldType.Bool:
  113. // Explicitly use the Java true/false
  114. generator.Print((bool) value ? "true" : "false");
  115. break;
  116. case FieldType.String:
  117. generator.Print("\"");
  118. generator.Print(EscapeText((string) value));
  119. generator.Print("\"");
  120. break;
  121. case FieldType.Bytes: {
  122. generator.Print("\"");
  123. generator.Print(EscapeBytes((ByteString) value));
  124. generator.Print("\"");
  125. break;
  126. }
  127. case FieldType.Enum: {
  128. generator.Print(((EnumValueDescriptor) value).Name);
  129. break;
  130. }
  131. case FieldType.Message:
  132. case FieldType.Group:
  133. Print((IMessage) value, generator);
  134. break;
  135. }
  136. }
  137. private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator) {
  138. foreach (KeyValuePair<int, UnknownField> entry in unknownFields.FieldDictionary) {
  139. String prefix = entry.Key.ToString() + ": ";
  140. UnknownField field = entry.Value;
  141. foreach (ulong value in field.VarintList) {
  142. generator.Print(prefix);
  143. generator.Print(value.ToString());
  144. generator.Print("\n");
  145. }
  146. foreach (uint value in field.Fixed32List) {
  147. generator.Print(prefix);
  148. generator.Print(string.Format("0x{0:x8}", value));
  149. generator.Print("\n");
  150. }
  151. foreach (ulong value in field.Fixed64List) {
  152. generator.Print(prefix);
  153. generator.Print(string.Format("0x{0:x16}", value));
  154. generator.Print("\n");
  155. }
  156. foreach (ByteString value in field.LengthDelimitedList) {
  157. generator.Print(entry.Key.ToString());
  158. generator.Print(": \"");
  159. generator.Print(EscapeBytes(value));
  160. generator.Print("\"\n");
  161. }
  162. foreach (UnknownFieldSet value in field.GroupList) {
  163. generator.Print(entry.Key.ToString());
  164. generator.Print(" {\n");
  165. generator.Indent();
  166. PrintUnknownFields(value, generator);
  167. generator.Outdent();
  168. generator.Print("}\n");
  169. }
  170. }
  171. }
  172. // TODO(jonskeet): InternalsVisibleTo
  173. public static ulong ParseUInt64(string text) {
  174. return (ulong) ParseInteger(text, false, true);
  175. }
  176. // TODO(jonskeet): InternalsVisibleTo
  177. public static long ParseInt64(string text) {
  178. return ParseInteger(text, true, true);
  179. }
  180. // TODO(jonskeet): InternalsVisibleTo
  181. public static uint ParseUInt32(string text) {
  182. return (uint) ParseInteger(text, false, false);
  183. }
  184. // TODO(jonskeet): InternalsVisibleTo
  185. public static int ParseInt32(string text) {
  186. return (int) ParseInteger(text, true, false);
  187. }
  188. /// <summary>
  189. /// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0).
  190. /// Only a negative sign is permitted, and it must come before the radix indicator.
  191. /// </summary>
  192. private static long ParseInteger(string text, bool isSigned, bool isLong) {
  193. string original = text;
  194. bool negative = false;
  195. if (text.StartsWith("-")) {
  196. if (!isSigned) {
  197. throw new FormatException("Number must be positive: " + original);
  198. }
  199. negative = true;
  200. text = text.Substring(1);
  201. }
  202. int radix = 10;
  203. if (text.StartsWith("0x")) {
  204. radix = 16;
  205. text = text.Substring(2);
  206. } else if (text.StartsWith("0")) {
  207. radix = 8;
  208. }
  209. ulong result;
  210. try {
  211. // Workaround for https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448
  212. // We should be able to use Convert.ToUInt64 for all cases.
  213. result = radix == 10 ? ulong.Parse(text) : Convert.ToUInt64(text, radix);
  214. } catch (OverflowException) {
  215. // Convert OverflowException to FormatException so there's a single exception type this method can throw.
  216. string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
  217. throw new FormatException("Number out of range for " + numberDescription + ": " + original);
  218. }
  219. if (negative) {
  220. ulong max = isLong ? 0x8000000000000000UL : 0x80000000L;
  221. if (result > max) {
  222. string numberDescription = string.Format("{0}-bit signed integer", isLong ? 64 : 32);
  223. throw new FormatException("Number out of range for " + numberDescription + ": " + original);
  224. }
  225. return -((long) result);
  226. } else {
  227. ulong max = isSigned
  228. ? (isLong ? (ulong) long.MaxValue : int.MaxValue)
  229. : (isLong ? ulong.MaxValue : uint.MaxValue);
  230. if (result > max) {
  231. string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
  232. throw new FormatException("Number out of range for " + numberDescription + ": " + original);
  233. }
  234. return (long) result;
  235. }
  236. }
  237. /// <summary>
  238. /// Tests a character to see if it's an octal digit.
  239. /// </summary>
  240. private static bool IsOctal(char c) {
  241. return '0' <= c && c <= '7';
  242. }
  243. /// <summary>
  244. /// Tests a character to see if it's a hex digit.
  245. /// </summary>
  246. private static bool IsHex(char c) {
  247. return ('0' <= c && c <= '9') ||
  248. ('a' <= c && c <= 'f') ||
  249. ('A' <= c && c <= 'F');
  250. }
  251. /// <summary>
  252. /// Interprets a character as a digit (in any base up to 36) and returns the
  253. /// numeric value.
  254. /// </summary>
  255. private static int ParseDigit(char c) {
  256. if ('0' <= c && c <= '9') {
  257. return c - '0';
  258. } else if ('a' <= c && c <= 'z') {
  259. return c - 'a' + 10;
  260. } else {
  261. return c - 'A' + 10;
  262. }
  263. }
  264. /// <summary>
  265. /// Unescapes a text string as escaped using <see cref="EscapeText(string)" />.
  266. /// Two-digit hex escapes (starting with "\x" are also recognised.
  267. /// TODO(jonskeet): InternalsVisibleTo
  268. /// </summary>
  269. public static string UnescapeText(string input) {
  270. return UnescapeBytes(input).ToStringUtf8();
  271. }
  272. /// <summary>
  273. /// Like <see cref="EscapeBytes" /> but escapes a text string.
  274. /// The string is first encoded as UTF-8, then each byte escaped individually.
  275. /// The returned value is guaranteed to be entirely ASCII.
  276. /// TODO(jonskeet): InternalsVisibleTo
  277. /// </summary>
  278. public static string EscapeText(string input) {
  279. return EscapeBytes(ByteString.CopyFromUtf8(input));
  280. }
  281. /// <summary>
  282. /// Escapes bytes in the format used in protocol buffer text format, which
  283. /// is the same as the format used for C string literals. All bytes
  284. /// that are not printable 7-bit ASCII characters are escaped, as well as
  285. /// backslash, single-quote, and double-quote characters. Characters for
  286. /// which no defined short-hand escape sequence is defined will be escaped
  287. /// using 3-digit octal sequences.
  288. /// The returned value is guaranteed to be entirely ASCII.
  289. /// TODO(jonskeet): InternalsVisibleTo
  290. /// </summary>
  291. public static String EscapeBytes(ByteString input) {
  292. StringBuilder builder = new StringBuilder(input.Length);
  293. foreach (byte b in input) {
  294. switch (b) {
  295. // C# does not use \a or \v
  296. case 0x07: builder.Append("\\a" ); break;
  297. case (byte)'\b': builder.Append("\\b" ); break;
  298. case (byte)'\f': builder.Append("\\f" ); break;
  299. case (byte)'\n': builder.Append("\\n" ); break;
  300. case (byte)'\r': builder.Append("\\r" ); break;
  301. case (byte)'\t': builder.Append("\\t" ); break;
  302. case 0x0b: builder.Append("\\v" ); break;
  303. case (byte)'\\': builder.Append("\\\\"); break;
  304. case (byte)'\'': builder.Append("\\\'"); break;
  305. case (byte)'"' : builder.Append("\\\""); break;
  306. default:
  307. if (b >= 0x20 && b < 128) {
  308. builder.Append((char) b);
  309. } else {
  310. builder.Append('\\');
  311. builder.Append((char) ('0' + ((b >> 6) & 3)));
  312. builder.Append((char) ('0' + ((b >> 3) & 7)));
  313. builder.Append((char) ('0' + (b & 7)));
  314. }
  315. break;
  316. }
  317. }
  318. return builder.ToString();
  319. }
  320. /// <summary>
  321. /// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string.
  322. /// TODO(jonskeet): Make this internal again, and use InternalsVisibleTo.
  323. /// </summary>
  324. public static ByteString UnescapeBytes(string input) {
  325. byte[] result = new byte[input.Length];
  326. int pos = 0;
  327. for (int i = 0; i < input.Length; i++) {
  328. char c = input[i];
  329. if (c > 127 || c < 32) {
  330. throw new FormatException("Escaped string must only contain ASCII");
  331. }
  332. if (c != '\\') {
  333. result[pos++] = (byte) c;
  334. continue;
  335. }
  336. if (i + 1 >= input.Length) {
  337. throw new FormatException("Invalid escape sequence: '\\' at end of string.");
  338. }
  339. i++;
  340. c = input[i];
  341. if (c >= '0' && c <= '7') {
  342. // Octal escape.
  343. int code = ParseDigit(c);
  344. if (i + 1 < input.Length && IsOctal(input[i+1])) {
  345. i++;
  346. code = code * 8 + ParseDigit(input[i]);
  347. }
  348. if (i + 1 < input.Length && IsOctal(input[i+1])) {
  349. i++;
  350. code = code * 8 + ParseDigit(input[i]);
  351. }
  352. result[pos++] = (byte) code;
  353. } else {
  354. switch (c) {
  355. case 'a': result[pos++] = 0x07; break;
  356. case 'b': result[pos++] = (byte) '\b'; break;
  357. case 'f': result[pos++] = (byte) '\f'; break;
  358. case 'n': result[pos++] = (byte) '\n'; break;
  359. case 'r': result[pos++] = (byte) '\r'; break;
  360. case 't': result[pos++] = (byte) '\t'; break;
  361. case 'v': result[pos++] = 0x0b; break;
  362. case '\\': result[pos++] = (byte) '\\'; break;
  363. case '\'': result[pos++] = (byte) '\''; break;
  364. case '"': result[pos++] = (byte) '\"'; break;
  365. case 'x':
  366. // hex escape
  367. int code;
  368. if (i + 1 < input.Length && IsHex(input[i+1])) {
  369. i++;
  370. code = ParseDigit(input[i]);
  371. } else {
  372. throw new FormatException("Invalid escape sequence: '\\x' with no digits");
  373. }
  374. if (i + 1 < input.Length && IsHex(input[i+1])) {
  375. ++i;
  376. code = code * 16 + ParseDigit(input[i]);
  377. }
  378. result[pos++] = (byte)code;
  379. break;
  380. default:
  381. throw new FormatException("Invalid escape sequence: '\\" + c + "'");
  382. }
  383. }
  384. }
  385. return ByteString.CopyFrom(result, 0, pos);
  386. }
  387. public static void Merge(string text, IBuilder builder) {
  388. Merge(text, ExtensionRegistry.Empty, builder);
  389. }
  390. public static void Merge(TextReader reader, IBuilder builder) {
  391. Merge(reader, ExtensionRegistry.Empty, builder);
  392. }
  393. public static void Merge(TextReader reader, ExtensionRegistry registry, IBuilder builder) {
  394. Merge(reader.ReadToEnd(), registry, builder);
  395. }
  396. public static void Merge(string text, ExtensionRegistry registry, IBuilder builder) {
  397. TextTokenizer tokenizer = new TextTokenizer(text);
  398. while (!tokenizer.AtEnd) {
  399. MergeField(tokenizer, registry, builder);
  400. }
  401. }
  402. /// <summary>
  403. /// Parses a single field from the specified tokenizer and merges it into
  404. /// the builder.
  405. /// </summary>
  406. private static void MergeField(TextTokenizer tokenizer, ExtensionRegistry extensionRegistry,
  407. IBuilder builder) {
  408. FieldDescriptor field;
  409. MessageDescriptor type = builder.DescriptorForType;
  410. ExtensionInfo extension = null;
  411. if (tokenizer.TryConsume("[")) {
  412. // An extension.
  413. StringBuilder name = new StringBuilder(tokenizer.ConsumeIdentifier());
  414. while (tokenizer.TryConsume(".")) {
  415. name.Append(".");
  416. name.Append(tokenizer.ConsumeIdentifier());
  417. }
  418. extension = extensionRegistry[name.ToString()];
  419. if (extension == null) {
  420. throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" not found in the ExtensionRegistry.");
  421. } else if (extension.Descriptor.ContainingType != type) {
  422. throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" +
  423. type.FullName + "\".");
  424. }
  425. tokenizer.Consume("]");
  426. field = extension.Descriptor;
  427. } else {
  428. String name = tokenizer.ConsumeIdentifier();
  429. field = type.FindDescriptor<FieldDescriptor>(name);
  430. // Group names are expected to be capitalized as they appear in the
  431. // .proto file, which actually matches their type names, not their field
  432. // names.
  433. if (field == null) {
  434. // Explicitly specify the invariant culture so that this code does not break when
  435. // executing in Turkey.
  436. String lowerName = name.ToLowerInvariant();
  437. field = type.FindDescriptor<FieldDescriptor>(lowerName);
  438. // If the case-insensitive match worked but the field is NOT a group,
  439. // TODO(jonskeet): What? Java comment ends here!
  440. if (field != null && field.FieldType != FieldType.Group) {
  441. field = null;
  442. }
  443. }
  444. // Again, special-case group names as described above.
  445. if (field != null && field.FieldType == FieldType.Group && field.MessageType.Name != name) {
  446. field = null;
  447. }
  448. if (field == null) {
  449. throw tokenizer.CreateFormatExceptionPreviousToken(
  450. "Message type \"" + type.FullName + "\" has no field named \"" + name + "\".");
  451. }
  452. }
  453. object value = null;
  454. if (field.MappedType == MappedType.Message) {
  455. tokenizer.TryConsume(":"); // optional
  456. String endToken;
  457. if (tokenizer.TryConsume("<")) {
  458. endToken = ">";
  459. } else {
  460. tokenizer.Consume("{");
  461. endToken = "}";
  462. }
  463. IBuilder subBuilder;
  464. if (extension == null) {
  465. subBuilder = builder.CreateBuilderForField(field);
  466. } else {
  467. subBuilder = extension.DefaultInstance.WeakCreateBuilderForType();
  468. }
  469. while (!tokenizer.TryConsume(endToken)) {
  470. if (tokenizer.AtEnd) {
  471. throw tokenizer.CreateFormatException("Expected \"" + endToken + "\".");
  472. }
  473. MergeField(tokenizer, extensionRegistry, subBuilder);
  474. }
  475. value = subBuilder.WeakBuild();
  476. } else {
  477. tokenizer.Consume(":");
  478. switch (field.FieldType) {
  479. case FieldType.Int32:
  480. case FieldType.SInt32:
  481. case FieldType.SFixed32:
  482. value = tokenizer.ConsumeInt32();
  483. break;
  484. case FieldType.Int64:
  485. case FieldType.SInt64:
  486. case FieldType.SFixed64:
  487. value = tokenizer.ConsumeInt64();
  488. break;
  489. case FieldType.UInt32:
  490. case FieldType.Fixed32:
  491. value = tokenizer.ConsumeUInt32();
  492. break;
  493. case FieldType.UInt64:
  494. case FieldType.Fixed64:
  495. value = tokenizer.ConsumeUInt64();
  496. break;
  497. case FieldType.Float:
  498. value = tokenizer.consumeFloat();
  499. break;
  500. case FieldType.Double:
  501. value = tokenizer.ConsumeDouble();
  502. break;
  503. case FieldType.Bool:
  504. value = tokenizer.ConsumeBoolean();
  505. break;
  506. case FieldType.String:
  507. value = tokenizer.ConsumeString();
  508. break;
  509. case FieldType.Bytes:
  510. value = tokenizer.ConsumeByteString();
  511. break;
  512. case FieldType.Enum: {
  513. EnumDescriptor enumType = field.EnumType;
  514. if (tokenizer.LookingAtInteger()) {
  515. int number = tokenizer.ConsumeInt32();
  516. value = enumType.FindValueByNumber(number);
  517. if (value == null) {
  518. throw tokenizer.CreateFormatExceptionPreviousToken(
  519. "Enum type \"" + enumType.FullName +
  520. "\" has no value with number " + number + ".");
  521. }
  522. } else {
  523. String id = tokenizer.ConsumeIdentifier();
  524. value = enumType.FindValueByName(id);
  525. if (value == null) {
  526. throw tokenizer.CreateFormatExceptionPreviousToken(
  527. "Enum type \"" + enumType.FullName +
  528. "\" has no value named \"" + id + "\".");
  529. }
  530. }
  531. break;
  532. }
  533. case FieldType.Message:
  534. case FieldType.Group:
  535. throw new InvalidOperationException("Can't get here.");
  536. }
  537. }
  538. if (field.IsRepeated) {
  539. builder.WeakAddRepeatedField(field, value);
  540. } else {
  541. builder.SetField(field, value);
  542. }
  543. }
  544. }
  545. }