TextFormat.cs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Text;
  5. using Google.ProtocolBuffers.Descriptors;
  6. using System.Collections;
  7. namespace Google.ProtocolBuffers {
  8. /// <summary>
  9. /// Provides ASCII text formatting support for messages.
  10. /// TODO(jonskeet): Parsing support.
  11. /// </summary>
  12. public static class TextFormat {
  13. /// <summary>
  14. /// Outputs a textual representation of the Protocol Message supplied into
  15. /// the parameter output.
  16. /// </summary>
  17. public static void Print(IMessage message, TextWriter output) {
  18. TextGenerator generator = new TextGenerator(output);
  19. Print(message, generator);
  20. }
  21. /// <summary>
  22. /// Outputs a textual representation of <paramref name="fields" /> to <paramref name="output"/>.
  23. /// </summary>
  24. /// <param name="fields"></param>
  25. /// <param name="output"></param>
  26. public static void Print(UnknownFieldSet fields, TextWriter output) {
  27. TextGenerator generator = new TextGenerator(output);
  28. PrintUnknownFields(fields, generator);
  29. }
  30. public static string PrintToString(IMessage message) {
  31. StringWriter text = new StringWriter();
  32. Print(message, text);
  33. return text.ToString();
  34. }
  35. public static string PrintToString(UnknownFieldSet fields) {
  36. StringWriter text = new StringWriter();
  37. Print(fields, text);
  38. return text.ToString();
  39. }
  40. private static void Print(IMessage message, TextGenerator generator) {
  41. // TODO(jonskeet): Check why descriptor is never used.
  42. MessageDescriptor descriptor = message.DescriptorForType;
  43. foreach (KeyValuePair<FieldDescriptor, object> entry in message.AllFields) {
  44. PrintField(entry.Key, entry.Value, generator);
  45. }
  46. PrintUnknownFields(message.UnknownFields, generator);
  47. }
  48. internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator) {
  49. if (field.IsRepeated) {
  50. // Repeated field. Print each element.
  51. foreach (object element in (IEnumerable) value) {
  52. PrintSingleField(field, element, generator);
  53. }
  54. } else {
  55. PrintSingleField(field, value, generator);
  56. }
  57. }
  58. private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator) {
  59. if (field.IsExtension) {
  60. generator.Print("[");
  61. // We special-case MessageSet elements for compatibility with proto1.
  62. if (field.ContainingType.Options.MessageSetWireFormat
  63. && field.FieldType == FieldType.Message
  64. && field.IsOptional
  65. // object equality (TODO(jonskeet): Work out what this comment means!)
  66. && field.ExtensionScope == field.MessageType) {
  67. generator.Print(field.MessageType.FullName);
  68. } else {
  69. generator.Print(field.FullName);
  70. }
  71. generator.Print("]");
  72. } else {
  73. if (field.FieldType == FieldType.Group) {
  74. // Groups must be serialized with their original capitalization.
  75. generator.Print(field.MessageType.Name);
  76. } else {
  77. generator.Print(field.Name);
  78. }
  79. }
  80. if (field.MappedType == MappedType.Message) {
  81. generator.Print(" {\n");
  82. generator.Indent();
  83. } else {
  84. generator.Print(": ");
  85. }
  86. PrintFieldValue(field, value, generator);
  87. if (field.MappedType == MappedType.Message) {
  88. generator.Outdent();
  89. generator.Print("}");
  90. }
  91. generator.Print("\n");
  92. }
  93. private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator) {
  94. switch (field.FieldType) {
  95. case FieldType.Int32:
  96. case FieldType.Int64:
  97. case FieldType.SInt32:
  98. case FieldType.SInt64:
  99. case FieldType.SFixed32:
  100. case FieldType.SFixed64:
  101. case FieldType.Float:
  102. case FieldType.Double:
  103. case FieldType.UInt32:
  104. case FieldType.UInt64:
  105. case FieldType.Fixed32:
  106. case FieldType.Fixed64:
  107. // Good old ToString() does what we want for these types. (Including the
  108. // unsigned ones, unlike with Java.)
  109. generator.Print(value.ToString());
  110. break;
  111. case FieldType.Bool:
  112. // Explicitly use the Java true/false
  113. generator.Print((bool) value ? "true" : "false");
  114. break;
  115. case FieldType.String:
  116. generator.Print("\"");
  117. generator.Print(EscapeText((string) value));
  118. generator.Print("\"");
  119. break;
  120. case FieldType.Bytes: {
  121. generator.Print("\"");
  122. generator.Print(EscapeBytes((ByteString) value));
  123. generator.Print("\"");
  124. break;
  125. }
  126. case FieldType.Enum: {
  127. generator.Print(((EnumValueDescriptor) value).Name);
  128. break;
  129. }
  130. case FieldType.Message:
  131. case FieldType.Group:
  132. Print((IMessage) value, generator);
  133. break;
  134. }
  135. }
  136. private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator) {
  137. foreach (KeyValuePair<int, UnknownField> entry in unknownFields.FieldDictionary) {
  138. String prefix = entry.Key.ToString() + ": ";
  139. UnknownField field = entry.Value;
  140. foreach (ulong value in field.VarintList) {
  141. generator.Print(prefix);
  142. generator.Print(value.ToString());
  143. generator.Print("\n");
  144. }
  145. foreach (uint value in field.Fixed32List) {
  146. generator.Print(prefix);
  147. generator.Print(string.Format("0x{0:x8}", value));
  148. generator.Print("\n");
  149. }
  150. foreach (ulong value in field.Fixed64List) {
  151. generator.Print(prefix);
  152. generator.Print(string.Format("0x{0:x16}", value));
  153. generator.Print("\n");
  154. }
  155. foreach (ByteString value in field.LengthDelimitedList) {
  156. generator.Print(entry.Key.ToString());
  157. generator.Print(": \"");
  158. generator.Print(EscapeBytes(value));
  159. generator.Print("\"\n");
  160. }
  161. foreach (UnknownFieldSet value in field.GroupList) {
  162. generator.Print(entry.Key.ToString());
  163. generator.Print(" {\n");
  164. generator.Indent();
  165. PrintUnknownFields(value, generator);
  166. generator.Outdent();
  167. generator.Print("}\n");
  168. }
  169. }
  170. }
  171. // TODO(jonskeet): InternalsVisibleTo
  172. public static ulong ParseUInt64(string text) {
  173. return (ulong) ParseInteger(text, false, true);
  174. }
  175. // TODO(jonskeet): InternalsVisibleTo
  176. public static long ParseInt64(string text) {
  177. return ParseInteger(text, true, true);
  178. }
  179. // TODO(jonskeet): InternalsVisibleTo
  180. public static uint ParseUInt32(string text) {
  181. return (uint) ParseInteger(text, false, false);
  182. }
  183. // TODO(jonskeet): InternalsVisibleTo
  184. public static int ParseInt32(string text) {
  185. return (int) ParseInteger(text, true, false);
  186. }
  187. /// <summary>
  188. /// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0).
  189. /// Only a negative sign is permitted, and it must come before the radix indicator.
  190. /// </summary>
  191. private static long ParseInteger(string text, bool isSigned, bool isLong) {
  192. string original = text;
  193. bool negative = false;
  194. if (text.StartsWith("-")) {
  195. if (!isSigned) {
  196. throw new FormatException("Number must be positive: " + original);
  197. }
  198. negative = true;
  199. text = text.Substring(1);
  200. }
  201. int radix = 10;
  202. if (text.StartsWith("0x")) {
  203. radix = 16;
  204. text = text.Substring(2);
  205. } else if (text.StartsWith("0")) {
  206. radix = 8;
  207. }
  208. ulong result;
  209. try {
  210. // Workaround for https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448
  211. // We should be able to use Convert.ToUInt64 for all cases.
  212. result = radix == 10 ? ulong.Parse(text) : Convert.ToUInt64(text, radix);
  213. } catch (OverflowException) {
  214. // Convert OverflowException to FormatException so there's a single exception type this method can throw.
  215. throw new FormatException("Number of out range: " + original);
  216. }
  217. if (negative) {
  218. ulong max = isLong ? 0x8000000000000000UL : 0x80000000L;
  219. if (result > max) {
  220. throw new FormatException("Number of out range: " + original);
  221. }
  222. return -((long) result);
  223. } else {
  224. ulong max = isSigned
  225. ? (isLong ? (ulong) long.MaxValue : int.MaxValue)
  226. : (isLong ? ulong.MaxValue : uint.MaxValue);
  227. if (result > max) {
  228. throw new FormatException("Number of out range: " + original);
  229. }
  230. return (long) result;
  231. }
  232. }
  233. /// <summary>
  234. /// Tests a character to see if it's an octal digit.
  235. /// </summary>
  236. private static bool IsOctal(char c) {
  237. return '0' <= c && c <= '7';
  238. }
  239. /// <summary>
  240. /// Tests a character to see if it's a hex digit.
  241. /// </summary>
  242. private static bool IsHex(char c) {
  243. return ('0' <= c && c <= '9') ||
  244. ('a' <= c && c <= 'f') ||
  245. ('A' <= c && c <= 'F');
  246. }
  247. /// <summary>
  248. /// Interprets a character as a digit (in any base up to 36) and returns the
  249. /// numeric value.
  250. /// </summary>
  251. private static int ParseDigit(char c) {
  252. if ('0' <= c && c <= '9') {
  253. return c - '0';
  254. } else if ('a' <= c && c <= 'z') {
  255. return c - 'a' + 10;
  256. } else {
  257. return c - 'A' + 10;
  258. }
  259. }
  260. /// <summary>
  261. /// Unescapes a text string as escaped using <see cref="EscapeText(string)" />.
  262. /// Two-digit hex escapes (starting with "\x" are also recognised.
  263. /// TODO(jonskeet): InternalsVisibleTo
  264. /// </summary>
  265. public static string UnescapeText(string input) {
  266. return UnescapeBytes(input).ToStringUtf8();
  267. }
  268. /// <summary>
  269. /// Like <see cref="EscapeBytes" /> but escapes a text string.
  270. /// The string is first encoded as UTF-8, then each byte escaped individually.
  271. /// The returned value is guaranteed to be entirely ASCII.
  272. /// TODO(jonskeet): InternalsVisibleTo
  273. /// </summary>
  274. public static string EscapeText(string input) {
  275. return EscapeBytes(ByteString.CopyFromUtf8(input));
  276. }
  277. /// <summary>
  278. /// Escapes bytes in the format used in protocol buffer text format, which
  279. /// is the same as the format used for C string literals. All bytes
  280. /// that are not printable 7-bit ASCII characters are escaped, as well as
  281. /// backslash, single-quote, and double-quote characters. Characters for
  282. /// which no defined short-hand escape sequence is defined will be escaped
  283. /// using 3-digit octal sequences.
  284. /// The returned value is guaranteed to be entirely ASCII.
  285. /// TODO(jonskeet): InternalsVisibleTo
  286. /// </summary>
  287. public static String EscapeBytes(ByteString input) {
  288. StringBuilder builder = new StringBuilder(input.Length);
  289. foreach (byte b in input) {
  290. switch (b) {
  291. // C# does not use \a or \v
  292. case 0x07: builder.Append("\\a" ); break;
  293. case (byte)'\b': builder.Append("\\b" ); break;
  294. case (byte)'\f': builder.Append("\\f" ); break;
  295. case (byte)'\n': builder.Append("\\n" ); break;
  296. case (byte)'\r': builder.Append("\\r" ); break;
  297. case (byte)'\t': builder.Append("\\t" ); break;
  298. case 0x0b: builder.Append("\\v" ); break;
  299. case (byte)'\\': builder.Append("\\\\"); break;
  300. case (byte)'\'': builder.Append("\\\'"); break;
  301. case (byte)'"' : builder.Append("\\\""); break;
  302. default:
  303. if (b >= 0x20 && b < 128) {
  304. builder.Append((char) b);
  305. } else {
  306. builder.Append('\\');
  307. builder.Append((char) ('0' + ((b >> 6) & 3)));
  308. builder.Append((char) ('0' + ((b >> 3) & 7)));
  309. builder.Append((char) ('0' + (b & 7)));
  310. }
  311. break;
  312. }
  313. }
  314. return builder.ToString();
  315. }
  316. /// <summary>
  317. /// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string.
  318. /// TODO(jonskeet): Make this internal again, and use InternalsVisibleTo.
  319. /// </summary>
  320. public static ByteString UnescapeBytes(string input) {
  321. byte[] result = new byte[input.Length];
  322. int pos = 0;
  323. for (int i = 0; i < input.Length; i++) {
  324. char c = input[i];
  325. if (c > 127 || c < 32) {
  326. throw new FormatException("Escaped string must only contain ASCII");
  327. }
  328. if (c != '\\') {
  329. result[pos++] = (byte) c;
  330. continue;
  331. }
  332. if (i + 1 >= input.Length) {
  333. throw new FormatException("Invalid escape sequence: '\\' at end of string.");
  334. }
  335. i++;
  336. c = input[i];
  337. if (c >= '0' && c <= '7') {
  338. // Octal escape.
  339. int code = ParseDigit(c);
  340. if (i + 1 < input.Length && IsOctal(input[i+1])) {
  341. i++;
  342. code = code * 8 + ParseDigit(input[i]);
  343. }
  344. if (i + 1 < input.Length && IsOctal(input[i+1])) {
  345. i++;
  346. code = code * 8 + ParseDigit(input[i]);
  347. }
  348. result[pos++] = (byte) code;
  349. } else {
  350. switch (c) {
  351. case 'a': result[pos++] = 0x07; break;
  352. case 'b': result[pos++] = (byte) '\b'; break;
  353. case 'f': result[pos++] = (byte) '\f'; break;
  354. case 'n': result[pos++] = (byte) '\n'; break;
  355. case 'r': result[pos++] = (byte) '\r'; break;
  356. case 't': result[pos++] = (byte) '\t'; break;
  357. case 'v': result[pos++] = 0x0b; break;
  358. case '\\': result[pos++] = (byte) '\\'; break;
  359. case '\'': result[pos++] = (byte) '\''; break;
  360. case '"': result[pos++] = (byte) '\"'; break;
  361. case 'x':
  362. // hex escape
  363. int code;
  364. if (i + 1 < input.Length && IsHex(input[i+1])) {
  365. i++;
  366. code = ParseDigit(input[i]);
  367. } else {
  368. throw new FormatException("Invalid escape sequence: '\\x' with no digits");
  369. }
  370. if (i + 1 < input.Length && IsHex(input[i+1])) {
  371. ++i;
  372. code = code * 16 + ParseDigit(input[i]);
  373. }
  374. result[pos++] = (byte)code;
  375. break;
  376. default:
  377. throw new FormatException("Invalid escape sequence: '\\" + c + "'");
  378. }
  379. }
  380. }
  381. return ByteString.CopyFrom(result, 0, pos);
  382. }
  383. public static void Merge(string text, IBuilder builder) {
  384. throw new NotImplementedException();
  385. }
  386. public static void Merge(TextReader reader, IBuilder builder) {
  387. throw new NotImplementedException();
  388. }
  389. public static void Merge(string text, ExtensionRegistry registry, IBuilder builder) {
  390. throw new NotImplementedException();
  391. }
  392. public static void Merge(TextReader reader, ExtensionRegistry registry, IBuilder builder) {
  393. throw new NotImplementedException();
  394. }
  395. }
  396. }