JsonFormatter.cs 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. #region Copyright notice and license
  2. // Protocol Buffers - Google's data interchange format
  3. // Copyright 2015 Google Inc. All rights reserved.
  4. // https://developers.google.com/protocol-buffers/
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. #endregion
  32. using System;
  33. using System.Collections;
  34. using System.Globalization;
  35. using System.Text;
  36. using Google.Protobuf.Reflection;
  37. using Google.Protobuf.WellKnownTypes;
  38. namespace Google.Protobuf
  39. {
  40. /// <summary>
  41. /// Reflection-based converter from messages to JSON.
  42. /// </summary>
  43. /// <remarks>
  44. /// <para>
  45. /// Instances of this class are thread-safe, with no mutable state.
  46. /// </para>
  47. /// <para>
  48. /// This is a simple start to get JSON formatting working. As it's reflection-based,
  49. /// it's not as quick as baking calls into generated messages - but is a simpler implementation.
  50. /// (This code is generally not heavily optimized.)
  51. /// </para>
  52. /// </remarks>
  53. public sealed class JsonFormatter
  54. {
  55. private static JsonFormatter defaultInstance = new JsonFormatter(Settings.Default);
  56. /// <summary>
  57. /// Returns a formatter using the default settings.
  58. /// </summary>
  59. public static JsonFormatter Default { get { return defaultInstance; } }
  60. /// <summary>
  61. /// The JSON representation of the first 160 characters of Unicode.
  62. /// Empty strings are replaced by the static constructor.
  63. /// </summary>
  64. private static readonly string[] CommonRepresentations = {
  65. // C0 (ASCII and derivatives) control characters
  66. "\\u0000", "\\u0001", "\\u0002", "\\u0003", // 0x00
  67. "\\u0004", "\\u0005", "\\u0006", "\\u0007",
  68. "\\b", "\\t", "\\n", "\\u000b",
  69. "\\f", "\\r", "\\u000e", "\\u000f",
  70. "\\u0010", "\\u0011", "\\u0012", "\\u0013", // 0x10
  71. "\\u0014", "\\u0015", "\\u0016", "\\u0017",
  72. "\\u0018", "\\u0019", "\\u001a", "\\u001b",
  73. "\\u001c", "\\u001d", "\\u001e", "\\u001f",
  74. // Escaping of " and \ are required by www.json.org string definition.
  75. // Escaping of < and > are required for HTML security.
  76. "", "", "\\\"", "", "", "", "", "", // 0x20
  77. "", "", "", "", "", "", "", "",
  78. "", "", "", "", "", "", "", "", // 0x30
  79. "", "", "", "", "\\u003c", "", "\\u003e", "",
  80. "", "", "", "", "", "", "", "", // 0x40
  81. "", "", "", "", "", "", "", "",
  82. "", "", "", "", "", "", "", "", // 0x50
  83. "", "", "", "", "\\\\", "", "", "",
  84. "", "", "", "", "", "", "", "", // 0x60
  85. "", "", "", "", "", "", "", "",
  86. "", "", "", "", "", "", "", "", // 0x70
  87. "", "", "", "", "", "", "", "\\u007f",
  88. // C1 (ISO 8859 and Unicode) extended control characters
  89. "\\u0080", "\\u0081", "\\u0082", "\\u0083", // 0x80
  90. "\\u0084", "\\u0085", "\\u0086", "\\u0087",
  91. "\\u0088", "\\u0089", "\\u008a", "\\u008b",
  92. "\\u008c", "\\u008d", "\\u008e", "\\u008f",
  93. "\\u0090", "\\u0091", "\\u0092", "\\u0093", // 0x90
  94. "\\u0094", "\\u0095", "\\u0096", "\\u0097",
  95. "\\u0098", "\\u0099", "\\u009a", "\\u009b",
  96. "\\u009c", "\\u009d", "\\u009e", "\\u009f"
  97. };
  98. static JsonFormatter()
  99. {
  100. for (int i = 0; i < CommonRepresentations.Length; i++)
  101. {
  102. if (CommonRepresentations[i] == "")
  103. {
  104. CommonRepresentations[i] = ((char) i).ToString();
  105. }
  106. }
  107. }
  108. private readonly Settings settings;
  109. public JsonFormatter(Settings settings)
  110. {
  111. this.settings = settings;
  112. }
  113. public string Format(IMessage message)
  114. {
  115. Preconditions.CheckNotNull(message, "message");
  116. StringBuilder builder = new StringBuilder();
  117. // TODO(jonskeet): Handle well-known types here.
  118. // Our reflection support needs improving so that we can get at the descriptor
  119. // to find out whether *this* message is a well-known type.
  120. WriteMessage(builder, message);
  121. return builder.ToString();
  122. }
  123. private void WriteMessage(StringBuilder builder, IMessage message)
  124. {
  125. if (message == null)
  126. {
  127. WriteNull(builder);
  128. return;
  129. }
  130. builder.Append("{ ");
  131. var fields = message.Descriptor.Fields;
  132. bool first = true;
  133. // First non-oneof fields
  134. foreach (var field in fields.InFieldNumberOrder())
  135. {
  136. var accessor = field.Accessor;
  137. // Oneofs are written later
  138. // TODO: Change to write out fields in order, interleaving oneofs appropriately (as per binary format)
  139. if (field.ContainingOneof != null)
  140. {
  141. continue;
  142. }
  143. // Omit default values unless we're asked to format them
  144. object value = accessor.GetValue(message);
  145. if (!settings.FormatDefaultValues && IsDefaultValue(accessor, value))
  146. {
  147. continue;
  148. }
  149. // Omit awkward (single) values such as unknown enum values
  150. if (!field.IsRepeated && !field.IsMap && !CanWriteSingleValue(accessor.Descriptor, value))
  151. {
  152. continue;
  153. }
  154. // Okay, all tests complete: let's write the field value...
  155. if (!first)
  156. {
  157. builder.Append(", ");
  158. }
  159. WriteString(builder, ToCamelCase(accessor.Descriptor.Name));
  160. builder.Append(": ");
  161. WriteValue(builder, accessor, value);
  162. first = false;
  163. }
  164. // Now oneofs
  165. foreach (var oneof in message.Descriptor.Oneofs)
  166. {
  167. var accessor = oneof.Accessor;
  168. var fieldDescriptor = accessor.GetCaseFieldDescriptor(message);
  169. if (fieldDescriptor == null)
  170. {
  171. continue;
  172. }
  173. object value = fieldDescriptor.Accessor.GetValue(message);
  174. // Omit awkward (single) values such as unknown enum values
  175. if (!fieldDescriptor.IsRepeated && !fieldDescriptor.IsMap && !CanWriteSingleValue(fieldDescriptor, value))
  176. {
  177. continue;
  178. }
  179. if (!first)
  180. {
  181. builder.Append(", ");
  182. }
  183. WriteString(builder, ToCamelCase(fieldDescriptor.Name));
  184. builder.Append(": ");
  185. WriteValue(builder, fieldDescriptor.Accessor, value);
  186. first = false;
  187. }
  188. builder.Append(first ? "}" : " }");
  189. }
  190. // Converted from src/google/protobuf/util/internal/utility.cc ToCamelCase
  191. internal static string ToCamelCase(string input)
  192. {
  193. bool capitalizeNext = false;
  194. bool wasCap = true;
  195. bool isCap = false;
  196. bool firstWord = true;
  197. StringBuilder result = new StringBuilder(input.Length);
  198. for (int i = 0; i < input.Length; i++, wasCap = isCap)
  199. {
  200. isCap = char.IsUpper(input[i]);
  201. if (input[i] == '_')
  202. {
  203. capitalizeNext = true;
  204. if (result.Length != 0)
  205. {
  206. firstWord = false;
  207. }
  208. continue;
  209. }
  210. else if (firstWord)
  211. {
  212. // Consider when the current character B is capitalized,
  213. // first word ends when:
  214. // 1) following a lowercase: "...aB..."
  215. // 2) followed by a lowercase: "...ABc..."
  216. if (result.Length != 0 && isCap &&
  217. (!wasCap || (i + 1 < input.Length && char.IsLower(input[i + 1]))))
  218. {
  219. firstWord = false;
  220. }
  221. else
  222. {
  223. result.Append(char.ToLowerInvariant(input[i]));
  224. continue;
  225. }
  226. }
  227. else if (capitalizeNext)
  228. {
  229. capitalizeNext = false;
  230. if (char.IsLower(input[i]))
  231. {
  232. result.Append(char.ToUpperInvariant(input[i]));
  233. continue;
  234. }
  235. }
  236. result.Append(input[i]);
  237. }
  238. return result.ToString();
  239. }
  240. private static void WriteNull(StringBuilder builder)
  241. {
  242. builder.Append("null");
  243. }
  244. private static bool IsDefaultValue(IFieldAccessor accessor, object value)
  245. {
  246. if (accessor.Descriptor.IsMap)
  247. {
  248. IDictionary dictionary = (IDictionary) value;
  249. return dictionary.Count == 0;
  250. }
  251. if (accessor.Descriptor.IsRepeated)
  252. {
  253. IList list = (IList) value;
  254. return list.Count == 0;
  255. }
  256. switch (accessor.Descriptor.FieldType)
  257. {
  258. case FieldType.Bool:
  259. return (bool) value == false;
  260. case FieldType.Bytes:
  261. return (ByteString) value == ByteString.Empty;
  262. case FieldType.String:
  263. return (string) value == "";
  264. case FieldType.Double:
  265. return (double) value == 0.0;
  266. case FieldType.SInt32:
  267. case FieldType.Int32:
  268. case FieldType.SFixed32:
  269. case FieldType.Enum:
  270. return (int) value == 0;
  271. case FieldType.Fixed32:
  272. case FieldType.UInt32:
  273. return (uint) value == 0;
  274. case FieldType.Fixed64:
  275. case FieldType.UInt64:
  276. return (ulong) value == 0;
  277. case FieldType.SFixed64:
  278. case FieldType.Int64:
  279. case FieldType.SInt64:
  280. return (long) value == 0;
  281. case FieldType.Float:
  282. return (float) value == 0f;
  283. case FieldType.Message:
  284. case FieldType.Group: // Never expect to get this, but...
  285. return value == null;
  286. default:
  287. throw new ArgumentException("Invalid field type");
  288. }
  289. }
  290. private void WriteValue(StringBuilder builder, IFieldAccessor accessor, object value)
  291. {
  292. if (accessor.Descriptor.IsMap)
  293. {
  294. WriteDictionary(builder, accessor, (IDictionary) value);
  295. }
  296. else if (accessor.Descriptor.IsRepeated)
  297. {
  298. WriteList(builder, accessor, (IList) value);
  299. }
  300. else
  301. {
  302. WriteSingleValue(builder, accessor.Descriptor, value);
  303. }
  304. }
  305. private void WriteSingleValue(StringBuilder builder, FieldDescriptor descriptor, object value)
  306. {
  307. switch (descriptor.FieldType)
  308. {
  309. case FieldType.Bool:
  310. builder.Append((bool) value ? "true" : "false");
  311. break;
  312. case FieldType.Bytes:
  313. // Nothing in Base64 needs escaping
  314. builder.Append('"');
  315. builder.Append(((ByteString) value).ToBase64());
  316. builder.Append('"');
  317. break;
  318. case FieldType.String:
  319. WriteString(builder, (string) value);
  320. break;
  321. case FieldType.Fixed32:
  322. case FieldType.UInt32:
  323. case FieldType.SInt32:
  324. case FieldType.Int32:
  325. case FieldType.SFixed32:
  326. {
  327. IFormattable formattable = (IFormattable) value;
  328. builder.Append(formattable.ToString("d", CultureInfo.InvariantCulture));
  329. break;
  330. }
  331. case FieldType.Enum:
  332. EnumValueDescriptor enumValue = descriptor.EnumType.FindValueByNumber((int) value);
  333. // We will already have validated that this is a known value.
  334. WriteString(builder, enumValue.Name);
  335. break;
  336. case FieldType.Fixed64:
  337. case FieldType.UInt64:
  338. case FieldType.SFixed64:
  339. case FieldType.Int64:
  340. case FieldType.SInt64:
  341. {
  342. builder.Append('"');
  343. IFormattable formattable = (IFormattable) value;
  344. builder.Append(formattable.ToString("d", CultureInfo.InvariantCulture));
  345. builder.Append('"');
  346. break;
  347. }
  348. case FieldType.Double:
  349. case FieldType.Float:
  350. string text = ((IFormattable) value).ToString("r", CultureInfo.InvariantCulture);
  351. if (text == "NaN" || text == "Infinity" || text == "-Infinity")
  352. {
  353. builder.Append('"');
  354. builder.Append(text);
  355. builder.Append('"');
  356. }
  357. else
  358. {
  359. builder.Append(text);
  360. }
  361. break;
  362. case FieldType.Message:
  363. case FieldType.Group: // Never expect to get this, but...
  364. if (descriptor.MessageType.IsWellKnownType)
  365. {
  366. WriteWellKnownTypeValue(builder, descriptor, value);
  367. }
  368. else
  369. {
  370. WriteMessage(builder, (IMessage) value);
  371. }
  372. break;
  373. default:
  374. throw new ArgumentException("Invalid field type: " + descriptor.FieldType);
  375. }
  376. }
  377. /// <summary>
  378. /// Central interception point for well-known type formatting. Any well-known types which
  379. /// don't need special handling can fall back to WriteMessage.
  380. /// </summary>
  381. private void WriteWellKnownTypeValue(StringBuilder builder, FieldDescriptor descriptor, object value)
  382. {
  383. // For wrapper types, the value will be the (possibly boxed) "native" value,
  384. // so we can write it as if we were unconditionally writing the Value field for the wrapper type.
  385. if (descriptor.MessageType.File == Int32Value.Descriptor.File && value != null)
  386. {
  387. WriteSingleValue(builder, descriptor.MessageType.FindFieldByNumber(1), value);
  388. return;
  389. }
  390. WriteMessage(builder, (IMessage) value);
  391. }
  392. private void WriteList(StringBuilder builder, IFieldAccessor accessor, IList list)
  393. {
  394. builder.Append("[ ");
  395. bool first = true;
  396. foreach (var value in list)
  397. {
  398. if (!CanWriteSingleValue(accessor.Descriptor, value))
  399. {
  400. continue;
  401. }
  402. if (!first)
  403. {
  404. builder.Append(", ");
  405. }
  406. WriteSingleValue(builder, accessor.Descriptor, value);
  407. first = false;
  408. }
  409. builder.Append(first ? "]" : " ]");
  410. }
  411. private void WriteDictionary(StringBuilder builder, IFieldAccessor accessor, IDictionary dictionary)
  412. {
  413. builder.Append("{ ");
  414. bool first = true;
  415. FieldDescriptor keyType = accessor.Descriptor.MessageType.FindFieldByNumber(1);
  416. FieldDescriptor valueType = accessor.Descriptor.MessageType.FindFieldByNumber(2);
  417. // This will box each pair. Could use IDictionaryEnumerator, but that's ugly in terms of disposal.
  418. foreach (DictionaryEntry pair in dictionary)
  419. {
  420. if (!CanWriteSingleValue(valueType, pair.Value))
  421. {
  422. continue;
  423. }
  424. if (!first)
  425. {
  426. builder.Append(", ");
  427. }
  428. string keyText;
  429. switch (keyType.FieldType)
  430. {
  431. case FieldType.String:
  432. keyText = (string) pair.Key;
  433. break;
  434. case FieldType.Bool:
  435. keyText = (bool) pair.Key ? "true" : "false";
  436. break;
  437. case FieldType.Fixed32:
  438. case FieldType.Fixed64:
  439. case FieldType.SFixed32:
  440. case FieldType.SFixed64:
  441. case FieldType.Int32:
  442. case FieldType.Int64:
  443. case FieldType.SInt32:
  444. case FieldType.SInt64:
  445. case FieldType.UInt32:
  446. case FieldType.UInt64:
  447. keyText = ((IFormattable) pair.Key).ToString("d", CultureInfo.InvariantCulture);
  448. break;
  449. default:
  450. throw new ArgumentException("Invalid key type: " + keyType.FieldType);
  451. }
  452. WriteString(builder, keyText);
  453. builder.Append(": ");
  454. WriteSingleValue(builder, valueType, pair.Value);
  455. first = false;
  456. }
  457. builder.Append(first ? "}" : " }");
  458. }
  459. /// <summary>
  460. /// Returns whether or not a singular value can be represented in JSON.
  461. /// Currently only relevant for enums, where unknown values can't be represented.
  462. /// For repeated/map fields, this always returns true.
  463. /// </summary>
  464. private bool CanWriteSingleValue(FieldDescriptor descriptor, object value)
  465. {
  466. if (descriptor.FieldType == FieldType.Enum)
  467. {
  468. EnumValueDescriptor enumValue = descriptor.EnumType.FindValueByNumber((int) value);
  469. return enumValue != null;
  470. }
  471. return true;
  472. }
  473. /// <summary>
  474. /// Writes a string (including leading and trailing double quotes) to a builder, escaping as required.
  475. /// </summary>
  476. /// <remarks>
  477. /// Other than surrogate pair handling, this code is mostly taken from src/google/protobuf/util/internal/json_escaping.cc.
  478. /// </remarks>
  479. private void WriteString(StringBuilder builder, string text)
  480. {
  481. builder.Append('"');
  482. for (int i = 0; i < text.Length; i++)
  483. {
  484. char c = text[i];
  485. if (c < 0xa0)
  486. {
  487. builder.Append(CommonRepresentations[c]);
  488. continue;
  489. }
  490. if (char.IsHighSurrogate(c))
  491. {
  492. // Encountered first part of a surrogate pair.
  493. // Check that we have the whole pair, and encode both parts as hex.
  494. i++;
  495. if (i == text.Length || !char.IsLowSurrogate(text[i]))
  496. {
  497. throw new ArgumentException("String contains low surrogate not followed by high surrogate");
  498. }
  499. HexEncodeUtf16CodeUnit(builder, c);
  500. HexEncodeUtf16CodeUnit(builder, text[i]);
  501. continue;
  502. }
  503. else if (char.IsLowSurrogate(c))
  504. {
  505. throw new ArgumentException("String contains high surrogate not preceded by low surrogate");
  506. }
  507. switch ((uint) c)
  508. {
  509. // These are not required by json spec
  510. // but used to prevent security bugs in javascript.
  511. case 0xfeff: // Zero width no-break space
  512. case 0xfff9: // Interlinear annotation anchor
  513. case 0xfffa: // Interlinear annotation separator
  514. case 0xfffb: // Interlinear annotation terminator
  515. case 0x00ad: // Soft-hyphen
  516. case 0x06dd: // Arabic end of ayah
  517. case 0x070f: // Syriac abbreviation mark
  518. case 0x17b4: // Khmer vowel inherent Aq
  519. case 0x17b5: // Khmer vowel inherent Aa
  520. HexEncodeUtf16CodeUnit(builder, c);
  521. break;
  522. default:
  523. if ((c >= 0x0600 && c <= 0x0603) || // Arabic signs
  524. (c >= 0x200b && c <= 0x200f) || // Zero width etc.
  525. (c >= 0x2028 && c <= 0x202e) || // Separators etc.
  526. (c >= 0x2060 && c <= 0x2064) || // Invisible etc.
  527. (c >= 0x206a && c <= 0x206f))
  528. {
  529. HexEncodeUtf16CodeUnit(builder, c);
  530. }
  531. else
  532. {
  533. // No handling of surrogates here - that's done earlier
  534. builder.Append(c);
  535. }
  536. break;
  537. }
  538. }
  539. builder.Append('"');
  540. }
  541. private const string Hex = "0123456789abcdef";
  542. private static void HexEncodeUtf16CodeUnit(StringBuilder builder, char c)
  543. {
  544. uint utf16 = c;
  545. builder.Append("\\u");
  546. builder.Append(Hex[(c >> 12) & 0xf]);
  547. builder.Append(Hex[(c >> 8) & 0xf]);
  548. builder.Append(Hex[(c >> 4) & 0xf]);
  549. builder.Append(Hex[(c >> 0) & 0xf]);
  550. }
  551. /// <summary>
  552. /// Settings controlling JSON formatting.
  553. /// </summary>
  554. public sealed class Settings
  555. {
  556. private static readonly Settings defaultInstance = new Settings(false);
  557. /// <summary>
  558. /// Default settings, as used by <see cref="JsonFormatter.Default"/>
  559. /// </summary>
  560. public static Settings Default { get { return defaultInstance; } }
  561. private readonly bool formatDefaultValues;
  562. /// <summary>
  563. /// Whether fields whose values are the default for the field type (e.g. 0 for integers)
  564. /// should be formatted (true) or omitted (false).
  565. /// </summary>
  566. public bool FormatDefaultValues { get { return formatDefaultValues; } }
  567. public Settings(bool formatDefaultValues)
  568. {
  569. this.formatDefaultValues = formatDefaultValues;
  570. }
  571. }
  572. }
  573. }