JsonFormatter.cs 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. #region Copyright notice and license
  2. // Protocol Buffers - Google's data interchange format
  3. // Copyright 2015 Google Inc. All rights reserved.
  4. // https://developers.google.com/protocol-buffers/
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. #endregion
  32. using System;
  33. using System.Collections;
  34. using System.Globalization;
  35. using System.Text;
  36. using Google.Protobuf.Reflection;
  37. using Google.Protobuf.WellKnownTypes;
  38. namespace Google.Protobuf
  39. {
  40. /// <summary>
  41. /// Reflection-based converter from messages to JSON.
  42. /// </summary>
  43. /// <remarks>
  44. /// <para>
  45. /// Instances of this class are thread-safe, with no mutable state.
  46. /// </para>
  47. /// <para>
  48. /// This is a simple start to get JSON formatting working. As it's reflection-based,
  49. /// it's not as quick as baking calls into generated messages - but is a simpler implementation.
  50. /// (This code is generally not heavily optimized.)
  51. /// </para>
  52. /// </remarks>
  53. public sealed class JsonFormatter
  54. {
  55. private static JsonFormatter defaultInstance = new JsonFormatter(Settings.Default);
  56. /// <summary>
  57. /// Returns a formatter using the default settings.
  58. /// </summary>
  59. public static JsonFormatter Default { get { return defaultInstance; } }
  60. /// <summary>
  61. /// The JSON representation of the first 160 characters of Unicode.
  62. /// Empty strings are replaced by the static constructor.
  63. /// </summary>
  64. private static readonly string[] CommonRepresentations = {
  65. // C0 (ASCII and derivatives) control characters
  66. "\\u0000", "\\u0001", "\\u0002", "\\u0003", // 0x00
  67. "\\u0004", "\\u0005", "\\u0006", "\\u0007",
  68. "\\b", "\\t", "\\n", "\\u000b",
  69. "\\f", "\\r", "\\u000e", "\\u000f",
  70. "\\u0010", "\\u0011", "\\u0012", "\\u0013", // 0x10
  71. "\\u0014", "\\u0015", "\\u0016", "\\u0017",
  72. "\\u0018", "\\u0019", "\\u001a", "\\u001b",
  73. "\\u001c", "\\u001d", "\\u001e", "\\u001f",
  74. // Escaping of " and \ are required by www.json.org string definition.
  75. // Escaping of < and > are required for HTML security.
  76. "", "", "\\\"", "", "", "", "", "", // 0x20
  77. "", "", "", "", "", "", "", "",
  78. "", "", "", "", "", "", "", "", // 0x30
  79. "", "", "", "", "\\u003c", "", "\\u003e", "",
  80. "", "", "", "", "", "", "", "", // 0x40
  81. "", "", "", "", "", "", "", "",
  82. "", "", "", "", "", "", "", "", // 0x50
  83. "", "", "", "", "\\\\", "", "", "",
  84. "", "", "", "", "", "", "", "", // 0x60
  85. "", "", "", "", "", "", "", "",
  86. "", "", "", "", "", "", "", "", // 0x70
  87. "", "", "", "", "", "", "", "\\u007f",
  88. // C1 (ISO 8859 and Unicode) extended control characters
  89. "\\u0080", "\\u0081", "\\u0082", "\\u0083", // 0x80
  90. "\\u0084", "\\u0085", "\\u0086", "\\u0087",
  91. "\\u0088", "\\u0089", "\\u008a", "\\u008b",
  92. "\\u008c", "\\u008d", "\\u008e", "\\u008f",
  93. "\\u0090", "\\u0091", "\\u0092", "\\u0093", // 0x90
  94. "\\u0094", "\\u0095", "\\u0096", "\\u0097",
  95. "\\u0098", "\\u0099", "\\u009a", "\\u009b",
  96. "\\u009c", "\\u009d", "\\u009e", "\\u009f"
  97. };
  98. static JsonFormatter()
  99. {
  100. for (int i = 0; i < CommonRepresentations.Length; i++)
  101. {
  102. if (CommonRepresentations[i] == "")
  103. {
  104. CommonRepresentations[i] = ((char) i).ToString();
  105. }
  106. }
  107. }
  108. private readonly Settings settings;
  109. public JsonFormatter(Settings settings)
  110. {
  111. this.settings = settings;
  112. }
  113. public string Format(IMessage message)
  114. {
  115. Preconditions.CheckNotNull(message, "message");
  116. StringBuilder builder = new StringBuilder();
  117. // TODO(jonskeet): Handle well-known types here.
  118. // Our reflection support needs improving so that we can get at the descriptor
  119. // to find out whether *this* message is a well-known type.
  120. WriteMessage(builder, message);
  121. return builder.ToString();
  122. }
  123. private void WriteMessage(StringBuilder builder, IMessage message)
  124. {
  125. if (message == null)
  126. {
  127. WriteNull(builder);
  128. return;
  129. }
  130. builder.Append("{ ");
  131. var fields = message.Descriptor.Fields;
  132. bool first = true;
  133. // First non-oneof fields
  134. foreach (var field in fields.InFieldNumberOrder())
  135. {
  136. var accessor = field.Accessor;
  137. // Oneofs are written later
  138. // TODO: Change to write out fields in order, interleaving oneofs appropriately (as per binary format)
  139. if (field.ContainingOneof != null && field.ContainingOneof.Accessor.GetCaseFieldDescriptor(message) != field)
  140. {
  141. continue;
  142. }
  143. // Omit default values unless we're asked to format them, or they're oneofs (where the default
  144. // value is still formatted regardless, because that's how we preserve the oneof case).
  145. object value = accessor.GetValue(message);
  146. if (field.ContainingOneof == null && !settings.FormatDefaultValues && IsDefaultValue(accessor, value))
  147. {
  148. continue;
  149. }
  150. // Omit awkward (single) values such as unknown enum values
  151. if (!field.IsRepeated && !field.IsMap && !CanWriteSingleValue(accessor.Descriptor, value))
  152. {
  153. continue;
  154. }
  155. // Okay, all tests complete: let's write the field value...
  156. if (!first)
  157. {
  158. builder.Append(", ");
  159. }
  160. WriteString(builder, ToCamelCase(accessor.Descriptor.Name));
  161. builder.Append(": ");
  162. WriteValue(builder, accessor, value);
  163. first = false;
  164. }
  165. builder.Append(first ? "}" : " }");
  166. }
  167. // Converted from src/google/protobuf/util/internal/utility.cc ToCamelCase
  168. internal static string ToCamelCase(string input)
  169. {
  170. bool capitalizeNext = false;
  171. bool wasCap = true;
  172. bool isCap = false;
  173. bool firstWord = true;
  174. StringBuilder result = new StringBuilder(input.Length);
  175. for (int i = 0; i < input.Length; i++, wasCap = isCap)
  176. {
  177. isCap = char.IsUpper(input[i]);
  178. if (input[i] == '_')
  179. {
  180. capitalizeNext = true;
  181. if (result.Length != 0)
  182. {
  183. firstWord = false;
  184. }
  185. continue;
  186. }
  187. else if (firstWord)
  188. {
  189. // Consider when the current character B is capitalized,
  190. // first word ends when:
  191. // 1) following a lowercase: "...aB..."
  192. // 2) followed by a lowercase: "...ABc..."
  193. if (result.Length != 0 && isCap &&
  194. (!wasCap || (i + 1 < input.Length && char.IsLower(input[i + 1]))))
  195. {
  196. firstWord = false;
  197. }
  198. else
  199. {
  200. result.Append(char.ToLowerInvariant(input[i]));
  201. continue;
  202. }
  203. }
  204. else if (capitalizeNext)
  205. {
  206. capitalizeNext = false;
  207. if (char.IsLower(input[i]))
  208. {
  209. result.Append(char.ToUpperInvariant(input[i]));
  210. continue;
  211. }
  212. }
  213. result.Append(input[i]);
  214. }
  215. return result.ToString();
  216. }
  217. private static void WriteNull(StringBuilder builder)
  218. {
  219. builder.Append("null");
  220. }
  221. private static bool IsDefaultValue(IFieldAccessor accessor, object value)
  222. {
  223. if (accessor.Descriptor.IsMap)
  224. {
  225. IDictionary dictionary = (IDictionary) value;
  226. return dictionary.Count == 0;
  227. }
  228. if (accessor.Descriptor.IsRepeated)
  229. {
  230. IList list = (IList) value;
  231. return list.Count == 0;
  232. }
  233. switch (accessor.Descriptor.FieldType)
  234. {
  235. case FieldType.Bool:
  236. return (bool) value == false;
  237. case FieldType.Bytes:
  238. return (ByteString) value == ByteString.Empty;
  239. case FieldType.String:
  240. return (string) value == "";
  241. case FieldType.Double:
  242. return (double) value == 0.0;
  243. case FieldType.SInt32:
  244. case FieldType.Int32:
  245. case FieldType.SFixed32:
  246. case FieldType.Enum:
  247. return (int) value == 0;
  248. case FieldType.Fixed32:
  249. case FieldType.UInt32:
  250. return (uint) value == 0;
  251. case FieldType.Fixed64:
  252. case FieldType.UInt64:
  253. return (ulong) value == 0;
  254. case FieldType.SFixed64:
  255. case FieldType.Int64:
  256. case FieldType.SInt64:
  257. return (long) value == 0;
  258. case FieldType.Float:
  259. return (float) value == 0f;
  260. case FieldType.Message:
  261. case FieldType.Group: // Never expect to get this, but...
  262. return value == null;
  263. default:
  264. throw new ArgumentException("Invalid field type");
  265. }
  266. }
  267. private void WriteValue(StringBuilder builder, IFieldAccessor accessor, object value)
  268. {
  269. if (accessor.Descriptor.IsMap)
  270. {
  271. WriteDictionary(builder, accessor, (IDictionary) value);
  272. }
  273. else if (accessor.Descriptor.IsRepeated)
  274. {
  275. WriteList(builder, accessor, (IList) value);
  276. }
  277. else
  278. {
  279. WriteSingleValue(builder, accessor.Descriptor, value);
  280. }
  281. }
  282. private void WriteSingleValue(StringBuilder builder, FieldDescriptor descriptor, object value)
  283. {
  284. switch (descriptor.FieldType)
  285. {
  286. case FieldType.Bool:
  287. builder.Append((bool) value ? "true" : "false");
  288. break;
  289. case FieldType.Bytes:
  290. // Nothing in Base64 needs escaping
  291. builder.Append('"');
  292. builder.Append(((ByteString) value).ToBase64());
  293. builder.Append('"');
  294. break;
  295. case FieldType.String:
  296. WriteString(builder, (string) value);
  297. break;
  298. case FieldType.Fixed32:
  299. case FieldType.UInt32:
  300. case FieldType.SInt32:
  301. case FieldType.Int32:
  302. case FieldType.SFixed32:
  303. {
  304. IFormattable formattable = (IFormattable) value;
  305. builder.Append(formattable.ToString("d", CultureInfo.InvariantCulture));
  306. break;
  307. }
  308. case FieldType.Enum:
  309. EnumValueDescriptor enumValue = descriptor.EnumType.FindValueByNumber((int) value);
  310. // We will already have validated that this is a known value.
  311. WriteString(builder, enumValue.Name);
  312. break;
  313. case FieldType.Fixed64:
  314. case FieldType.UInt64:
  315. case FieldType.SFixed64:
  316. case FieldType.Int64:
  317. case FieldType.SInt64:
  318. {
  319. builder.Append('"');
  320. IFormattable formattable = (IFormattable) value;
  321. builder.Append(formattable.ToString("d", CultureInfo.InvariantCulture));
  322. builder.Append('"');
  323. break;
  324. }
  325. case FieldType.Double:
  326. case FieldType.Float:
  327. string text = ((IFormattable) value).ToString("r", CultureInfo.InvariantCulture);
  328. if (text == "NaN" || text == "Infinity" || text == "-Infinity")
  329. {
  330. builder.Append('"');
  331. builder.Append(text);
  332. builder.Append('"');
  333. }
  334. else
  335. {
  336. builder.Append(text);
  337. }
  338. break;
  339. case FieldType.Message:
  340. case FieldType.Group: // Never expect to get this, but...
  341. if (descriptor.MessageType.IsWellKnownType)
  342. {
  343. WriteWellKnownTypeValue(builder, descriptor, value);
  344. }
  345. else
  346. {
  347. WriteMessage(builder, (IMessage) value);
  348. }
  349. break;
  350. default:
  351. throw new ArgumentException("Invalid field type: " + descriptor.FieldType);
  352. }
  353. }
  354. /// <summary>
  355. /// Central interception point for well-known type formatting. Any well-known types which
  356. /// don't need special handling can fall back to WriteMessage.
  357. /// </summary>
  358. private void WriteWellKnownTypeValue(StringBuilder builder, FieldDescriptor descriptor, object value)
  359. {
  360. // For wrapper types, the value will be the (possibly boxed) "native" value,
  361. // so we can write it as if we were unconditionally writing the Value field for the wrapper type.
  362. if (descriptor.MessageType.File == Int32Value.Descriptor.File && value != null)
  363. {
  364. WriteSingleValue(builder, descriptor.MessageType.FindFieldByNumber(1), value);
  365. return;
  366. }
  367. WriteMessage(builder, (IMessage) value);
  368. }
  369. private void WriteList(StringBuilder builder, IFieldAccessor accessor, IList list)
  370. {
  371. builder.Append("[ ");
  372. bool first = true;
  373. foreach (var value in list)
  374. {
  375. if (!CanWriteSingleValue(accessor.Descriptor, value))
  376. {
  377. continue;
  378. }
  379. if (!first)
  380. {
  381. builder.Append(", ");
  382. }
  383. WriteSingleValue(builder, accessor.Descriptor, value);
  384. first = false;
  385. }
  386. builder.Append(first ? "]" : " ]");
  387. }
  388. private void WriteDictionary(StringBuilder builder, IFieldAccessor accessor, IDictionary dictionary)
  389. {
  390. builder.Append("{ ");
  391. bool first = true;
  392. FieldDescriptor keyType = accessor.Descriptor.MessageType.FindFieldByNumber(1);
  393. FieldDescriptor valueType = accessor.Descriptor.MessageType.FindFieldByNumber(2);
  394. // This will box each pair. Could use IDictionaryEnumerator, but that's ugly in terms of disposal.
  395. foreach (DictionaryEntry pair in dictionary)
  396. {
  397. if (!CanWriteSingleValue(valueType, pair.Value))
  398. {
  399. continue;
  400. }
  401. if (!first)
  402. {
  403. builder.Append(", ");
  404. }
  405. string keyText;
  406. switch (keyType.FieldType)
  407. {
  408. case FieldType.String:
  409. keyText = (string) pair.Key;
  410. break;
  411. case FieldType.Bool:
  412. keyText = (bool) pair.Key ? "true" : "false";
  413. break;
  414. case FieldType.Fixed32:
  415. case FieldType.Fixed64:
  416. case FieldType.SFixed32:
  417. case FieldType.SFixed64:
  418. case FieldType.Int32:
  419. case FieldType.Int64:
  420. case FieldType.SInt32:
  421. case FieldType.SInt64:
  422. case FieldType.UInt32:
  423. case FieldType.UInt64:
  424. keyText = ((IFormattable) pair.Key).ToString("d", CultureInfo.InvariantCulture);
  425. break;
  426. default:
  427. throw new ArgumentException("Invalid key type: " + keyType.FieldType);
  428. }
  429. WriteString(builder, keyText);
  430. builder.Append(": ");
  431. WriteSingleValue(builder, valueType, pair.Value);
  432. first = false;
  433. }
  434. builder.Append(first ? "}" : " }");
  435. }
  436. /// <summary>
  437. /// Returns whether or not a singular value can be represented in JSON.
  438. /// Currently only relevant for enums, where unknown values can't be represented.
  439. /// For repeated/map fields, this always returns true.
  440. /// </summary>
  441. private bool CanWriteSingleValue(FieldDescriptor descriptor, object value)
  442. {
  443. if (descriptor.FieldType == FieldType.Enum)
  444. {
  445. EnumValueDescriptor enumValue = descriptor.EnumType.FindValueByNumber((int) value);
  446. return enumValue != null;
  447. }
  448. return true;
  449. }
  450. /// <summary>
  451. /// Writes a string (including leading and trailing double quotes) to a builder, escaping as required.
  452. /// </summary>
  453. /// <remarks>
  454. /// Other than surrogate pair handling, this code is mostly taken from src/google/protobuf/util/internal/json_escaping.cc.
  455. /// </remarks>
  456. private void WriteString(StringBuilder builder, string text)
  457. {
  458. builder.Append('"');
  459. for (int i = 0; i < text.Length; i++)
  460. {
  461. char c = text[i];
  462. if (c < 0xa0)
  463. {
  464. builder.Append(CommonRepresentations[c]);
  465. continue;
  466. }
  467. if (char.IsHighSurrogate(c))
  468. {
  469. // Encountered first part of a surrogate pair.
  470. // Check that we have the whole pair, and encode both parts as hex.
  471. i++;
  472. if (i == text.Length || !char.IsLowSurrogate(text[i]))
  473. {
  474. throw new ArgumentException("String contains low surrogate not followed by high surrogate");
  475. }
  476. HexEncodeUtf16CodeUnit(builder, c);
  477. HexEncodeUtf16CodeUnit(builder, text[i]);
  478. continue;
  479. }
  480. else if (char.IsLowSurrogate(c))
  481. {
  482. throw new ArgumentException("String contains high surrogate not preceded by low surrogate");
  483. }
  484. switch ((uint) c)
  485. {
  486. // These are not required by json spec
  487. // but used to prevent security bugs in javascript.
  488. case 0xfeff: // Zero width no-break space
  489. case 0xfff9: // Interlinear annotation anchor
  490. case 0xfffa: // Interlinear annotation separator
  491. case 0xfffb: // Interlinear annotation terminator
  492. case 0x00ad: // Soft-hyphen
  493. case 0x06dd: // Arabic end of ayah
  494. case 0x070f: // Syriac abbreviation mark
  495. case 0x17b4: // Khmer vowel inherent Aq
  496. case 0x17b5: // Khmer vowel inherent Aa
  497. HexEncodeUtf16CodeUnit(builder, c);
  498. break;
  499. default:
  500. if ((c >= 0x0600 && c <= 0x0603) || // Arabic signs
  501. (c >= 0x200b && c <= 0x200f) || // Zero width etc.
  502. (c >= 0x2028 && c <= 0x202e) || // Separators etc.
  503. (c >= 0x2060 && c <= 0x2064) || // Invisible etc.
  504. (c >= 0x206a && c <= 0x206f))
  505. {
  506. HexEncodeUtf16CodeUnit(builder, c);
  507. }
  508. else
  509. {
  510. // No handling of surrogates here - that's done earlier
  511. builder.Append(c);
  512. }
  513. break;
  514. }
  515. }
  516. builder.Append('"');
  517. }
  518. private const string Hex = "0123456789abcdef";
  519. private static void HexEncodeUtf16CodeUnit(StringBuilder builder, char c)
  520. {
  521. uint utf16 = c;
  522. builder.Append("\\u");
  523. builder.Append(Hex[(c >> 12) & 0xf]);
  524. builder.Append(Hex[(c >> 8) & 0xf]);
  525. builder.Append(Hex[(c >> 4) & 0xf]);
  526. builder.Append(Hex[(c >> 0) & 0xf]);
  527. }
  528. /// <summary>
  529. /// Settings controlling JSON formatting.
  530. /// </summary>
  531. public sealed class Settings
  532. {
  533. private static readonly Settings defaultInstance = new Settings(false);
  534. /// <summary>
  535. /// Default settings, as used by <see cref="JsonFormatter.Default"/>
  536. /// </summary>
  537. public static Settings Default { get { return defaultInstance; } }
  538. private readonly bool formatDefaultValues;
  539. /// <summary>
  540. /// Whether fields whose values are the default for the field type (e.g. 0 for integers)
  541. /// should be formatted (true) or omitted (false).
  542. /// </summary>
  543. public bool FormatDefaultValues { get { return formatDefaultValues; } }
  544. public Settings(bool formatDefaultValues)
  545. {
  546. this.formatDefaultValues = formatDefaultValues;
  547. }
  548. }
  549. }
  550. }