JsonParser.cs 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018
  1. #region Copyright notice and license
  2. // Protocol Buffers - Google's data interchange format
  3. // Copyright 2015 Google Inc. All rights reserved.
  4. // https://developers.google.com/protocol-buffers/
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. #endregion
  32. using Google.Protobuf.Reflection;
  33. using Google.Protobuf.WellKnownTypes;
  34. using System;
  35. using System.Collections;
  36. using System.Collections.Generic;
  37. using System.Globalization;
  38. using System.IO;
  39. using System.Text;
  40. using System.Text.RegularExpressions;
  41. namespace Google.Protobuf
  42. {
  43. /// <summary>
  44. /// Reflection-based converter from JSON to messages.
  45. /// </summary>
  46. /// <remarks>
  47. /// <para>
  48. /// Instances of this class are thread-safe, with no mutable state.
  49. /// </para>
  50. /// <para>
  51. /// This is a simple start to get JSON parsing working. As it's reflection-based,
  52. /// it's not as quick as baking calls into generated messages - but is a simpler implementation.
  53. /// (This code is generally not heavily optimized.)
  54. /// </para>
  55. /// </remarks>
  56. public sealed class JsonParser
  57. {
  58. // Note: using 0-9 instead of \d to ensure no non-ASCII digits.
  59. // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest.
  60. private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable);
  61. private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable);
  62. private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 };
  63. private static readonly char[] FieldMaskPathSeparators = new[] { ',' };
  64. private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default);
  65. // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers
  66. // and the signatures of various methods.
  67. private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
  68. WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
  69. {
  70. { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) },
  71. { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) },
  72. { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) },
  73. { ListValue.Descriptor.FullName, (parser, message, tokenizer) =>
  74. parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
  75. { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) },
  76. { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) },
  77. { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) },
  78. { Int32Value.Descriptor.FullName, MergeWrapperField },
  79. { Int64Value.Descriptor.FullName, MergeWrapperField },
  80. { UInt32Value.Descriptor.FullName, MergeWrapperField },
  81. { UInt64Value.Descriptor.FullName, MergeWrapperField },
  82. { FloatValue.Descriptor.FullName, MergeWrapperField },
  83. { DoubleValue.Descriptor.FullName, MergeWrapperField },
  84. { BytesValue.Descriptor.FullName, MergeWrapperField },
  85. { StringValue.Descriptor.FullName, MergeWrapperField }
  86. };
  87. // Convenience method to avoid having to repeat the same code multiple times in the above
  88. // dictionary initialization.
  89. private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)
  90. {
  91. parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer);
  92. }
  93. /// <summary>
  94. /// Returns a formatter using the default settings.
  95. /// </summary>
  96. public static JsonParser Default { get { return defaultInstance; } }
  97. private readonly Settings settings;
  98. /// <summary>
  99. /// Creates a new formatted with the given settings.
  100. /// </summary>
  101. /// <param name="settings">The settings.</param>
  102. public JsonParser(Settings settings)
  103. {
  104. this.settings = settings;
  105. }
  106. /// <summary>
  107. /// Parses <paramref name="json"/> and merges the information into the given message.
  108. /// </summary>
  109. /// <param name="message">The message to merge the JSON information into.</param>
  110. /// <param name="json">The JSON to parse.</param>
  111. internal void Merge(IMessage message, string json)
  112. {
  113. Merge(message, new StringReader(json));
  114. }
  115. /// <summary>
  116. /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message.
  117. /// </summary>
  118. /// <param name="message">The message to merge the JSON information into.</param>
  119. /// <param name="jsonReader">Reader providing the JSON to parse.</param>
  120. internal void Merge(IMessage message, TextReader jsonReader)
  121. {
  122. var tokenizer = JsonTokenizer.FromTextReader(jsonReader);
  123. Merge(message, tokenizer);
  124. var lastToken = tokenizer.Next();
  125. if (lastToken != JsonToken.EndDocument)
  126. {
  127. throw new InvalidProtocolBufferException("Expected end of JSON after object");
  128. }
  129. }
  130. /// <summary>
  131. /// Merges the given message using data from the given tokenizer. In most cases, the next
  132. /// token should be a "start object" token, but wrapper types and nullity can invalidate
  133. /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream
  134. /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the
  135. /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON".
  136. /// </summary>
  137. private void Merge(IMessage message, JsonTokenizer tokenizer)
  138. {
  139. if (tokenizer.ObjectDepth > settings.RecursionLimit)
  140. {
  141. throw InvalidProtocolBufferException.JsonRecursionLimitExceeded();
  142. }
  143. if (message.Descriptor.IsWellKnownType)
  144. {
  145. Action<JsonParser, IMessage, JsonTokenizer> handler;
  146. if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler))
  147. {
  148. handler(this, message, tokenizer);
  149. return;
  150. }
  151. // Well-known types with no special handling continue in the normal way.
  152. }
  153. var token = tokenizer.Next();
  154. if (token.Type != JsonToken.TokenType.StartObject)
  155. {
  156. throw new InvalidProtocolBufferException("Expected an object");
  157. }
  158. var descriptor = message.Descriptor;
  159. var jsonFieldMap = descriptor.Fields.ByJsonName();
  160. // All the oneof fields we've already accounted for - we can only see each of them once.
  161. // The set is created lazily to avoid the overhead of creating a set for every message
  162. // we parsed, when oneofs are relatively rare.
  163. HashSet<OneofDescriptor> seenOneofs = null;
  164. while (true)
  165. {
  166. token = tokenizer.Next();
  167. if (token.Type == JsonToken.TokenType.EndObject)
  168. {
  169. return;
  170. }
  171. if (token.Type != JsonToken.TokenType.Name)
  172. {
  173. throw new InvalidOperationException("Unexpected token type " + token.Type);
  174. }
  175. string name = token.StringValue;
  176. FieldDescriptor field;
  177. if (jsonFieldMap.TryGetValue(name, out field))
  178. {
  179. if (field.ContainingOneof != null)
  180. {
  181. if (seenOneofs == null)
  182. {
  183. seenOneofs = new HashSet<OneofDescriptor>();
  184. }
  185. if (!seenOneofs.Add(field.ContainingOneof))
  186. {
  187. throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}");
  188. }
  189. }
  190. MergeField(message, field, tokenizer);
  191. }
  192. else
  193. {
  194. // TODO: Is this what we want to do? If not, we'll need to skip the value,
  195. // which may be an object or array. (We might want to put code in the tokenizer
  196. // to do that.)
  197. throw new InvalidProtocolBufferException("Unknown field: " + name);
  198. }
  199. }
  200. }
  201. private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
  202. {
  203. var token = tokenizer.Next();
  204. if (token.Type == JsonToken.TokenType.Null)
  205. {
  206. // Clear the field if we see a null token, unless it's for a singular field of type
  207. // google.protobuf.Value.
  208. // Note: different from Java API, which just ignores it.
  209. // TODO: Bring it more in line? Discuss...
  210. if (field.IsMap || field.IsRepeated || !IsGoogleProtobufValueField(field))
  211. {
  212. field.Accessor.Clear(message);
  213. return;
  214. }
  215. }
  216. tokenizer.PushBack(token);
  217. if (field.IsMap)
  218. {
  219. MergeMapField(message, field, tokenizer);
  220. }
  221. else if (field.IsRepeated)
  222. {
  223. MergeRepeatedField(message, field, tokenizer);
  224. }
  225. else
  226. {
  227. var value = ParseSingleValue(field, tokenizer);
  228. field.Accessor.SetValue(message, value);
  229. }
  230. }
  231. private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
  232. {
  233. var token = tokenizer.Next();
  234. if (token.Type != JsonToken.TokenType.StartArray)
  235. {
  236. throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type);
  237. }
  238. IList list = (IList) field.Accessor.GetValue(message);
  239. while (true)
  240. {
  241. token = tokenizer.Next();
  242. if (token.Type == JsonToken.TokenType.EndArray)
  243. {
  244. return;
  245. }
  246. tokenizer.PushBack(token);
  247. if (token.Type == JsonToken.TokenType.Null)
  248. {
  249. throw new InvalidProtocolBufferException("Repeated field elements cannot be null");
  250. }
  251. list.Add(ParseSingleValue(field, tokenizer));
  252. }
  253. }
  254. private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
  255. {
  256. // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those.
  257. var token = tokenizer.Next();
  258. if (token.Type != JsonToken.TokenType.StartObject)
  259. {
  260. throw new InvalidProtocolBufferException("Expected an object to populate a map");
  261. }
  262. var type = field.MessageType;
  263. var keyField = type.FindFieldByNumber(1);
  264. var valueField = type.FindFieldByNumber(2);
  265. if (keyField == null || valueField == null)
  266. {
  267. throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName);
  268. }
  269. IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message);
  270. while (true)
  271. {
  272. token = tokenizer.Next();
  273. if (token.Type == JsonToken.TokenType.EndObject)
  274. {
  275. return;
  276. }
  277. object key = ParseMapKey(keyField, token.StringValue);
  278. object value = ParseSingleValue(valueField, tokenizer);
  279. if (value == null)
  280. {
  281. throw new InvalidProtocolBufferException("Map values must not be null");
  282. }
  283. dictionary[key] = value;
  284. }
  285. }
  286. private static bool IsGoogleProtobufValueField(FieldDescriptor field)
  287. {
  288. return field.FieldType == FieldType.Message &&
  289. field.MessageType.FullName == Value.Descriptor.FullName;
  290. }
  291. private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer)
  292. {
  293. var token = tokenizer.Next();
  294. if (token.Type == JsonToken.TokenType.Null)
  295. {
  296. // TODO: In order to support dynamic messages, we should really build this up
  297. // dynamically.
  298. if (IsGoogleProtobufValueField(field))
  299. {
  300. return Value.ForNull();
  301. }
  302. return null;
  303. }
  304. var fieldType = field.FieldType;
  305. if (fieldType == FieldType.Message)
  306. {
  307. // Parse wrapper types as their constituent types.
  308. // TODO: What does this mean for null?
  309. if (field.MessageType.IsWrapperType)
  310. {
  311. field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber];
  312. fieldType = field.FieldType;
  313. }
  314. else
  315. {
  316. // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.)
  317. tokenizer.PushBack(token);
  318. IMessage subMessage = NewMessageForField(field);
  319. Merge(subMessage, tokenizer);
  320. return subMessage;
  321. }
  322. }
  323. switch (token.Type)
  324. {
  325. case JsonToken.TokenType.True:
  326. case JsonToken.TokenType.False:
  327. if (fieldType == FieldType.Bool)
  328. {
  329. return token.Type == JsonToken.TokenType.True;
  330. }
  331. // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default
  332. // case instead, but this way we'd only need to change one place.
  333. goto default;
  334. case JsonToken.TokenType.StringValue:
  335. return ParseSingleStringValue(field, token.StringValue);
  336. // Note: not passing the number value itself here, as we may end up storing the string value in the token too.
  337. case JsonToken.TokenType.Number:
  338. return ParseSingleNumberValue(field, token);
  339. case JsonToken.TokenType.Null:
  340. throw new NotImplementedException("Haven't worked out what to do for null yet");
  341. default:
  342. throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType);
  343. }
  344. }
  345. /// <summary>
  346. /// Parses <paramref name="json"/> into a new message.
  347. /// </summary>
  348. /// <typeparam name="T">The type of message to create.</typeparam>
  349. /// <param name="json">The JSON to parse.</param>
  350. /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
  351. /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
  352. public T Parse<T>(string json) where T : IMessage, new()
  353. {
  354. ProtoPreconditions.CheckNotNull(json, nameof(json));
  355. return Parse<T>(new StringReader(json));
  356. }
  357. /// <summary>
  358. /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
  359. /// </summary>
  360. /// <typeparam name="T">The type of message to create.</typeparam>
  361. /// <param name="jsonReader">Reader providing the JSON to parse.</param>
  362. /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
  363. /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
  364. public T Parse<T>(TextReader jsonReader) where T : IMessage, new()
  365. {
  366. ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
  367. T message = new T();
  368. Merge(message, jsonReader);
  369. return message;
  370. }
  371. /// <summary>
  372. /// Parses <paramref name="json"/> into a new message.
  373. /// </summary>
  374. /// <param name="json">The JSON to parse.</param>
  375. /// <param name="descriptor">Descriptor of message type to parse.</param>
  376. /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
  377. /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
  378. public IMessage Parse(string json, MessageDescriptor descriptor)
  379. {
  380. ProtoPreconditions.CheckNotNull(json, nameof(json));
  381. ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
  382. return Parse(new StringReader(json), descriptor);
  383. }
  384. /// <summary>
  385. /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
  386. /// </summary>
  387. /// <param name="jsonReader">Reader providing the JSON to parse.</param>
  388. /// <param name="descriptor">Descriptor of message type to parse.</param>
  389. /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
  390. /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
  391. public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor)
  392. {
  393. ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
  394. ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
  395. IMessage message = descriptor.Parser.CreateTemplate();
  396. Merge(message, jsonReader);
  397. return message;
  398. }
  399. private void MergeStructValue(IMessage message, JsonTokenizer tokenizer)
  400. {
  401. var firstToken = tokenizer.Next();
  402. var fields = message.Descriptor.Fields;
  403. switch (firstToken.Type)
  404. {
  405. case JsonToken.TokenType.Null:
  406. fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0);
  407. return;
  408. case JsonToken.TokenType.StringValue:
  409. fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue);
  410. return;
  411. case JsonToken.TokenType.Number:
  412. fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue);
  413. return;
  414. case JsonToken.TokenType.False:
  415. case JsonToken.TokenType.True:
  416. fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True);
  417. return;
  418. case JsonToken.TokenType.StartObject:
  419. {
  420. var field = fields[Value.StructValueFieldNumber];
  421. var structMessage = NewMessageForField(field);
  422. tokenizer.PushBack(firstToken);
  423. Merge(structMessage, tokenizer);
  424. field.Accessor.SetValue(message, structMessage);
  425. return;
  426. }
  427. case JsonToken.TokenType.StartArray:
  428. {
  429. var field = fields[Value.ListValueFieldNumber];
  430. var list = NewMessageForField(field);
  431. tokenizer.PushBack(firstToken);
  432. Merge(list, tokenizer);
  433. field.Accessor.SetValue(message, list);
  434. return;
  435. }
  436. default:
  437. throw new InvalidOperationException("Unexpected token type: " + firstToken.Type);
  438. }
  439. }
  440. private void MergeStruct(IMessage message, JsonTokenizer tokenizer)
  441. {
  442. var token = tokenizer.Next();
  443. if (token.Type != JsonToken.TokenType.StartObject)
  444. {
  445. throw new InvalidProtocolBufferException("Expected object value for Struct");
  446. }
  447. tokenizer.PushBack(token);
  448. var field = message.Descriptor.Fields[Struct.FieldsFieldNumber];
  449. MergeMapField(message, field, tokenizer);
  450. }
  451. private void MergeAny(IMessage message, JsonTokenizer tokenizer)
  452. {
  453. // Record the token stream until we see the @type property. At that point, we can take the value, consult
  454. // the type registry for the relevant message, and replay the stream, omitting the @type property.
  455. var tokens = new List<JsonToken>();
  456. var token = tokenizer.Next();
  457. if (token.Type != JsonToken.TokenType.StartObject)
  458. {
  459. throw new InvalidProtocolBufferException("Expected object value for Any");
  460. }
  461. int typeUrlObjectDepth = tokenizer.ObjectDepth;
  462. // The check for the property depth protects us from nested Any values which occur before the type URL
  463. // for *this* Any.
  464. while (token.Type != JsonToken.TokenType.Name ||
  465. token.StringValue != JsonFormatter.AnyTypeUrlField ||
  466. tokenizer.ObjectDepth != typeUrlObjectDepth)
  467. {
  468. tokens.Add(token);
  469. token = tokenizer.Next();
  470. if (tokenizer.ObjectDepth < typeUrlObjectDepth)
  471. {
  472. throw new InvalidProtocolBufferException("Any message with no @type");
  473. }
  474. }
  475. // Don't add the @type property or its value to the recorded token list
  476. token = tokenizer.Next();
  477. if (token.Type != JsonToken.TokenType.StringValue)
  478. {
  479. throw new InvalidProtocolBufferException("Expected string value for Any.@type");
  480. }
  481. string typeUrl = token.StringValue;
  482. string typeName = Any.GetTypeName(typeUrl);
  483. MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName);
  484. if (descriptor == null)
  485. {
  486. throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'");
  487. }
  488. // Now replay the token stream we've already read and anything that remains of the object, just parsing it
  489. // as normal. Our original tokenizer should end up at the end of the object.
  490. var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer);
  491. var body = descriptor.Parser.CreateTemplate();
  492. if (descriptor.IsWellKnownType)
  493. {
  494. MergeWellKnownTypeAnyBody(body, replay);
  495. }
  496. else
  497. {
  498. Merge(body, replay);
  499. }
  500. var data = body.ToByteString();
  501. // Now that we have the message data, we can pack it into an Any (the message received as a parameter).
  502. message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl);
  503. message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data);
  504. }
  505. // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property
  506. // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value
  507. // itself, and then end-object.
  508. private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)
  509. {
  510. var token = tokenizer.Next(); // Definitely start-object; checked in previous method
  511. token = tokenizer.Next();
  512. // TODO: What about an absent Int32Value, for example?
  513. if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField)
  514. {
  515. throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body");
  516. }
  517. Merge(body, tokenizer);
  518. token = tokenizer.Next();
  519. if (token.Type != JsonToken.TokenType.EndObject)
  520. {
  521. throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type");
  522. }
  523. }
  524. #region Utility methods which don't depend on the state (or settings) of the parser.
  525. private static object ParseMapKey(FieldDescriptor field, string keyText)
  526. {
  527. switch (field.FieldType)
  528. {
  529. case FieldType.Bool:
  530. if (keyText == "true")
  531. {
  532. return true;
  533. }
  534. if (keyText == "false")
  535. {
  536. return false;
  537. }
  538. throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText);
  539. case FieldType.String:
  540. return keyText;
  541. case FieldType.Int32:
  542. case FieldType.SInt32:
  543. case FieldType.SFixed32:
  544. return ParseNumericString(keyText, int.Parse);
  545. case FieldType.UInt32:
  546. case FieldType.Fixed32:
  547. return ParseNumericString(keyText, uint.Parse);
  548. case FieldType.Int64:
  549. case FieldType.SInt64:
  550. case FieldType.SFixed64:
  551. return ParseNumericString(keyText, long.Parse);
  552. case FieldType.UInt64:
  553. case FieldType.Fixed64:
  554. return ParseNumericString(keyText, ulong.Parse);
  555. default:
  556. throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType);
  557. }
  558. }
  559. private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token)
  560. {
  561. double value = token.NumberValue;
  562. checked
  563. {
  564. try
  565. {
  566. switch (field.FieldType)
  567. {
  568. case FieldType.Int32:
  569. case FieldType.SInt32:
  570. case FieldType.SFixed32:
  571. CheckInteger(value);
  572. return (int) value;
  573. case FieldType.UInt32:
  574. case FieldType.Fixed32:
  575. CheckInteger(value);
  576. return (uint) value;
  577. case FieldType.Int64:
  578. case FieldType.SInt64:
  579. case FieldType.SFixed64:
  580. CheckInteger(value);
  581. return (long) value;
  582. case FieldType.UInt64:
  583. case FieldType.Fixed64:
  584. CheckInteger(value);
  585. return (ulong) value;
  586. case FieldType.Double:
  587. return value;
  588. case FieldType.Float:
  589. if (double.IsNaN(value))
  590. {
  591. return float.NaN;
  592. }
  593. if (value > float.MaxValue || value < float.MinValue)
  594. {
  595. if (double.IsPositiveInfinity(value))
  596. {
  597. return float.PositiveInfinity;
  598. }
  599. if (double.IsNegativeInfinity(value))
  600. {
  601. return float.NegativeInfinity;
  602. }
  603. throw new InvalidProtocolBufferException($"Value out of range: {value}");
  604. }
  605. return (float) value;
  606. case FieldType.Enum:
  607. CheckInteger(value);
  608. // Just return it as an int, and let the CLR convert it.
  609. // Note that we deliberately don't check that it's a known value.
  610. return (int) value;
  611. default:
  612. throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}");
  613. }
  614. }
  615. catch (OverflowException)
  616. {
  617. throw new InvalidProtocolBufferException($"Value out of range: {value}");
  618. }
  619. }
  620. }
  621. private static void CheckInteger(double value)
  622. {
  623. if (double.IsInfinity(value) || double.IsNaN(value))
  624. {
  625. throw new InvalidProtocolBufferException($"Value not an integer: {value}");
  626. }
  627. if (value != Math.Floor(value))
  628. {
  629. throw new InvalidProtocolBufferException($"Value not an integer: {value}");
  630. }
  631. }
  632. private static object ParseSingleStringValue(FieldDescriptor field, string text)
  633. {
  634. switch (field.FieldType)
  635. {
  636. case FieldType.String:
  637. return text;
  638. case FieldType.Bytes:
  639. try
  640. {
  641. return ByteString.FromBase64(text);
  642. }
  643. catch (FormatException e)
  644. {
  645. throw InvalidProtocolBufferException.InvalidBase64(e);
  646. }
  647. case FieldType.Int32:
  648. case FieldType.SInt32:
  649. case FieldType.SFixed32:
  650. return ParseNumericString(text, int.Parse);
  651. case FieldType.UInt32:
  652. case FieldType.Fixed32:
  653. return ParseNumericString(text, uint.Parse);
  654. case FieldType.Int64:
  655. case FieldType.SInt64:
  656. case FieldType.SFixed64:
  657. return ParseNumericString(text, long.Parse);
  658. case FieldType.UInt64:
  659. case FieldType.Fixed64:
  660. return ParseNumericString(text, ulong.Parse);
  661. case FieldType.Double:
  662. double d = ParseNumericString(text, double.Parse);
  663. ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d));
  664. return d;
  665. case FieldType.Float:
  666. float f = ParseNumericString(text, float.Parse);
  667. ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f));
  668. return f;
  669. case FieldType.Enum:
  670. var enumValue = field.EnumType.FindValueByName(text);
  671. if (enumValue == null)
  672. {
  673. throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}");
  674. }
  675. // Just return it as an int, and let the CLR convert it.
  676. return enumValue.Number;
  677. default:
  678. throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}");
  679. }
  680. }
  681. /// <summary>
  682. /// Creates a new instance of the message type for the given field.
  683. /// </summary>
  684. private static IMessage NewMessageForField(FieldDescriptor field)
  685. {
  686. return field.MessageType.Parser.CreateTemplate();
  687. }
  688. private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser)
  689. {
  690. // Can't prohibit this with NumberStyles.
  691. if (text.StartsWith("+"))
  692. {
  693. throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
  694. }
  695. if (text.StartsWith("0") && text.Length > 1)
  696. {
  697. if (text[1] >= '0' && text[1] <= '9')
  698. {
  699. throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
  700. }
  701. }
  702. else if (text.StartsWith("-0") && text.Length > 2)
  703. {
  704. if (text[2] >= '0' && text[2] <= '9')
  705. {
  706. throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
  707. }
  708. }
  709. try
  710. {
  711. return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
  712. }
  713. catch (FormatException)
  714. {
  715. throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}");
  716. }
  717. catch (OverflowException)
  718. {
  719. throw new InvalidProtocolBufferException($"Value out of range: {text}");
  720. }
  721. }
  722. /// <summary>
  723. /// Checks that any infinite/NaN values originated from the correct text.
  724. /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the
  725. /// way that Mono parses out-of-range values as infinity.
  726. /// </summary>
  727. private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)
  728. {
  729. if ((isPositiveInfinity && text != "Infinity") ||
  730. (isNegativeInfinity && text != "-Infinity") ||
  731. (isNaN && text != "NaN"))
  732. {
  733. throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
  734. }
  735. }
  736. private static void MergeTimestamp(IMessage message, JsonToken token)
  737. {
  738. if (token.Type != JsonToken.TokenType.StringValue)
  739. {
  740. throw new InvalidProtocolBufferException("Expected string value for Timestamp");
  741. }
  742. var match = TimestampRegex.Match(token.StringValue);
  743. if (!match.Success)
  744. {
  745. throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}");
  746. }
  747. var dateTime = match.Groups["datetime"].Value;
  748. var subseconds = match.Groups["subseconds"].Value;
  749. var offset = match.Groups["offset"].Value;
  750. try
  751. {
  752. DateTime parsed = DateTime.ParseExact(
  753. dateTime,
  754. "yyyy-MM-dd'T'HH:mm:ss",
  755. CultureInfo.InvariantCulture,
  756. DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
  757. // TODO: It would be nice not to have to create all these objects... easy to optimize later though.
  758. Timestamp timestamp = Timestamp.FromDateTime(parsed);
  759. int nanosToAdd = 0;
  760. if (subseconds != "")
  761. {
  762. // This should always work, as we've got 1-9 digits.
  763. int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture);
  764. nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length];
  765. }
  766. int secondsToAdd = 0;
  767. if (offset != "Z")
  768. {
  769. // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa.
  770. int sign = offset[0] == '-' ? 1 : -1;
  771. int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture);
  772. int minutes = int.Parse(offset.Substring(4, 2));
  773. int totalMinutes = hours * 60 + minutes;
  774. if (totalMinutes > 18 * 60)
  775. {
  776. throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
  777. }
  778. if (totalMinutes == 0 && sign == 1)
  779. {
  780. // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp.
  781. throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
  782. }
  783. // We need to *subtract* the offset from local time to get UTC.
  784. secondsToAdd = sign * totalMinutes * 60;
  785. }
  786. // Ensure we've got the right signs. Currently unnecessary, but easy to do.
  787. if (secondsToAdd < 0 && nanosToAdd > 0)
  788. {
  789. secondsToAdd++;
  790. nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond;
  791. }
  792. if (secondsToAdd != 0 || nanosToAdd != 0)
  793. {
  794. timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd };
  795. // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this
  796. // anywhere, but we shouldn't parse it.
  797. if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue)
  798. {
  799. throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
  800. }
  801. }
  802. message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds);
  803. message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos);
  804. }
  805. catch (FormatException)
  806. {
  807. throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
  808. }
  809. }
  810. private static void MergeDuration(IMessage message, JsonToken token)
  811. {
  812. if (token.Type != JsonToken.TokenType.StringValue)
  813. {
  814. throw new InvalidProtocolBufferException("Expected string value for Duration");
  815. }
  816. var match = DurationRegex.Match(token.StringValue);
  817. if (!match.Success)
  818. {
  819. throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
  820. }
  821. var sign = match.Groups["sign"].Value;
  822. var secondsText = match.Groups["int"].Value;
  823. // Prohibit leading insignficant zeroes
  824. if (secondsText[0] == '0' && secondsText.Length > 1)
  825. {
  826. throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
  827. }
  828. var subseconds = match.Groups["subseconds"].Value;
  829. var multiplier = sign == "-" ? -1 : 1;
  830. try
  831. {
  832. long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier;
  833. int nanos = 0;
  834. if (subseconds != "")
  835. {
  836. // This should always work, as we've got 1-9 digits.
  837. int parsedFraction = int.Parse(subseconds.Substring(1));
  838. nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier;
  839. }
  840. if (!Duration.IsNormalized(seconds, nanos))
  841. {
  842. throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
  843. }
  844. message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds);
  845. message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos);
  846. }
  847. catch (FormatException)
  848. {
  849. throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
  850. }
  851. }
  852. private static void MergeFieldMask(IMessage message, JsonToken token)
  853. {
  854. if (token.Type != JsonToken.TokenType.StringValue)
  855. {
  856. throw new InvalidProtocolBufferException("Expected string value for FieldMask");
  857. }
  858. // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"?
  859. string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries);
  860. IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message);
  861. foreach (var path in jsonPaths)
  862. {
  863. messagePaths.Add(ToSnakeCase(path));
  864. }
  865. }
  866. // Ported from src/google/protobuf/util/internal/utility.cc
  867. private static string ToSnakeCase(string text)
  868. {
  869. var builder = new StringBuilder(text.Length * 2);
  870. // Note: this is probably unnecessary now, but currently retained to be as close as possible to the
  871. // C++, whilst still throwing an exception on underscores.
  872. bool wasNotUnderscore = false; // Initialize to false for case 1 (below)
  873. bool wasNotCap = false;
  874. for (int i = 0; i < text.Length; i++)
  875. {
  876. char c = text[i];
  877. if (c >= 'A' && c <= 'Z') // ascii_isupper
  878. {
  879. // Consider when the current character B is capitalized:
  880. // 1) At beginning of input: "B..." => "b..."
  881. // (e.g. "Biscuit" => "biscuit")
  882. // 2) Following a lowercase: "...aB..." => "...a_b..."
  883. // (e.g. "gBike" => "g_bike")
  884. // 3) At the end of input: "...AB" => "...ab"
  885. // (e.g. "GoogleLAB" => "google_lab")
  886. // 4) Followed by a lowercase: "...ABc..." => "...a_bc..."
  887. // (e.g. "GBike" => "g_bike")
  888. if (wasNotUnderscore && // case 1 out
  889. (wasNotCap || // case 2 in, case 3 out
  890. (i + 1 < text.Length && // case 3 out
  891. (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1])
  892. { // case 4 in
  893. // We add an underscore for case 2 and case 4.
  894. builder.Append('_');
  895. }
  896. // ascii_tolower, but we already know that c *is* an upper case ASCII character...
  897. builder.Append((char) (c + 'a' - 'A'));
  898. wasNotUnderscore = true;
  899. wasNotCap = false;
  900. }
  901. else
  902. {
  903. builder.Append(c);
  904. if (c == '_')
  905. {
  906. throw new InvalidProtocolBufferException($"Invalid field mask: {text}");
  907. }
  908. wasNotUnderscore = true;
  909. wasNotCap = true;
  910. }
  911. }
  912. return builder.ToString();
  913. }
  914. #endregion
  915. /// <summary>
  916. /// Settings controlling JSON parsing.
  917. /// </summary>
  918. public sealed class Settings
  919. {
  920. /// <summary>
  921. /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default
  922. /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry.
  923. /// </summary>
  924. public static Settings Default { get; }
  925. // Workaround for the Mono compiler complaining about XML comments not being on
  926. // valid language elements.
  927. static Settings()
  928. {
  929. Default = new Settings(CodedInputStream.DefaultRecursionLimit);
  930. }
  931. /// <summary>
  932. /// The maximum depth of messages to parse. Note that this limit only applies to parsing
  933. /// messages, not collections - so a message within a collection within a message only counts as
  934. /// depth 2, not 3.
  935. /// </summary>
  936. public int RecursionLimit { get; }
  937. /// <summary>
  938. /// The type registry used to parse <see cref="Any"/> messages.
  939. /// </summary>
  940. public TypeRegistry TypeRegistry { get; }
  941. /// <summary>
  942. /// Creates a new <see cref="Settings"/> object with the specified recursion limit.
  943. /// </summary>
  944. /// <param name="recursionLimit">The maximum depth of messages to parse</param>
  945. public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty)
  946. {
  947. }
  948. /// <summary>
  949. /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry.
  950. /// </summary>
  951. /// <param name="recursionLimit">The maximum depth of messages to parse</param>
  952. /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param>
  953. public Settings(int recursionLimit, TypeRegistry typeRegistry)
  954. {
  955. RecursionLimit = recursionLimit;
  956. TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry));
  957. }
  958. }
  959. }
  960. }