JsonParser.cs 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019
  1. #region Copyright notice and license
  2. // Protocol Buffers - Google's data interchange format
  3. // Copyright 2015 Google Inc. All rights reserved.
  4. // https://developers.google.com/protocol-buffers/
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. #endregion
  32. using Google.Protobuf.Reflection;
  33. using Google.Protobuf.WellKnownTypes;
  34. using System;
  35. using System.Collections;
  36. using System.Collections.Generic;
  37. using System.Globalization;
  38. using System.IO;
  39. using System.Text;
  40. using System.Text.RegularExpressions;
  41. namespace Google.Protobuf
  42. {
  43. /// <summary>
  44. /// Reflection-based converter from JSON to messages.
  45. /// </summary>
  46. /// <remarks>
  47. /// <para>
  48. /// Instances of this class are thread-safe, with no mutable state.
  49. /// </para>
  50. /// <para>
  51. /// This is a simple start to get JSON parsing working. As it's reflection-based,
  52. /// it's not as quick as baking calls into generated messages - but is a simpler implementation.
  53. /// (This code is generally not heavily optimized.)
  54. /// </para>
  55. /// </remarks>
  56. public sealed class JsonParser
  57. {
  58. // Note: using 0-9 instead of \d to ensure no non-ASCII digits.
  59. // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest.
  60. private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable);
  61. private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable);
  62. private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 };
  63. private static readonly char[] FieldMaskPathSeparators = new[] { ',' };
  64. private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default);
  65. // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers
  66. // and the signatures of various methods.
  67. private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
  68. WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
  69. {
  70. { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) },
  71. { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) },
  72. { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) },
  73. { ListValue.Descriptor.FullName, (parser, message, tokenizer) =>
  74. parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
  75. { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) },
  76. { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) },
  77. { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) },
  78. { Int32Value.Descriptor.FullName, MergeWrapperField },
  79. { Int64Value.Descriptor.FullName, MergeWrapperField },
  80. { UInt32Value.Descriptor.FullName, MergeWrapperField },
  81. { UInt64Value.Descriptor.FullName, MergeWrapperField },
  82. { FloatValue.Descriptor.FullName, MergeWrapperField },
  83. { DoubleValue.Descriptor.FullName, MergeWrapperField },
  84. { BytesValue.Descriptor.FullName, MergeWrapperField },
  85. { StringValue.Descriptor.FullName, MergeWrapperField },
  86. { BoolValue.Descriptor.FullName, MergeWrapperField }
  87. };
  88. // Convenience method to avoid having to repeat the same code multiple times in the above
  89. // dictionary initialization.
  90. private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)
  91. {
  92. parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer);
  93. }
  94. /// <summary>
  95. /// Returns a formatter using the default settings.
  96. /// </summary>
  97. public static JsonParser Default { get { return defaultInstance; } }
  98. private readonly Settings settings;
  99. /// <summary>
  100. /// Creates a new formatted with the given settings.
  101. /// </summary>
  102. /// <param name="settings">The settings.</param>
  103. public JsonParser(Settings settings)
  104. {
  105. this.settings = settings;
  106. }
  107. /// <summary>
  108. /// Parses <paramref name="json"/> and merges the information into the given message.
  109. /// </summary>
  110. /// <param name="message">The message to merge the JSON information into.</param>
  111. /// <param name="json">The JSON to parse.</param>
  112. internal void Merge(IMessage message, string json)
  113. {
  114. Merge(message, new StringReader(json));
  115. }
  116. /// <summary>
  117. /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message.
  118. /// </summary>
  119. /// <param name="message">The message to merge the JSON information into.</param>
  120. /// <param name="jsonReader">Reader providing the JSON to parse.</param>
  121. internal void Merge(IMessage message, TextReader jsonReader)
  122. {
  123. var tokenizer = JsonTokenizer.FromTextReader(jsonReader);
  124. Merge(message, tokenizer);
  125. var lastToken = tokenizer.Next();
  126. if (lastToken != JsonToken.EndDocument)
  127. {
  128. throw new InvalidProtocolBufferException("Expected end of JSON after object");
  129. }
  130. }
  131. /// <summary>
  132. /// Merges the given message using data from the given tokenizer. In most cases, the next
  133. /// token should be a "start object" token, but wrapper types and nullity can invalidate
  134. /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream
  135. /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the
  136. /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON".
  137. /// </summary>
  138. private void Merge(IMessage message, JsonTokenizer tokenizer)
  139. {
  140. if (tokenizer.ObjectDepth > settings.RecursionLimit)
  141. {
  142. throw InvalidProtocolBufferException.JsonRecursionLimitExceeded();
  143. }
  144. if (message.Descriptor.IsWellKnownType)
  145. {
  146. Action<JsonParser, IMessage, JsonTokenizer> handler;
  147. if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler))
  148. {
  149. handler(this, message, tokenizer);
  150. return;
  151. }
  152. // Well-known types with no special handling continue in the normal way.
  153. }
  154. var token = tokenizer.Next();
  155. if (token.Type != JsonToken.TokenType.StartObject)
  156. {
  157. throw new InvalidProtocolBufferException("Expected an object");
  158. }
  159. var descriptor = message.Descriptor;
  160. var jsonFieldMap = descriptor.Fields.ByJsonName();
  161. // All the oneof fields we've already accounted for - we can only see each of them once.
  162. // The set is created lazily to avoid the overhead of creating a set for every message
  163. // we parsed, when oneofs are relatively rare.
  164. HashSet<OneofDescriptor> seenOneofs = null;
  165. while (true)
  166. {
  167. token = tokenizer.Next();
  168. if (token.Type == JsonToken.TokenType.EndObject)
  169. {
  170. return;
  171. }
  172. if (token.Type != JsonToken.TokenType.Name)
  173. {
  174. throw new InvalidOperationException("Unexpected token type " + token.Type);
  175. }
  176. string name = token.StringValue;
  177. FieldDescriptor field;
  178. if (jsonFieldMap.TryGetValue(name, out field))
  179. {
  180. if (field.ContainingOneof != null)
  181. {
  182. if (seenOneofs == null)
  183. {
  184. seenOneofs = new HashSet<OneofDescriptor>();
  185. }
  186. if (!seenOneofs.Add(field.ContainingOneof))
  187. {
  188. throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}");
  189. }
  190. }
  191. MergeField(message, field, tokenizer);
  192. }
  193. else
  194. {
  195. // TODO: Is this what we want to do? If not, we'll need to skip the value,
  196. // which may be an object or array. (We might want to put code in the tokenizer
  197. // to do that.)
  198. throw new InvalidProtocolBufferException("Unknown field: " + name);
  199. }
  200. }
  201. }
  202. private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
  203. {
  204. var token = tokenizer.Next();
  205. if (token.Type == JsonToken.TokenType.Null)
  206. {
  207. // Clear the field if we see a null token, unless it's for a singular field of type
  208. // google.protobuf.Value.
  209. // Note: different from Java API, which just ignores it.
  210. // TODO: Bring it more in line? Discuss...
  211. if (field.IsMap || field.IsRepeated || !IsGoogleProtobufValueField(field))
  212. {
  213. field.Accessor.Clear(message);
  214. return;
  215. }
  216. }
  217. tokenizer.PushBack(token);
  218. if (field.IsMap)
  219. {
  220. MergeMapField(message, field, tokenizer);
  221. }
  222. else if (field.IsRepeated)
  223. {
  224. MergeRepeatedField(message, field, tokenizer);
  225. }
  226. else
  227. {
  228. var value = ParseSingleValue(field, tokenizer);
  229. field.Accessor.SetValue(message, value);
  230. }
  231. }
  232. private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
  233. {
  234. var token = tokenizer.Next();
  235. if (token.Type != JsonToken.TokenType.StartArray)
  236. {
  237. throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type);
  238. }
  239. IList list = (IList) field.Accessor.GetValue(message);
  240. while (true)
  241. {
  242. token = tokenizer.Next();
  243. if (token.Type == JsonToken.TokenType.EndArray)
  244. {
  245. return;
  246. }
  247. tokenizer.PushBack(token);
  248. if (token.Type == JsonToken.TokenType.Null)
  249. {
  250. throw new InvalidProtocolBufferException("Repeated field elements cannot be null");
  251. }
  252. list.Add(ParseSingleValue(field, tokenizer));
  253. }
  254. }
  255. private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
  256. {
  257. // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those.
  258. var token = tokenizer.Next();
  259. if (token.Type != JsonToken.TokenType.StartObject)
  260. {
  261. throw new InvalidProtocolBufferException("Expected an object to populate a map");
  262. }
  263. var type = field.MessageType;
  264. var keyField = type.FindFieldByNumber(1);
  265. var valueField = type.FindFieldByNumber(2);
  266. if (keyField == null || valueField == null)
  267. {
  268. throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName);
  269. }
  270. IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message);
  271. while (true)
  272. {
  273. token = tokenizer.Next();
  274. if (token.Type == JsonToken.TokenType.EndObject)
  275. {
  276. return;
  277. }
  278. object key = ParseMapKey(keyField, token.StringValue);
  279. object value = ParseSingleValue(valueField, tokenizer);
  280. if (value == null)
  281. {
  282. throw new InvalidProtocolBufferException("Map values must not be null");
  283. }
  284. dictionary[key] = value;
  285. }
  286. }
  287. private static bool IsGoogleProtobufValueField(FieldDescriptor field)
  288. {
  289. return field.FieldType == FieldType.Message &&
  290. field.MessageType.FullName == Value.Descriptor.FullName;
  291. }
  292. private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer)
  293. {
  294. var token = tokenizer.Next();
  295. if (token.Type == JsonToken.TokenType.Null)
  296. {
  297. // TODO: In order to support dynamic messages, we should really build this up
  298. // dynamically.
  299. if (IsGoogleProtobufValueField(field))
  300. {
  301. return Value.ForNull();
  302. }
  303. return null;
  304. }
  305. var fieldType = field.FieldType;
  306. if (fieldType == FieldType.Message)
  307. {
  308. // Parse wrapper types as their constituent types.
  309. // TODO: What does this mean for null?
  310. if (field.MessageType.IsWrapperType)
  311. {
  312. field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber];
  313. fieldType = field.FieldType;
  314. }
  315. else
  316. {
  317. // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.)
  318. tokenizer.PushBack(token);
  319. IMessage subMessage = NewMessageForField(field);
  320. Merge(subMessage, tokenizer);
  321. return subMessage;
  322. }
  323. }
  324. switch (token.Type)
  325. {
  326. case JsonToken.TokenType.True:
  327. case JsonToken.TokenType.False:
  328. if (fieldType == FieldType.Bool)
  329. {
  330. return token.Type == JsonToken.TokenType.True;
  331. }
  332. // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default
  333. // case instead, but this way we'd only need to change one place.
  334. goto default;
  335. case JsonToken.TokenType.StringValue:
  336. return ParseSingleStringValue(field, token.StringValue);
  337. // Note: not passing the number value itself here, as we may end up storing the string value in the token too.
  338. case JsonToken.TokenType.Number:
  339. return ParseSingleNumberValue(field, token);
  340. case JsonToken.TokenType.Null:
  341. throw new NotImplementedException("Haven't worked out what to do for null yet");
  342. default:
  343. throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType);
  344. }
  345. }
  346. /// <summary>
  347. /// Parses <paramref name="json"/> into a new message.
  348. /// </summary>
  349. /// <typeparam name="T">The type of message to create.</typeparam>
  350. /// <param name="json">The JSON to parse.</param>
  351. /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
  352. /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
  353. public T Parse<T>(string json) where T : IMessage, new()
  354. {
  355. ProtoPreconditions.CheckNotNull(json, nameof(json));
  356. return Parse<T>(new StringReader(json));
  357. }
  358. /// <summary>
  359. /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
  360. /// </summary>
  361. /// <typeparam name="T">The type of message to create.</typeparam>
  362. /// <param name="jsonReader">Reader providing the JSON to parse.</param>
  363. /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
  364. /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
  365. public T Parse<T>(TextReader jsonReader) where T : IMessage, new()
  366. {
  367. ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
  368. T message = new T();
  369. Merge(message, jsonReader);
  370. return message;
  371. }
  372. /// <summary>
  373. /// Parses <paramref name="json"/> into a new message.
  374. /// </summary>
  375. /// <param name="json">The JSON to parse.</param>
  376. /// <param name="descriptor">Descriptor of message type to parse.</param>
  377. /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
  378. /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
  379. public IMessage Parse(string json, MessageDescriptor descriptor)
  380. {
  381. ProtoPreconditions.CheckNotNull(json, nameof(json));
  382. ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
  383. return Parse(new StringReader(json), descriptor);
  384. }
  385. /// <summary>
  386. /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
  387. /// </summary>
  388. /// <param name="jsonReader">Reader providing the JSON to parse.</param>
  389. /// <param name="descriptor">Descriptor of message type to parse.</param>
  390. /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
  391. /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
  392. public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor)
  393. {
  394. ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
  395. ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
  396. IMessage message = descriptor.Parser.CreateTemplate();
  397. Merge(message, jsonReader);
  398. return message;
  399. }
  400. private void MergeStructValue(IMessage message, JsonTokenizer tokenizer)
  401. {
  402. var firstToken = tokenizer.Next();
  403. var fields = message.Descriptor.Fields;
  404. switch (firstToken.Type)
  405. {
  406. case JsonToken.TokenType.Null:
  407. fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0);
  408. return;
  409. case JsonToken.TokenType.StringValue:
  410. fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue);
  411. return;
  412. case JsonToken.TokenType.Number:
  413. fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue);
  414. return;
  415. case JsonToken.TokenType.False:
  416. case JsonToken.TokenType.True:
  417. fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True);
  418. return;
  419. case JsonToken.TokenType.StartObject:
  420. {
  421. var field = fields[Value.StructValueFieldNumber];
  422. var structMessage = NewMessageForField(field);
  423. tokenizer.PushBack(firstToken);
  424. Merge(structMessage, tokenizer);
  425. field.Accessor.SetValue(message, structMessage);
  426. return;
  427. }
  428. case JsonToken.TokenType.StartArray:
  429. {
  430. var field = fields[Value.ListValueFieldNumber];
  431. var list = NewMessageForField(field);
  432. tokenizer.PushBack(firstToken);
  433. Merge(list, tokenizer);
  434. field.Accessor.SetValue(message, list);
  435. return;
  436. }
  437. default:
  438. throw new InvalidOperationException("Unexpected token type: " + firstToken.Type);
  439. }
  440. }
  441. private void MergeStruct(IMessage message, JsonTokenizer tokenizer)
  442. {
  443. var token = tokenizer.Next();
  444. if (token.Type != JsonToken.TokenType.StartObject)
  445. {
  446. throw new InvalidProtocolBufferException("Expected object value for Struct");
  447. }
  448. tokenizer.PushBack(token);
  449. var field = message.Descriptor.Fields[Struct.FieldsFieldNumber];
  450. MergeMapField(message, field, tokenizer);
  451. }
  452. private void MergeAny(IMessage message, JsonTokenizer tokenizer)
  453. {
  454. // Record the token stream until we see the @type property. At that point, we can take the value, consult
  455. // the type registry for the relevant message, and replay the stream, omitting the @type property.
  456. var tokens = new List<JsonToken>();
  457. var token = tokenizer.Next();
  458. if (token.Type != JsonToken.TokenType.StartObject)
  459. {
  460. throw new InvalidProtocolBufferException("Expected object value for Any");
  461. }
  462. int typeUrlObjectDepth = tokenizer.ObjectDepth;
  463. // The check for the property depth protects us from nested Any values which occur before the type URL
  464. // for *this* Any.
  465. while (token.Type != JsonToken.TokenType.Name ||
  466. token.StringValue != JsonFormatter.AnyTypeUrlField ||
  467. tokenizer.ObjectDepth != typeUrlObjectDepth)
  468. {
  469. tokens.Add(token);
  470. token = tokenizer.Next();
  471. if (tokenizer.ObjectDepth < typeUrlObjectDepth)
  472. {
  473. throw new InvalidProtocolBufferException("Any message with no @type");
  474. }
  475. }
  476. // Don't add the @type property or its value to the recorded token list
  477. token = tokenizer.Next();
  478. if (token.Type != JsonToken.TokenType.StringValue)
  479. {
  480. throw new InvalidProtocolBufferException("Expected string value for Any.@type");
  481. }
  482. string typeUrl = token.StringValue;
  483. string typeName = Any.GetTypeName(typeUrl);
  484. MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName);
  485. if (descriptor == null)
  486. {
  487. throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'");
  488. }
  489. // Now replay the token stream we've already read and anything that remains of the object, just parsing it
  490. // as normal. Our original tokenizer should end up at the end of the object.
  491. var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer);
  492. var body = descriptor.Parser.CreateTemplate();
  493. if (descriptor.IsWellKnownType)
  494. {
  495. MergeWellKnownTypeAnyBody(body, replay);
  496. }
  497. else
  498. {
  499. Merge(body, replay);
  500. }
  501. var data = body.ToByteString();
  502. // Now that we have the message data, we can pack it into an Any (the message received as a parameter).
  503. message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl);
  504. message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data);
  505. }
  506. // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property
  507. // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value
  508. // itself, and then end-object.
  509. private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)
  510. {
  511. var token = tokenizer.Next(); // Definitely start-object; checked in previous method
  512. token = tokenizer.Next();
  513. // TODO: What about an absent Int32Value, for example?
  514. if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField)
  515. {
  516. throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body");
  517. }
  518. Merge(body, tokenizer);
  519. token = tokenizer.Next();
  520. if (token.Type != JsonToken.TokenType.EndObject)
  521. {
  522. throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type");
  523. }
  524. }
  525. #region Utility methods which don't depend on the state (or settings) of the parser.
  526. private static object ParseMapKey(FieldDescriptor field, string keyText)
  527. {
  528. switch (field.FieldType)
  529. {
  530. case FieldType.Bool:
  531. if (keyText == "true")
  532. {
  533. return true;
  534. }
  535. if (keyText == "false")
  536. {
  537. return false;
  538. }
  539. throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText);
  540. case FieldType.String:
  541. return keyText;
  542. case FieldType.Int32:
  543. case FieldType.SInt32:
  544. case FieldType.SFixed32:
  545. return ParseNumericString(keyText, int.Parse);
  546. case FieldType.UInt32:
  547. case FieldType.Fixed32:
  548. return ParseNumericString(keyText, uint.Parse);
  549. case FieldType.Int64:
  550. case FieldType.SInt64:
  551. case FieldType.SFixed64:
  552. return ParseNumericString(keyText, long.Parse);
  553. case FieldType.UInt64:
  554. case FieldType.Fixed64:
  555. return ParseNumericString(keyText, ulong.Parse);
  556. default:
  557. throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType);
  558. }
  559. }
  560. private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token)
  561. {
  562. double value = token.NumberValue;
  563. checked
  564. {
  565. try
  566. {
  567. switch (field.FieldType)
  568. {
  569. case FieldType.Int32:
  570. case FieldType.SInt32:
  571. case FieldType.SFixed32:
  572. CheckInteger(value);
  573. return (int) value;
  574. case FieldType.UInt32:
  575. case FieldType.Fixed32:
  576. CheckInteger(value);
  577. return (uint) value;
  578. case FieldType.Int64:
  579. case FieldType.SInt64:
  580. case FieldType.SFixed64:
  581. CheckInteger(value);
  582. return (long) value;
  583. case FieldType.UInt64:
  584. case FieldType.Fixed64:
  585. CheckInteger(value);
  586. return (ulong) value;
  587. case FieldType.Double:
  588. return value;
  589. case FieldType.Float:
  590. if (double.IsNaN(value))
  591. {
  592. return float.NaN;
  593. }
  594. if (value > float.MaxValue || value < float.MinValue)
  595. {
  596. if (double.IsPositiveInfinity(value))
  597. {
  598. return float.PositiveInfinity;
  599. }
  600. if (double.IsNegativeInfinity(value))
  601. {
  602. return float.NegativeInfinity;
  603. }
  604. throw new InvalidProtocolBufferException($"Value out of range: {value}");
  605. }
  606. return (float) value;
  607. case FieldType.Enum:
  608. CheckInteger(value);
  609. // Just return it as an int, and let the CLR convert it.
  610. // Note that we deliberately don't check that it's a known value.
  611. return (int) value;
  612. default:
  613. throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}");
  614. }
  615. }
  616. catch (OverflowException)
  617. {
  618. throw new InvalidProtocolBufferException($"Value out of range: {value}");
  619. }
  620. }
  621. }
  622. private static void CheckInteger(double value)
  623. {
  624. if (double.IsInfinity(value) || double.IsNaN(value))
  625. {
  626. throw new InvalidProtocolBufferException($"Value not an integer: {value}");
  627. }
  628. if (value != Math.Floor(value))
  629. {
  630. throw new InvalidProtocolBufferException($"Value not an integer: {value}");
  631. }
  632. }
  633. private static object ParseSingleStringValue(FieldDescriptor field, string text)
  634. {
  635. switch (field.FieldType)
  636. {
  637. case FieldType.String:
  638. return text;
  639. case FieldType.Bytes:
  640. try
  641. {
  642. return ByteString.FromBase64(text);
  643. }
  644. catch (FormatException e)
  645. {
  646. throw InvalidProtocolBufferException.InvalidBase64(e);
  647. }
  648. case FieldType.Int32:
  649. case FieldType.SInt32:
  650. case FieldType.SFixed32:
  651. return ParseNumericString(text, int.Parse);
  652. case FieldType.UInt32:
  653. case FieldType.Fixed32:
  654. return ParseNumericString(text, uint.Parse);
  655. case FieldType.Int64:
  656. case FieldType.SInt64:
  657. case FieldType.SFixed64:
  658. return ParseNumericString(text, long.Parse);
  659. case FieldType.UInt64:
  660. case FieldType.Fixed64:
  661. return ParseNumericString(text, ulong.Parse);
  662. case FieldType.Double:
  663. double d = ParseNumericString(text, double.Parse);
  664. ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d));
  665. return d;
  666. case FieldType.Float:
  667. float f = ParseNumericString(text, float.Parse);
  668. ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f));
  669. return f;
  670. case FieldType.Enum:
  671. var enumValue = field.EnumType.FindValueByName(text);
  672. if (enumValue == null)
  673. {
  674. throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}");
  675. }
  676. // Just return it as an int, and let the CLR convert it.
  677. return enumValue.Number;
  678. default:
  679. throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}");
  680. }
  681. }
  682. /// <summary>
  683. /// Creates a new instance of the message type for the given field.
  684. /// </summary>
  685. private static IMessage NewMessageForField(FieldDescriptor field)
  686. {
  687. return field.MessageType.Parser.CreateTemplate();
  688. }
  689. private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser)
  690. {
  691. // Can't prohibit this with NumberStyles.
  692. if (text.StartsWith("+"))
  693. {
  694. throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
  695. }
  696. if (text.StartsWith("0") && text.Length > 1)
  697. {
  698. if (text[1] >= '0' && text[1] <= '9')
  699. {
  700. throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
  701. }
  702. }
  703. else if (text.StartsWith("-0") && text.Length > 2)
  704. {
  705. if (text[2] >= '0' && text[2] <= '9')
  706. {
  707. throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
  708. }
  709. }
  710. try
  711. {
  712. return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
  713. }
  714. catch (FormatException)
  715. {
  716. throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}");
  717. }
  718. catch (OverflowException)
  719. {
  720. throw new InvalidProtocolBufferException($"Value out of range: {text}");
  721. }
  722. }
  723. /// <summary>
  724. /// Checks that any infinite/NaN values originated from the correct text.
  725. /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the
  726. /// way that Mono parses out-of-range values as infinity.
  727. /// </summary>
  728. private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)
  729. {
  730. if ((isPositiveInfinity && text != "Infinity") ||
  731. (isNegativeInfinity && text != "-Infinity") ||
  732. (isNaN && text != "NaN"))
  733. {
  734. throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
  735. }
  736. }
  737. private static void MergeTimestamp(IMessage message, JsonToken token)
  738. {
  739. if (token.Type != JsonToken.TokenType.StringValue)
  740. {
  741. throw new InvalidProtocolBufferException("Expected string value for Timestamp");
  742. }
  743. var match = TimestampRegex.Match(token.StringValue);
  744. if (!match.Success)
  745. {
  746. throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}");
  747. }
  748. var dateTime = match.Groups["datetime"].Value;
  749. var subseconds = match.Groups["subseconds"].Value;
  750. var offset = match.Groups["offset"].Value;
  751. try
  752. {
  753. DateTime parsed = DateTime.ParseExact(
  754. dateTime,
  755. "yyyy-MM-dd'T'HH:mm:ss",
  756. CultureInfo.InvariantCulture,
  757. DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
  758. // TODO: It would be nice not to have to create all these objects... easy to optimize later though.
  759. Timestamp timestamp = Timestamp.FromDateTime(parsed);
  760. int nanosToAdd = 0;
  761. if (subseconds != "")
  762. {
  763. // This should always work, as we've got 1-9 digits.
  764. int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture);
  765. nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length];
  766. }
  767. int secondsToAdd = 0;
  768. if (offset != "Z")
  769. {
  770. // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa.
  771. int sign = offset[0] == '-' ? 1 : -1;
  772. int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture);
  773. int minutes = int.Parse(offset.Substring(4, 2));
  774. int totalMinutes = hours * 60 + minutes;
  775. if (totalMinutes > 18 * 60)
  776. {
  777. throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
  778. }
  779. if (totalMinutes == 0 && sign == 1)
  780. {
  781. // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp.
  782. throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
  783. }
  784. // We need to *subtract* the offset from local time to get UTC.
  785. secondsToAdd = sign * totalMinutes * 60;
  786. }
  787. // Ensure we've got the right signs. Currently unnecessary, but easy to do.
  788. if (secondsToAdd < 0 && nanosToAdd > 0)
  789. {
  790. secondsToAdd++;
  791. nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond;
  792. }
  793. if (secondsToAdd != 0 || nanosToAdd != 0)
  794. {
  795. timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd };
  796. // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this
  797. // anywhere, but we shouldn't parse it.
  798. if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue)
  799. {
  800. throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
  801. }
  802. }
  803. message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds);
  804. message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos);
  805. }
  806. catch (FormatException)
  807. {
  808. throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
  809. }
  810. }
  811. private static void MergeDuration(IMessage message, JsonToken token)
  812. {
  813. if (token.Type != JsonToken.TokenType.StringValue)
  814. {
  815. throw new InvalidProtocolBufferException("Expected string value for Duration");
  816. }
  817. var match = DurationRegex.Match(token.StringValue);
  818. if (!match.Success)
  819. {
  820. throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
  821. }
  822. var sign = match.Groups["sign"].Value;
  823. var secondsText = match.Groups["int"].Value;
  824. // Prohibit leading insignficant zeroes
  825. if (secondsText[0] == '0' && secondsText.Length > 1)
  826. {
  827. throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
  828. }
  829. var subseconds = match.Groups["subseconds"].Value;
  830. var multiplier = sign == "-" ? -1 : 1;
  831. try
  832. {
  833. long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier;
  834. int nanos = 0;
  835. if (subseconds != "")
  836. {
  837. // This should always work, as we've got 1-9 digits.
  838. int parsedFraction = int.Parse(subseconds.Substring(1));
  839. nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier;
  840. }
  841. if (!Duration.IsNormalized(seconds, nanos))
  842. {
  843. throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
  844. }
  845. message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds);
  846. message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos);
  847. }
  848. catch (FormatException)
  849. {
  850. throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
  851. }
  852. }
  853. private static void MergeFieldMask(IMessage message, JsonToken token)
  854. {
  855. if (token.Type != JsonToken.TokenType.StringValue)
  856. {
  857. throw new InvalidProtocolBufferException("Expected string value for FieldMask");
  858. }
  859. // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"?
  860. string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries);
  861. IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message);
  862. foreach (var path in jsonPaths)
  863. {
  864. messagePaths.Add(ToSnakeCase(path));
  865. }
  866. }
  867. // Ported from src/google/protobuf/util/internal/utility.cc
  868. private static string ToSnakeCase(string text)
  869. {
  870. var builder = new StringBuilder(text.Length * 2);
  871. // Note: this is probably unnecessary now, but currently retained to be as close as possible to the
  872. // C++, whilst still throwing an exception on underscores.
  873. bool wasNotUnderscore = false; // Initialize to false for case 1 (below)
  874. bool wasNotCap = false;
  875. for (int i = 0; i < text.Length; i++)
  876. {
  877. char c = text[i];
  878. if (c >= 'A' && c <= 'Z') // ascii_isupper
  879. {
  880. // Consider when the current character B is capitalized:
  881. // 1) At beginning of input: "B..." => "b..."
  882. // (e.g. "Biscuit" => "biscuit")
  883. // 2) Following a lowercase: "...aB..." => "...a_b..."
  884. // (e.g. "gBike" => "g_bike")
  885. // 3) At the end of input: "...AB" => "...ab"
  886. // (e.g. "GoogleLAB" => "google_lab")
  887. // 4) Followed by a lowercase: "...ABc..." => "...a_bc..."
  888. // (e.g. "GBike" => "g_bike")
  889. if (wasNotUnderscore && // case 1 out
  890. (wasNotCap || // case 2 in, case 3 out
  891. (i + 1 < text.Length && // case 3 out
  892. (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1])
  893. { // case 4 in
  894. // We add an underscore for case 2 and case 4.
  895. builder.Append('_');
  896. }
  897. // ascii_tolower, but we already know that c *is* an upper case ASCII character...
  898. builder.Append((char) (c + 'a' - 'A'));
  899. wasNotUnderscore = true;
  900. wasNotCap = false;
  901. }
  902. else
  903. {
  904. builder.Append(c);
  905. if (c == '_')
  906. {
  907. throw new InvalidProtocolBufferException($"Invalid field mask: {text}");
  908. }
  909. wasNotUnderscore = true;
  910. wasNotCap = true;
  911. }
  912. }
  913. return builder.ToString();
  914. }
  915. #endregion
  916. /// <summary>
  917. /// Settings controlling JSON parsing.
  918. /// </summary>
  919. public sealed class Settings
  920. {
  921. /// <summary>
  922. /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default
  923. /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry.
  924. /// </summary>
  925. public static Settings Default { get; }
  926. // Workaround for the Mono compiler complaining about XML comments not being on
  927. // valid language elements.
  928. static Settings()
  929. {
  930. Default = new Settings(CodedInputStream.DefaultRecursionLimit);
  931. }
  932. /// <summary>
  933. /// The maximum depth of messages to parse. Note that this limit only applies to parsing
  934. /// messages, not collections - so a message within a collection within a message only counts as
  935. /// depth 2, not 3.
  936. /// </summary>
  937. public int RecursionLimit { get; }
  938. /// <summary>
  939. /// The type registry used to parse <see cref="Any"/> messages.
  940. /// </summary>
  941. public TypeRegistry TypeRegistry { get; }
  942. /// <summary>
  943. /// Creates a new <see cref="Settings"/> object with the specified recursion limit.
  944. /// </summary>
  945. /// <param name="recursionLimit">The maximum depth of messages to parse</param>
  946. public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty)
  947. {
  948. }
  949. /// <summary>
  950. /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry.
  951. /// </summary>
  952. /// <param name="recursionLimit">The maximum depth of messages to parse</param>
  953. /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param>
  954. public Settings(int recursionLimit, TypeRegistry typeRegistry)
  955. {
  956. RecursionLimit = recursionLimit;
  957. TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry));
  958. }
  959. }
  960. }
  961. }