JsonParser.cs 50 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073
  1. #region Copyright notice and license
  2. // Protocol Buffers - Google's data interchange format
  3. // Copyright 2015 Google Inc. All rights reserved.
  4. // https://developers.google.com/protocol-buffers/
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. #endregion
  32. using Google.Protobuf.Reflection;
  33. using Google.Protobuf.WellKnownTypes;
  34. using System;
  35. using System.Collections;
  36. using System.Collections.Generic;
  37. using System.Globalization;
  38. using System.IO;
  39. using System.Linq;
  40. using System.Text;
  41. using System.Text.RegularExpressions;
  42. namespace Google.Protobuf
  43. {
  44. /// <summary>
  45. /// Reflection-based converter from JSON to messages.
  46. /// </summary>
  47. /// <remarks>
  48. /// <para>
  49. /// Instances of this class are thread-safe, with no mutable state.
  50. /// </para>
  51. /// <para>
  52. /// This is a simple start to get JSON parsing working. As it's reflection-based,
  53. /// it's not as quick as baking calls into generated messages - but is a simpler implementation.
  54. /// (This code is generally not heavily optimized.)
  55. /// </para>
  56. /// </remarks>
  57. public sealed class JsonParser
  58. {
  59. // Note: using 0-9 instead of \d to ensure no non-ASCII digits.
  60. // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest.
  61. private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable);
  62. private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable);
  63. private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 };
  64. private static readonly char[] FieldMaskPathSeparators = new[] { ',' };
  65. private static readonly EnumDescriptor NullValueDescriptor = StructReflection.Descriptor.EnumTypes.Single(ed => ed.ClrType == typeof(NullValue));
  66. private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default);
  67. // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers
  68. // and the signatures of various methods.
  69. private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
  70. WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
  71. {
  72. { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) },
  73. { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) },
  74. { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) },
  75. { ListValue.Descriptor.FullName, (parser, message, tokenizer) =>
  76. parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
  77. { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) },
  78. { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) },
  79. { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) },
  80. { Int32Value.Descriptor.FullName, MergeWrapperField },
  81. { Int64Value.Descriptor.FullName, MergeWrapperField },
  82. { UInt32Value.Descriptor.FullName, MergeWrapperField },
  83. { UInt64Value.Descriptor.FullName, MergeWrapperField },
  84. { FloatValue.Descriptor.FullName, MergeWrapperField },
  85. { DoubleValue.Descriptor.FullName, MergeWrapperField },
  86. { BytesValue.Descriptor.FullName, MergeWrapperField },
  87. { StringValue.Descriptor.FullName, MergeWrapperField },
  88. { BoolValue.Descriptor.FullName, MergeWrapperField }
  89. };
  90. // Convenience method to avoid having to repeat the same code multiple times in the above
  91. // dictionary initialization.
  92. private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)
  93. {
  94. parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer);
  95. }
  96. /// <summary>
  97. /// Returns a formatter using the default settings.
  98. /// </summary>
  99. public static JsonParser Default { get { return defaultInstance; } }
  100. private readonly Settings settings;
  101. /// <summary>
  102. /// Creates a new formatted with the given settings.
  103. /// </summary>
  104. /// <param name="settings">The settings.</param>
  105. public JsonParser(Settings settings)
  106. {
  107. this.settings = ProtoPreconditions.CheckNotNull(settings, nameof(settings));
  108. }
  109. /// <summary>
  110. /// Parses <paramref name="json"/> and merges the information into the given message.
  111. /// </summary>
  112. /// <param name="message">The message to merge the JSON information into.</param>
  113. /// <param name="json">The JSON to parse.</param>
  114. internal void Merge(IMessage message, string json)
  115. {
  116. Merge(message, new StringReader(json));
  117. }
  118. /// <summary>
  119. /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message.
  120. /// </summary>
  121. /// <param name="message">The message to merge the JSON information into.</param>
  122. /// <param name="jsonReader">Reader providing the JSON to parse.</param>
  123. internal void Merge(IMessage message, TextReader jsonReader)
  124. {
  125. var tokenizer = JsonTokenizer.FromTextReader(jsonReader);
  126. Merge(message, tokenizer);
  127. var lastToken = tokenizer.Next();
  128. if (lastToken != JsonToken.EndDocument)
  129. {
  130. throw new InvalidProtocolBufferException("Expected end of JSON after object");
  131. }
  132. }
  133. /// <summary>
  134. /// Merges the given message using data from the given tokenizer. In most cases, the next
  135. /// token should be a "start object" token, but wrapper types and nullity can invalidate
  136. /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream
  137. /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the
  138. /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON".
  139. /// </summary>
  140. private void Merge(IMessage message, JsonTokenizer tokenizer)
  141. {
  142. if (tokenizer.ObjectDepth > settings.RecursionLimit)
  143. {
  144. throw InvalidProtocolBufferException.JsonRecursionLimitExceeded();
  145. }
  146. if (message.Descriptor.IsWellKnownType)
  147. {
  148. Action<JsonParser, IMessage, JsonTokenizer> handler;
  149. if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler))
  150. {
  151. handler(this, message, tokenizer);
  152. return;
  153. }
  154. // Well-known types with no special handling continue in the normal way.
  155. }
  156. var token = tokenizer.Next();
  157. if (token.Type != JsonToken.TokenType.StartObject)
  158. {
  159. throw new InvalidProtocolBufferException("Expected an object");
  160. }
  161. var descriptor = message.Descriptor;
  162. var jsonFieldMap = descriptor.Fields.ByJsonName();
  163. // All the oneof fields we've already accounted for - we can only see each of them once.
  164. // The set is created lazily to avoid the overhead of creating a set for every message
  165. // we parsed, when oneofs are relatively rare.
  166. HashSet<OneofDescriptor> seenOneofs = null;
  167. while (true)
  168. {
  169. token = tokenizer.Next();
  170. if (token.Type == JsonToken.TokenType.EndObject)
  171. {
  172. return;
  173. }
  174. if (token.Type != JsonToken.TokenType.Name)
  175. {
  176. throw new InvalidOperationException("Unexpected token type " + token.Type);
  177. }
  178. string name = token.StringValue;
  179. FieldDescriptor field;
  180. if (jsonFieldMap.TryGetValue(name, out field))
  181. {
  182. if (field.ContainingOneof != null)
  183. {
  184. if (seenOneofs == null)
  185. {
  186. seenOneofs = new HashSet<OneofDescriptor>();
  187. }
  188. if (!seenOneofs.Add(field.ContainingOneof))
  189. {
  190. throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}");
  191. }
  192. }
  193. MergeField(message, field, tokenizer);
  194. }
  195. else
  196. {
  197. if (settings.IgnoreUnknownFields)
  198. {
  199. tokenizer.SkipValue();
  200. }
  201. else
  202. {
  203. throw new InvalidProtocolBufferException("Unknown field: " + name);
  204. }
  205. }
  206. }
  207. }
  208. private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
  209. {
  210. var token = tokenizer.Next();
  211. if (token.Type == JsonToken.TokenType.Null)
  212. {
  213. // Clear the field if we see a null token, unless it's for a singular field of type
  214. // google.protobuf.Value or google.protobuf.NullValue.
  215. // Note: different from Java API, which just ignores it.
  216. // TODO: Bring it more in line? Discuss...
  217. if (field.IsMap || field.IsRepeated ||
  218. !(IsGoogleProtobufValueField(field) || IsGoogleProtobufNullValueField(field)))
  219. {
  220. field.Accessor.Clear(message);
  221. return;
  222. }
  223. }
  224. tokenizer.PushBack(token);
  225. if (field.IsMap)
  226. {
  227. MergeMapField(message, field, tokenizer);
  228. }
  229. else if (field.IsRepeated)
  230. {
  231. MergeRepeatedField(message, field, tokenizer);
  232. }
  233. else
  234. {
  235. var value = ParseSingleValue(field, tokenizer);
  236. field.Accessor.SetValue(message, value);
  237. }
  238. }
  239. private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
  240. {
  241. var token = tokenizer.Next();
  242. if (token.Type != JsonToken.TokenType.StartArray)
  243. {
  244. throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type);
  245. }
  246. IList list = (IList) field.Accessor.GetValue(message);
  247. while (true)
  248. {
  249. token = tokenizer.Next();
  250. if (token.Type == JsonToken.TokenType.EndArray)
  251. {
  252. return;
  253. }
  254. tokenizer.PushBack(token);
  255. object value = ParseSingleValue(field, tokenizer);
  256. if (value == null)
  257. {
  258. throw new InvalidProtocolBufferException("Repeated field elements cannot be null");
  259. }
  260. list.Add(value);
  261. }
  262. }
  263. private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
  264. {
  265. // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those.
  266. var token = tokenizer.Next();
  267. if (token.Type != JsonToken.TokenType.StartObject)
  268. {
  269. throw new InvalidProtocolBufferException("Expected an object to populate a map");
  270. }
  271. var type = field.MessageType;
  272. var keyField = type.FindFieldByNumber(1);
  273. var valueField = type.FindFieldByNumber(2);
  274. if (keyField == null || valueField == null)
  275. {
  276. throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName);
  277. }
  278. IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message);
  279. while (true)
  280. {
  281. token = tokenizer.Next();
  282. if (token.Type == JsonToken.TokenType.EndObject)
  283. {
  284. return;
  285. }
  286. object key = ParseMapKey(keyField, token.StringValue);
  287. object value = ParseSingleValue(valueField, tokenizer);
  288. if (value == null)
  289. {
  290. throw new InvalidProtocolBufferException("Map values must not be null");
  291. }
  292. dictionary[key] = value;
  293. }
  294. }
  295. private static bool IsGoogleProtobufValueField(FieldDescriptor field)
  296. {
  297. return field.FieldType == FieldType.Message &&
  298. field.MessageType.FullName == Value.Descriptor.FullName;
  299. }
  300. private static bool IsGoogleProtobufNullValueField(FieldDescriptor field)
  301. {
  302. return field.FieldType == FieldType.Enum &&
  303. field.EnumType.FullName == NullValueDescriptor.FullName;
  304. }
  305. private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer)
  306. {
  307. var token = tokenizer.Next();
  308. if (token.Type == JsonToken.TokenType.Null)
  309. {
  310. // TODO: In order to support dynamic messages, we should really build this up
  311. // dynamically.
  312. if (IsGoogleProtobufValueField(field))
  313. {
  314. return Value.ForNull();
  315. }
  316. if (IsGoogleProtobufNullValueField(field))
  317. {
  318. return NullValue.NullValue;
  319. }
  320. return null;
  321. }
  322. var fieldType = field.FieldType;
  323. if (fieldType == FieldType.Message)
  324. {
  325. // Parse wrapper types as their constituent types.
  326. // TODO: What does this mean for null?
  327. if (field.MessageType.IsWrapperType)
  328. {
  329. field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber];
  330. fieldType = field.FieldType;
  331. }
  332. else
  333. {
  334. // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.)
  335. tokenizer.PushBack(token);
  336. IMessage subMessage = NewMessageForField(field);
  337. Merge(subMessage, tokenizer);
  338. return subMessage;
  339. }
  340. }
  341. switch (token.Type)
  342. {
  343. case JsonToken.TokenType.True:
  344. case JsonToken.TokenType.False:
  345. if (fieldType == FieldType.Bool)
  346. {
  347. return token.Type == JsonToken.TokenType.True;
  348. }
  349. // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default
  350. // case instead, but this way we'd only need to change one place.
  351. goto default;
  352. case JsonToken.TokenType.StringValue:
  353. return ParseSingleStringValue(field, token.StringValue);
  354. // Note: not passing the number value itself here, as we may end up storing the string value in the token too.
  355. case JsonToken.TokenType.Number:
  356. return ParseSingleNumberValue(field, token);
  357. case JsonToken.TokenType.Null:
  358. throw new NotImplementedException("Haven't worked out what to do for null yet");
  359. default:
  360. throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType);
  361. }
  362. }
  363. /// <summary>
  364. /// Parses <paramref name="json"/> into a new message.
  365. /// </summary>
  366. /// <typeparam name="T">The type of message to create.</typeparam>
  367. /// <param name="json">The JSON to parse.</param>
  368. /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
  369. /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
  370. public T Parse<T>(string json) where T : IMessage, new()
  371. {
  372. ProtoPreconditions.CheckNotNull(json, nameof(json));
  373. return Parse<T>(new StringReader(json));
  374. }
  375. /// <summary>
  376. /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
  377. /// </summary>
  378. /// <typeparam name="T">The type of message to create.</typeparam>
  379. /// <param name="jsonReader">Reader providing the JSON to parse.</param>
  380. /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
  381. /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
  382. public T Parse<T>(TextReader jsonReader) where T : IMessage, new()
  383. {
  384. ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
  385. T message = new T();
  386. Merge(message, jsonReader);
  387. return message;
  388. }
  389. /// <summary>
  390. /// Parses <paramref name="json"/> into a new message.
  391. /// </summary>
  392. /// <param name="json">The JSON to parse.</param>
  393. /// <param name="descriptor">Descriptor of message type to parse.</param>
  394. /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
  395. /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
  396. public IMessage Parse(string json, MessageDescriptor descriptor)
  397. {
  398. ProtoPreconditions.CheckNotNull(json, nameof(json));
  399. ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
  400. return Parse(new StringReader(json), descriptor);
  401. }
  402. /// <summary>
  403. /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
  404. /// </summary>
  405. /// <param name="jsonReader">Reader providing the JSON to parse.</param>
  406. /// <param name="descriptor">Descriptor of message type to parse.</param>
  407. /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
  408. /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
  409. public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor)
  410. {
  411. ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
  412. ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
  413. IMessage message = descriptor.Parser.CreateTemplate();
  414. Merge(message, jsonReader);
  415. return message;
  416. }
  417. private void MergeStructValue(IMessage message, JsonTokenizer tokenizer)
  418. {
  419. var firstToken = tokenizer.Next();
  420. var fields = message.Descriptor.Fields;
  421. switch (firstToken.Type)
  422. {
  423. case JsonToken.TokenType.Null:
  424. fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0);
  425. return;
  426. case JsonToken.TokenType.StringValue:
  427. fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue);
  428. return;
  429. case JsonToken.TokenType.Number:
  430. fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue);
  431. return;
  432. case JsonToken.TokenType.False:
  433. case JsonToken.TokenType.True:
  434. fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True);
  435. return;
  436. case JsonToken.TokenType.StartObject:
  437. {
  438. var field = fields[Value.StructValueFieldNumber];
  439. var structMessage = NewMessageForField(field);
  440. tokenizer.PushBack(firstToken);
  441. Merge(structMessage, tokenizer);
  442. field.Accessor.SetValue(message, structMessage);
  443. return;
  444. }
  445. case JsonToken.TokenType.StartArray:
  446. {
  447. var field = fields[Value.ListValueFieldNumber];
  448. var list = NewMessageForField(field);
  449. tokenizer.PushBack(firstToken);
  450. Merge(list, tokenizer);
  451. field.Accessor.SetValue(message, list);
  452. return;
  453. }
  454. default:
  455. throw new InvalidOperationException("Unexpected token type: " + firstToken.Type);
  456. }
  457. }
  458. private void MergeStruct(IMessage message, JsonTokenizer tokenizer)
  459. {
  460. var token = tokenizer.Next();
  461. if (token.Type != JsonToken.TokenType.StartObject)
  462. {
  463. throw new InvalidProtocolBufferException("Expected object value for Struct");
  464. }
  465. tokenizer.PushBack(token);
  466. var field = message.Descriptor.Fields[Struct.FieldsFieldNumber];
  467. MergeMapField(message, field, tokenizer);
  468. }
  469. private void MergeAny(IMessage message, JsonTokenizer tokenizer)
  470. {
  471. // Record the token stream until we see the @type property. At that point, we can take the value, consult
  472. // the type registry for the relevant message, and replay the stream, omitting the @type property.
  473. var tokens = new List<JsonToken>();
  474. var token = tokenizer.Next();
  475. if (token.Type != JsonToken.TokenType.StartObject)
  476. {
  477. throw new InvalidProtocolBufferException("Expected object value for Any");
  478. }
  479. int typeUrlObjectDepth = tokenizer.ObjectDepth;
  480. // The check for the property depth protects us from nested Any values which occur before the type URL
  481. // for *this* Any.
  482. while (token.Type != JsonToken.TokenType.Name ||
  483. token.StringValue != JsonFormatter.AnyTypeUrlField ||
  484. tokenizer.ObjectDepth != typeUrlObjectDepth)
  485. {
  486. tokens.Add(token);
  487. token = tokenizer.Next();
  488. if (tokenizer.ObjectDepth < typeUrlObjectDepth)
  489. {
  490. throw new InvalidProtocolBufferException("Any message with no @type");
  491. }
  492. }
  493. // Don't add the @type property or its value to the recorded token list
  494. token = tokenizer.Next();
  495. if (token.Type != JsonToken.TokenType.StringValue)
  496. {
  497. throw new InvalidProtocolBufferException("Expected string value for Any.@type");
  498. }
  499. string typeUrl = token.StringValue;
  500. string typeName = Any.GetTypeName(typeUrl);
  501. MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName);
  502. if (descriptor == null)
  503. {
  504. throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'");
  505. }
  506. // Now replay the token stream we've already read and anything that remains of the object, just parsing it
  507. // as normal. Our original tokenizer should end up at the end of the object.
  508. var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer);
  509. var body = descriptor.Parser.CreateTemplate();
  510. if (descriptor.IsWellKnownType)
  511. {
  512. MergeWellKnownTypeAnyBody(body, replay);
  513. }
  514. else
  515. {
  516. Merge(body, replay);
  517. }
  518. var data = body.ToByteString();
  519. // Now that we have the message data, we can pack it into an Any (the message received as a parameter).
  520. message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl);
  521. message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data);
  522. }
  523. // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property
  524. // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value
  525. // itself, and then end-object.
  526. private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)
  527. {
  528. var token = tokenizer.Next(); // Definitely start-object; checked in previous method
  529. token = tokenizer.Next();
  530. // TODO: What about an absent Int32Value, for example?
  531. if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField)
  532. {
  533. throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body");
  534. }
  535. Merge(body, tokenizer);
  536. token = tokenizer.Next();
  537. if (token.Type != JsonToken.TokenType.EndObject)
  538. {
  539. throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type");
  540. }
  541. }
  542. #region Utility methods which don't depend on the state (or settings) of the parser.
  543. private static object ParseMapKey(FieldDescriptor field, string keyText)
  544. {
  545. switch (field.FieldType)
  546. {
  547. case FieldType.Bool:
  548. if (keyText == "true")
  549. {
  550. return true;
  551. }
  552. if (keyText == "false")
  553. {
  554. return false;
  555. }
  556. throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText);
  557. case FieldType.String:
  558. return keyText;
  559. case FieldType.Int32:
  560. case FieldType.SInt32:
  561. case FieldType.SFixed32:
  562. return ParseNumericString(keyText, int.Parse);
  563. case FieldType.UInt32:
  564. case FieldType.Fixed32:
  565. return ParseNumericString(keyText, uint.Parse);
  566. case FieldType.Int64:
  567. case FieldType.SInt64:
  568. case FieldType.SFixed64:
  569. return ParseNumericString(keyText, long.Parse);
  570. case FieldType.UInt64:
  571. case FieldType.Fixed64:
  572. return ParseNumericString(keyText, ulong.Parse);
  573. default:
  574. throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType);
  575. }
  576. }
  577. private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token)
  578. {
  579. double value = token.NumberValue;
  580. checked
  581. {
  582. try
  583. {
  584. switch (field.FieldType)
  585. {
  586. case FieldType.Int32:
  587. case FieldType.SInt32:
  588. case FieldType.SFixed32:
  589. CheckInteger(value);
  590. return (int) value;
  591. case FieldType.UInt32:
  592. case FieldType.Fixed32:
  593. CheckInteger(value);
  594. return (uint) value;
  595. case FieldType.Int64:
  596. case FieldType.SInt64:
  597. case FieldType.SFixed64:
  598. CheckInteger(value);
  599. return (long) value;
  600. case FieldType.UInt64:
  601. case FieldType.Fixed64:
  602. CheckInteger(value);
  603. return (ulong) value;
  604. case FieldType.Double:
  605. return value;
  606. case FieldType.Float:
  607. if (double.IsNaN(value))
  608. {
  609. return float.NaN;
  610. }
  611. if (value > float.MaxValue || value < float.MinValue)
  612. {
  613. if (double.IsPositiveInfinity(value))
  614. {
  615. return float.PositiveInfinity;
  616. }
  617. if (double.IsNegativeInfinity(value))
  618. {
  619. return float.NegativeInfinity;
  620. }
  621. throw new InvalidProtocolBufferException($"Value out of range: {value}");
  622. }
  623. return (float) value;
  624. case FieldType.Enum:
  625. CheckInteger(value);
  626. // Just return it as an int, and let the CLR convert it.
  627. // Note that we deliberately don't check that it's a known value.
  628. return (int) value;
  629. default:
  630. throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}");
  631. }
  632. }
  633. catch (OverflowException)
  634. {
  635. throw new InvalidProtocolBufferException($"Value out of range: {value}");
  636. }
  637. }
  638. }
  639. private static void CheckInteger(double value)
  640. {
  641. if (double.IsInfinity(value) || double.IsNaN(value))
  642. {
  643. throw new InvalidProtocolBufferException($"Value not an integer: {value}");
  644. }
  645. if (value != Math.Floor(value))
  646. {
  647. throw new InvalidProtocolBufferException($"Value not an integer: {value}");
  648. }
  649. }
  650. private static object ParseSingleStringValue(FieldDescriptor field, string text)
  651. {
  652. switch (field.FieldType)
  653. {
  654. case FieldType.String:
  655. return text;
  656. case FieldType.Bytes:
  657. try
  658. {
  659. return ByteString.FromBase64(text);
  660. }
  661. catch (FormatException e)
  662. {
  663. throw InvalidProtocolBufferException.InvalidBase64(e);
  664. }
  665. case FieldType.Int32:
  666. case FieldType.SInt32:
  667. case FieldType.SFixed32:
  668. return ParseNumericString(text, int.Parse);
  669. case FieldType.UInt32:
  670. case FieldType.Fixed32:
  671. return ParseNumericString(text, uint.Parse);
  672. case FieldType.Int64:
  673. case FieldType.SInt64:
  674. case FieldType.SFixed64:
  675. return ParseNumericString(text, long.Parse);
  676. case FieldType.UInt64:
  677. case FieldType.Fixed64:
  678. return ParseNumericString(text, ulong.Parse);
  679. case FieldType.Double:
  680. double d = ParseNumericString(text, double.Parse);
  681. ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d));
  682. return d;
  683. case FieldType.Float:
  684. float f = ParseNumericString(text, float.Parse);
  685. ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f));
  686. return f;
  687. case FieldType.Enum:
  688. var enumValue = field.EnumType.FindValueByName(text);
  689. if (enumValue == null)
  690. {
  691. throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}");
  692. }
  693. // Just return it as an int, and let the CLR convert it.
  694. return enumValue.Number;
  695. default:
  696. throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}");
  697. }
  698. }
  699. /// <summary>
  700. /// Creates a new instance of the message type for the given field.
  701. /// </summary>
  702. private static IMessage NewMessageForField(FieldDescriptor field)
  703. {
  704. return field.MessageType.Parser.CreateTemplate();
  705. }
  706. private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser)
  707. {
  708. // Can't prohibit this with NumberStyles.
  709. if (text.StartsWith("+"))
  710. {
  711. throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
  712. }
  713. if (text.StartsWith("0") && text.Length > 1)
  714. {
  715. if (text[1] >= '0' && text[1] <= '9')
  716. {
  717. throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
  718. }
  719. }
  720. else if (text.StartsWith("-0") && text.Length > 2)
  721. {
  722. if (text[2] >= '0' && text[2] <= '9')
  723. {
  724. throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
  725. }
  726. }
  727. try
  728. {
  729. return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
  730. }
  731. catch (FormatException)
  732. {
  733. throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}");
  734. }
  735. catch (OverflowException)
  736. {
  737. throw new InvalidProtocolBufferException($"Value out of range: {text}");
  738. }
  739. }
  740. /// <summary>
  741. /// Checks that any infinite/NaN values originated from the correct text.
  742. /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the
  743. /// way that Mono parses out-of-range values as infinity.
  744. /// </summary>
  745. private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)
  746. {
  747. if ((isPositiveInfinity && text != "Infinity") ||
  748. (isNegativeInfinity && text != "-Infinity") ||
  749. (isNaN && text != "NaN"))
  750. {
  751. throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
  752. }
  753. }
  754. private static void MergeTimestamp(IMessage message, JsonToken token)
  755. {
  756. if (token.Type != JsonToken.TokenType.StringValue)
  757. {
  758. throw new InvalidProtocolBufferException("Expected string value for Timestamp");
  759. }
  760. var match = TimestampRegex.Match(token.StringValue);
  761. if (!match.Success)
  762. {
  763. throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}");
  764. }
  765. var dateTime = match.Groups["datetime"].Value;
  766. var subseconds = match.Groups["subseconds"].Value;
  767. var offset = match.Groups["offset"].Value;
  768. try
  769. {
  770. DateTime parsed = DateTime.ParseExact(
  771. dateTime,
  772. "yyyy-MM-dd'T'HH:mm:ss",
  773. CultureInfo.InvariantCulture,
  774. DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
  775. // TODO: It would be nice not to have to create all these objects... easy to optimize later though.
  776. Timestamp timestamp = Timestamp.FromDateTime(parsed);
  777. int nanosToAdd = 0;
  778. if (subseconds != "")
  779. {
  780. // This should always work, as we've got 1-9 digits.
  781. int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture);
  782. nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length];
  783. }
  784. int secondsToAdd = 0;
  785. if (offset != "Z")
  786. {
  787. // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa.
  788. int sign = offset[0] == '-' ? 1 : -1;
  789. int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture);
  790. int minutes = int.Parse(offset.Substring(4, 2));
  791. int totalMinutes = hours * 60 + minutes;
  792. if (totalMinutes > 18 * 60)
  793. {
  794. throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
  795. }
  796. if (totalMinutes == 0 && sign == 1)
  797. {
  798. // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp.
  799. throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
  800. }
  801. // We need to *subtract* the offset from local time to get UTC.
  802. secondsToAdd = sign * totalMinutes * 60;
  803. }
  804. // Ensure we've got the right signs. Currently unnecessary, but easy to do.
  805. if (secondsToAdd < 0 && nanosToAdd > 0)
  806. {
  807. secondsToAdd++;
  808. nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond;
  809. }
  810. if (secondsToAdd != 0 || nanosToAdd != 0)
  811. {
  812. timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd };
  813. // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this
  814. // anywhere, but we shouldn't parse it.
  815. if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue)
  816. {
  817. throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
  818. }
  819. }
  820. message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds);
  821. message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos);
  822. }
  823. catch (FormatException)
  824. {
  825. throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
  826. }
  827. }
  828. private static void MergeDuration(IMessage message, JsonToken token)
  829. {
  830. if (token.Type != JsonToken.TokenType.StringValue)
  831. {
  832. throw new InvalidProtocolBufferException("Expected string value for Duration");
  833. }
  834. var match = DurationRegex.Match(token.StringValue);
  835. if (!match.Success)
  836. {
  837. throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
  838. }
  839. var sign = match.Groups["sign"].Value;
  840. var secondsText = match.Groups["int"].Value;
  841. // Prohibit leading insignficant zeroes
  842. if (secondsText[0] == '0' && secondsText.Length > 1)
  843. {
  844. throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
  845. }
  846. var subseconds = match.Groups["subseconds"].Value;
  847. var multiplier = sign == "-" ? -1 : 1;
  848. try
  849. {
  850. long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier;
  851. int nanos = 0;
  852. if (subseconds != "")
  853. {
  854. // This should always work, as we've got 1-9 digits.
  855. int parsedFraction = int.Parse(subseconds.Substring(1));
  856. nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier;
  857. }
  858. if (!Duration.IsNormalized(seconds, nanos))
  859. {
  860. throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
  861. }
  862. message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds);
  863. message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos);
  864. }
  865. catch (FormatException)
  866. {
  867. throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
  868. }
  869. }
  870. private static void MergeFieldMask(IMessage message, JsonToken token)
  871. {
  872. if (token.Type != JsonToken.TokenType.StringValue)
  873. {
  874. throw new InvalidProtocolBufferException("Expected string value for FieldMask");
  875. }
  876. // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"?
  877. string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries);
  878. IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message);
  879. foreach (var path in jsonPaths)
  880. {
  881. messagePaths.Add(ToSnakeCase(path));
  882. }
  883. }
  884. // Ported from src/google/protobuf/util/internal/utility.cc
  885. private static string ToSnakeCase(string text)
  886. {
  887. var builder = new StringBuilder(text.Length * 2);
  888. // Note: this is probably unnecessary now, but currently retained to be as close as possible to the
  889. // C++, whilst still throwing an exception on underscores.
  890. bool wasNotUnderscore = false; // Initialize to false for case 1 (below)
  891. bool wasNotCap = false;
  892. for (int i = 0; i < text.Length; i++)
  893. {
  894. char c = text[i];
  895. if (c >= 'A' && c <= 'Z') // ascii_isupper
  896. {
  897. // Consider when the current character B is capitalized:
  898. // 1) At beginning of input: "B..." => "b..."
  899. // (e.g. "Biscuit" => "biscuit")
  900. // 2) Following a lowercase: "...aB..." => "...a_b..."
  901. // (e.g. "gBike" => "g_bike")
  902. // 3) At the end of input: "...AB" => "...ab"
  903. // (e.g. "GoogleLAB" => "google_lab")
  904. // 4) Followed by a lowercase: "...ABc..." => "...a_bc..."
  905. // (e.g. "GBike" => "g_bike")
  906. if (wasNotUnderscore && // case 1 out
  907. (wasNotCap || // case 2 in, case 3 out
  908. (i + 1 < text.Length && // case 3 out
  909. (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1])
  910. { // case 4 in
  911. // We add an underscore for case 2 and case 4.
  912. builder.Append('_');
  913. }
  914. // ascii_tolower, but we already know that c *is* an upper case ASCII character...
  915. builder.Append((char) (c + 'a' - 'A'));
  916. wasNotUnderscore = true;
  917. wasNotCap = false;
  918. }
  919. else
  920. {
  921. builder.Append(c);
  922. if (c == '_')
  923. {
  924. throw new InvalidProtocolBufferException($"Invalid field mask: {text}");
  925. }
  926. wasNotUnderscore = true;
  927. wasNotCap = true;
  928. }
  929. }
  930. return builder.ToString();
  931. }
  932. #endregion
  933. /// <summary>
  934. /// Settings controlling JSON parsing.
  935. /// </summary>
  936. public sealed class Settings
  937. {
  938. /// <summary>
  939. /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default
  940. /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry.
  941. /// </summary>
  942. public static Settings Default { get; }
  943. // Workaround for the Mono compiler complaining about XML comments not being on
  944. // valid language elements.
  945. static Settings()
  946. {
  947. Default = new Settings(CodedInputStream.DefaultRecursionLimit);
  948. }
  949. /// <summary>
  950. /// The maximum depth of messages to parse. Note that this limit only applies to parsing
  951. /// messages, not collections - so a message within a collection within a message only counts as
  952. /// depth 2, not 3.
  953. /// </summary>
  954. public int RecursionLimit { get; }
  955. /// <summary>
  956. /// The type registry used to parse <see cref="Any"/> messages.
  957. /// </summary>
  958. public TypeRegistry TypeRegistry { get; }
  959. /// <summary>
  960. /// Whether the parser should ignore unknown fields (<c>true</c>) or throw an exception when
  961. /// they are encountered (<c>false</c>).
  962. /// </summary>
  963. public bool IgnoreUnknownFields { get; }
  964. private Settings(int recursionLimit, TypeRegistry typeRegistry, bool ignoreUnknownFields)
  965. {
  966. RecursionLimit = recursionLimit;
  967. TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry));
  968. IgnoreUnknownFields = ignoreUnknownFields;
  969. }
  970. /// <summary>
  971. /// Creates a new <see cref="Settings"/> object with the specified recursion limit.
  972. /// </summary>
  973. /// <param name="recursionLimit">The maximum depth of messages to parse</param>
  974. public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty)
  975. {
  976. }
  977. /// <summary>
  978. /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry.
  979. /// </summary>
  980. /// <param name="recursionLimit">The maximum depth of messages to parse</param>
  981. /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param>
  982. public Settings(int recursionLimit, TypeRegistry typeRegistry) : this(recursionLimit, typeRegistry, false)
  983. {
  984. }
  985. /// <summary>
  986. /// Creates a new <see cref="Settings"/> object set to either ignore unknown fields, or throw an exception
  987. /// when unknown fields are encountered.
  988. /// </summary>
  989. /// <param name="ignoreUnknownFields"><c>true</c> if unknown fields should be ignored when parsing; <c>false</c> to throw an exception.</param>
  990. public Settings WithIgnoreUnknownFields(bool ignoreUnknownFields) =>
  991. new Settings(RecursionLimit, TypeRegistry, ignoreUnknownFields);
  992. /// <summary>
  993. /// Creates a new <see cref="Settings"/> object based on this one, but with the specified recursion limit.
  994. /// </summary>
  995. /// <param name="recursionLimit">The new recursion limit.</param>
  996. public Settings WithRecursionLimit(int recursionLimit) =>
  997. new Settings(recursionLimit, TypeRegistry, IgnoreUnknownFields);
  998. /// <summary>
  999. /// Creates a new <see cref="Settings"/> object based on this one, but with the specified type registry.
  1000. /// </summary>
  1001. /// <param name="typeRegistry">The new type registry. Must not be null.</param>
  1002. public Settings WithTypeRegistry(TypeRegistry typeRegistry) =>
  1003. new Settings(
  1004. RecursionLimit,
  1005. ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry)),
  1006. IgnoreUnknownFields);
  1007. }
  1008. }
  1009. }