JsonTokenizer.cs 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766
  1. #region Copyright notice and license
  2. // Protocol Buffers - Google's data interchange format
  3. // Copyright 2008 Google Inc. All rights reserved.
  4. // https://developers.google.com/protocol-buffers/
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. #endregion
  32. using System;
  33. using System.Collections.Generic;
  34. using System.Globalization;
  35. using System.IO;
  36. using System.Text;
  37. namespace Google.Protobuf
  38. {
  39. /// <summary>
  40. /// Simple but strict JSON tokenizer, rigidly following RFC 7159.
  41. /// </summary>
  42. /// <remarks>
  43. /// <para>
  44. /// This tokenizer is stateful, and only returns "useful" tokens - names, values etc.
  45. /// It does not create tokens for the separator between names and values, or for the comma
  46. /// between values. It validates the token stream as it goes - so callers can assume that the
  47. /// tokens it produces are appropriate. For example, it would never produce "start object, end array."
  48. /// </para>
  49. /// <para>Implementation details: the base class handles single token push-back and </para>
  50. /// <para>Not thread-safe.</para>
  51. /// </remarks>
  52. internal abstract class JsonTokenizer
  53. {
  54. private JsonToken bufferedToken;
  55. /// <summary>
  56. /// Creates a tokenizer that reads from the given text reader.
  57. /// </summary>
  58. internal static JsonTokenizer FromTextReader(TextReader reader)
  59. {
  60. return new JsonTextTokenizer(reader);
  61. }
  62. /// <summary>
  63. /// Creates a tokenizer that first replays the given list of tokens, then continues reading
  64. /// from another tokenizer. Note that if the returned tokenizer is "pushed back", that does not push back
  65. /// on the continuation tokenizer, or vice versa. Care should be taken when using this method - it was
  66. /// created for the sake of Any parsing.
  67. /// </summary>
  68. internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation)
  69. {
  70. return new JsonReplayTokenizer(tokens, continuation);
  71. }
  72. /// <summary>
  73. /// Returns the depth of the stack, purely in objects (not collections).
  74. /// Informally, this is the number of remaining unclosed '{' characters we have.
  75. /// </summary>
  76. internal int ObjectDepth { get; private set; }
  77. // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous
  78. // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack).
  79. internal void PushBack(JsonToken token)
  80. {
  81. if (bufferedToken != null)
  82. {
  83. throw new InvalidOperationException("Can't push back twice");
  84. }
  85. bufferedToken = token;
  86. if (token.Type == JsonToken.TokenType.StartObject)
  87. {
  88. ObjectDepth--;
  89. }
  90. else if (token.Type == JsonToken.TokenType.EndObject)
  91. {
  92. ObjectDepth++;
  93. }
  94. }
  95. /// <summary>
  96. /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream,
  97. /// after which point <c>Next()</c> should not be called again.
  98. /// </summary>
  99. /// <remarks>This implementation provides single-token buffering, and calls <see cref="NextImpl"/> if there is no buffered token.</remarks>
  100. /// <returns>The next token in the stream. This is never null.</returns>
  101. /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
  102. /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
  103. internal JsonToken Next()
  104. {
  105. JsonToken tokenToReturn;
  106. if (bufferedToken != null)
  107. {
  108. tokenToReturn = bufferedToken;
  109. bufferedToken = null;
  110. }
  111. else
  112. {
  113. tokenToReturn = NextImpl();
  114. }
  115. if (tokenToReturn.Type == JsonToken.TokenType.StartObject)
  116. {
  117. ObjectDepth++;
  118. }
  119. else if (tokenToReturn.Type == JsonToken.TokenType.EndObject)
  120. {
  121. ObjectDepth--;
  122. }
  123. return tokenToReturn;
  124. }
  125. /// <summary>
  126. /// Returns the next JSON token in the stream, when requested by the base class. (The <see cref="Next"/> method delegates
  127. /// to this if it doesn't have a buffered token.)
  128. /// </summary>
  129. /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
  130. /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
  131. protected abstract JsonToken NextImpl();
  132. /// <summary>
  133. /// Skips the value we're about to read. This must only be called immediately after reading a property name.
  134. /// If the value is an object or an array, the complete object/array is skipped.
  135. /// </summary>
  136. internal void SkipValue()
  137. {
  138. // We'll assume that Next() makes sure that the end objects and end arrays are all valid.
  139. // All we care about is the total nesting depth we need to close.
  140. int depth = 0;
  141. // do/while rather than while loop so that we read at least one token.
  142. do
  143. {
  144. var token = Next();
  145. switch (token.Type)
  146. {
  147. case JsonToken.TokenType.EndArray:
  148. case JsonToken.TokenType.EndObject:
  149. depth--;
  150. break;
  151. case JsonToken.TokenType.StartArray:
  152. case JsonToken.TokenType.StartObject:
  153. depth++;
  154. break;
  155. }
  156. } while (depth != 0);
  157. }
  158. /// <summary>
  159. /// Tokenizer which first exhausts a list of tokens, then consults another tokenizer.
  160. /// </summary>
  161. private class JsonReplayTokenizer : JsonTokenizer
  162. {
  163. private readonly IList<JsonToken> tokens;
  164. private readonly JsonTokenizer nextTokenizer;
  165. private int nextTokenIndex;
  166. internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer)
  167. {
  168. this.tokens = tokens;
  169. this.nextTokenizer = nextTokenizer;
  170. }
  171. // FIXME: Object depth not maintained...
  172. protected override JsonToken NextImpl()
  173. {
  174. if (nextTokenIndex >= tokens.Count)
  175. {
  176. return nextTokenizer.Next();
  177. }
  178. return tokens[nextTokenIndex++];
  179. }
  180. }
  181. /// <summary>
  182. /// Tokenizer which does all the *real* work of parsing JSON.
  183. /// </summary>
  184. private sealed class JsonTextTokenizer : JsonTokenizer
  185. {
  186. // The set of states in which a value is valid next token.
  187. private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument;
  188. private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>();
  189. private readonly PushBackReader reader;
  190. private State state;
  191. internal JsonTextTokenizer(TextReader reader)
  192. {
  193. this.reader = new PushBackReader(reader);
  194. state = State.StartOfDocument;
  195. containerStack.Push(ContainerType.Document);
  196. }
  197. /// <remarks>
  198. /// This method essentially just loops through characters skipping whitespace, validating and
  199. /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon)
  200. /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point
  201. /// it returns the token. Although the method is large, it would be relatively hard to break down further... most
  202. /// of it is the large switch statement, which sometimes returns and sometimes doesn't.
  203. /// </remarks>
  204. protected override JsonToken NextImpl()
  205. {
  206. if (state == State.ReaderExhausted)
  207. {
  208. throw new InvalidOperationException("Next() called after end of document");
  209. }
  210. while (true)
  211. {
  212. var next = reader.Read();
  213. if (next == null)
  214. {
  215. ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: ");
  216. state = State.ReaderExhausted;
  217. return JsonToken.EndDocument;
  218. }
  219. switch (next.Value)
  220. {
  221. // Skip whitespace between tokens
  222. case ' ':
  223. case '\t':
  224. case '\r':
  225. case '\n':
  226. break;
  227. case ':':
  228. ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: ");
  229. state = State.ObjectAfterColon;
  230. break;
  231. case ',':
  232. ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a colon: ");
  233. state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma;
  234. break;
  235. case '"':
  236. string stringValue = ReadString();
  237. if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0)
  238. {
  239. state = State.ObjectBeforeColon;
  240. return JsonToken.Name(stringValue);
  241. }
  242. else
  243. {
  244. ValidateAndModifyStateForValue("Invalid state to read a double quote: ");
  245. return JsonToken.Value(stringValue);
  246. }
  247. case '{':
  248. ValidateState(ValueStates, "Invalid state to read an open brace: ");
  249. state = State.ObjectStart;
  250. containerStack.Push(ContainerType.Object);
  251. return JsonToken.StartObject;
  252. case '}':
  253. ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: ");
  254. PopContainer();
  255. return JsonToken.EndObject;
  256. case '[':
  257. ValidateState(ValueStates, "Invalid state to read an open square bracket: ");
  258. state = State.ArrayStart;
  259. containerStack.Push(ContainerType.Array);
  260. return JsonToken.StartArray;
  261. case ']':
  262. ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: ");
  263. PopContainer();
  264. return JsonToken.EndArray;
  265. case 'n': // Start of null
  266. ConsumeLiteral("null");
  267. ValidateAndModifyStateForValue("Invalid state to read a null literal: ");
  268. return JsonToken.Null;
  269. case 't': // Start of true
  270. ConsumeLiteral("true");
  271. ValidateAndModifyStateForValue("Invalid state to read a true literal: ");
  272. return JsonToken.True;
  273. case 'f': // Start of false
  274. ConsumeLiteral("false");
  275. ValidateAndModifyStateForValue("Invalid state to read a false literal: ");
  276. return JsonToken.False;
  277. case '-': // Start of a number
  278. case '0':
  279. case '1':
  280. case '2':
  281. case '3':
  282. case '4':
  283. case '5':
  284. case '6':
  285. case '7':
  286. case '8':
  287. case '9':
  288. double number = ReadNumber(next.Value);
  289. ValidateAndModifyStateForValue("Invalid state to read a number token: ");
  290. return JsonToken.Value(number);
  291. default:
  292. throw new InvalidJsonException("Invalid first character of token: " + next.Value);
  293. }
  294. }
  295. }
  296. private void ValidateState(State validStates, string errorPrefix)
  297. {
  298. if ((validStates & state) == 0)
  299. {
  300. throw reader.CreateException(errorPrefix + state);
  301. }
  302. }
  303. /// <summary>
  304. /// Reads a string token. It is assumed that the opening " has already been read.
  305. /// </summary>
  306. private string ReadString()
  307. {
  308. var value = new StringBuilder();
  309. bool haveHighSurrogate = false;
  310. while (true)
  311. {
  312. char c = reader.ReadOrFail("Unexpected end of text while reading string");
  313. if (c < ' ')
  314. {
  315. throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c));
  316. }
  317. if (c == '"')
  318. {
  319. if (haveHighSurrogate)
  320. {
  321. throw reader.CreateException("Invalid use of surrogate pair code units");
  322. }
  323. return value.ToString();
  324. }
  325. if (c == '\\')
  326. {
  327. c = ReadEscapedCharacter();
  328. }
  329. // TODO: Consider only allowing surrogate pairs that are either both escaped,
  330. // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate
  331. // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8.
  332. if (haveHighSurrogate != char.IsLowSurrogate(c))
  333. {
  334. throw reader.CreateException("Invalid use of surrogate pair code units");
  335. }
  336. haveHighSurrogate = char.IsHighSurrogate(c);
  337. value.Append(c);
  338. }
  339. }
  340. /// <summary>
  341. /// Reads an escaped character. It is assumed that the leading backslash has already been read.
  342. /// </summary>
  343. private char ReadEscapedCharacter()
  344. {
  345. char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence");
  346. switch (c)
  347. {
  348. case 'n':
  349. return '\n';
  350. case '\\':
  351. return '\\';
  352. case 'b':
  353. return '\b';
  354. case 'f':
  355. return '\f';
  356. case 'r':
  357. return '\r';
  358. case 't':
  359. return '\t';
  360. case '"':
  361. return '"';
  362. case '/':
  363. return '/';
  364. case 'u':
  365. return ReadUnicodeEscape();
  366. default:
  367. throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c));
  368. }
  369. }
  370. /// <summary>
  371. /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read.
  372. /// </summary>
  373. private char ReadUnicodeEscape()
  374. {
  375. int result = 0;
  376. for (int i = 0; i < 4; i++)
  377. {
  378. char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence");
  379. int nybble;
  380. if (c >= '0' && c <= '9')
  381. {
  382. nybble = c - '0';
  383. }
  384. else if (c >= 'a' && c <= 'f')
  385. {
  386. nybble = c - 'a' + 10;
  387. }
  388. else if (c >= 'A' && c <= 'F')
  389. {
  390. nybble = c - 'A' + 10;
  391. }
  392. else
  393. {
  394. throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c));
  395. }
  396. result = (result << 4) + nybble;
  397. }
  398. return (char) result;
  399. }
  400. /// <summary>
  401. /// Consumes a text-only literal, throwing an exception if the read text doesn't match it.
  402. /// It is assumed that the first letter of the literal has already been read.
  403. /// </summary>
  404. private void ConsumeLiteral(string text)
  405. {
  406. for (int i = 1; i < text.Length; i++)
  407. {
  408. char? next = reader.Read();
  409. if (next == null)
  410. {
  411. throw reader.CreateException("Unexpected end of text while reading literal token " + text);
  412. }
  413. if (next.Value != text[i])
  414. {
  415. throw reader.CreateException("Unexpected character while reading literal token " + text);
  416. }
  417. }
  418. }
  419. private double ReadNumber(char initialCharacter)
  420. {
  421. StringBuilder builder = new StringBuilder();
  422. if (initialCharacter == '-')
  423. {
  424. builder.Append("-");
  425. }
  426. else
  427. {
  428. reader.PushBack(initialCharacter);
  429. }
  430. // Each method returns the character it read that doesn't belong in that part,
  431. // so we know what to do next, including pushing the character back at the end.
  432. // null is returned for "end of text".
  433. char? next = ReadInt(builder);
  434. if (next == '.')
  435. {
  436. next = ReadFrac(builder);
  437. }
  438. if (next == 'e' || next == 'E')
  439. {
  440. next = ReadExp(builder);
  441. }
  442. // If we read a character which wasn't part of the number, push it back so we can read it again
  443. // to parse the next token.
  444. if (next != null)
  445. {
  446. reader.PushBack(next.Value);
  447. }
  448. // TODO: What exception should we throw if the value can't be represented as a double?
  449. try
  450. {
  451. return double.Parse(builder.ToString(),
  452. NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent,
  453. CultureInfo.InvariantCulture);
  454. }
  455. catch (OverflowException)
  456. {
  457. throw reader.CreateException("Numeric value out of range: " + builder);
  458. }
  459. }
  460. private char? ReadInt(StringBuilder builder)
  461. {
  462. char first = reader.ReadOrFail("Invalid numeric literal");
  463. if (first < '0' || first > '9')
  464. {
  465. throw reader.CreateException("Invalid numeric literal");
  466. }
  467. builder.Append(first);
  468. int digitCount;
  469. char? next = ConsumeDigits(builder, out digitCount);
  470. if (first == '0' && digitCount != 0)
  471. {
  472. throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value.");
  473. }
  474. return next;
  475. }
  476. private char? ReadFrac(StringBuilder builder)
  477. {
  478. builder.Append('.'); // Already consumed this
  479. int digitCount;
  480. char? next = ConsumeDigits(builder, out digitCount);
  481. if (digitCount == 0)
  482. {
  483. throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits");
  484. }
  485. return next;
  486. }
  487. private char? ReadExp(StringBuilder builder)
  488. {
  489. builder.Append('E'); // Already consumed this (or 'e')
  490. char? next = reader.Read();
  491. if (next == null)
  492. {
  493. throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits");
  494. }
  495. if (next == '-' || next == '+')
  496. {
  497. builder.Append(next.Value);
  498. }
  499. else
  500. {
  501. reader.PushBack(next.Value);
  502. }
  503. int digitCount;
  504. next = ConsumeDigits(builder, out digitCount);
  505. if (digitCount == 0)
  506. {
  507. throw reader.CreateException("Invalid numeric literal: exponent without value");
  508. }
  509. return next;
  510. }
  511. private char? ConsumeDigits(StringBuilder builder, out int count)
  512. {
  513. count = 0;
  514. while (true)
  515. {
  516. char? next = reader.Read();
  517. if (next == null || next.Value < '0' || next.Value > '9')
  518. {
  519. return next;
  520. }
  521. count++;
  522. builder.Append(next.Value);
  523. }
  524. }
  525. /// <summary>
  526. /// Validates that we're in a valid state to read a value (using the given error prefix if necessary)
  527. /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty.
  528. /// </summary>
  529. private void ValidateAndModifyStateForValue(string errorPrefix)
  530. {
  531. ValidateState(ValueStates, errorPrefix);
  532. switch (state)
  533. {
  534. case State.StartOfDocument:
  535. state = State.ExpectedEndOfDocument;
  536. return;
  537. case State.ObjectAfterColon:
  538. state = State.ObjectAfterProperty;
  539. return;
  540. case State.ArrayStart:
  541. case State.ArrayAfterComma:
  542. state = State.ArrayAfterValue;
  543. return;
  544. default:
  545. throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)");
  546. }
  547. }
  548. /// <summary>
  549. /// Pops the top-most container, and sets the state to the appropriate one for the end of a value
  550. /// in the parent container.
  551. /// </summary>
  552. private void PopContainer()
  553. {
  554. containerStack.Pop();
  555. var parent = containerStack.Peek();
  556. switch (parent)
  557. {
  558. case ContainerType.Object:
  559. state = State.ObjectAfterProperty;
  560. break;
  561. case ContainerType.Array:
  562. state = State.ArrayAfterValue;
  563. break;
  564. case ContainerType.Document:
  565. state = State.ExpectedEndOfDocument;
  566. break;
  567. default:
  568. throw new InvalidOperationException("Unexpected container type: " + parent);
  569. }
  570. }
  571. private enum ContainerType
  572. {
  573. Document, Object, Array
  574. }
  575. /// <summary>
  576. /// Possible states of the tokenizer.
  577. /// </summary>
  578. /// <remarks>
  579. /// <para>This is a flags enum purely so we can simply and efficiently represent a set of valid states
  580. /// for checking.</para>
  581. /// <para>
  582. /// Each is documented with an example,
  583. /// where ^ represents the current position within the text stream. The examples all use string values,
  584. /// but could be any value, including nested objects/arrays.
  585. /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects).
  586. /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which
  587. /// point there's an immediate transition to ExpectedEndOfDocument, ObjectAfterProperty or ArrayAfterValue.
  588. /// </para>
  589. /// <para>
  590. /// These states were derived manually by reading RFC 7159 carefully.
  591. /// </para>
  592. /// </remarks>
  593. [Flags]
  594. private enum State
  595. {
  596. /// <summary>
  597. /// ^ { "foo": "bar" }
  598. /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue"
  599. /// </summary>
  600. StartOfDocument = 1 << 0,
  601. /// <summary>
  602. /// { "foo": "bar" } ^
  603. /// After the value in a document. Next states: ReaderExhausted
  604. /// </summary>
  605. ExpectedEndOfDocument = 1 << 1,
  606. /// <summary>
  607. /// { "foo": "bar" } ^ (and already read to the end of the reader)
  608. /// Terminal state.
  609. /// </summary>
  610. ReaderExhausted = 1 << 2,
  611. /// <summary>
  612. /// { ^ "foo": "bar" }
  613. /// Before the *first* property in an object.
  614. /// Next states:
  615. /// "AfterValue" (empty object)
  616. /// ObjectBeforeColon (read a name)
  617. /// </summary>
  618. ObjectStart = 1 << 3,
  619. /// <summary>
  620. /// { "foo" ^ : "bar", "x": "y" }
  621. /// Next state: ObjectAfterColon
  622. /// </summary>
  623. ObjectBeforeColon = 1 << 4,
  624. /// <summary>
  625. /// { "foo" : ^ "bar", "x": "y" }
  626. /// Before any property other than the first in an object.
  627. /// (Equivalently: after any property in an object)
  628. /// Next states:
  629. /// "AfterValue" (value is simple)
  630. /// ObjectStart (value is object)
  631. /// ArrayStart (value is array)
  632. /// </summary>
  633. ObjectAfterColon = 1 << 5,
  634. /// <summary>
  635. /// { "foo" : "bar" ^ , "x" : "y" }
  636. /// At the end of a property, so expecting either a comma or end-of-object
  637. /// Next states: ObjectAfterComma or "AfterValue"
  638. /// </summary>
  639. ObjectAfterProperty = 1 << 6,
  640. /// <summary>
  641. /// { "foo":"bar", ^ "x":"y" }
  642. /// Read the comma after the previous property, so expecting another property.
  643. /// This is like ObjectStart, but closing brace isn't valid here
  644. /// Next state: ObjectBeforeColon.
  645. /// </summary>
  646. ObjectAfterComma = 1 << 7,
  647. /// <summary>
  648. /// [ ^ "foo", "bar" ]
  649. /// Before the *first* value in an array.
  650. /// Next states:
  651. /// "AfterValue" (read a value)
  652. /// "AfterValue" (end of array; will pop stack)
  653. /// </summary>
  654. ArrayStart = 1 << 8,
  655. /// <summary>
  656. /// [ "foo" ^ , "bar" ]
  657. /// After any value in an array, so expecting either a comma or end-of-array
  658. /// Next states: ArrayAfterComma or "AfterValue"
  659. /// </summary>
  660. ArrayAfterValue = 1 << 9,
  661. /// <summary>
  662. /// [ "foo", ^ "bar" ]
  663. /// After a comma in an array, so there *must* be another value (simple or complex).
  664. /// Next states: "AfterValue" (simple value), StartObject, StartArray
  665. /// </summary>
  666. ArrayAfterComma = 1 << 10
  667. }
  668. /// <summary>
  669. /// Wrapper around a text reader allowing small amounts of buffering and location handling.
  670. /// </summary>
  671. private class PushBackReader
  672. {
  673. // TODO: Add locations for errors etc.
  674. private readonly TextReader reader;
  675. internal PushBackReader(TextReader reader)
  676. {
  677. // TODO: Wrap the reader in a BufferedReader?
  678. this.reader = reader;
  679. }
  680. /// <summary>
  681. /// The buffered next character, if we have one.
  682. /// </summary>
  683. private char? nextChar;
  684. /// <summary>
  685. /// Returns the next character in the stream, or null if we have reached the end.
  686. /// </summary>
  687. /// <returns></returns>
  688. internal char? Read()
  689. {
  690. if (nextChar != null)
  691. {
  692. char? tmp = nextChar;
  693. nextChar = null;
  694. return tmp;
  695. }
  696. int next = reader.Read();
  697. return next == -1 ? null : (char?) next;
  698. }
  699. internal char ReadOrFail(string messageOnFailure)
  700. {
  701. char? next = Read();
  702. if (next == null)
  703. {
  704. throw CreateException(messageOnFailure);
  705. }
  706. return next.Value;
  707. }
  708. internal void PushBack(char c)
  709. {
  710. if (nextChar != null)
  711. {
  712. throw new InvalidOperationException("Cannot push back when already buffering a character");
  713. }
  714. nextChar = c;
  715. }
  716. /// <summary>
  717. /// Creates a new exception appropriate for the current state of the reader.
  718. /// </summary>
  719. internal InvalidJsonException CreateException(string message)
  720. {
  721. // TODO: Keep track of and use the location.
  722. return new InvalidJsonException(message);
  723. }
  724. }
  725. }
  726. }
  727. }