JsonTokenizer.cs 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738
  1. #region Copyright notice and license
  2. // Protocol Buffers - Google's data interchange format
  3. // Copyright 2008 Google Inc. All rights reserved.
  4. // https://developers.google.com/protocol-buffers/
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. #endregion
  32. using System;
  33. using System.Collections.Generic;
  34. using System.Globalization;
  35. using System.IO;
  36. using System.Text;
  37. namespace Google.Protobuf
  38. {
  39. /// <summary>
  40. /// Simple but strict JSON tokenizer, rigidly following RFC 7159.
  41. /// </summary>
  42. /// <remarks>
  43. /// <para>
  44. /// This tokenizer is stateful, and only returns "useful" tokens - names, values etc.
  45. /// It does not create tokens for the separator between names and values, or for the comma
  46. /// between values. It validates the token stream as it goes - so callers can assume that the
  47. /// tokens it produces are appropriate. For example, it would never produce "start object, end array."
  48. /// </para>
  49. /// <para>Implementation details: the base class handles single token push-back and </para>
  50. /// <para>Not thread-safe.</para>
  51. /// </remarks>
  52. internal abstract class JsonTokenizer
  53. {
  54. private JsonToken bufferedToken;
  55. /// <summary>
  56. /// Creates a tokenizer that reads from the given text reader.
  57. /// </summary>
  58. internal static JsonTokenizer FromTextReader(TextReader reader)
  59. {
  60. return new JsonTextTokenizer(reader);
  61. }
  62. /// <summary>
  63. /// Creates a tokenizer that first replays the given list of tokens, then continues reading
  64. /// from another tokenizer. Note that if the returned tokenizer is "pushed back", that does not push back
  65. /// on the continuation tokenizer, or vice versa. Care should be taken when using this method - it was
  66. /// created for the sake of Any parsing.
  67. /// </summary>
  68. internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation)
  69. {
  70. return new JsonReplayTokenizer(tokens, continuation);
  71. }
  72. /// <summary>
  73. /// Returns the depth of the stack, purely in objects (not collections).
  74. /// Informally, this is the number of remaining unclosed '{' characters we have.
  75. /// </summary>
  76. internal int ObjectDepth { get; private set; }
  77. // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous
  78. // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack).
  79. internal void PushBack(JsonToken token)
  80. {
  81. if (bufferedToken != null)
  82. {
  83. throw new InvalidOperationException("Can't push back twice");
  84. }
  85. bufferedToken = token;
  86. if (token.Type == JsonToken.TokenType.StartObject)
  87. {
  88. ObjectDepth--;
  89. }
  90. else if (token.Type == JsonToken.TokenType.EndObject)
  91. {
  92. ObjectDepth++;
  93. }
  94. }
  95. /// <summary>
  96. /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream,
  97. /// after which point <c>Next()</c> should not be called again.
  98. /// </summary>
  99. /// <remarks>This implementation provides single-token buffering, and calls <see cref="NextImpl"/> if there is no buffered token.</remarks>
  100. /// <returns>The next token in the stream. This is never null.</returns>
  101. /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
  102. /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
  103. internal JsonToken Next()
  104. {
  105. JsonToken tokenToReturn;
  106. if (bufferedToken != null)
  107. {
  108. tokenToReturn = bufferedToken;
  109. bufferedToken = null;
  110. }
  111. else
  112. {
  113. tokenToReturn = NextImpl();
  114. }
  115. if (tokenToReturn.Type == JsonToken.TokenType.StartObject)
  116. {
  117. ObjectDepth++;
  118. }
  119. else if (tokenToReturn.Type == JsonToken.TokenType.EndObject)
  120. {
  121. ObjectDepth--;
  122. }
  123. return tokenToReturn;
  124. }
  125. /// <summary>
  126. /// Returns the next JSON token in the stream, when requested by the base class. (The <see cref="Next"/> method delegates
  127. /// to this if it doesn't have a buffered token.)
  128. /// </summary>
  129. /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
  130. /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
  131. protected abstract JsonToken NextImpl();
  132. /// <summary>
  133. /// Tokenizer which first exhausts a list of tokens, then consults another tokenizer.
  134. /// </summary>
  135. private class JsonReplayTokenizer : JsonTokenizer
  136. {
  137. private readonly IList<JsonToken> tokens;
  138. private readonly JsonTokenizer nextTokenizer;
  139. private int nextTokenIndex;
  140. internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer)
  141. {
  142. this.tokens = tokens;
  143. this.nextTokenizer = nextTokenizer;
  144. }
  145. // FIXME: Object depth not maintained...
  146. protected override JsonToken NextImpl()
  147. {
  148. if (nextTokenIndex >= tokens.Count)
  149. {
  150. return nextTokenizer.Next();
  151. }
  152. return tokens[nextTokenIndex++];
  153. }
  154. }
  155. /// <summary>
  156. /// Tokenizer which does all the *real* work of parsing JSON.
  157. /// </summary>
  158. private sealed class JsonTextTokenizer : JsonTokenizer
  159. {
  160. // The set of states in which a value is valid next token.
  161. private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument;
  162. private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>();
  163. private readonly PushBackReader reader;
  164. private State state;
  165. internal JsonTextTokenizer(TextReader reader)
  166. {
  167. this.reader = new PushBackReader(reader);
  168. state = State.StartOfDocument;
  169. containerStack.Push(ContainerType.Document);
  170. }
  171. /// <remarks>
  172. /// This method essentially just loops through characters skipping whitespace, validating and
  173. /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon)
  174. /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point
  175. /// it returns the token. Although the method is large, it would be relatively hard to break down further... most
  176. /// of it is the large switch statement, which sometimes returns and sometimes doesn't.
  177. /// </remarks>
  178. protected override JsonToken NextImpl()
  179. {
  180. if (state == State.ReaderExhausted)
  181. {
  182. throw new InvalidOperationException("Next() called after end of document");
  183. }
  184. while (true)
  185. {
  186. var next = reader.Read();
  187. if (next == null)
  188. {
  189. ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: ");
  190. state = State.ReaderExhausted;
  191. return JsonToken.EndDocument;
  192. }
  193. switch (next.Value)
  194. {
  195. // Skip whitespace between tokens
  196. case ' ':
  197. case '\t':
  198. case '\r':
  199. case '\n':
  200. break;
  201. case ':':
  202. ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: ");
  203. state = State.ObjectAfterColon;
  204. break;
  205. case ',':
  206. ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a colon: ");
  207. state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma;
  208. break;
  209. case '"':
  210. string stringValue = ReadString();
  211. if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0)
  212. {
  213. state = State.ObjectBeforeColon;
  214. return JsonToken.Name(stringValue);
  215. }
  216. else
  217. {
  218. ValidateAndModifyStateForValue("Invalid state to read a double quote: ");
  219. return JsonToken.Value(stringValue);
  220. }
  221. case '{':
  222. ValidateState(ValueStates, "Invalid state to read an open brace: ");
  223. state = State.ObjectStart;
  224. containerStack.Push(ContainerType.Object);
  225. return JsonToken.StartObject;
  226. case '}':
  227. ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: ");
  228. PopContainer();
  229. return JsonToken.EndObject;
  230. case '[':
  231. ValidateState(ValueStates, "Invalid state to read an open square bracket: ");
  232. state = State.ArrayStart;
  233. containerStack.Push(ContainerType.Array);
  234. return JsonToken.StartArray;
  235. case ']':
  236. ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: ");
  237. PopContainer();
  238. return JsonToken.EndArray;
  239. case 'n': // Start of null
  240. ConsumeLiteral("null");
  241. ValidateAndModifyStateForValue("Invalid state to read a null literal: ");
  242. return JsonToken.Null;
  243. case 't': // Start of true
  244. ConsumeLiteral("true");
  245. ValidateAndModifyStateForValue("Invalid state to read a true literal: ");
  246. return JsonToken.True;
  247. case 'f': // Start of false
  248. ConsumeLiteral("false");
  249. ValidateAndModifyStateForValue("Invalid state to read a false literal: ");
  250. return JsonToken.False;
  251. case '-': // Start of a number
  252. case '0':
  253. case '1':
  254. case '2':
  255. case '3':
  256. case '4':
  257. case '5':
  258. case '6':
  259. case '7':
  260. case '8':
  261. case '9':
  262. double number = ReadNumber(next.Value);
  263. ValidateAndModifyStateForValue("Invalid state to read a number token: ");
  264. return JsonToken.Value(number);
  265. default:
  266. throw new InvalidJsonException("Invalid first character of token: " + next.Value);
  267. }
  268. }
  269. }
  270. private void ValidateState(State validStates, string errorPrefix)
  271. {
  272. if ((validStates & state) == 0)
  273. {
  274. throw reader.CreateException(errorPrefix + state);
  275. }
  276. }
  277. /// <summary>
  278. /// Reads a string token. It is assumed that the opening " has already been read.
  279. /// </summary>
  280. private string ReadString()
  281. {
  282. var value = new StringBuilder();
  283. bool haveHighSurrogate = false;
  284. while (true)
  285. {
  286. char c = reader.ReadOrFail("Unexpected end of text while reading string");
  287. if (c < ' ')
  288. {
  289. throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c));
  290. }
  291. if (c == '"')
  292. {
  293. if (haveHighSurrogate)
  294. {
  295. throw reader.CreateException("Invalid use of surrogate pair code units");
  296. }
  297. return value.ToString();
  298. }
  299. if (c == '\\')
  300. {
  301. c = ReadEscapedCharacter();
  302. }
  303. // TODO: Consider only allowing surrogate pairs that are either both escaped,
  304. // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate
  305. // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8.
  306. if (haveHighSurrogate != char.IsLowSurrogate(c))
  307. {
  308. throw reader.CreateException("Invalid use of surrogate pair code units");
  309. }
  310. haveHighSurrogate = char.IsHighSurrogate(c);
  311. value.Append(c);
  312. }
  313. }
  314. /// <summary>
  315. /// Reads an escaped character. It is assumed that the leading backslash has already been read.
  316. /// </summary>
  317. private char ReadEscapedCharacter()
  318. {
  319. char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence");
  320. switch (c)
  321. {
  322. case 'n':
  323. return '\n';
  324. case '\\':
  325. return '\\';
  326. case 'b':
  327. return '\b';
  328. case 'f':
  329. return '\f';
  330. case 'r':
  331. return '\r';
  332. case 't':
  333. return '\t';
  334. case '"':
  335. return '"';
  336. case '/':
  337. return '/';
  338. case 'u':
  339. return ReadUnicodeEscape();
  340. default:
  341. throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c));
  342. }
  343. }
  344. /// <summary>
  345. /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read.
  346. /// </summary>
  347. private char ReadUnicodeEscape()
  348. {
  349. int result = 0;
  350. for (int i = 0; i < 4; i++)
  351. {
  352. char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence");
  353. int nybble;
  354. if (c >= '0' && c <= '9')
  355. {
  356. nybble = c - '0';
  357. }
  358. else if (c >= 'a' && c <= 'f')
  359. {
  360. nybble = c - 'a' + 10;
  361. }
  362. else if (c >= 'A' && c <= 'F')
  363. {
  364. nybble = c - 'A' + 10;
  365. }
  366. else
  367. {
  368. throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c));
  369. }
  370. result = (result << 4) + nybble;
  371. }
  372. return (char) result;
  373. }
  374. /// <summary>
  375. /// Consumes a text-only literal, throwing an exception if the read text doesn't match it.
  376. /// It is assumed that the first letter of the literal has already been read.
  377. /// </summary>
  378. private void ConsumeLiteral(string text)
  379. {
  380. for (int i = 1; i < text.Length; i++)
  381. {
  382. char? next = reader.Read();
  383. if (next == null)
  384. {
  385. throw reader.CreateException("Unexpected end of text while reading literal token " + text);
  386. }
  387. if (next.Value != text[i])
  388. {
  389. throw reader.CreateException("Unexpected character while reading literal token " + text);
  390. }
  391. }
  392. }
  393. private double ReadNumber(char initialCharacter)
  394. {
  395. StringBuilder builder = new StringBuilder();
  396. if (initialCharacter == '-')
  397. {
  398. builder.Append("-");
  399. }
  400. else
  401. {
  402. reader.PushBack(initialCharacter);
  403. }
  404. // Each method returns the character it read that doesn't belong in that part,
  405. // so we know what to do next, including pushing the character back at the end.
  406. // null is returned for "end of text".
  407. char? next = ReadInt(builder);
  408. if (next == '.')
  409. {
  410. next = ReadFrac(builder);
  411. }
  412. if (next == 'e' || next == 'E')
  413. {
  414. next = ReadExp(builder);
  415. }
  416. // If we read a character which wasn't part of the number, push it back so we can read it again
  417. // to parse the next token.
  418. if (next != null)
  419. {
  420. reader.PushBack(next.Value);
  421. }
  422. // TODO: What exception should we throw if the value can't be represented as a double?
  423. try
  424. {
  425. return double.Parse(builder.ToString(),
  426. NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent,
  427. CultureInfo.InvariantCulture);
  428. }
  429. catch (OverflowException)
  430. {
  431. throw reader.CreateException("Numeric value out of range: " + builder);
  432. }
  433. }
  434. private char? ReadInt(StringBuilder builder)
  435. {
  436. char first = reader.ReadOrFail("Invalid numeric literal");
  437. if (first < '0' || first > '9')
  438. {
  439. throw reader.CreateException("Invalid numeric literal");
  440. }
  441. builder.Append(first);
  442. int digitCount;
  443. char? next = ConsumeDigits(builder, out digitCount);
  444. if (first == '0' && digitCount != 0)
  445. {
  446. throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value.");
  447. }
  448. return next;
  449. }
  450. private char? ReadFrac(StringBuilder builder)
  451. {
  452. builder.Append('.'); // Already consumed this
  453. int digitCount;
  454. char? next = ConsumeDigits(builder, out digitCount);
  455. if (digitCount == 0)
  456. {
  457. throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits");
  458. }
  459. return next;
  460. }
  461. private char? ReadExp(StringBuilder builder)
  462. {
  463. builder.Append('E'); // Already consumed this (or 'e')
  464. char? next = reader.Read();
  465. if (next == null)
  466. {
  467. throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits");
  468. }
  469. if (next == '-' || next == '+')
  470. {
  471. builder.Append(next.Value);
  472. }
  473. else
  474. {
  475. reader.PushBack(next.Value);
  476. }
  477. int digitCount;
  478. next = ConsumeDigits(builder, out digitCount);
  479. if (digitCount == 0)
  480. {
  481. throw reader.CreateException("Invalid numeric literal: exponent without value");
  482. }
  483. return next;
  484. }
  485. private char? ConsumeDigits(StringBuilder builder, out int count)
  486. {
  487. count = 0;
  488. while (true)
  489. {
  490. char? next = reader.Read();
  491. if (next == null || next.Value < '0' || next.Value > '9')
  492. {
  493. return next;
  494. }
  495. count++;
  496. builder.Append(next.Value);
  497. }
  498. }
  499. /// <summary>
  500. /// Validates that we're in a valid state to read a value (using the given error prefix if necessary)
  501. /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty.
  502. /// </summary>
  503. private void ValidateAndModifyStateForValue(string errorPrefix)
  504. {
  505. ValidateState(ValueStates, errorPrefix);
  506. switch (state)
  507. {
  508. case State.StartOfDocument:
  509. state = State.ExpectedEndOfDocument;
  510. return;
  511. case State.ObjectAfterColon:
  512. state = State.ObjectAfterProperty;
  513. return;
  514. case State.ArrayStart:
  515. case State.ArrayAfterComma:
  516. state = State.ArrayAfterValue;
  517. return;
  518. default:
  519. throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)");
  520. }
  521. }
  522. /// <summary>
  523. /// Pops the top-most container, and sets the state to the appropriate one for the end of a value
  524. /// in the parent container.
  525. /// </summary>
  526. private void PopContainer()
  527. {
  528. containerStack.Pop();
  529. var parent = containerStack.Peek();
  530. switch (parent)
  531. {
  532. case ContainerType.Object:
  533. state = State.ObjectAfterProperty;
  534. break;
  535. case ContainerType.Array:
  536. state = State.ArrayAfterValue;
  537. break;
  538. case ContainerType.Document:
  539. state = State.ExpectedEndOfDocument;
  540. break;
  541. default:
  542. throw new InvalidOperationException("Unexpected container type: " + parent);
  543. }
  544. }
  545. private enum ContainerType
  546. {
  547. Document, Object, Array
  548. }
  549. /// <summary>
  550. /// Possible states of the tokenizer.
  551. /// </summary>
  552. /// <remarks>
  553. /// <para>This is a flags enum purely so we can simply and efficiently represent a set of valid states
  554. /// for checking.</para>
  555. /// <para>
  556. /// Each is documented with an example,
  557. /// where ^ represents the current position within the text stream. The examples all use string values,
  558. /// but could be any value, including nested objects/arrays.
  559. /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects).
  560. /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which
  561. /// point there's an immediate transition to ExpectedEndOfDocument, ObjectAfterProperty or ArrayAfterValue.
  562. /// </para>
  563. /// <para>
  564. /// These states were derived manually by reading RFC 7159 carefully.
  565. /// </para>
  566. /// </remarks>
  567. [Flags]
  568. private enum State
  569. {
  570. /// <summary>
  571. /// ^ { "foo": "bar" }
  572. /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue"
  573. /// </summary>
  574. StartOfDocument = 1 << 0,
  575. /// <summary>
  576. /// { "foo": "bar" } ^
  577. /// After the value in a document. Next states: ReaderExhausted
  578. /// </summary>
  579. ExpectedEndOfDocument = 1 << 1,
  580. /// <summary>
  581. /// { "foo": "bar" } ^ (and already read to the end of the reader)
  582. /// Terminal state.
  583. /// </summary>
  584. ReaderExhausted = 1 << 2,
  585. /// <summary>
  586. /// { ^ "foo": "bar" }
  587. /// Before the *first* property in an object.
  588. /// Next states:
  589. /// "AfterValue" (empty object)
  590. /// ObjectBeforeColon (read a name)
  591. /// </summary>
  592. ObjectStart = 1 << 3,
  593. /// <summary>
  594. /// { "foo" ^ : "bar", "x": "y" }
  595. /// Next state: ObjectAfterColon
  596. /// </summary>
  597. ObjectBeforeColon = 1 << 4,
  598. /// <summary>
  599. /// { "foo" : ^ "bar", "x": "y" }
  600. /// Before any property other than the first in an object.
  601. /// (Equivalently: after any property in an object)
  602. /// Next states:
  603. /// "AfterValue" (value is simple)
  604. /// ObjectStart (value is object)
  605. /// ArrayStart (value is array)
  606. /// </summary>
  607. ObjectAfterColon = 1 << 5,
  608. /// <summary>
  609. /// { "foo" : "bar" ^ , "x" : "y" }
  610. /// At the end of a property, so expecting either a comma or end-of-object
  611. /// Next states: ObjectAfterComma or "AfterValue"
  612. /// </summary>
  613. ObjectAfterProperty = 1 << 6,
  614. /// <summary>
  615. /// { "foo":"bar", ^ "x":"y" }
  616. /// Read the comma after the previous property, so expecting another property.
  617. /// This is like ObjectStart, but closing brace isn't valid here
  618. /// Next state: ObjectBeforeColon.
  619. /// </summary>
  620. ObjectAfterComma = 1 << 7,
  621. /// <summary>
  622. /// [ ^ "foo", "bar" ]
  623. /// Before the *first* value in an array.
  624. /// Next states:
  625. /// "AfterValue" (read a value)
  626. /// "AfterValue" (end of array; will pop stack)
  627. /// </summary>
  628. ArrayStart = 1 << 8,
  629. /// <summary>
  630. /// [ "foo" ^ , "bar" ]
  631. /// After any value in an array, so expecting either a comma or end-of-array
  632. /// Next states: ArrayAfterComma or "AfterValue"
  633. /// </summary>
  634. ArrayAfterValue = 1 << 9,
  635. /// <summary>
  636. /// [ "foo", ^ "bar" ]
  637. /// After a comma in an array, so there *must* be another value (simple or complex).
  638. /// Next states: "AfterValue" (simple value), StartObject, StartArray
  639. /// </summary>
  640. ArrayAfterComma = 1 << 10
  641. }
  642. /// <summary>
  643. /// Wrapper around a text reader allowing small amounts of buffering and location handling.
  644. /// </summary>
  645. private class PushBackReader
  646. {
  647. // TODO: Add locations for errors etc.
  648. private readonly TextReader reader;
  649. internal PushBackReader(TextReader reader)
  650. {
  651. // TODO: Wrap the reader in a BufferedReader?
  652. this.reader = reader;
  653. }
  654. /// <summary>
  655. /// The buffered next character, if we have one.
  656. /// </summary>
  657. private char? nextChar;
  658. /// <summary>
  659. /// Returns the next character in the stream, or null if we have reached the end.
  660. /// </summary>
  661. /// <returns></returns>
  662. internal char? Read()
  663. {
  664. if (nextChar != null)
  665. {
  666. char? tmp = nextChar;
  667. nextChar = null;
  668. return tmp;
  669. }
  670. int next = reader.Read();
  671. return next == -1 ? null : (char?) next;
  672. }
  673. internal char ReadOrFail(string messageOnFailure)
  674. {
  675. char? next = Read();
  676. if (next == null)
  677. {
  678. throw CreateException(messageOnFailure);
  679. }
  680. return next.Value;
  681. }
  682. internal void PushBack(char c)
  683. {
  684. if (nextChar != null)
  685. {
  686. throw new InvalidOperationException("Cannot push back when already buffering a character");
  687. }
  688. nextChar = c;
  689. }
  690. /// <summary>
  691. /// Creates a new exception appropriate for the current state of the reader.
  692. /// </summary>
  693. internal InvalidJsonException CreateException(string message)
  694. {
  695. // TODO: Keep track of and use the location.
  696. return new InvalidJsonException(message);
  697. }
  698. }
  699. }
  700. }
  701. }