| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390 | // Protocol Buffers - Google's data interchange format// Copyright 2008 Google Inc.// http://code.google.com/p/protobuf///// Licensed under the Apache License, Version 2.0 (the "License");// you may not use this file except in compliance with the License.// You may obtain a copy of the License at////      http://www.apache.org/licenses/LICENSE-2.0//// Unless required by applicable law or agreed to in writing, software// distributed under the License is distributed on an "AS IS" BASIS,// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.// See the License for the specific language governing permissions and// limitations under the License.using System;using System.Globalization;using System.Text.RegularExpressions;namespace Google.ProtocolBuffers {  /// <summary>  /// Represents a stream of tokens parsed from a string.  /// </summary>  internal sealed class TextTokenizer {    private readonly string text;    private string currentToken;    /// <summary>    /// The character index within the text to perform the next regex match at.    /// </summary>    private int matchPos = 0;    /// <summary>    /// The character index within the text at which the current token begins.    /// </summary>    private int pos = 0;    /// <summary>    /// The line number of the current token.    /// </summary>    private int line = 0;    /// <summary>    /// The column number of the current token.    /// </summary>    private int column = 0;    /// <summary>    /// The line number of the previous token.    /// </summary>    private int previousLine = 0;    /// <summary>    /// The column number of the previous token.    /// </summary>    private int previousColumn = 0;    private static readonly Regex WhitespaceAndCommentPattern = new Regex("\\G(\\s|(#.*$))+",         RegexOptions.Compiled | RegexOptions.Multiline);    private static readonly Regex TokenPattern = new Regex(      "\\G[a-zA-Z_][0-9a-zA-Z_+-]*|" +              // an identifier      "\\G[0-9+-][0-9a-zA-Z_.+-]*|" +                  // a number      "\\G\"([^\"\\\n\\\\]|\\\\.)*(\"|\\\\?$)|" +    // a double-quoted string      "\\G\'([^\"\\\n\\\\]|\\\\.)*(\'|\\\\?$)",      // a single-quoted string      RegexOptions.Compiled | RegexOptions.Multiline);    private static readonly Regex DoubleInfinity = new Regex("^-?inf(inity)?$", RegexOptions.Compiled | RegexOptions.IgnoreCase);    private static readonly Regex FloatInfinity = new Regex("^-?inf(inity)?f?$", RegexOptions.Compiled | RegexOptions.IgnoreCase);    private static readonly Regex FloatNan = new Regex("^nanf?$", RegexOptions.Compiled | RegexOptions.IgnoreCase);    /** Construct a tokenizer that parses tokens from the given text. */    public TextTokenizer(string text) {      this.text = text;      SkipWhitespace();      NextToken();    }    /// <summary>    /// Are we at the end of the input?    /// </summary>    public bool AtEnd {      get { return currentToken.Length == 0; }    }    /// <summary>    /// Advances to the next token.    /// </summary>    public void NextToken() {      previousLine = line;      previousColumn = column;      // Advance the line counter to the current position.      while (pos < matchPos) {        if (text[pos] == '\n') {          ++line;          column = 0;        } else {          ++column;        }        ++pos;      }      // Match the next token.      if (matchPos == text.Length) {        // EOF        currentToken = "";      } else {        Match match = TokenPattern.Match(text, matchPos);        if (match.Success) {          currentToken = match.Value;          matchPos += match.Length;        } else {          // Take one character.          currentToken = text[matchPos].ToString();          matchPos++;        }        SkipWhitespace();      }    }    /// <summary>    /// Skip over any whitespace so that matchPos starts at the next token.    /// </summary>    private void SkipWhitespace() {      Match match = WhitespaceAndCommentPattern.Match(text, matchPos);      if (match.Success) {        matchPos += match.Length;      }    }    /// <summary>    /// If the next token exactly matches the given token, consume it and return    /// true. Otherwise, return false without doing anything.    /// </summary>    public bool TryConsume(string token) {      if (currentToken == token) {        NextToken();        return true;      }      return false;    }    /*     * If the next token exactly matches {@code token}, consume it.  Otherwise,     * throw a {@link ParseException}.     */    /// <summary>    /// If the next token exactly matches the specified one, consume it.    /// Otherwise, throw a FormatException.    /// </summary>    /// <param name="token"></param>    public void Consume(string token) {      if (!TryConsume(token)) {        throw CreateFormatException("Expected \"" + token + "\".");      }    }    /// <summary>    /// Returns true if the next token is an integer, but does not consume it.    /// </summary>    public bool LookingAtInteger() {      if (currentToken.Length == 0) {        return false;      }      char c = currentToken[0];      return ('0' <= c && c <= '9') || c == '-' || c == '+';    }    /// <summary>    /// If the next token is an identifier, consume it and return its value.    /// Otherwise, throw a FormatException.    /// </summary>    public string ConsumeIdentifier() {      foreach (char c in currentToken) {        if (('a' <= c && c <= 'z') ||            ('A' <= c && c <= 'Z') ||            ('0' <= c && c <= '9') ||            (c == '_') || (c == '.')) {          // OK        } else {          throw CreateFormatException("Expected identifier.");        }      }      string result = currentToken;      NextToken();      return result;    }    /// <summary>    /// If the next token is a 32-bit signed integer, consume it and return its     /// value. Otherwise, throw a FormatException.    /// </summary>    public int ConsumeInt32()  {      try {        int result = TextFormat.ParseInt32(currentToken);        NextToken();        return result;      } catch (FormatException e) {        throw CreateIntegerParseException(e);      }    }    /// <summary>    /// If the next token is a 32-bit unsigned integer, consume it and return its    /// value. Otherwise, throw a FormatException.    /// </summary>    public uint ConsumeUInt32() {      try {        uint result = TextFormat.ParseUInt32(currentToken);        NextToken();        return result;      } catch (FormatException e) {        throw CreateIntegerParseException(e);      }    }    /// <summary>    /// If the next token is a 64-bit signed integer, consume it and return its    /// value. Otherwise, throw a FormatException.    /// </summary>    public long ConsumeInt64() {      try {        long result = TextFormat.ParseInt64(currentToken);        NextToken();        return result;      } catch (FormatException e) {        throw CreateIntegerParseException(e);      }    }    /// <summary>    /// If the next token is a 64-bit unsigned integer, consume it and return its    /// value. Otherwise, throw a FormatException.    /// </summary>    public ulong ConsumeUInt64() {      try {        ulong result = TextFormat.ParseUInt64(currentToken);        NextToken();        return result;      } catch (FormatException e) {        throw CreateIntegerParseException(e);      }    }    /// <summary>    /// If the next token is a double, consume it and return its value.    /// Otherwise, throw a FormatException.    /// </summary>    public double ConsumeDouble() {      // We need to parse infinity and nan separately because      // double.Parse() does not accept "inf", "infinity", or "nan".      if (DoubleInfinity.IsMatch(currentToken)) {        bool negative = currentToken.StartsWith("-");        NextToken();        return negative ? double.NegativeInfinity : double.PositiveInfinity;      }      if (currentToken.Equals("nan", StringComparison.InvariantCultureIgnoreCase)) {        NextToken();        return Double.NaN;      }      try {        double result = double.Parse(currentToken, CultureInfo.InvariantCulture);        NextToken();        return result;      } catch (FormatException e) {        throw CreateFloatParseException(e);      } catch (OverflowException e) {        throw CreateFloatParseException(e);      }    }    /// <summary>    /// If the next token is a float, consume it and return its value.    /// Otherwise, throw a FormatException.    /// </summary>    public float ConsumeFloat() {      // We need to parse infinity and nan separately because      // Float.parseFloat() does not accept "inf", "infinity", or "nan".      if (FloatInfinity.IsMatch(currentToken)) {        bool negative = currentToken.StartsWith("-");        NextToken();        return negative ? float.NegativeInfinity : float.PositiveInfinity;      }      if (FloatNan.IsMatch(currentToken)) {        NextToken();        return float.NaN;      }      if (currentToken.EndsWith("f")) {        currentToken = currentToken.TrimEnd('f');      }      try {        float result = float.Parse(currentToken, CultureInfo.InvariantCulture);        NextToken();        return result;      } catch (FormatException e) {        throw CreateFloatParseException(e);      } catch (OverflowException e) {        throw CreateFloatParseException(e);      }    }    /// <summary>    /// If the next token is a Boolean, consume it and return its value.    /// Otherwise, throw a FormatException.        /// </summary>    public bool ConsumeBoolean() {      if (currentToken == "true") {        NextToken();        return true;      }       if (currentToken == "false") {        NextToken();        return false;      }      throw CreateFormatException("Expected \"true\" or \"false\".");    }    /// <summary>    /// If the next token is a string, consume it and return its (unescaped) value.    /// Otherwise, throw a FormatException.    /// </summary>    public string ConsumeString() {      return ConsumeByteString().ToStringUtf8();    }    /// <summary>    /// If the next token is a string, consume it, unescape it as a    /// ByteString and return it. Otherwise, throw a FormatException.    /// </summary>    public ByteString ConsumeByteString() {      char quote = currentToken.Length > 0 ? currentToken[0] : '\0';      if (quote != '\"' && quote != '\'') {        throw CreateFormatException("Expected string.");      }      if (currentToken.Length < 2 ||          currentToken[currentToken.Length-1] != quote) {        throw CreateFormatException("String missing ending quote.");      }      try {        string escaped = currentToken.Substring(1, currentToken.Length - 2);        ByteString result = TextFormat.UnescapeBytes(escaped);        NextToken();        return result;      } catch (FormatException e) {        throw CreateFormatException(e.Message);      }    }    /// <summary>    /// Returns a format exception with the current line and column numbers    /// in the description, suitable for throwing.    /// </summary>    public FormatException CreateFormatException(string description) {      // Note:  People generally prefer one-based line and column numbers.      return new FormatException((line + 1) + ":" + (column + 1) + ": " + description);    }    /// <summary>    /// Returns a format exception with the line and column numbers of the    /// previous token in the description, suitable for throwing.    /// </summary>    public FormatException CreateFormatExceptionPreviousToken(string description) {      // Note:  People generally prefer one-based line and column numbers.      return new FormatException((previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);    }    /// <summary>    /// Constructs an appropriate FormatException for the given existing exception    /// when trying to parse an integer.    /// </summary>    private FormatException CreateIntegerParseException(FormatException e) {      return CreateFormatException("Couldn't parse integer: " + e.Message);    }    /// <summary>    /// Constructs an appropriate FormatException for the given existing exception    /// when trying to parse a float or double.    /// </summary>    private FormatException CreateFloatParseException(Exception e) {      return CreateFormatException("Couldn't parse number: " + e.Message);    }  }}
 |