|  | @@ -0,0 +1,341 @@
 | 
	
		
			
				|  |  | +using System;
 | 
	
		
			
				|  |  | +using System.Globalization;
 | 
	
		
			
				|  |  | +using System.Text.RegularExpressions;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +namespace Google.ProtocolBuffers {
 | 
	
		
			
				|  |  | +  /// <summary>
 | 
	
		
			
				|  |  | +  /// Represents a stream of tokens parsed from a string.
 | 
	
		
			
				|  |  | +  /// </summary>
 | 
	
		
			
				|  |  | +  internal sealed class TextTokenizer {
 | 
	
		
			
				|  |  | +    private readonly string text;
 | 
	
		
			
				|  |  | +    private string currentToken;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// The character index within the text to perform the next regex match at.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    private int matchPos = 0;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// The character index within the text at which the current token begins.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    private int pos = 0;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// The line number of the current token.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    private int line = 0;
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// The column number of the current token.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    private int column = 0;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// The line number of the previous token.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    private int previousLine = 0;
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// The column number of the previous token.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    private int previousColumn = 0;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    private static Regex WhitespaceAndCommentPattern = new Regex("\\G(\\s|(#[^\\\n]*\\n))+", RegexOptions.Compiled);
 | 
	
		
			
				|  |  | +    private static Regex TokenPattern = new Regex(
 | 
	
		
			
				|  |  | +      "\\G[a-zA-Z_][0-9a-zA-Z_+-]*|" +              // an identifier
 | 
	
		
			
				|  |  | +      "\\G[0-9+-][0-9a-zA-Z_.+-]*|" +                  // a number
 | 
	
		
			
				|  |  | +      "\\G\"([^\"\\\n\\\\]|\\\\[^\\\n])*(\"|\\\\?$)|" +    // a double-quoted string
 | 
	
		
			
				|  |  | +      "\\G\'([^\"\\\n\\\\]|\\\\[^\\\n])*(\'|\\\\?$)",      // a single-quoted string
 | 
	
		
			
				|  |  | +      RegexOptions.Compiled);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /** Construct a tokenizer that parses tokens from the given text. */
 | 
	
		
			
				|  |  | +    public TextTokenizer(string text) {
 | 
	
		
			
				|  |  | +      this.text = text;
 | 
	
		
			
				|  |  | +      SkipWhitespace();
 | 
	
		
			
				|  |  | +      NextToken();
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// Are we at the end of the input?
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public bool AtEnd {
 | 
	
		
			
				|  |  | +      get { return currentToken.Length == 0; }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// Advances to the next token.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public void NextToken() {
 | 
	
		
			
				|  |  | +      previousLine = line;
 | 
	
		
			
				|  |  | +      previousColumn = column;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      // Advance the line counter to the current position.
 | 
	
		
			
				|  |  | +      while (pos < matchPos) {
 | 
	
		
			
				|  |  | +        if (text[pos] == '\n') {
 | 
	
		
			
				|  |  | +          ++line;
 | 
	
		
			
				|  |  | +          column = 0;
 | 
	
		
			
				|  |  | +        } else {
 | 
	
		
			
				|  |  | +          ++column;
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        ++pos;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      // Match the next token.
 | 
	
		
			
				|  |  | +      if (matchPos == text.Length) {
 | 
	
		
			
				|  |  | +        // EOF
 | 
	
		
			
				|  |  | +        currentToken = "";
 | 
	
		
			
				|  |  | +      } else {
 | 
	
		
			
				|  |  | +        Match match = TokenPattern.Match(text, matchPos);
 | 
	
		
			
				|  |  | +        if (match.Success) {
 | 
	
		
			
				|  |  | +          currentToken = match.Value;
 | 
	
		
			
				|  |  | +          matchPos += match.Length;
 | 
	
		
			
				|  |  | +        } else {
 | 
	
		
			
				|  |  | +          // Take one character.
 | 
	
		
			
				|  |  | +          currentToken = text[matchPos].ToString();
 | 
	
		
			
				|  |  | +          matchPos++;
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        SkipWhitespace();
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// Skip over any whitespace so that matchPos starts at the next token.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    private void SkipWhitespace() {
 | 
	
		
			
				|  |  | +      Match match = WhitespaceAndCommentPattern.Match(text, matchPos);
 | 
	
		
			
				|  |  | +      if (match.Success) {
 | 
	
		
			
				|  |  | +        matchPos += match.Length;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// If the next token exactly matches the given token, consume it and return
 | 
	
		
			
				|  |  | +    /// true. Otherwise, return false without doing anything.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public bool TryConsume(string token) {
 | 
	
		
			
				|  |  | +      if (currentToken == token) {
 | 
	
		
			
				|  |  | +        NextToken();
 | 
	
		
			
				|  |  | +        return true;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +      return false;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /*
 | 
	
		
			
				|  |  | +     * If the next token exactly matches {@code token}, consume it.  Otherwise,
 | 
	
		
			
				|  |  | +     * throw a {@link ParseException}.
 | 
	
		
			
				|  |  | +     */
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// If the next token exactly matches the specified one, consume it.
 | 
	
		
			
				|  |  | +    /// Otherwise, throw a FormatException.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    /// <param name="token"></param>
 | 
	
		
			
				|  |  | +    public void Consume(string token) {
 | 
	
		
			
				|  |  | +      if (!TryConsume(token)) {
 | 
	
		
			
				|  |  | +        throw CreateFormatException("Expected \"" + token + "\".");
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// Returns true if the next token is an integer, but does not consume it.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public bool LookingAtInteger() {
 | 
	
		
			
				|  |  | +      if (currentToken.Length == 0) {
 | 
	
		
			
				|  |  | +        return false;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      char c = currentToken[0];
 | 
	
		
			
				|  |  | +      return ('0' <= c && c <= '9') || c == '-' || c == '+';
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// If the next token is an identifier, consume it and return its value.
 | 
	
		
			
				|  |  | +    /// Otherwise, throw a FormatException.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public string ConsumeIdentifier() {
 | 
	
		
			
				|  |  | +      foreach (char c in currentToken) {
 | 
	
		
			
				|  |  | +        if (('a' <= c && c <= 'z') ||
 | 
	
		
			
				|  |  | +            ('A' <= c && c <= 'Z') ||
 | 
	
		
			
				|  |  | +            ('0' <= c && c <= '9') ||
 | 
	
		
			
				|  |  | +            (c == '_') || (c == '.')) {
 | 
	
		
			
				|  |  | +          // OK
 | 
	
		
			
				|  |  | +        } else {
 | 
	
		
			
				|  |  | +          throw CreateFormatException("Expected identifier.");
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      string result = currentToken;
 | 
	
		
			
				|  |  | +      NextToken();
 | 
	
		
			
				|  |  | +      return result;
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// If the next token is a 32-bit signed integer, consume it and return its 
 | 
	
		
			
				|  |  | +    /// value. Otherwise, throw a FormatException.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public int ConsumeInt32()  {
 | 
	
		
			
				|  |  | +      try {
 | 
	
		
			
				|  |  | +        int result = TextFormat.ParseInt32(currentToken);
 | 
	
		
			
				|  |  | +        NextToken();
 | 
	
		
			
				|  |  | +        return result;
 | 
	
		
			
				|  |  | +      } catch (FormatException e) {
 | 
	
		
			
				|  |  | +        throw CreateIntegerParseException(e);
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// If the next token is a 32-bit unsigned integer, consume it and return its
 | 
	
		
			
				|  |  | +    /// value. Otherwise, throw a FormatException.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public uint ConsumeUInt32() {
 | 
	
		
			
				|  |  | +      try {
 | 
	
		
			
				|  |  | +        uint result = TextFormat.ParseUInt32(currentToken);
 | 
	
		
			
				|  |  | +        NextToken();
 | 
	
		
			
				|  |  | +        return result;
 | 
	
		
			
				|  |  | +      } catch (FormatException e) {
 | 
	
		
			
				|  |  | +        throw CreateIntegerParseException(e);
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// If the next token is a 64-bit signed integer, consume it and return its
 | 
	
		
			
				|  |  | +    /// value. Otherwise, throw a FormatException.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public long ConsumeInt64() {
 | 
	
		
			
				|  |  | +      try {
 | 
	
		
			
				|  |  | +        long result = TextFormat.ParseInt64(currentToken);
 | 
	
		
			
				|  |  | +        NextToken();
 | 
	
		
			
				|  |  | +        return result;
 | 
	
		
			
				|  |  | +      } catch (FormatException e) {
 | 
	
		
			
				|  |  | +        throw CreateIntegerParseException(e);
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// If the next token is a 64-bit unsigned integer, consume it and return its
 | 
	
		
			
				|  |  | +    /// value. Otherwise, throw a FormatException.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public ulong ConsumeUInt64() {
 | 
	
		
			
				|  |  | +      try {
 | 
	
		
			
				|  |  | +        ulong result = TextFormat.ParseUInt64(currentToken);
 | 
	
		
			
				|  |  | +        NextToken();
 | 
	
		
			
				|  |  | +        return result;
 | 
	
		
			
				|  |  | +      } catch (FormatException e) {
 | 
	
		
			
				|  |  | +        throw CreateIntegerParseException(e);
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// If the next token is a double, consume it and return its value.
 | 
	
		
			
				|  |  | +    /// Otherwise, throw a FormatException.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public double ConsumeDouble() {
 | 
	
		
			
				|  |  | +      try {
 | 
	
		
			
				|  |  | +        double result = double.Parse(currentToken, CultureInfo.InvariantCulture);
 | 
	
		
			
				|  |  | +        NextToken();
 | 
	
		
			
				|  |  | +        return result;
 | 
	
		
			
				|  |  | +      } catch (FormatException e) {
 | 
	
		
			
				|  |  | +        throw CreateFloatParseException(e);
 | 
	
		
			
				|  |  | +      } catch (OverflowException e) {
 | 
	
		
			
				|  |  | +        throw CreateFloatParseException(e);
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// If the next token is a float, consume it and return its value.
 | 
	
		
			
				|  |  | +    /// Otherwise, throw a FormatException.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public float consumeFloat() {
 | 
	
		
			
				|  |  | +      try {
 | 
	
		
			
				|  |  | +        float result = float.Parse(currentToken, CultureInfo.InvariantCulture);
 | 
	
		
			
				|  |  | +        NextToken();
 | 
	
		
			
				|  |  | +        return result;
 | 
	
		
			
				|  |  | +      } catch (FormatException e) {
 | 
	
		
			
				|  |  | +        throw CreateFloatParseException(e);
 | 
	
		
			
				|  |  | +      } catch (OverflowException e) {
 | 
	
		
			
				|  |  | +        throw CreateFloatParseException(e);
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// If the next token is a Boolean, consume it and return its value.
 | 
	
		
			
				|  |  | +    /// Otherwise, throw a FormatException.    
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public bool ConsumeBoolean() {
 | 
	
		
			
				|  |  | +      if (currentToken == "true") {
 | 
	
		
			
				|  |  | +        NextToken();
 | 
	
		
			
				|  |  | +        return true;
 | 
	
		
			
				|  |  | +      } 
 | 
	
		
			
				|  |  | +      if (currentToken == "false") {
 | 
	
		
			
				|  |  | +        NextToken();
 | 
	
		
			
				|  |  | +        return false;
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +      throw CreateFormatException("Expected \"true\" or \"false\".");
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// If the next token is a string, consume it and return its (unescaped) value.
 | 
	
		
			
				|  |  | +    /// Otherwise, throw a FormatException.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public string ConsumeString() {
 | 
	
		
			
				|  |  | +      return ConsumeByteString().ToStringUtf8();
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// If the next token is a string, consume it, unescape it as a
 | 
	
		
			
				|  |  | +    /// ByteString and return it. Otherwise, throw a FormatException.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public ByteString ConsumeByteString() {
 | 
	
		
			
				|  |  | +      char quote = currentToken.Length > 0 ? currentToken[0] : '\0';
 | 
	
		
			
				|  |  | +      if (quote != '\"' && quote != '\'') {
 | 
	
		
			
				|  |  | +        throw CreateFormatException("Expected string.");
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      if (currentToken.Length < 2 ||
 | 
	
		
			
				|  |  | +          currentToken[currentToken.Length-1] != quote) {
 | 
	
		
			
				|  |  | +        throw CreateFormatException("String missing ending quote.");
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      try {
 | 
	
		
			
				|  |  | +        string escaped = currentToken.Substring(1, currentToken.Length - 2);
 | 
	
		
			
				|  |  | +        ByteString result = TextFormat.UnescapeBytes(escaped);
 | 
	
		
			
				|  |  | +        NextToken();
 | 
	
		
			
				|  |  | +        return result;
 | 
	
		
			
				|  |  | +      } catch (FormatException e) {
 | 
	
		
			
				|  |  | +        throw CreateFormatException(e.Message);
 | 
	
		
			
				|  |  | +      }
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// Returns a format exception with the current line and column numbers
 | 
	
		
			
				|  |  | +    /// in the description, suitable for throwing.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public FormatException CreateFormatException(string description) {
 | 
	
		
			
				|  |  | +      // Note:  People generally prefer one-based line and column numbers.
 | 
	
		
			
				|  |  | +      return new FormatException((line + 1) + ":" + (column + 1) + ": " + description);
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// Returns a format exception with the line and column numbers of the
 | 
	
		
			
				|  |  | +    /// previous token in the description, suitable for throwing.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    public FormatException CreateFormatExceptionPreviousToken(string description) {
 | 
	
		
			
				|  |  | +      // Note:  People generally prefer one-based line and column numbers.
 | 
	
		
			
				|  |  | +      return new FormatException((previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// Constructs an appropriate FormatException for the given existing exception
 | 
	
		
			
				|  |  | +    /// when trying to parse an integer.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    private FormatException CreateIntegerParseException(FormatException e) {
 | 
	
		
			
				|  |  | +      return CreateFormatException("Couldn't parse integer: " + e.Message);
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    /// <summary>
 | 
	
		
			
				|  |  | +    /// Constructs an appropriate FormatException for the given existing exception
 | 
	
		
			
				|  |  | +    /// when trying to parse a float or double.
 | 
	
		
			
				|  |  | +    /// </summary>
 | 
	
		
			
				|  |  | +    private FormatException CreateFloatParseException(Exception e) {
 | 
	
		
			
				|  |  | +      return CreateFormatException("Couldn't parse number: " + e.Message);
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +  }
 | 
	
		
			
				|  |  | +}
 |