فهرست منبع

Improve number handling in JSON parsing

- Tighten up on Infinity/NaN handling in terms of whitespace handling (and test casing)
- Validate that values are genuinely integers when they've been parsed from a JSON number (ignoring the fact that 1.0000000000000000001 == 1 as a double...)
- Allow exponents and decimal points in string representations
Jon Skeet 9 سال پیش
والد
کامیت
aa431a0481
2فایلهای تغییر یافته به همراه91 افزوده شده و 51 حذف شده
  1. 35 12
      csharp/src/Google.Protobuf.Test/JsonParserTest.cs
  2. 56 39
      csharp/src/Google.Protobuf/JsonParser.cs

+ 35 - 12
csharp/src/Google.Protobuf.Test/JsonParserTest.cs

@@ -39,9 +39,7 @@ using System;
 namespace Google.Protobuf
 {
     /// <summary>
-    /// Unit tests for JSON parsing. Some tests are ignored at the moment as the desired behaviour
-    /// isn't fully known, either in terms of which exceptions should be thrown or whether they should
-    /// count as valid values.
+    /// Unit tests for JSON parsing.
     /// </summary>
     public class JsonParserTest
     {
@@ -205,6 +203,8 @@ namespace Google.Protobuf
 
         [Test]
         [TestCase("+0")]
+        [TestCase(" 1")]
+        [TestCase("1 ")]
         [TestCase("00")]
         [TestCase("-00")]
         [TestCase("--1")]
@@ -318,7 +318,18 @@ namespace Google.Protobuf
         [TestCase("1.0.0")]
         [TestCase("+1")]
         [TestCase("00")]
+        [TestCase("01")]
+        [TestCase("-00")]
+        [TestCase("-01")]
         [TestCase("--1")]
+        [TestCase(" Infinity")]
+        [TestCase(" -Infinity")]
+        [TestCase("NaN ")]
+        [TestCase("Infinity ")]
+        [TestCase("-Infinity ")]
+        [TestCase(" NaN")]
+        [TestCase("INFINITY")]
+        [TestCase("nan")]
         [TestCase("\u00BD")] // 1/2 as a single Unicode character. Just sanity checking...
         public void StringToDouble_Invalid(string jsonValue)
         {
@@ -363,6 +374,10 @@ namespace Google.Protobuf
         [TestCase("-1", -1)]
         [TestCase("2147483647", 2147483647)]
         [TestCase("-2147483648", -2147483648)]
+        [TestCase("1e1", 10)]
+        [TestCase("-1e1", -10)]
+        [TestCase("10.00", 10)]
+        [TestCase("-10.00", -10)]
         public void NumberToInt32_Valid(string jsonValue, int expectedParsedValue)
         {
             string json = "{ \"singleInt32\": " + jsonValue + "}";
@@ -376,7 +391,8 @@ namespace Google.Protobuf
         [TestCase("-00", typeof(InvalidJsonException))]
         [TestCase("--1", typeof(InvalidJsonException))]
         [TestCase("+1", typeof(InvalidJsonException))]
-        [TestCase("1.5", typeof(InvalidProtocolBufferException), Ignore = true, Reason = "Desired behaviour unclear")]
+        [TestCase("1.5", typeof(InvalidProtocolBufferException))]
+        // Value is out of range
         [TestCase("1e10", typeof(InvalidProtocolBufferException))]
         [TestCase("2147483648", typeof(InvalidProtocolBufferException))]
         [TestCase("-2147483649", typeof(InvalidProtocolBufferException))]
@@ -411,8 +427,10 @@ namespace Google.Protobuf
         [TestCase("0", 0L)]
         [TestCase("1", 1L)]
         [TestCase("-1", -1L)]
-        [TestCase("9223372036854775807", 9223372036854775807, Ignore = true, Reason = "Desired behaviour unclear")]
-        [TestCase("-9223372036854775808", -9223372036854775808, Ignore = true, Reason = "Desired behaviour unclear")]
+        // long.MaxValue isn't actually representable as a double. This string value is the highest
+        // representable value which isn't greater than long.MaxValue.
+        [TestCase("9223372036854769664", 9223372036854769664)]
+        [TestCase("-9223372036854775808", -9223372036854775808)]
         public void NumberToInt64_Valid(string jsonValue, long expectedParsedValue)
         {
             string json = "{ \"singleInt64\": " + jsonValue + "}";
@@ -422,8 +440,11 @@ namespace Google.Protobuf
 
         // Assume that anything non-bounds-related is covered in the Int32 case
         [Test]
-        [TestCase("-9223372036854775809", Ignore = true, Reason = "Desired behaviour unclear")]
-        [TestCase("9223372036854775808", Ignore = true, Reason = "Desired behaviour unclear")]
+        [TestCase("9223372036854775808")]
+        // Theoretical bound would be -9223372036854775809, but when that is parsed to a double
+        // we end up with the exact value of long.MinValue due to lack of precision. The value here
+        // is the "next double down".
+        [TestCase("-9223372036854780000")]
         public void NumberToInt64_Invalid(string jsonValue)
         {
             string json = "{ \"singleInt64\": " + jsonValue + "}";
@@ -433,7 +454,9 @@ namespace Google.Protobuf
         [Test]
         [TestCase("0", 0UL)]
         [TestCase("1", 1UL)]
-        [TestCase("18446744073709551615", 18446744073709551615, Ignore = true, Reason = "Desired behaviour unclear")]
+        // ulong.MaxValue isn't representable as a double. This value is the largest double within
+        // the range of ulong.
+        [TestCase("18446744073709500416", 18446744073709500416UL)]
         public void NumberToUInt64_Valid(string jsonValue, ulong expectedParsedValue)
         {
             string json = "{ \"singleUint64\": " + jsonValue + "}";
@@ -475,9 +498,9 @@ namespace Google.Protobuf
         }
 
         [Test]
-        [TestCase("1.7977e308", Ignore = true, Reason = "Desired behaviour unclear")]
-        [TestCase("-1.7977e308", Ignore = true, Reason = "Desired behaviour unclear")]
-        [TestCase("1e309", Ignore = true, Reason = "Desired behaviour unclear")]
+        [TestCase("1.7977e308")]
+        [TestCase("-1.7977e308")]
+        [TestCase("1e309")]
         [TestCase("1,0")]
         [TestCase("1.0.0")]
         [TestCase("+1")]

+ 56 - 39
csharp/src/Google.Protobuf/JsonParser.cs

@@ -540,17 +540,17 @@ namespace Google.Protobuf
                 case FieldType.Int32:
                 case FieldType.SInt32:
                 case FieldType.SFixed32:
-                    return ParseNumericString(keyText, int.Parse, false);
+                    return ParseNumericString(keyText, int.Parse);
                 case FieldType.UInt32:
                 case FieldType.Fixed32:
-                    return ParseNumericString(keyText, uint.Parse, false);
+                    return ParseNumericString(keyText, uint.Parse);
                 case FieldType.Int64:
                 case FieldType.SInt64:
                 case FieldType.SFixed64:
-                    return ParseNumericString(keyText, long.Parse, false);
+                    return ParseNumericString(keyText, long.Parse);
                 case FieldType.UInt64:
                 case FieldType.Fixed64:
-                    return ParseNumericString(keyText, ulong.Parse, false);
+                    return ParseNumericString(keyText, ulong.Parse);
                 default:
                     throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType);
             }
@@ -561,7 +561,6 @@ namespace Google.Protobuf
             double value = token.NumberValue;
             checked
             {
-                // TODO: Validate that it's actually an integer, possibly in terms of the textual representation?                
                 try
                 {
                     switch (field.FieldType)
@@ -569,16 +568,20 @@ namespace Google.Protobuf
                         case FieldType.Int32:
                         case FieldType.SInt32:
                         case FieldType.SFixed32:
+                            CheckInteger(value);
                             return (int) value;
                         case FieldType.UInt32:
                         case FieldType.Fixed32:
+                            CheckInteger(value);
                             return (uint) value;
                         case FieldType.Int64:
                         case FieldType.SInt64:
                         case FieldType.SFixed64:
+                            CheckInteger(value);
                             return (long) value;
                         case FieldType.UInt64:
                         case FieldType.Fixed64:
+                            CheckInteger(value);
                             return (ulong) value;
                         case FieldType.Double:
                             return value;
@@ -597,20 +600,32 @@ namespace Google.Protobuf
                                 {
                                     return float.NegativeInfinity;
                                 }
-                                throw new InvalidProtocolBufferException("Value out of range: " + value);
+                                throw new InvalidProtocolBufferException($"Value out of range: {value}");
                             }
                             return (float) value;
                         default:
-                            throw new InvalidProtocolBufferException("Unsupported conversion from JSON number for field type " + field.FieldType);
+                            throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}");
                     }
                 }
                 catch (OverflowException)
                 {
-                    throw new InvalidProtocolBufferException("Value out of range: " + value);
+                    throw new InvalidProtocolBufferException($"Value out of range: {value}");
                 }
             }
         }
 
+        private static void CheckInteger(double value)
+        {
+            if (double.IsInfinity(value) || double.IsNaN(value))
+            {
+                throw new InvalidProtocolBufferException($"Value not an integer: {value}");
+            }
+            if (value != Math.Floor(value))
+            {
+                throw new InvalidProtocolBufferException($"Value not an integer: {value}");
+            }            
+        }
+
         private static object ParseSingleStringValue(FieldDescriptor field, string text)
         {
             switch (field.FieldType)
@@ -622,43 +637,35 @@ namespace Google.Protobuf
                 case FieldType.Int32:
                 case FieldType.SInt32:
                 case FieldType.SFixed32:
-                    return ParseNumericString(text, int.Parse, false);
+                    return ParseNumericString(text, int.Parse);
                 case FieldType.UInt32:
                 case FieldType.Fixed32:
-                    return ParseNumericString(text, uint.Parse, false);
+                    return ParseNumericString(text, uint.Parse);
                 case FieldType.Int64:
                 case FieldType.SInt64:
                 case FieldType.SFixed64:
-                    return ParseNumericString(text, long.Parse, false);
+                    return ParseNumericString(text, long.Parse);
                 case FieldType.UInt64:
                 case FieldType.Fixed64:
-                    return ParseNumericString(text, ulong.Parse, false);
+                    return ParseNumericString(text, ulong.Parse);
                 case FieldType.Double:
-                    double d = ParseNumericString(text, double.Parse, true);
-                    // double.Parse can return +/- infinity on Mono for non-infinite values which are out of range for double.
-                    if (double.IsInfinity(d) && !text.Contains("Infinity"))
-                    {
-                        throw new InvalidProtocolBufferException("Invalid numeric value: " + text);
-                    }
+                    double d = ParseNumericString(text, double.Parse);
+                    ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d));
                     return d;
                 case FieldType.Float:
-                    float f = ParseNumericString(text, float.Parse, true);
-                    // float.Parse can return +/- infinity on Mono for non-infinite values which are out of range for float.
-                    if (float.IsInfinity(f) && !text.Contains("Infinity"))
-                    {
-                        throw new InvalidProtocolBufferException("Invalid numeric value: " + text);
-                    }
+                    float f = ParseNumericString(text, float.Parse);
+                    ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f));
                     return f;
                 case FieldType.Enum:
                     var enumValue = field.EnumType.FindValueByName(text);
                     if (enumValue == null)
                     {
-                        throw new InvalidProtocolBufferException("Invalid enum value: " + text + " for enum type: " + field.EnumType.FullName);
+                        throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}");
                     }
                     // Just return it as an int, and let the CLR convert it.
                     return enumValue.Number;
                 default:
-                    throw new InvalidProtocolBufferException("Unsupported conversion from JSON string for field type " + field.FieldType);
+                    throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}");
             }
         }
 
@@ -670,43 +677,53 @@ namespace Google.Protobuf
             return field.MessageType.Parser.CreateTemplate();
         }
 
-        private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser, bool floatingPoint)
+        private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser)
         {
-            // TODO: Prohibit leading zeroes (but allow 0!)
-            // TODO: Validate handling of "Infinity" etc. (Should be case sensitive, no leading whitespace etc)
             // Can't prohibit this with NumberStyles.
             if (text.StartsWith("+"))
             {
-                throw new InvalidProtocolBufferException("Invalid numeric value: " + text);
+                throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
             }
             if (text.StartsWith("0") && text.Length > 1)
             {
                 if (text[1] >= '0' && text[1] <= '9')
                 {
-                    throw new InvalidProtocolBufferException("Invalid numeric value: " + text);
+                    throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
                 }
             }
             else if (text.StartsWith("-0") && text.Length > 2)
             {
                 if (text[2] >= '0' && text[2] <= '9')
                 {
-                    throw new InvalidProtocolBufferException("Invalid numeric value: " + text);
+                    throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
                 }
             }
             try
             {
-                var styles = floatingPoint
-                    ? NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent
-                    : NumberStyles.AllowLeadingSign;
-                return parser(text, styles, CultureInfo.InvariantCulture);
+                return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
             }
             catch (FormatException)
             {
-                throw new InvalidProtocolBufferException("Invalid numeric value for type: " + text);
+                throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}");
             }
             catch (OverflowException)
             {
-                throw new InvalidProtocolBufferException("Value out of range: " + text);
+                throw new InvalidProtocolBufferException($"Value out of range: {text}");
+            }
+        }
+
+        /// <summary>
+        /// Checks that any infinite/NaN values originated from the correct text.
+        /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the
+        /// way that Mono parses out-of-range values as infinity.
+        /// </summary>
+        private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)
+        {
+            if ((isPositiveInfinity && text != "Infinity") ||
+                (isNegativeInfinity && text != "-Infinity") ||
+                (isNaN && text != "NaN"))
+            {
+                throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
             }
         }
 
@@ -719,7 +736,7 @@ namespace Google.Protobuf
             var match = TimestampRegex.Match(token.StringValue);
             if (!match.Success)
             {
-                throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
+                throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}");
             }
             var dateTime = match.Groups["datetime"].Value;
             var subseconds = match.Groups["subseconds"].Value;