Просмотр исходного кода

Merge pull request #941 from jskeet/recursion-limit

Add recursion limit handling to JSON parsing.
Jan Tattermusch 10 лет назад
Родитель
Сommit
ffe25c76ea

+ 2 - 2
csharp/src/Google.Protobuf.Test/CodedInputStreamTest.cs

@@ -284,7 +284,7 @@ namespace Google.Protobuf
             Assert.Throws<InvalidProtocolBufferException>(() => input.ReadBytes());
         }
 
-        private static TestRecursiveMessage MakeRecursiveMessage(int depth)
+        internal static TestRecursiveMessage MakeRecursiveMessage(int depth)
         {
             if (depth == 0)
             {
@@ -296,7 +296,7 @@ namespace Google.Protobuf
             }
         }
 
-        private static void AssertMessageDepth(TestRecursiveMessage message, int depth)
+        internal static void AssertMessageDepth(TestRecursiveMessage message, int depth)
         {
             if (depth == 0)
             {

+ 18 - 0
csharp/src/Google.Protobuf.Test/JsonParserTest.cs

@@ -723,5 +723,23 @@ namespace Google.Protobuf
             string json = "{} 10";
             Assert.Throws<InvalidJsonException>(() => TestAllTypes.Parser.ParseJson(json));
         }
+
+        /// <summary>
+        /// JSON equivalent to <see cref="CodedInputStreamTest.MaliciousRecursion"/>
+        /// </summary>
+        [Test]
+        public void MaliciousRecursion()
+        {
+            string data64 = CodedInputStreamTest.MakeRecursiveMessage(64).ToString();
+            string data65 = CodedInputStreamTest.MakeRecursiveMessage(65).ToString();
+
+            var parser64 = new JsonParser(new JsonParser.Settings(64));
+            CodedInputStreamTest.AssertMessageDepth(parser64.Parse<TestRecursiveMessage>(data64), 64);
+            Assert.Throws<InvalidProtocolBufferException>(() => parser64.Parse<TestRecursiveMessage>(data65));
+
+            var parser63 = new JsonParser(new JsonParser.Settings(63));
+            Assert.Throws<InvalidProtocolBufferException>(() => parser63.Parse<TestRecursiveMessage>(data64));
+
+        }
     }
 }

+ 57 - 0
csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs

@@ -81,6 +81,63 @@ namespace Google.Protobuf
             AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected));
         }
 
+        [Test]
+        public void ObjectDepth()
+        {
+            string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }";
+            var tokenizer = new JsonTokenizer(new StringReader(json));
+            // If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it.
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
+            Assert.AreEqual(1, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.Name("foo"), tokenizer.Next());
+            Assert.AreEqual(1, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.Name("x"), tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.Value(1), tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.Name("y"), tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.StartArray, tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth); // Depth hasn't changed in array
+            Assert.AreEqual(JsonToken.Value(0), tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.EndArray, tokenizer.Next());
+            Assert.AreEqual(2, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.EndObject, tokenizer.Next());
+            Assert.AreEqual(1, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.EndObject, tokenizer.Next());
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+        }
+
+        [Test]
+        public void ObjectDepth_WithPushBack()
+        {
+            string json = "{}";
+            var tokenizer = new JsonTokenizer(new StringReader(json));
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+            var token = tokenizer.Next();
+            Assert.AreEqual(1, tokenizer.ObjectDepth);
+            // When we push back a "start object", we should effectively be back to the previous depth.
+            tokenizer.PushBack(token);
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+            // Read the same token again, and get back to depth 1
+            token = tokenizer.Next();
+            Assert.AreEqual(1, tokenizer.ObjectDepth);
+
+            // Now the same in reverse, with EndObject
+            token = tokenizer.Next();
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+            tokenizer.PushBack(token);
+            Assert.AreEqual(1, tokenizer.ObjectDepth);
+            tokenizer.Next();
+            Assert.AreEqual(0, tokenizer.ObjectDepth);
+        }
+
         [Test]
         [TestCase("embedded tab\t")]
         [TestCase("embedded CR\r")]

+ 7 - 0
csharp/src/Google.Protobuf/InvalidProtocolBufferException.cs

@@ -95,6 +95,13 @@ namespace Google.Protobuf
                 "Use CodedInputStream.SetRecursionLimit() to increase the depth limit.");
         }
 
+        internal static InvalidProtocolBufferException JsonRecursionLimitExceeded()
+        {
+            return new InvalidProtocolBufferException(
+                "Protocol message had too many levels of nesting.  May be malicious.  " +
+                "Use JsonParser.Settings to increase the depth limit.");
+        }
+
         internal static InvalidProtocolBufferException SizeLimitExceeded()
         {
             return new InvalidProtocolBufferException(

+ 24 - 13
csharp/src/Google.Protobuf/JsonParser.cs

@@ -66,13 +66,16 @@ namespace Google.Protobuf
 
         private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default);
 
+        // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers
+        // and the signatures of various methods.
         private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
             WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
         {
             { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) },
             { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) },
             { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) },
-            { ListValue.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
+            { ListValue.Descriptor.FullName, (parser, message, tokenizer) =>
+                parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
             { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) },
             { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) },
             { Int32Value.Descriptor.FullName, MergeWrapperField },
@@ -93,15 +96,11 @@ namespace Google.Protobuf
         }
 
         /// <summary>
-        /// Returns a formatter using the default settings.        /// </summary>
+        /// Returns a formatter using the default settings.
+        /// </summary>
         public static JsonParser Default { get { return defaultInstance; } }
 
-// Currently the settings are unused.
-// TODO: When we've implemented Any (and the json spec is finalized), revisit whether they're
-// needed at all.
-#pragma warning disable 0414
         private readonly Settings settings;
-#pragma warning restore 0414
 
         /// <summary>
         /// Creates a new formatted with the given settings.
@@ -147,6 +146,10 @@ namespace Google.Protobuf
         /// </summary>
         private void Merge(IMessage message, JsonTokenizer tokenizer)
         {
+            if (tokenizer.ObjectDepth > settings.RecursionLimit)
+            {
+                throw InvalidProtocolBufferException.JsonRecursionLimitExceeded();
+            }
             if (message.Descriptor.IsWellKnownType)
             {
                 Action<JsonParser, IMessage, JsonTokenizer> handler;
@@ -787,14 +790,13 @@ namespace Google.Protobuf
         #endregion
 
         /// <summary>
-        /// Settings controlling JSON parsing. (Currently doesn't have any actual settings, but I suspect
-        /// we'll want them for levels of strictness, descriptor pools for Any handling, etc.)
+        /// Settings controlling JSON parsing.
         /// </summary>
         public sealed class Settings
         {
-            private static readonly Settings defaultInstance = new Settings();
+            private static readonly Settings defaultInstance = new Settings(CodedInputStream.DefaultRecursionLimit);
 
-            // TODO: Add recursion limit.
+            private readonly int recursionLimit;
 
             /// <summary>
             /// Default settings, as used by <see cref="JsonParser.Default"/>
@@ -802,10 +804,19 @@ namespace Google.Protobuf
             public static Settings Default { get { return defaultInstance; } }
 
             /// <summary>
-            /// Creates a new <see cref="Settings"/> object.
+            /// The maximum depth of messages to parse. Note that this limit only applies to parsing
+            /// messages, not collections - so a message within a collection within a message only counts as
+            /// depth 2, not 3.
+            /// </summary>
+            public int RecursionLimit { get { return recursionLimit; } }
+
+            /// <summary>
+            /// Creates a new <see cref="Settings"/> object with the specified recursion limit.
             /// </summary>
-            public Settings()
+            /// <param name="recursionLimit">The maximum depth of messages to parse</param>
+            public Settings(int recursionLimit)
             {
+                this.recursionLimit = recursionLimit;
             }
         }
     }

+ 27 - 0
csharp/src/Google.Protobuf/JsonTokenizer.cs

@@ -58,6 +58,13 @@ namespace Google.Protobuf
         private readonly PushBackReader reader;
         private JsonToken bufferedToken;
         private State state;
+        private int objectDepth = 0;
+
+        /// <summary>
+        /// Returns the depth of the stack, purely in objects (not collections).
+        /// Informally, this is the number of remaining unclosed '{' characters we have.
+        /// </summary>
+        internal int ObjectDepth { get { return objectDepth; } }
 
         internal JsonTokenizer(TextReader reader)
         {
@@ -66,6 +73,8 @@ namespace Google.Protobuf
             containerStack.Push(ContainerType.Document);
         }
 
+        // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous
+        // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack).
         internal void PushBack(JsonToken token)
         {
             if (bufferedToken != null)
@@ -73,6 +82,14 @@ namespace Google.Protobuf
                 throw new InvalidOperationException("Can't push back twice");
             }
             bufferedToken = token;
+            if (token.Type == JsonToken.TokenType.StartObject)
+            {
+                objectDepth--;
+            }
+            else if (token.Type == JsonToken.TokenType.EndObject)
+            {
+                objectDepth++;
+            }
         }
 
         /// <summary>
@@ -95,6 +112,14 @@ namespace Google.Protobuf
             {
                 var ret = bufferedToken;
                 bufferedToken = null;
+                if (ret.Type == JsonToken.TokenType.StartObject)
+                {
+                    objectDepth++;
+                }
+                else if (ret.Type == JsonToken.TokenType.EndObject)
+                {
+                    objectDepth--;
+                }
                 return ret;
             }
             if (state == State.ReaderExhausted)
@@ -142,10 +167,12 @@ namespace Google.Protobuf
                         ValidateState(ValueStates, "Invalid state to read an open brace: ");
                         state = State.ObjectStart;
                         containerStack.Push(ContainerType.Object);
+                        objectDepth++;
                         return JsonToken.StartObject;
                     case '}':
                         ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: ");
                         PopContainer();
+                        objectDepth--;
                         return JsonToken.EndObject;
                     case '[':
                         ValidateState(ValueStates, "Invalid state to read an open square bracket: ");