Przeglądaj źródła

Down integrate to GitHub (#6893)

Rafi Kamal 5 lat temu
rodzic
commit
4e93585e8b

+ 5 - 5
js/map.js

@@ -208,14 +208,14 @@ jspb.Map.ArrayIteratorIterable_.prototype.next = function() {
   }
 };
 
-
 if (typeof(Symbol) != 'undefined') {
-/** @override */
-jspb.Map.ArrayIteratorIterable_.prototype[Symbol.iterator] = function() {
-  return this;
-};
+  /** @override */
+  jspb.Map.ArrayIteratorIterable_.prototype[Symbol.iterator] = function() {
+    return this;
+  };
 }
 
+
 /**
  * Returns the map's length (number of key/value pairs).
  * @return {number}

+ 8 - 3
python/google/protobuf/descriptor.py

@@ -873,9 +873,14 @@ class FileDescriptor(DescriptorBase):
                 syntax=None, pool=None):
       # FileDescriptor() is called from various places, not only from generated
       # files, to register dynamic proto files and messages.
-      if serialized_pb:
-        # TODO(amauryfa): use the pool passed as argument. This will work only
-        # for C++-implemented DescriptorPools.
+      # pylint: disable=g-explicit-bool-comparison
+      if serialized_pb == '':
+        # Cpp generated code must be linked in if serialized_pb is ''
+        try:
+          return _message.default_pool.FindFileByName(name)
+        except KeyError:
+          raise RuntimeError('Please link in cpp generated lib for %s' % (name))
+      elif serialized_pb:
         return _message.default_pool.AddSerializedFile(serialized_pb)
       else:
         return super(FileDescriptor, cls).__new__(cls)

+ 32 - 11
python/google/protobuf/pyext/message.cc

@@ -216,15 +216,41 @@ static PyObject* New(PyTypeObject* type,
   }
 
   // Check dict['DESCRIPTOR']
-  PyObject* py_descriptor = PyDict_GetItem(dict, kDESCRIPTOR);
-  if (py_descriptor == NULL) {
+  PyObject* descriptor_or_name = PyDict_GetItem(dict, kDESCRIPTOR);
+  if (descriptor_or_name == nullptr) {
     PyErr_SetString(PyExc_TypeError, "Message class has no DESCRIPTOR");
     return NULL;
   }
-  if (!PyObject_TypeCheck(py_descriptor, &PyMessageDescriptor_Type)) {
-    PyErr_Format(PyExc_TypeError, "Expected a message Descriptor, got %s",
-                 py_descriptor->ob_type->tp_name);
-    return NULL;
+
+  Py_ssize_t name_size;
+  char* full_name;
+  const Descriptor* message_descriptor;
+  PyObject* py_descriptor;
+
+  if (PyObject_TypeCheck(descriptor_or_name, &PyMessageDescriptor_Type)) {
+    py_descriptor = descriptor_or_name;
+    message_descriptor = PyMessageDescriptor_AsDescriptor(py_descriptor);
+    if (message_descriptor == nullptr) {
+      return nullptr;
+    }
+  } else {
+    if (PyString_AsStringAndSize(descriptor_or_name, &full_name, &name_size) <
+        0) {
+      return nullptr;
+    }
+    message_descriptor =
+        GetDefaultDescriptorPool()->pool->FindMessageTypeByName(
+            std::string(full_name, name_size));
+    if (message_descriptor == nullptr) {
+      PyErr_Format(PyExc_KeyError,
+                   "Can not find message descriptor %s "
+                   "from pool",
+                   full_name);
+      return nullptr;
+    }
+    py_descriptor = PyMessageDescriptor_FromDescriptor(message_descriptor);
+    // reset the dict['DESCRIPTOR'] to py_descriptor.
+    PyDict_SetItem(dict, kDESCRIPTOR, py_descriptor);
   }
 
   // Messages have no __dict__
@@ -236,11 +262,6 @@ static PyObject* New(PyTypeObject* type,
   // Build the arguments to the base metaclass.
   // We change the __bases__ classes.
   ScopedPyObjectPtr new_args;
-  const Descriptor* message_descriptor =
-      PyMessageDescriptor_AsDescriptor(py_descriptor);
-  if (message_descriptor == NULL) {
-    return NULL;
-  }
 
   if (WKT_classes == NULL) {
     ScopedPyObjectPtr well_known_types(PyImport_ImportModule(

+ 17 - 1
python/google/protobuf/service_reflection.py

@@ -38,6 +38,12 @@ compiler at compile-time.
 
 __author__ = 'petar@google.com (Petar Petrov)'
 
+from google.protobuf.internal import api_implementation
+
+if api_implementation.Type() == 'cpp':
+  # pylint: disable=g-import-not-at-top
+  from google.protobuf.pyext import _message
+
 
 class GeneratedServiceType(type):
 
@@ -76,9 +82,15 @@ class GeneratedServiceType(type):
     # when a service class is subclassed.
     if GeneratedServiceType._DESCRIPTOR_KEY not in dictionary:
       return
+
     descriptor = dictionary[GeneratedServiceType._DESCRIPTOR_KEY]
+    if isinstance(descriptor, str):
+      descriptor = _message.default_pool.FindServiceByName(descriptor)
+      dictionary[GeneratedServiceType._DESCRIPTOR_KEY] = descriptor
+
     service_builder = _ServiceBuilder(descriptor)
     service_builder.BuildService(cls)
+    cls.DESCRIPTOR = descriptor
 
 
 class GeneratedServiceStubType(GeneratedServiceType):
@@ -101,12 +113,16 @@ class GeneratedServiceStubType(GeneratedServiceType):
         dictionary[_DESCRIPTOR_KEY] must contain a ServiceDescriptor object
         describing this protocol service type.
     """
+    descriptor = dictionary.get(cls._DESCRIPTOR_KEY)
+    if isinstance(descriptor, str):
+      descriptor = _message.default_pool.FindServiceByName(descriptor)
+      dictionary[GeneratedServiceStubType._DESCRIPTOR_KEY] = descriptor
     super(GeneratedServiceStubType, cls).__init__(name, bases, dictionary)
     # Don't do anything if this class doesn't have a descriptor. This happens
     # when a service stub is subclassed.
     if GeneratedServiceStubType._DESCRIPTOR_KEY not in dictionary:
       return
-    descriptor = dictionary[GeneratedServiceStubType._DESCRIPTOR_KEY]
+
     service_stub_builder = _ServiceStubBuilder(descriptor)
     service_stub_builder.BuildServiceStub(cls)
 

+ 1 - 0
src/google/protobuf/compiler/command_line_interface.cc

@@ -811,6 +811,7 @@ void CommandLineInterface::AllowPlugins(const std::string& exe_name_prefix) {
   plugin_prefix_ = exe_name_prefix;
 }
 
+
 int CommandLineInterface::Run(int argc, const char* const argv[]) {
   Clear();
   switch (ParseArguments(argc, argv)) {

+ 3 - 1
src/google/protobuf/compiler/csharp/csharp_message.cc

@@ -152,7 +152,9 @@ void MessageGenerator::Generate(io::Printer* printer) {
 
     // a read-only property for fast
     // retrieval of the set in IsInitialized
-    printer->Print(vars, "private pb::ExtensionSet<$class_name$> _Extensions { get { return _extensions; } }\n");
+    printer->Print(vars,
+                   "private pb::ExtensionSet<$class_name$> _Extensions { get { "
+                   "return _extensions; } }\n");
   }
 
   for (int i = 0; i < has_bit_field_count_; i++) {

+ 1 - 0
src/google/protobuf/compiler/java/java_helpers.h

@@ -160,6 +160,7 @@ inline bool MultipleJavaFiles(const FileDescriptor* descriptor,
   return descriptor->options().java_multiple_files();
 }
 
+
 // Returns true if `descriptor` will be written to its own .java file.
 // `immutable` should be set to true if we're generating for the immutable API.
 template <typename Descriptor>

+ 3 - 3
src/google/protobuf/compiler/java/java_name_resolver.cc

@@ -210,9 +210,9 @@ std::string ClassNameResolver::GetClassName(const FileDescriptor* descriptor,
 // or outer class name.
 std::string ClassNameResolver::GetClassFullName(
     const std::string& name_without_package, const FileDescriptor* file,
-    bool immutable, bool multiple_files) {
+    bool immutable, bool is_own_file) {
   std::string result;
-  if (multiple_files) {
+  if (is_own_file) {
     result = FileJavaPackage(file, immutable);
   } else {
     result = GetClassName(file, immutable);
@@ -242,7 +242,7 @@ std::string ClassNameResolver::GetClassName(const ServiceDescriptor* descriptor,
                                             bool immutable) {
   return GetClassFullName(ClassNameWithoutPackage(descriptor, immutable),
                           descriptor->file(), immutable,
-                          MultipleJavaFiles(descriptor->file(), immutable));
+                          IsOwnFile(descriptor, immutable));
 }
 
 // Get the Java Class style full name of a message.

+ 1 - 1
src/google/protobuf/compiler/java/java_name_resolver.h

@@ -108,7 +108,7 @@ class ClassNameResolver {
   // or outer class name.
   std::string GetClassFullName(const std::string& name_without_package,
                                const FileDescriptor* file, bool immutable,
-                               bool multiple_files);
+                               bool is_own_file);
   // Get the Java Class style full name of a message.
   std::string GetJavaClassFullName(const std::string& name_without_package,
                                    const FileDescriptor* file, bool immutable);

+ 95 - 61
src/google/protobuf/compiler/python/python_generator.cc

@@ -28,8 +28,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-//#PY25 compatible generated code for GAE.
-// Copyright 2007 Google Inc. All Rights Reserved.
 // Author: robinson@google.com (Will Robinson)
 //
 // This module outputs pure-Python protocol message classes that will
@@ -185,8 +183,6 @@ void PrintTopBoilerplate(io::Printer* printer, const FileDescriptor* file,
       "# -*- coding: utf-8 -*-\n"
       "# Generated by the protocol buffer compiler.  DO NOT EDIT!\n"
       "# source: $filename$\n"
-      "\nimport sys\n_b=sys.version_info[0]<3 and (lambda x:x) or (lambda "
-      "x:x.encode('latin1'))"  //##PY25
       "\n",
       "filename", file->name());
   if (HasTopLevelEnums(file)) {
@@ -272,14 +268,10 @@ std::string StringifyDefaultValue(const FieldDescriptor& field) {
     case FieldDescriptor::CPPTYPE_ENUM:
       return StrCat(field.default_value_enum()->number());
     case FieldDescriptor::CPPTYPE_STRING:
-      //##!PY25      return "b\"" + CEscape(field.default_value_string())
-      //+
-      //##!PY25             (field.type() != FieldDescriptor::TYPE_STRING ? "\""
-      //:
-      //##!PY25               "\".decode('utf-8')");
-      return "_b(\"" + CEscape(field.default_value_string()) +  //##PY25
-             (field.type() != FieldDescriptor::TYPE_STRING ? "\")" :  //##PY25
-                  "\").decode('utf-8')");                             //##PY25
+      return "b\"" + CEscape(field.default_value_string()) +
+             (field.type() != FieldDescriptor::TYPE_STRING
+                  ? "\""
+                  : "\".decode('utf-8')");
     case FieldDescriptor::CPPTYPE_MESSAGE:
       return "None";
   }
@@ -305,13 +297,29 @@ std::string StringifySyntax(FileDescriptor::Syntax syntax) {
 
 }  // namespace
 
-Generator::Generator() : file_(NULL) {}
+Generator::Generator() : file_(nullptr) {}
 
 Generator::~Generator() {}
 
 bool Generator::Generate(const FileDescriptor* file,
                          const std::string& parameter,
                          GeneratorContext* context, std::string* error) const {
+  // -----------------------------------------------------------------
+  // parse generator options
+  bool cpp_generated_lib_linked = false;
+
+  std::vector<std::pair<std::string, std::string> > options;
+  ParseGeneratorParameter(parameter, &options);
+
+  for (int i = 0; i < options.size(); i++) {
+    if (options[i].first == "cpp_generated_lib_linked") {
+      cpp_generated_lib_linked = true;
+    } else {
+      *error = "Unknown generator option: " + options[i].first;
+      return false;
+    }
+  }
+
 
   // Completely serialize all Generate() calls on this instance.  The
   // thread-safety constraints of the CodeGenerator interface aren't clear so
@@ -327,6 +335,11 @@ bool Generator::Generate(const FileDescriptor* file,
   ReplaceCharacters(&filename, ".", '/');
   filename += ".py";
 
+  pure_python_workable_ = !cpp_generated_lib_linked;
+  if (HasPrefixString(file->name(), "google/protobuf/")) {
+    pure_python_workable_ = true;
+  }
+
   FileDescriptorProto fdp;
   file_->CopyTo(&fdp);
   fdp.SerializeToString(&file_descriptor_serialized_);
@@ -338,25 +351,31 @@ bool Generator::Generate(const FileDescriptor* file,
   printer_ = &printer;
 
   PrintTopBoilerplate(printer_, file_, GeneratingDescriptorProto());
-  PrintImports();
+  if (pure_python_workable_) {
+    PrintImports();
+  }
   PrintFileDescriptor();
   PrintTopLevelEnums();
   PrintTopLevelExtensions();
-  PrintAllNestedEnumsInFile();
-  PrintMessageDescriptors();
-  FixForeignFieldsInDescriptors();
+  if (pure_python_workable_) {
+    PrintAllNestedEnumsInFile();
+    PrintMessageDescriptors();
+    FixForeignFieldsInDescriptors();
+  }
   PrintMessages();
-  // We have to fix up the extensions after the message classes themselves,
-  // since they need to call static RegisterExtension() methods on these
-  // classes.
-  FixForeignFieldsInExtensions();
-  // Descriptor options may have custom extensions. These custom options
-  // can only be successfully parsed after we register corresponding
-  // extensions. Therefore we parse all options again here to recognize
-  // custom options that may be unknown when we define the descriptors.
-  // This does not apply to services because they are not used by extensions.
-  FixAllDescriptorOptions();
-  PrintServiceDescriptors();
+  if (pure_python_workable_) {
+    // We have to fix up the extensions after the message classes themselves,
+    // since they need to call static RegisterExtension() methods on these
+    // classes.
+    FixForeignFieldsInExtensions();
+    // Descriptor options may have custom extensions. These custom options
+    // can only be successfully parsed after we register corresponding
+    // extensions. Therefore we parse all options again here to recognize
+    // custom options that may be unknown when we define the descriptors.
+    // This does not apply to services because they are not used by extensions.
+    FixAllDescriptorOptions();
+    PrintServiceDescriptors();
+  }
   if (HasGenericServices(file)) {
     PrintServices();
   }
@@ -425,28 +444,30 @@ void Generator::PrintFileDescriptor() const {
       "  serialized_options=$options$,\n";
   printer_->Print(m, file_descriptor_template);
   printer_->Indent();
-  printer_->Print(
-      //##!PY25      "serialized_pb=b'$value$'\n",
-      "serialized_pb=_b('$value$')\n",  //##PY25
-      "value", strings::CHexEscape(file_descriptor_serialized_));
-  if (file_->dependency_count() != 0) {
-    printer_->Print(",\ndependencies=[");
-    for (int i = 0; i < file_->dependency_count(); ++i) {
-      std::string module_alias = ModuleAlias(file_->dependency(i)->name());
-      printer_->Print("$module_alias$.DESCRIPTOR,", "module_alias",
-                      module_alias);
+  if (pure_python_workable_) {
+    printer_->Print("serialized_pb=b'$value$'\n", "value",
+                    strings::CHexEscape(file_descriptor_serialized_));
+    if (file_->dependency_count() != 0) {
+      printer_->Print(",\ndependencies=[");
+      for (int i = 0; i < file_->dependency_count(); ++i) {
+        std::string module_alias = ModuleAlias(file_->dependency(i)->name());
+        printer_->Print("$module_alias$.DESCRIPTOR,", "module_alias",
+                        module_alias);
+      }
+      printer_->Print("]");
     }
-    printer_->Print("]");
-  }
-  if (file_->public_dependency_count() > 0) {
-    printer_->Print(",\npublic_dependencies=[");
-    for (int i = 0; i < file_->public_dependency_count(); ++i) {
-      std::string module_alias =
-          ModuleAlias(file_->public_dependency(i)->name());
-      printer_->Print("$module_alias$.DESCRIPTOR,", "module_alias",
-                      module_alias);
+    if (file_->public_dependency_count() > 0) {
+      printer_->Print(",\npublic_dependencies=[");
+      for (int i = 0; i < file_->public_dependency_count(); ++i) {
+        std::string module_alias =
+            ModuleAlias(file_->public_dependency(i)->name());
+        printer_->Print("$module_alias$.DESCRIPTOR,", "module_alias",
+                        module_alias);
+      }
+      printer_->Print("]");
     }
-    printer_->Print("]");
+  } else {
+    printer_->Print("serialized_pb=''\n");
   }
 
   // TODO(falk): Also print options and fix the message_type, enum_type,
@@ -516,10 +537,14 @@ void Generator::PrintEnum(const EnumDescriptor& enum_descriptor) const {
   printer_->Print(m, enum_descriptor_template);
   printer_->Indent();
   printer_->Indent();
-  for (int i = 0; i < enum_descriptor.value_count(); ++i) {
-    PrintEnumValueDescriptor(*enum_descriptor.value(i));
-    printer_->Print(",\n");
+
+  if (pure_python_workable_) {
+    for (int i = 0; i < enum_descriptor.value_count(); ++i) {
+      PrintEnumValueDescriptor(*enum_descriptor.value(i));
+      printer_->Print(",\n");
+    }
   }
+
   printer_->Outdent();
   printer_->Print("],\n");
   printer_->Print("containing_type=None,\n");
@@ -529,8 +554,10 @@ void Generator::PrintEnum(const EnumDescriptor& enum_descriptor) const {
   PrintSerializedPbInterval(enum_descriptor, edp);
   printer_->Outdent();
   printer_->Print(")\n");
-  printer_->Print("_sym_db.RegisterEnumDescriptor($name$)\n", "name",
-                  module_level_descriptor_name);
+  if (pure_python_workable_) {
+    printer_->Print("_sym_db.RegisterEnumDescriptor($name$)\n", "name",
+                    module_level_descriptor_name);
+  }
   printer_->Print("\n");
 }
 
@@ -650,9 +677,12 @@ void Generator::PrintServiceDescriptor(
 
 void Generator::PrintDescriptorKeyAndModuleName(
     const ServiceDescriptor& descriptor) const {
+  std::string name = ModuleLevelServiceDescriptorName(descriptor);
+  if (!pure_python_workable_) {
+    name = "'" + descriptor.full_name() + "'";
+  }
   printer_->Print("$descriptor_key$ = $descriptor_name$,\n", "descriptor_key",
-                  kDescriptorKey, "descriptor_name",
-                  ModuleLevelServiceDescriptorName(descriptor));
+                  kDescriptorKey, "descriptor_name", name);
   std::string module_name = ModuleName(file_->name());
   printer_->Print("__module__ = '$module_name$'\n", "module_name", module_name);
 }
@@ -841,7 +871,11 @@ void Generator::PrintMessage(const Descriptor& message_descriptor,
   PrintNestedMessages(message_descriptor, qualified_name, to_register);
   std::map<std::string, std::string> m;
   m["descriptor_key"] = kDescriptorKey;
-  m["descriptor_name"] = ModuleLevelDescriptorName(message_descriptor);
+  if (pure_python_workable_) {
+    m["descriptor_name"] = ModuleLevelDescriptorName(message_descriptor);
+  } else {
+    m["descriptor_name"] = "'" + message_descriptor.full_name() + "'";
+  }
   printer_->Print(m, "'$descriptor_key$' : $descriptor_name$,\n");
   std::string module_name = ModuleName(file_->name());
   printer_->Print("'__module__' : '$module_name$'\n", "module_name",
@@ -1012,7 +1046,7 @@ template <typename DescriptorT>
 void Generator::FixContainingTypeInDescriptor(
     const DescriptorT& descriptor,
     const Descriptor* containing_descriptor) const {
-  if (containing_descriptor != NULL) {
+  if (containing_descriptor != nullptr) {
     const std::string nested_name = ModuleLevelDescriptorName(descriptor);
     const std::string parent_name =
         ModuleLevelDescriptorName(*containing_descriptor);
@@ -1027,7 +1061,7 @@ void Generator::FixContainingTypeInDescriptor(
 // just set everything in the initial assignment statements).
 void Generator::FixForeignFieldsInDescriptors() const {
   for (int i = 0; i < file_->message_type_count(); ++i) {
-    FixForeignFieldsInDescriptor(*file_->message_type(i), NULL);
+    FixForeignFieldsInDescriptor(*file_->message_type(i), nullptr);
   }
   for (int i = 0; i < file_->message_type_count(); ++i) {
     AddMessageToFileDescriptor(*file_->message_type(i));
@@ -1038,6 +1072,7 @@ void Generator::FixForeignFieldsInDescriptors() const {
   for (int i = 0; i < file_->extension_count(); ++i) {
     AddExtensionToFileDescriptor(*file_->extension(i));
   }
+
   // TODO(jieluo): Move this register to PrintFileDescriptor() when
   // FieldDescriptor.file is added in generated file.
   printer_->Print("_sym_db.RegisterFileDescriptor($name$)\n", "name",
@@ -1118,8 +1153,7 @@ std::string Generator::OptionsValue(
   if (serialized_options.length() == 0 || GeneratingDescriptorProto()) {
     return "None";
   } else {
-    //##!PY25    return "b'('" + CEscape(serialized_options)+ "')";
-    return "_b('" + CEscape(serialized_options) + "')";  //##PY25
+    return "b'" + CEscape(serialized_options) + "'";
   }
 }
 
@@ -1351,7 +1385,7 @@ void Generator::FixOptionsForField(const FieldDescriptor& field) const {
   if (field_options != "None") {
     std::string field_name;
     if (field.is_extension()) {
-      if (field.extension_scope() == NULL) {
+      if (field.extension_scope() == nullptr) {
         // Top level extensions.
         field_name = field.name();
       } else {

+ 1 - 0
src/google/protobuf/compiler/python/python_generator.h

@@ -166,6 +166,7 @@ class PROTOC_EXPORT Generator : public CodeGenerator {
   mutable const FileDescriptor* file_;  // Set in Generate().  Under mutex_.
   mutable std::string file_descriptor_serialized_;
   mutable io::Printer* printer_;  // Set in Generate().  Under mutex_.
+  mutable bool pure_python_workable_;
 
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Generator);
 };

+ 0 - 1
src/google/protobuf/descriptor.cc

@@ -4602,7 +4602,6 @@ void DescriptorBuilder::BuildMessage(const DescriptorProto& proto,
 
   AddSymbol(result->full_name(), parent, result->name(), proto, Symbol(result));
 
-
   for (int i = 0; i < proto.reserved_range_size(); i++) {
     const DescriptorProto_ReservedRange& range1 = proto.reserved_range(i);
     for (int j = i + 1; j < proto.reserved_range_size(); j++) {

+ 12 - 9
src/google/protobuf/parse_context.cc

@@ -197,22 +197,25 @@ const char* EpsCopyInputStream::SkipFallback(const char* ptr, int size) {
 }
 
 const char* EpsCopyInputStream::ReadStringFallback(const char* ptr, int size,
-                                                   std::string* s) {
-  s->clear();
-  // TODO(gerbens) assess security. At the moment its parity with
-  // CodedInputStream but it allows a payload to reserve large memory.
+                                                   std::string* str) {
+  str->clear();
   if (PROTOBUF_PREDICT_TRUE(size <= buffer_end_ - ptr + limit_)) {
-    s->reserve(size);
+    // Reserve the string up to a static safe size. If strings are bigger than
+    // this we proceed by growing the string as needed. This protects against
+    // malicious payloads making protobuf hold on to a lot of memory.
+    str->reserve(str->size() + std::min<int>(size, kSafeStringSize));
   }
-  return AppendStringFallback(ptr, size, s);
+  return AppendSize(ptr, size,
+                    [str](const char* p, int s) { str->append(p, s); });
 }
 
 const char* EpsCopyInputStream::AppendStringFallback(const char* ptr, int size,
                                                      std::string* str) {
-  // TODO(gerbens) assess security. At the moment its parity with
-  // CodedInputStream but it allows a payload to reserve large memory.
   if (PROTOBUF_PREDICT_TRUE(size <= buffer_end_ - ptr + limit_)) {
-    str->reserve(size);
+    // Reserve the string up to a static safe size. If strings are bigger than
+    // this we proceed by growing the string as needed. This protects against
+    // malicious payloads making protobuf hold on to a lot of memory.
+    str->reserve(str->size() + std::min<int>(size, kSafeStringSize));
   }
   return AppendSize(ptr, size,
                     [str](const char* p, int s) { str->append(p, s); });

+ 3 - 0
src/google/protobuf/parse_context.h

@@ -271,6 +271,9 @@ class PROTOBUF_EXPORT EpsCopyInputStream {
   // DoneFallback.
   uint32 last_tag_minus_1_ = 0;
   int overall_limit_ = INT_MAX;  // Overall limit independent of pushed limits.
+  // Pretty random large number that seems like a safe allocation on most
+  // systems. TODO(gerbens) do we need to set this as build flag?
+  enum { kSafeStringSize = 50000000 };
 
   std::pair<const char*, bool> DoneFallback(const char* ptr, int d);
   const char* Next(int overrun, int d);