Browse Source

Down-integrate from internal code base.

Feng Xiao 10 năm trước cách đây
mục cha
commit
b17ec3ca11

+ 13 - 4
cmake/tests.cmake

@@ -99,6 +99,12 @@ set(common_test_files
   ${protobuf_source_dir}/src/google/protobuf/testing/googletest.cc
 )
 
+set(common_lite_test_files
+  ${protobuf_source_dir}/src/google/protobuf/arena_test_util.cc
+  ${protobuf_source_dir}/src/google/protobuf/map_lite_test_util.cc
+  ${protobuf_source_dir}/src/google/protobuf/test_util_lite.cc
+)
+
 set(tests_files
   ${protobuf_source_dir}/src/google/protobuf/any_test.cc
   ${protobuf_source_dir}/src/google/protobuf/arena_unittest.cc
@@ -179,10 +185,13 @@ add_executable(test_plugin ${test_plugin_files})
 target_link_libraries(test_plugin libprotoc libprotobuf gmock)
 
 set(lite_test_files
-  ${protobuf_source_dir}/src/google/protobuf/arena_test_util.cc
   ${protobuf_source_dir}/src/google/protobuf/lite_unittest.cc
-  ${protobuf_source_dir}/src/google/protobuf/map_lite_test_util.cc
-  ${protobuf_source_dir}/src/google/protobuf/test_util_lite.cc
 )
-add_executable(lite-test ${lite_test_files} ${lite_test_proto_files})
+add_executable(lite-test ${lite_test_files} ${common_lite_test_files} ${lite_test_proto_files})
 target_link_libraries(lite-test libprotobuf-lite)
+
+set(lite_arena_test_files
+  ${protobuf_source_dir}/src/google/protobuf/lite_arena_unittest.cc
+)
+add_executable(lite-arena-test ${lite_arena_test_files} ${common_lite_test_files} ${lite_test_proto_files})
+target_link_libraries(lite-arena-test libprotobuf-lite gmock_main)

+ 1 - 0
java/pom.xml

@@ -142,6 +142,7 @@
                   <arg value="../src/google/protobuf/unittest_enormous_descriptor.proto" />
                   <arg value="../src/google/protobuf/unittest_no_generic_services.proto" />
                   <arg value="../src/google/protobuf/unittest_well_known_types.proto" />
+                  <arg value="src/test/java/com/google/protobuf/any_test.proto" />
                   <arg value="src/test/java/com/google/protobuf/field_presence_test.proto" />
                   <arg value="src/test/java/com/google/protobuf/map_for_proto2_lite_test.proto" />
                   <arg value="src/test/java/com/google/protobuf/map_for_proto2_test.proto" />

+ 92 - 0
java/src/test/java/com/google/protobuf/AnyTest.java

@@ -0,0 +1,92 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import any_test.AnyTestProto.TestAny;
+import protobuf_unittest.UnittestProto.TestAllTypes;
+
+import junit.framework.TestCase;
+
+/**
+ * Unit tests for Any message.
+ */
+public class AnyTest extends TestCase {
+  public void testAnyGeneratedApi() throws Exception {
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    TestUtil.setAllFields(builder);
+    TestAllTypes message = builder.build();
+
+    TestAny container = TestAny.newBuilder()
+        .setValue(Any.pack(message)).build();
+
+    assertTrue(container.getValue().is(TestAllTypes.class));
+    assertFalse(container.getValue().is(TestAny.class));
+
+    TestAllTypes result = container.getValue().unpack(TestAllTypes.class);
+    TestUtil.assertAllFieldsSet(result);
+
+
+    // Unpacking to a wrong type will throw an exception.
+    try {
+      TestAny wrongMessage = container.getValue().unpack(TestAny.class);
+      fail("Exception is expected.");
+    } catch (InvalidProtocolBufferException e) {
+      // expected.
+    }
+
+    // Test that unpacking throws an exception if parsing fails.
+    TestAny.Builder containerBuilder = container.toBuilder();
+    containerBuilder.getValueBuilder().setValue(
+        ByteString.copyFrom(new byte[]{0x11}));
+    container = containerBuilder.build();
+    try {
+      TestAllTypes parsingFailed = container.getValue().unpack(TestAllTypes.class);
+      fail("Exception is expected.");
+    } catch (InvalidProtocolBufferException e) {
+      // expected.
+    }
+  }
+
+  public void testCachedUnpackResult() throws Exception {
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    TestUtil.setAllFields(builder);
+    TestAllTypes message = builder.build();
+
+    TestAny container = TestAny.newBuilder()
+        .setValue(Any.pack(message)).build();
+
+    assertTrue(container.getValue().is(TestAllTypes.class));
+
+    TestAllTypes result1 = container.getValue().unpack(TestAllTypes.class);
+    TestAllTypes result2 = container.getValue().unpack(TestAllTypes.class);
+    assertTrue(result1 == result2);
+  }
+}

+ 42 - 0
java/src/test/java/com/google/protobuf/any_test.proto

@@ -0,0 +1,42 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+
+package any_test;
+
+option java_package = "any_test";
+option java_outer_classname = "AnyTestProto";
+
+import "google/protobuf/any.proto";
+
+message TestAny {
+  google.protobuf.Any value = 1;
+}

+ 57 - 9
python/google/protobuf/pyext/descriptor.cc

@@ -62,6 +62,14 @@ namespace google {
 namespace protobuf {
 namespace python {
 
+// Store interned descriptors, so that the same C++ descriptor yields the same
+// Python object. Objects are not immortal: this map does not own the
+// references, and items are deleted when the last reference to the object is
+// released.
+// This is enough to support the "is" operator on live objects.
+// All descriptors are stored here.
+hash_map<const void*, PyObject*> interned_descriptors;
+
 PyObject* PyString_FromCppString(const string& str) {
   return PyString_FromStringAndSize(str.c_str(), str.size());
 }
@@ -147,6 +155,24 @@ static int CheckCalledFromGeneratedFile(const char* attr_name) {
 
 // Helper functions for descriptor objects.
 
+// A set of templates to retrieve the C++ FileDescriptor of any descriptor.
+template<class DescriptorClass>
+const FileDescriptor* GetFileDescriptor(const DescriptorClass* descriptor) {
+  return descriptor->file();
+}
+template<>
+const FileDescriptor* GetFileDescriptor(const FileDescriptor* descriptor) {
+  return descriptor;
+}
+template<>
+const FileDescriptor* GetFileDescriptor(const EnumValueDescriptor* descriptor) {
+  return descriptor->type()->file();
+}
+template<>
+const FileDescriptor* GetFileDescriptor(const OneofDescriptor* descriptor) {
+  return descriptor->containing_type()->file();
+}
+
 // Converts options into a Python protobuf, and cache the result.
 //
 // This is a bit tricky because options can contain extension fields defined in
@@ -156,8 +182,13 @@ static int CheckCalledFromGeneratedFile(const char* attr_name) {
 // Always returns a new reference.
 template<class DescriptorClass>
 static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) {
+  // Options (and their extensions) are completely resolved in the proto file
+  // containing the descriptor.
+  PyDescriptorPool* pool = GetDescriptorPool_FromPool(
+      GetFileDescriptor(descriptor)->pool());
+
   hash_map<const void*, PyObject*>* descriptor_options =
-      GetDescriptorPool()->descriptor_options;
+      pool->descriptor_options;
   // First search in the cache.
   if (descriptor_options->find(descriptor) != descriptor_options->end()) {
     PyObject *value = (*descriptor_options)[descriptor];
@@ -170,7 +201,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) {
   const Message& options(descriptor->options());
   const Descriptor *message_type = options.GetDescriptor();
   PyObject* message_class(cdescriptor_pool::GetMessageClass(
-      GetDescriptorPool(), message_type));
+      pool, message_type));
   if (message_class == NULL) {
     PyErr_Format(PyExc_TypeError, "Could not retrieve class for Options: %s",
                  message_type->full_name().c_str());
@@ -192,7 +223,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) {
     options.SerializeToString(&serialized);
     io::CodedInputStream input(
         reinterpret_cast<const uint8*>(serialized.c_str()), serialized.size());
-    input.SetExtensionRegistry(GetDescriptorPool()->pool,
+    input.SetExtensionRegistry(pool->pool,
                                GetDescriptorPool()->message_factory);
     bool success = cmsg->message->MergePartialFromCodedStream(&input);
     if (!success) {
@@ -203,7 +234,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) {
 
   // Cache the result.
   Py_INCREF(value);
-  (*GetDescriptorPool()->descriptor_options)[descriptor] = value.get();
+  (*pool->descriptor_options)[descriptor] = value.get();
 
   return value.release();
 }
@@ -237,6 +268,9 @@ typedef struct PyBaseDescriptor {
   // Pointer to the C++ proto2 descriptor.
   // Like all descriptors, it is owned by the global DescriptorPool.
   const void* descriptor;
+
+  // Owned reference to the DescriptorPool, to ensure it is kept alive.
+  PyDescriptorPool* pool;
 } PyBaseDescriptor;
 
 
@@ -258,7 +292,9 @@ namespace descriptor {
 // 'was_created' is an optional pointer to a bool, and is set to true if a new
 // object was allocated.
 // Always return a new reference.
-PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor,
+template<class DescriptorClass>
+PyObject* NewInternedDescriptor(PyTypeObject* type,
+                                const DescriptorClass* descriptor,
                                 bool* was_created) {
   if (was_created) {
     *was_created = false;
@@ -270,8 +306,8 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor,
 
   // See if the object is in the map of interned descriptors
   hash_map<const void*, PyObject*>::iterator it =
-      GetDescriptorPool()->interned_descriptors->find(descriptor);
-  if (it != GetDescriptorPool()->interned_descriptors->end()) {
+      interned_descriptors.find(descriptor);
+  if (it != interned_descriptors.end()) {
     GOOGLE_DCHECK(Py_TYPE(it->second) == type);
     Py_INCREF(it->second);
     return it->second;
@@ -283,10 +319,21 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor,
     return NULL;
   }
   py_descriptor->descriptor = descriptor;
+
   // and cache it.
-  GetDescriptorPool()->interned_descriptors->insert(
+  interned_descriptors.insert(
       std::make_pair(descriptor, reinterpret_cast<PyObject*>(py_descriptor)));
 
+  // Ensures that the DescriptorPool stays alive.
+  PyDescriptorPool* pool = GetDescriptorPool_FromPool(
+      GetFileDescriptor(descriptor)->pool());
+  if (pool == NULL) {
+    Py_DECREF(py_descriptor);
+    return NULL;
+  }
+  Py_INCREF(pool);
+  py_descriptor->pool = pool;
+
   if (was_created) {
     *was_created = true;
   }
@@ -295,7 +342,8 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor,
 
 static void Dealloc(PyBaseDescriptor* self) {
   // Remove from interned dictionary
-  GetDescriptorPool()->interned_descriptors->erase(self->descriptor);
+  interned_descriptors.erase(self->descriptor);
+  Py_CLEAR(self->pool);
   Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
 }
 

+ 38 - 8
python/google/protobuf/pyext/descriptor_pool.cc

@@ -54,9 +54,13 @@ namespace google {
 namespace protobuf {
 namespace python {
 
+// A map to cache Python Pools per C++ pointer.
+// Pointers are not owned here, and belong to the PyDescriptorPool.
+static hash_map<const DescriptorPool*, PyDescriptorPool*> descriptor_pool_map;
+
 namespace cdescriptor_pool {
 
-PyDescriptorPool* NewDescriptorPool() {
+static PyDescriptorPool* NewDescriptorPool() {
   PyDescriptorPool* cdescriptor_pool = PyObject_New(
       PyDescriptorPool, &PyDescriptorPool_Type);
   if (cdescriptor_pool == NULL) {
@@ -77,22 +81,27 @@ PyDescriptorPool* NewDescriptorPool() {
   // storage.
   cdescriptor_pool->classes_by_descriptor =
       new PyDescriptorPool::ClassesByMessageMap();
-  cdescriptor_pool->interned_descriptors =
-      new hash_map<const void*, PyObject *>();
   cdescriptor_pool->descriptor_options =
       new hash_map<const void*, PyObject *>();
 
+  if (!descriptor_pool_map.insert(
+      std::make_pair(cdescriptor_pool->pool, cdescriptor_pool)).second) {
+    // Should never happen -- would indicate an internal error / bug.
+    PyErr_SetString(PyExc_ValueError, "DescriptorPool already registered");
+    return NULL;
+  }
+
   return cdescriptor_pool;
 }
 
 static void Dealloc(PyDescriptorPool* self) {
   typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator;
+  descriptor_pool_map.erase(self->pool);
   for (iterator it = self->classes_by_descriptor->begin();
        it != self->classes_by_descriptor->end(); ++it) {
     Py_DECREF(it->second);
   }
   delete self->classes_by_descriptor;
-  delete self->interned_descriptors;  // its references were borrowed.
   for (hash_map<const void*, PyObject*>::iterator it =
            self->descriptor_options->begin();
        it != self->descriptor_options->end(); ++it) {
@@ -391,22 +400,43 @@ PyTypeObject PyDescriptorPool_Type = {
   PyObject_Del,                        // tp_free
 };
 
-static PyDescriptorPool* global_cdescriptor_pool = NULL;
+// This is the DescriptorPool which contains all the definitions from the
+// generated _pb2.py modules.
+static PyDescriptorPool* python_generated_pool = NULL;
 
 bool InitDescriptorPool() {
   if (PyType_Ready(&PyDescriptorPool_Type) < 0)
     return false;
 
-  global_cdescriptor_pool = cdescriptor_pool::NewDescriptorPool();
-  if (global_cdescriptor_pool == NULL) {
+  python_generated_pool = cdescriptor_pool::NewDescriptorPool();
+  if (python_generated_pool == NULL) {
     return false;
   }
+  // Register this pool to be found for C++-generated descriptors.
+  descriptor_pool_map.insert(
+      std::make_pair(DescriptorPool::generated_pool(),
+                     python_generated_pool));
 
   return true;
 }
 
 PyDescriptorPool* GetDescriptorPool() {
-  return global_cdescriptor_pool;
+  return python_generated_pool;
+}
+
+PyDescriptorPool* GetDescriptorPool_FromPool(const DescriptorPool* pool) {
+  // Fast path for standard descriptors.
+  if (pool == python_generated_pool->pool ||
+      pool == DescriptorPool::generated_pool()) {
+    return python_generated_pool;
+  }
+  hash_map<const DescriptorPool*, PyDescriptorPool*>::iterator it =
+      descriptor_pool_map.find(pool);
+  if (it != descriptor_pool_map.end()) {
+    PyErr_SetString(PyExc_KeyError, "Unknown descriptor pool");
+    return NULL;
+  }
+  return it->second;
 }
 
 }  // namespace python

+ 4 - 11
python/google/protobuf/pyext/descriptor_pool.h

@@ -72,14 +72,6 @@ typedef struct PyDescriptorPool {
   typedef hash_map<const Descriptor*, PyObject*> ClassesByMessageMap;
   ClassesByMessageMap* classes_by_descriptor;
 
-  // Store interned descriptors, so that the same C++ descriptor yields the same
-  // Python object. Objects are not immortal: this map does not own the
-  // references, and items are deleted when the last reference to the object is
-  // released.
-  // This is enough to support the "is" operator on live objects.
-  // All descriptors are stored here.
-  hash_map<const void*, PyObject*>* interned_descriptors;
-
   // Cache the options for any kind of descriptor.
   // Descriptor pointers are owned by the DescriptorPool above.
   // Python objects are owned by the map.
@@ -91,9 +83,6 @@ extern PyTypeObject PyDescriptorPool_Type;
 
 namespace cdescriptor_pool {
 
-// Builds a new DescriptorPool. Normally called only once per process.
-PyDescriptorPool* NewDescriptorPool();
-
 // Looks up a message by name.
 // Returns a message Descriptor, or NULL if not found.
 const Descriptor* FindMessageTypeByName(PyDescriptorPool* self,
@@ -150,6 +139,10 @@ PyObject* FindOneofByName(PyDescriptorPool* self, PyObject* arg);
 // Returns a *borrowed* reference.
 PyDescriptorPool* GetDescriptorPool();
 
+// Retrieve the python descriptor pool owning a C++ descriptor pool.
+// Returns a *borrowed* reference.
+PyDescriptorPool* GetDescriptorPool_FromPool(const DescriptorPool* pool);
+
 // Initialize objects used by this module.
 bool InitDescriptorPool();
 

+ 31 - 10
src/Makefile.am

@@ -191,6 +191,7 @@ libprotobuf_lite_la_SOURCES =                                  \
   google/protobuf/stubs/stringpiece.h                          \
   google/protobuf/stubs/stringprintf.cc                        \
   google/protobuf/stubs/stringprintf.h                         \
+  google/protobuf/stubs/structurally_valid.cc                  \
   google/protobuf/stubs/strutil.cc                             \
   google/protobuf/stubs/strutil.h                              \
   google/protobuf/stubs/time.cc                                \
@@ -232,7 +233,6 @@ libprotobuf_la_SOURCES =                                       \
   google/protobuf/service.cc                                   \
   google/protobuf/source_context.pb.cc                         \
   google/protobuf/struct.pb.cc                                 \
-  google/protobuf/stubs/structurally_valid.cc                  \
   google/protobuf/stubs/substitute.cc                          \
   google/protobuf/stubs/substitute.h                           \
   google/protobuf/text_format.cc                               \
@@ -659,7 +659,8 @@ COMMON_TEST_SOURCES =                                          \
   google/protobuf/testing/file.h
 
 check_PROGRAMS = protoc protobuf-test protobuf-lazy-descriptor-test \
-                 protobuf-lite-test test_plugin $(GZCHECKPROGRAMS)
+                 protobuf-lite-test test_plugin protobuf-lite-arena-test \
+                 $(GZCHECKPROGRAMS)
 protobuf_test_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la \
                       ../gmock/gtest/lib/libgtest.la              \
                       ../gmock/lib/libgmock.la                    \
@@ -756,21 +757,40 @@ protobuf_lazy_descriptor_test_SOURCES =                        \
   $(COMMON_TEST_SOURCES)
 nodist_protobuf_lazy_descriptor_test_SOURCES = $(protoc_outputs)
 
-# Build lite_unittest separately, since it doesn't use gtest.
-protobuf_lite_test_LDADD = $(PTHREAD_LIBS) libprotobuf-lite.la
-protobuf_lite_test_CXXFLAGS = $(NO_OPT_CXXFLAGS)
-protobuf_lite_test_SOURCES =                                           \
+COMMON_LITE_TEST_SOURCES =                                             \
   google/protobuf/arena_test_util.cc                                   \
   google/protobuf/arena_test_util.h                                    \
-  google/protobuf/lite_unittest.cc                                     \
   google/protobuf/map_lite_test_util.cc                                \
   google/protobuf/map_lite_test_util.h                                 \
   google/protobuf/test_util_lite.cc                                    \
   google/protobuf/test_util_lite.h
-  # TODO(teboring) add the file back and make the test build.
-  # google/protobuf/map_lite_test.cc
+
+# Build lite_unittest separately, since it doesn't use gtest. It can't
+# depend on gtest because our internal version of gtest depend on proto
+# full runtime and we want to make sure this test builds without full
+# runtime.
+protobuf_lite_test_LDADD = $(PTHREAD_LIBS) libprotobuf-lite.la
+protobuf_lite_test_CXXFLAGS = $(NO_OPT_CXXFLAGS)
+protobuf_lite_test_SOURCES =                                           \
+  google/protobuf/lite_unittest.cc                                     \
+  $(COMMON_LITE_TEST_SOURCES)
 nodist_protobuf_lite_test_SOURCES = $(protoc_lite_outputs)
 
+# lite_arena_unittest depends on gtest because teboring@ found that without
+# gtest when building the test internally our memory sanitizer doesn't detect
+# memory leaks (don't know why).
+protobuf_lite_arena_test_LDADD = $(PTHREAD_LIBS) libprotobuf-lite.la \
+                      ../gmock/gtest/lib/libgtest.la                 \
+                      ../gmock/lib/libgmock.la                       \
+                      ../gmock/lib/libgmock_main.la
+protobuf_lite_arena_test_CPPFLAGS = -I$(srcdir)/../gmock/include       \
+                                    -I$(srcdir)/../gmock/gtest/include
+protobuf_lite_arena_test_CXXFLAGS = $(NO_OPT_CXXFLAGS)
+protobuf_lite_arena_test_SOURCES =       \
+  google/protobuf/lite_arena_unittest.cc \
+  $(COMMON_LITE_TEST_SOURCES)
+nodist_protobuf_lite_arena_test_SOURCES = $(protoc_lite_outputs)
+
 # Test plugin binary.
 test_plugin_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la \
                     ../gmock/gtest/lib/libgtest.la
@@ -790,4 +810,5 @@ zcgunzip_SOURCES = google/protobuf/testing/zcgunzip.cc
 endif
 
 TESTS = protobuf-test protobuf-lazy-descriptor-test protobuf-lite-test \
-        google/protobuf/compiler/zip_output_unittest.sh $(GZTESTS)
+        google/protobuf/compiler/zip_output_unittest.sh $(GZTESTS)     \
+        protobuf-lite-arena-test

+ 88 - 0
src/google/protobuf/compiler/cpp/cpp_helpers.cc

@@ -600,6 +600,94 @@ bool IsAnyMessage(const Descriptor* descriptor) {
          descriptor->file()->name() == kAnyProtoFile;
 }
 
+enum Utf8CheckMode {
+  STRICT = 0,  // Parsing will fail if non UTF-8 data is in string fields.
+  VERIFY = 1,  // Only log an error but parsing will succeed.
+  NONE = 2,  // No UTF-8 check.
+};
+
+// Which level of UTF-8 enforcemant is placed on this file.
+static Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field) {
+  if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3) {
+    return STRICT;
+  } else if (field->file()->options().optimize_for() !=
+             FileOptions::LITE_RUNTIME) {
+    return VERIFY;
+  } else {
+    return NONE;
+  }
+}
+
+static void GenerateUtf8CheckCode(const FieldDescriptor* field,
+                                  bool for_parse,
+                                  const map<string, string>& variables,
+                                  const char* parameters,
+                                  const char* strict_function,
+                                  const char* verify_function,
+                                  io::Printer* printer) {
+  switch (GetUtf8CheckMode(field)) {
+    case STRICT: {
+      if (for_parse) {
+        printer->Print("DO_(");
+      }
+      printer->Print(
+          "::google::protobuf::internal::WireFormatLite::$function$(\n",
+          "function", strict_function);
+      printer->Indent();
+      printer->Print(variables, parameters);
+      if (for_parse) {
+        printer->Print("::google::protobuf::internal::WireFormatLite::PARSE,\n");
+      } else {
+        printer->Print("::google::protobuf::internal::WireFormatLite::SERIALIZE,\n");
+      }
+      printer->Print("\"$full_name$\")", "full_name", field->full_name());
+      if (for_parse) {
+        printer->Print(")");
+      }
+      printer->Print(";\n");
+      printer->Outdent();
+      break;
+    }
+    case VERIFY: {
+      printer->Print(
+          "::google::protobuf::internal::WireFormat::$function$(\n",
+          "function", verify_function);
+      printer->Indent();
+      printer->Print(variables, parameters);
+      if (for_parse) {
+        printer->Print("::google::protobuf::internal::WireFormat::PARSE,\n");
+      } else {
+        printer->Print("::google::protobuf::internal::WireFormat::SERIALIZE,\n");
+      }
+      printer->Print("\"$full_name$\");\n", "full_name", field->full_name());
+      printer->Outdent();
+      break;
+    }
+    case NONE:
+      break;
+  }
+}
+
+void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
+                                    bool for_parse,
+                                    const map<string, string>& variables,
+                                    const char* parameters,
+                                    io::Printer* printer) {
+  GenerateUtf8CheckCode(field, for_parse, variables, parameters,
+                        "VerifyUtf8String", "VerifyUTF8StringNamedField",
+                        printer);
+}
+
+void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
+                                  bool for_parse,
+                                  const map<string, string>& variables,
+                                  const char* parameters,
+                                  io::Printer* printer) {
+  GenerateUtf8CheckCode(field, for_parse, variables, parameters,
+                        "VerifyUtf8Cord", "VerifyUTF8CordNamedField",
+                        printer);
+}
+
 }  // namespace cpp
 }  // namespace compiler
 }  // namespace protobuf

+ 14 - 5
src/google/protobuf/compiler/cpp/cpp_helpers.h

@@ -202,11 +202,6 @@ inline bool HasGenericServices(const FileDescriptor* file) {
          file->options().cc_generic_services();
 }
 
-// Should string fields in this file verify that their contents are UTF-8?
-inline bool HasUtf8Verification(const FileDescriptor* file) {
-  return file->options().optimize_for() != FileOptions::LITE_RUNTIME;
-}
-
 // Should we generate a separate, super-optimized code path for serializing to
 // flat arrays?  We don't do this in Lite mode because we'd rather reduce code
 // size.
@@ -270,6 +265,20 @@ inline bool SupportsArenas(const FieldDescriptor* field) {
 bool IsAnyMessage(const FileDescriptor* descriptor);
 bool IsAnyMessage(const Descriptor* descriptor);
 
+void GenerateUtf8CheckCodeForString(
+    const FieldDescriptor* field,
+    bool for_parse,
+    const map<string, string>& variables,
+    const char* parameters,
+    io::Printer* printer);
+
+void GenerateUtf8CheckCodeForCord(
+    const FieldDescriptor* field,
+    bool for_parse,
+    const map<string, string>& variables,
+    const char* parameters,
+    io::Printer* printer);
+
 }  // namespace cpp
 }  // namespace compiler
 }  // namespace protobuf

+ 61 - 2
src/google/protobuf/compiler/cpp/cpp_map_field.cc

@@ -234,6 +234,20 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
         "}\n");
   }
 
+  const FieldDescriptor* key_field =
+      descriptor_->message_type()->FindFieldByName("key");
+  if (key_field->type() == FieldDescriptor::TYPE_STRING) {
+    GenerateUtf8CheckCodeForString(
+        key_field, true, variables_,
+        "entry->key().data(), entry->key().length(),\n", printer);
+  }
+  if (value_field->type() == FieldDescriptor::TYPE_STRING) {
+    GenerateUtf8CheckCodeForString(
+        value_field, true, variables_,
+        "entry->mutable_value()->data(),\n"
+        "entry->mutable_value()->length(),\n", printer);
+  }
+
   // If entry is allocated by arena, its desctructor should be avoided.
   if (SupportsArenas(descriptor_)) {
     printer->Print(variables_,
@@ -261,7 +275,30 @@ GenerateSerializeWithCachedSizes(io::Printer* printer) const {
   printer->Print(variables_,
       "    entry.reset($name$_.New$wrapper$(it->first, it->second));\n"
       "    ::google::protobuf::internal::WireFormatLite::Write$stream_writer$(\n"
-      "        $number$, *entry, output);\n"
+      "        $number$, *entry, output);\n");
+
+  printer->Indent();
+  printer->Indent();
+
+  const FieldDescriptor* key_field =
+      descriptor_->message_type()->FindFieldByName("key");
+  const FieldDescriptor* value_field =
+      descriptor_->message_type()->FindFieldByName("value");
+  if (key_field->type() == FieldDescriptor::TYPE_STRING) {
+    GenerateUtf8CheckCodeForString(
+        key_field, false, variables_,
+        "it->first.data(), it->first.length(),\n", printer);
+  }
+  if (value_field->type() == FieldDescriptor::TYPE_STRING) {
+    GenerateUtf8CheckCodeForString(
+        value_field, false, variables_,
+        "it->second.data(), it->second.length(),\n", printer);
+  }
+
+  printer->Outdent();
+  printer->Outdent();
+
+  printer->Print(
       "  }\n");
 
   // If entry is allocated by arena, its desctructor should be avoided.
@@ -296,7 +333,29 @@ GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const {
       "    entry.reset($name$_.New$wrapper$(it->first, it->second));\n"
       "    target = ::google::protobuf::internal::WireFormatLite::\n"
       "        Write$declared_type$NoVirtualToArray(\n"
-      "            $number$, *entry, target);\n"
+      "            $number$, *entry, target);\n");
+
+  printer->Indent();
+  printer->Indent();
+
+  const FieldDescriptor* key_field =
+      descriptor_->message_type()->FindFieldByName("key");
+  const FieldDescriptor* value_field =
+      descriptor_->message_type()->FindFieldByName("value");
+  if (key_field->type() == FieldDescriptor::TYPE_STRING) {
+    GenerateUtf8CheckCodeForString(
+        key_field, false, variables_,
+        "it->first.data(), it->first.length(),\n", printer);
+  }
+  if (value_field->type() == FieldDescriptor::TYPE_STRING) {
+    GenerateUtf8CheckCodeForString(
+        value_field, false, variables_,
+        "it->second.data(), it->second.length(),\n", printer);
+  }
+
+  printer->Outdent();
+  printer->Outdent();
+  printer->Print(
       "  }\n");
 
   // If entry is allocated by arena, its desctructor should be avoided.

+ 34 - 50
src/google/protobuf/compiler/cpp/cpp_string_field.cc

@@ -367,25 +367,19 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
     "DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n"
     "      input, this->mutable_$name$()));\n");
 
-  if (HasUtf8Verification(descriptor_->file()) &&
-      descriptor_->type() == FieldDescriptor::TYPE_STRING) {
-    printer->Print(variables_,
-      "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
-      "  this->$name$().data(), this->$name$().length(),\n"
-      "  ::google::protobuf::internal::WireFormat::PARSE,\n"
-      "  \"$full_name$\");\n");
+  if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+    GenerateUtf8CheckCodeForString(
+        descriptor_, true, variables_,
+        "this->$name$().data(), this->$name$().length(),\n", printer);
   }
 }
 
 void StringFieldGenerator::
 GenerateSerializeWithCachedSizes(io::Printer* printer) const {
-  if (HasUtf8Verification(descriptor_->file()) &&
-      descriptor_->type() == FieldDescriptor::TYPE_STRING) {
-    printer->Print(variables_,
-      "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
-      "  this->$name$().data(), this->$name$().length(),\n"
-      "  ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
-      "  \"$full_name$\");\n");
+  if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+    GenerateUtf8CheckCodeForString(
+        descriptor_, false, variables_,
+        "this->$name$().data(), this->$name$().length(),\n", printer);
   }
   printer->Print(variables_,
     "::google::protobuf::internal::WireFormatLite::Write$declared_type$MaybeAliased(\n"
@@ -394,13 +388,10 @@ GenerateSerializeWithCachedSizes(io::Printer* printer) const {
 
 void StringFieldGenerator::
 GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const {
-  if (HasUtf8Verification(descriptor_->file()) &&
-      descriptor_->type() == FieldDescriptor::TYPE_STRING) {
-    printer->Print(variables_,
-      "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
-      "  this->$name$().data(), this->$name$().length(),\n"
-      "  ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
-      "  \"$full_name$\");\n");
+  if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+    GenerateUtf8CheckCodeForString(
+        descriptor_, false, variables_,
+        "this->$name$().data(), this->$name$().length(),\n", printer);
   }
   printer->Print(variables_,
     "target =\n"
@@ -665,13 +656,10 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
       "DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n"
       "      input, this->mutable_$name$()));\n");
 
-  if (HasUtf8Verification(descriptor_->file()) &&
-      descriptor_->type() == FieldDescriptor::TYPE_STRING) {
-    printer->Print(variables_,
-      "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
-      "  this->$name$().data(), this->$name$().length(),\n"
-      "  ::google::protobuf::internal::WireFormat::PARSE,\n"
-      "  \"$full_name$\");\n");
+  if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+    GenerateUtf8CheckCodeForString(
+        descriptor_, true, variables_,
+        "this->$name$().data(), this->$name$().length(),\n", printer);
   }
 }
 
@@ -817,14 +805,12 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
   printer->Print(variables_,
     "DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n"
     "      input, this->add_$name$()));\n");
-  if (HasUtf8Verification(descriptor_->file()) &&
-      descriptor_->type() == FieldDescriptor::TYPE_STRING) {
-    printer->Print(variables_,
-      "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
-      "  this->$name$(this->$name$_size() - 1).data(),\n"
-      "  this->$name$(this->$name$_size() - 1).length(),\n"
-      "  ::google::protobuf::internal::WireFormat::PARSE,\n"
-      "  \"$full_name$\");\n");
+  if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+    GenerateUtf8CheckCodeForString(
+        descriptor_, true, variables_,
+        "this->$name$(this->$name$_size() - 1).data(),\n"
+        "this->$name$(this->$name$_size() - 1).length(),\n",
+        printer);
   }
 }
 
@@ -832,14 +818,13 @@ void RepeatedStringFieldGenerator::
 GenerateSerializeWithCachedSizes(io::Printer* printer) const {
   printer->Print(variables_,
     "for (int i = 0; i < this->$name$_size(); i++) {\n");
-  if (HasUtf8Verification(descriptor_->file()) &&
-      descriptor_->type() == FieldDescriptor::TYPE_STRING) {
-    printer->Print(variables_,
-      "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
-      "  this->$name$(i).data(), this->$name$(i).length(),\n"
-      "  ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
-      "  \"$full_name$\");\n");
+  printer->Indent();
+  if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+    GenerateUtf8CheckCodeForString(
+        descriptor_, false, variables_,
+        "this->$name$(i).data(), this->$name$(i).length(),\n", printer);
   }
+  printer->Outdent();
   printer->Print(variables_,
     "  ::google::protobuf::internal::WireFormatLite::Write$declared_type$(\n"
     "    $number$, this->$name$(i), output);\n"
@@ -850,14 +835,13 @@ void RepeatedStringFieldGenerator::
 GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const {
   printer->Print(variables_,
     "for (int i = 0; i < this->$name$_size(); i++) {\n");
-  if (HasUtf8Verification(descriptor_->file()) &&
-      descriptor_->type() == FieldDescriptor::TYPE_STRING) {
-    printer->Print(variables_,
-      "  ::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
-      "    this->$name$(i).data(), this->$name$(i).length(),\n"
-      "    ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
-      "    \"$full_name$\");\n");
+  printer->Indent();
+  if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+    GenerateUtf8CheckCodeForString(
+        descriptor_, false, variables_,
+        "this->$name$(i).data(), this->$name$(i).length(),\n", printer);
   }
+  printer->Outdent();
   printer->Print(variables_,
     "  target = ::google::protobuf::internal::WireFormatLite::\n"
     "    Write$declared_type$ToArray($number$, this->$name$(i), target);\n"

+ 36 - 0
src/google/protobuf/compiler/parser.cc

@@ -939,6 +939,42 @@ void Parser::GenerateMapEntry(const MapField& map_field,
   } else {
     value_field->set_type_name(map_field.value_type_name);
   }
+  // Propagate the "enforce_utf8" option to key and value fields if they
+  // are strings. This helps simplify the implementation of code generators
+  // and also reflection-based parsing code.
+  //
+  // The following definition:
+  //   message Foo {
+  //     map<string, string> value = 1 [enforce_utf8 = false];
+  //   }
+  // will be interpreted as:
+  //   message Foo {
+  //     message ValueEntry {
+  //       option map_entry = true;
+  //       string key = 1 [enforce_utf8 = false];
+  //       string value = 2 [enforce_utf8 = false];
+  //     }
+  //     repeated ValueEntry value = 1 [enforce_utf8 = false];
+  //  }
+  //
+  // TODO(xiaofeng): Remove this when the "enforce_utf8" option is removed
+  // from protocol compiler.
+  for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) {
+    const UninterpretedOption& option =
+        field->options().uninterpreted_option(i);
+    if (option.name_size() == 1 &&
+        option.name(0).name_part() == "enforce_utf8" &&
+        !option.name(0).is_extension()) {
+      if (key_field->type() == FieldDescriptorProto::TYPE_STRING) {
+        key_field->mutable_options()->add_uninterpreted_option()
+            ->CopyFrom(option);
+      }
+      if (value_field->type() == FieldDescriptorProto::TYPE_STRING) {
+        value_field->mutable_options()->add_uninterpreted_option()
+            ->CopyFrom(option);
+      }
+    }
+  }
 }
 
 bool Parser::ParseFieldOptions(FieldDescriptorProto* field,

+ 83 - 0
src/google/protobuf/lite_arena_unittest.cc

@@ -0,0 +1,83 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <google/protobuf/arena_test_util.h>
+#include <google/protobuf/map_lite_test_util.h>
+#include <google/protobuf/testing/googletest.h>
+#include <gtest/gtest.h>
+
+namespace google {
+namespace protobuf {
+namespace {
+
+TEST(LiteArenaTest, MapNoHeapAllocation) {
+  // Allocate a large initial block to avoid mallocs during hooked test.
+  std::vector<char> arena_block(128 * 1024);
+  google::protobuf::ArenaOptions options;
+  options.initial_block = &arena_block[0];
+  options.initial_block_size = arena_block.size();
+  google::protobuf::Arena arena(options);
+  string data;
+  data.reserve(128 * 1024);
+
+  {
+    // TODO(teboring): Enable no heap check when ArenaStringPtr is used in
+    // Map.
+    // google::protobuf::internal::NoHeapChecker no_heap;
+
+    protobuf_unittest::TestArenaMapLite* from =
+        google::protobuf::Arena::CreateMessage<protobuf_unittest::TestArenaMapLite>(&arena);
+    google::protobuf::MapLiteTestUtil::SetArenaMapFields(from);
+    from->SerializeToString(&data);
+
+    protobuf_unittest::TestArenaMapLite* to =
+        google::protobuf::Arena::CreateMessage<protobuf_unittest::TestArenaMapLite>(&arena);
+    to->ParseFromString(data);
+    google::protobuf::MapLiteTestUtil::ExpectArenaMapFieldsSet(*to);
+  }
+}
+
+TEST(LiteArenaTest, UnknownFieldMemLeak) {
+  google::protobuf::Arena arena;
+  protobuf_unittest::ForeignMessageArenaLite* message =
+      google::protobuf::Arena::CreateMessage<protobuf_unittest::ForeignMessageArenaLite>(
+          &arena);
+  string data = "\012\000";
+  int original_capacity = data.capacity();
+  while (data.capacity() <= original_capacity) {
+    data.append("a");
+  }
+  data[1] = data.size() - 2;
+  message->ParseFromString(data);
+}
+
+}  // namespace
+}  // namespace protobuf
+}  // namespace google

+ 24 - 32
src/google/protobuf/wire_format.cc

@@ -461,6 +461,10 @@ bool WireFormat::ParseAndMergeMessageSetField(uint32 field_number,
   }
 }
 
+static bool StrictUtf8Check(const FieldDescriptor* field) {
+  return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3;
+}
+
 bool WireFormat::ParseAndMergeField(
     uint32 tag,
     const FieldDescriptor* field,        // May be NULL for unknown
@@ -633,10 +637,19 @@ bool WireFormat::ParseAndMergeField(
 
       // Handle strings separately so that we can optimize the ctype=CORD case.
       case FieldDescriptor::TYPE_STRING: {
+        bool strict_utf8_check = StrictUtf8Check(field);
         string value;
         if (!WireFormatLite::ReadString(input, &value)) return false;
-        VerifyUTF8StringNamedField(value.data(), value.length(), PARSE,
-                                   field->name().c_str());
+        if (strict_utf8_check) {
+          if (!WireFormatLite::VerifyUtf8String(
+                  value.data(), value.length(), WireFormatLite::PARSE,
+                  field->full_name().c_str())) {
+            return false;
+          }
+        } else {
+          VerifyUTF8StringNamedField(value.data(), value.length(), PARSE,
+                                     field->full_name().c_str());
+        }
         if (field->is_repeated()) {
           message_reflection->AddString(message, field, value);
         } else {
@@ -894,13 +907,20 @@ void WireFormat::SerializeFieldWithCachedSizes(
       // Handle strings separately so that we can get string references
       // instead of copying.
       case FieldDescriptor::TYPE_STRING: {
+        bool strict_utf8_check = StrictUtf8Check(field);
         string scratch;
         const string& value = field->is_repeated() ?
           message_reflection->GetRepeatedStringReference(
             message, field, j, &scratch) :
           message_reflection->GetStringReference(message, field, &scratch);
-        VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE,
-                                   field->name().c_str());
+        if (strict_utf8_check) {
+          WireFormatLite::VerifyUtf8String(value.data(), value.length(),
+                                           WireFormatLite::SERIALIZE,
+                                           field->full_name().c_str());
+        } else {
+          VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE,
+                                     field->full_name().c_str());
+        }
         WireFormatLite::WriteString(field->number(), value, output);
         break;
       }
@@ -1108,34 +1128,6 @@ int WireFormat::MessageSetItemByteSize(
   return our_size;
 }
 
-void WireFormat::VerifyUTF8StringFallback(const char* data,
-                                          int size,
-                                          Operation op,
-                                          const char* field_name) {
-  if (!IsStructurallyValidUTF8(data, size)) {
-    const char* operation_str = NULL;
-    switch (op) {
-      case PARSE:
-        operation_str = "parsing";
-        break;
-      case SERIALIZE:
-        operation_str = "serializing";
-        break;
-      // no default case: have the compiler warn if a case is not covered.
-    }
-    string quoted_field_name = "";
-    if (field_name != NULL) {
-      quoted_field_name = StringPrintf(" '%s'", field_name);
-    }
-    // no space below to avoid double space when the field name is missing.
-    GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid "
-               << "UTF-8 data when " << operation_str << " a protocol "
-               << "buffer. Use the 'bytes' type if you intend to send raw "
-               << "bytes. ";
-  }
-}
-
-
 }  // namespace internal
 }  // namespace protobuf
 }  // namespace google

+ 6 - 13
src/google/protobuf/wire_format.h

@@ -231,8 +231,8 @@ class LIBPROTOBUF_EXPORT WireFormat {
       const Message& message);
 
   enum Operation {
-    PARSE,
-    SERIALIZE,
+    PARSE = 0,
+    SERIALIZE = 1,
   };
 
   // Verifies that a string field is valid UTF8, logging an error if not.
@@ -247,13 +247,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
                                          const char* field_name);
 
  private:
-  // Verifies that a string field is valid UTF8, logging an error if not.
-  static void VerifyUTF8StringFallback(
-      const char* data,
-      int size,
-      Operation op,
-      const char* field_name);
-
   // Skip a MessageSet field.
   static bool SkipMessageSetField(io::CodedInputStream* input,
                                   uint32 field_number,
@@ -265,8 +258,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
                                            Message* message,
                                            io::CodedInputStream* input);
 
-
-
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat);
 };
 
@@ -321,7 +312,8 @@ inline int WireFormat::TagSize(int field_number, FieldDescriptor::Type type) {
 inline void WireFormat::VerifyUTF8String(const char* data, int size,
     WireFormat::Operation op) {
 #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
-  WireFormat::VerifyUTF8StringFallback(data, size, op, NULL);
+  WireFormatLite::VerifyUtf8String(
+      data, size, static_cast<WireFormatLite::Operation>(op), NULL);
 #else
   // Avoid the compiler warning about unsued variables.
   (void)data; (void)size; (void)op;
@@ -332,7 +324,8 @@ inline void WireFormat::VerifyUTF8StringNamedField(
     const char* data, int size, WireFormat::Operation op,
     const char* field_name) {
 #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
-  WireFormat::VerifyUTF8StringFallback(data, size, op, field_name);
+  WireFormatLite::VerifyUtf8String(
+      data, size, static_cast<WireFormatLite::Operation>(op), field_name);
 #endif
 }
 

+ 31 - 0
src/google/protobuf/wire_format_lite.cc

@@ -39,10 +39,12 @@
 #include <vector>
 #include <google/protobuf/stubs/logging.h>
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/stringprintf.h>
 #include <google/protobuf/io/coded_stream_inl.h>
 #include <google/protobuf/io/zero_copy_stream.h>
 #include <google/protobuf/io/zero_copy_stream_impl_lite.h>
 
+
 namespace google {
 namespace protobuf {
 namespace internal {
@@ -505,6 +507,35 @@ bool WireFormatLite::ReadBytes(io::CodedInputStream* input, string** p) {
   return ReadBytesToString(input, *p);
 }
 
+bool WireFormatLite::VerifyUtf8String(const char* data,
+                                      int size,
+                                      Operation op,
+                                      const char* field_name) {
+  if (!IsStructurallyValidUTF8(data, size)) {
+    const char* operation_str = NULL;
+    switch (op) {
+      case PARSE:
+        operation_str = "parsing";
+        break;
+      case SERIALIZE:
+        operation_str = "serializing";
+        break;
+      // no default case: have the compiler warn if a case is not covered.
+    }
+    string quoted_field_name = "";
+    if (field_name != NULL) {
+      quoted_field_name = StringPrintf(" '%s'", field_name);
+    }
+    // no space below to avoid double space when the field name is missing.
+    GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid "
+               << "UTF-8 data when " << operation_str << " a protocol "
+               << "buffer. Use the 'bytes' type if you intend to send raw "
+               << "bytes. ";
+    return false;
+  }
+  return true;
+}
+
 }  // namespace internal
 }  // namespace protobuf
 }  // namespace google

+ 10 - 0
src/google/protobuf/wire_format_lite.h

@@ -316,6 +316,16 @@ class LIBPROTOBUF_EXPORT WireFormatLite {
   static bool ReadBytes(input, string** p);
 
 
+  enum Operation {
+    PARSE = 0,
+    SERIALIZE = 1,
+  };
+
+  // Returns true if the data is valid UTF-8.
+  static bool VerifyUtf8String(const char* data, int size,
+                               Operation op,
+                               const char* field_name);
+
   static inline bool ReadGroup  (field_number, input, MessageLite* value);
   static inline bool ReadMessage(input, MessageLite* value);
 

+ 4 - 0
update_file_lists.sh

@@ -55,8 +55,10 @@ LITE_PROTOS=$(get_proto_files $MAKEFILE protoc_lite_outputs)
 PROTOS=$(get_proto_files $MAKEFILE protoc_outputs)
 WKT_PROTOS=$(get_variable_value $MAKEFILE nobase_dist_proto_DATA)
 COMMON_TEST_SOURCES=$(get_source_files $MAKEFILE COMMON_TEST_SOURCES)
+COMMON_LITE_TEST_SOURCES=$(get_source_files $MAKEFILE COMMON_LITE_TEST_SOURCES)
 TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_test_SOURCES)
 LITE_TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_lite_test_SOURCES)
+LITE_ARENA_TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_lite_arena_test_SOURCES)
 TEST_PLUGIN_SOURCES=$(get_source_files $MAKEFILE test_plugin_SOURCES)
 
 ################################################################################
@@ -112,8 +114,10 @@ set_cmake_value $CMAKE_DIR/libprotoc.cmake libprotoc_files $CMAKE_PREFIX $LIBPRO
 set_cmake_value $CMAKE_DIR/tests.cmake lite_test_protos "" $LITE_PROTOS
 set_cmake_value $CMAKE_DIR/tests.cmake tests_protos "" $PROTOS
 set_cmake_value $CMAKE_DIR/tests.cmake common_test_files $CMAKE_PREFIX $COMMON_TEST_SOURCES
+set_cmake_value $CMAKE_DIR/tests.cmake common_lite_test_files $CMAKE_PREFIX $COMMON_LITE_TEST_SOURCES
 set_cmake_value $CMAKE_DIR/tests.cmake tests_files $CMAKE_PREFIX $TEST_SOURCES
 set_cmake_value $CMAKE_DIR/tests.cmake lite_test_files $CMAKE_PREFIX $LITE_TEST_SOURCES
+set_cmake_value $CMAKE_DIR/tests.cmake lite_arena_test_files $CMAKE_PREFIX $LITE_ARENA_TEST_SOURCES
 
 # Generate extract_includes.bat
 echo "mkdir include" > $EXTRACT_INCLUDES_BAT