瀏覽代碼

Integrate changes from internal code.

protoc
* Enum values may now have custom options, using syntax similar to field
  options.
* Fixed bug where .proto files which use custom options but don't actually
  define them (i.e. they import another .proto file defining the options)
  had to explicitly import descriptor.proto.
* Adjacent string literals in .proto files will now be concatenated, like in
  C.

C++
* Generated message classes now have a Swap() method which efficiently swaps
  the contents of two objects.
* All message classes now have a SpaceUsed() method which returns an estimate
  of the number of bytes of allocated memory currently owned by the object.
  This is particularly useful when you are reusing a single message object
  to improve performance but want to make sure it doesn't bloat up too large.
* New method Message::SerializeAsString() returns a string containing the
  serialized data.  May be more convenient than calling
  SerializeToString(string*).
* In debug mode, log error messages when string-type fields are found to
  contain bytes that are not valid UTF-8.
* Fixed bug where a message with multiple extension ranges couldn't parse
  extensions.
* Fixed bug where MergeFrom(const Message&) didn't do anything if invoked on
  a message that contained no fields (but possibly contained extensions).
* Fixed ShortDebugString() to not be O(n^2).  Durr.
* Fixed crash in TextFormat parsing if the first token in the input caused a
  tokenization error.

Java
* New overload of mergeFrom() which parses a slice of a byte array instead
  of the whole thing.
* New method ByteString.asReadOnlyByteBuffer() does what it sounds like.
* Improved performance of isInitialized() when optimizing for code size.

Python
* Corrected ListFields() signature in Message base class to match what
  subclasses actually implement.
* Some minor refactoring.
kenton@google.com 17 年之前
父節點
當前提交
26bd9eee6e
共有 76 個文件被更改,包括 2461 次插入238 次删除
  1. 1 0
      Makefile.am
  2. 18 3
      java/src/main/java/com/google/protobuf/AbstractMessage.java
  3. 10 0
      java/src/main/java/com/google/protobuf/ByteString.java
  4. 13 4
      java/src/main/java/com/google/protobuf/CodedInputStream.java
  5. 31 0
      java/src/main/java/com/google/protobuf/GeneratedMessage.java
  6. 16 0
      java/src/main/java/com/google/protobuf/Message.java
  7. 10 0
      java/src/test/java/com/google/protobuf/CodedInputStreamTest.java
  8. 21 3
      java/src/test/java/com/google/protobuf/GeneratedMessageTest.java
  9. 41 0
      java/src/test/java/com/google/protobuf/WireFormatTest.java
  10. 179 0
      python/google/protobuf/internal/containers.py
  11. 2 0
      python/google/protobuf/internal/decoder_test.py
  12. 2 0
      python/google/protobuf/internal/descriptor_test.py
  13. 2 0
      python/google/protobuf/internal/encoder_test.py
  14. 2 0
      python/google/protobuf/internal/generator_test.py
  15. 2 0
      python/google/protobuf/internal/input_stream_test.py
  16. 2 0
      python/google/protobuf/internal/output_stream_test.py
  17. 2 1
      python/google/protobuf/internal/reflection_test.py
  18. 2 0
      python/google/protobuf/internal/service_reflection_test.py
  19. 2 0
      python/google/protobuf/internal/text_format_test.py
  20. 2 0
      python/google/protobuf/internal/wire_format_test.py
  21. 1 1
      python/google/protobuf/message.py
  22. 4 131
      python/google/protobuf/reflection.py
  23. 1 0
      python/setup.py
  24. 2 0
      src/Makefile.am
  25. 1 0
      src/google/protobuf/compiler/command_line_interface.cc
  26. 10 0
      src/google/protobuf/compiler/cpp/cpp_enum_field.cc
  27. 2 0
      src/google/protobuf/compiler/cpp/cpp_enum_field.h
  28. 7 0
      src/google/protobuf/compiler/cpp/cpp_field.h
  29. 53 19
      src/google/protobuf/compiler/cpp/cpp_message.cc
  30. 1 0
      src/google/protobuf/compiler/cpp/cpp_message.h
  31. 10 0
      src/google/protobuf/compiler/cpp/cpp_message_field.cc
  32. 2 0
      src/google/protobuf/compiler/cpp/cpp_message_field.h
  33. 10 0
      src/google/protobuf/compiler/cpp/cpp_primitive_field.cc
  34. 2 0
      src/google/protobuf/compiler/cpp/cpp_primitive_field.h
  35. 19 9
      src/google/protobuf/compiler/cpp/cpp_string_field.cc
  36. 2 0
      src/google/protobuf/compiler/cpp/cpp_string_field.h
  37. 116 0
      src/google/protobuf/compiler/cpp/cpp_unittest.cc
  38. 17 1
      src/google/protobuf/compiler/parser.cc
  39. 4 0
      src/google/protobuf/compiler/parser.h
  40. 38 0
      src/google/protobuf/compiler/parser_unittest.cc
  41. 24 9
      src/google/protobuf/descriptor.cc
  42. 248 18
      src/google/protobuf/descriptor.pb.cc
  43. 36 0
      src/google/protobuf/descriptor.pb.h
  44. 1 0
      src/google/protobuf/descriptor.proto
  45. 55 0
      src/google/protobuf/descriptor_unittest.cc
  46. 2 1
      src/google/protobuf/dynamic_message.cc
  47. 15 0
      src/google/protobuf/dynamic_message_unittest.cc
  48. 58 0
      src/google/protobuf/extension_set.cc
  49. 6 0
      src/google/protobuf/extension_set.h
  50. 167 0
      src/google/protobuf/extension_set_unittest.cc
  51. 81 1
      src/google/protobuf/generated_message_reflection.cc
  52. 12 1
      src/google/protobuf/generated_message_reflection.h
  53. 4 6
      src/google/protobuf/io/tokenizer.cc
  54. 8 0
      src/google/protobuf/io/tokenizer.h
  55. 9 0
      src/google/protobuf/io/tokenizer_unittest.cc
  56. 3 1
      src/google/protobuf/io/zero_copy_stream_unittest.cc
  57. 22 0
      src/google/protobuf/message.cc
  58. 19 1
      src/google/protobuf/message.h
  59. 2 0
      src/google/protobuf/message_unittest.cc
  60. 52 0
      src/google/protobuf/repeated_field.h
  61. 8 0
      src/google/protobuf/repeated_field_unittest.cc
  62. 6 0
      src/google/protobuf/stubs/common.h
  63. 521 0
      src/google/protobuf/stubs/structurally_valid.cc
  64. 30 0
      src/google/protobuf/stubs/structurally_valid_unittest.cc
  65. 27 14
      src/google/protobuf/text_format.cc
  66. 34 2
      src/google/protobuf/text_format_unittest.cc
  67. 9 0
      src/google/protobuf/unittest.proto
  68. 1 0
      src/google/protobuf/unittest_optimize_for.proto
  69. 34 0
      src/google/protobuf/unknown_field_set.cc
  70. 22 2
      src/google/protobuf/unknown_field_set.h
  71. 90 0
      src/google/protobuf/unknown_field_set_unittest.cc
  72. 11 2
      src/google/protobuf/wire_format.cc
  73. 25 7
      src/google/protobuf/wire_format_inl.h
  74. 149 1
      src/google/protobuf/wire_format_unittest.cc
  75. 4 0
      vsprojects/libprotobuf.vcproj
  76. 4 0
      vsprojects/tests.vcproj

+ 1 - 0
Makefile.am

@@ -70,6 +70,7 @@ EXTRA_DIST =                                                                 \
   java/pom.xml                                                               \
   java/README.txt                                                            \
   python/google/protobuf/internal/generator_test.py                          \
+  python/google/protobuf/internal/containers.py                              \
   python/google/protobuf/internal/decoder.py                                 \
   python/google/protobuf/internal/decoder_test.py                            \
   python/google/protobuf/internal/descriptor_test.py                         \

+ 18 - 3
java/src/main/java/com/google/protobuf/AbstractMessage.java

@@ -77,6 +77,7 @@ public abstract class AbstractMessage implements Message {
     return true;
   }
 
+  @Override
   public final String toString() {
     return TextFormat.printToString(this);
   }
@@ -199,6 +200,7 @@ public abstract class AbstractMessage implements Message {
   public static abstract class Builder<BuilderType extends Builder>
       implements Message.Builder {
     // The compiler produces an error if this is not declared explicitly.
+    @Override
     public abstract BuilderType clone();
 
     public BuilderType clear() {
@@ -307,8 +309,13 @@ public abstract class AbstractMessage implements Message {
 
     public BuilderType mergeFrom(byte[] data)
         throws InvalidProtocolBufferException {
+      return mergeFrom(data, 0, data.length);
+    }
+
+    public BuilderType mergeFrom(byte[] data, int off, int len)
+        throws InvalidProtocolBufferException {
       try {
-        CodedInputStream input = CodedInputStream.newInstance(data);
+        CodedInputStream input = CodedInputStream.newInstance(data, off, len);
         mergeFrom(input);
         input.checkLastTagWas(0);
         return (BuilderType) this;
@@ -322,10 +329,18 @@ public abstract class AbstractMessage implements Message {
     }
 
     public BuilderType mergeFrom(
-        byte[] data, ExtensionRegistry extensionRegistry)
+        byte[] data,
+        ExtensionRegistry extensionRegistry)
+        throws InvalidProtocolBufferException {
+      return mergeFrom(data, 0, data.length, extensionRegistry);
+    }
+
+    public BuilderType mergeFrom(
+        byte[] data, int off, int len,
+        ExtensionRegistry extensionRegistry)
         throws InvalidProtocolBufferException {
       try {
-        CodedInputStream input = CodedInputStream.newInstance(data);
+        CodedInputStream input = CodedInputStream.newInstance(data, off, len);
         mergeFrom(input, extensionRegistry);
         input.checkLastTagWas(0);
         return (BuilderType) this;

+ 10 - 0
java/src/main/java/com/google/protobuf/ByteString.java

@@ -35,6 +35,7 @@ import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.FilterOutputStream;
 import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
 
 /**
  * Immutable array of bytes.
@@ -153,6 +154,15 @@ public final class ByteString {
     return copy;
   }
 
+  /**
+   * Constructs a new read-only {@code java.nio.ByteBuffer} with the
+   * same backing byte array.
+   */
+  public ByteBuffer asReadOnlyByteBuffer() {
+    ByteBuffer byteBuffer = ByteBuffer.wrap(this.bytes);
+    return byteBuffer.asReadOnlyBuffer();
+  }
+
   /**
    * Constructs a new {@code String} by decoding the bytes using the
    * specified charset.

+ 13 - 4
java/src/main/java/com/google/protobuf/CodedInputStream.java

@@ -59,7 +59,14 @@ public final class CodedInputStream {
    * Create a new CodedInputStream wrapping the given byte array.
    */
   public static CodedInputStream newInstance(byte[] buf) {
-    return new CodedInputStream(buf);
+    return newInstance(buf, 0, buf.length);
+  }
+
+  /**
+   * Create a new CodedInputStream wrapping the given byte array slice.
+   */
+  public static CodedInputStream newInstance(byte[] buf, int off, int len) {
+    return new CodedInputStream(buf, off, len);
   }
 
   // -----------------------------------------------------------------
@@ -454,7 +461,7 @@ public final class CodedInputStream {
   private byte[] buffer;
   private int bufferSize;
   private int bufferSizeAfterLimit = 0;
-  private int bufferPos = 0;
+  private int bufferPos;
   private InputStream input;
   private int lastTag = 0;
 
@@ -479,15 +486,17 @@ public final class CodedInputStream {
   private static final int DEFAULT_SIZE_LIMIT = 64 << 20;  // 64MB
   private static final int BUFFER_SIZE = 4096;
 
-  private CodedInputStream(byte[] buffer) {
+  private CodedInputStream(byte[] buffer, int off, int len) {
     this.buffer = buffer;
-    this.bufferSize = buffer.length;
+    this.bufferSize = off + len;
+    this.bufferPos = off;
     this.input = null;
   }
 
   private CodedInputStream(InputStream input) {
     this.buffer = new byte[BUFFER_SIZE];
     this.bufferSize = 0;
+    this.bufferPos = 0;
     this.input = input;
   }
 

+ 31 - 0
java/src/main/java/com/google/protobuf/GeneratedMessage.java

@@ -87,6 +87,33 @@ public abstract class GeneratedMessage extends AbstractMessage {
     }
     return result;
   }
+  
+  public boolean isInitialized() {
+    for (FieldDescriptor field : getDescriptorForType().getFields()) {
+      // Check that all required fields are present.
+      if (field.isRequired()) {
+        if (!hasField(field)) {
+          return false;
+        }
+      }
+      // Check that embedded messages are initialized.
+      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+        if (field.isRepeated()) {
+          for (Message element : (List<Message>) getField(field)) {
+            if (!element.isInitialized()) {
+              return false;
+            }
+          }
+        } else {
+          if (hasField(field) && !((Message) getField(field)).isInitialized()) {
+            return false;
+          }
+        }
+      }
+    }
+
+    return true;
+  }
 
   public Map<FieldDescriptor, Object> getAllFields() {
     return Collections.unmodifiableMap(getAllFieldsMutable());
@@ -370,6 +397,10 @@ public abstract class GeneratedMessage extends AbstractMessage {
     protected boolean extensionsAreInitialized() {
       return extensions.isInitialized();
     }
+    
+    public boolean isInitialized() {
+      return super.isInitialized() && extensionsAreInitialized();
+    }
 
     /**
      * Used by subclasses to serialize extensions.  Extension ranges may be

+ 16 - 0
java/src/main/java/com/google/protobuf/Message.java

@@ -397,6 +397,13 @@ public interface Message {
      */
     public Builder mergeFrom(byte[] data) throws InvalidProtocolBufferException;
 
+    /**
+     * Parse {@code data} as a message of this type and merge it with the
+     * message being built.  This is just a small wrapper around
+     * {@link #mergeFrom(CodedInputStream)}.
+     */
+    public Builder mergeFrom(byte[] data, int off, int len) throws InvalidProtocolBufferException;
+
     /**
      * Parse {@code data} as a message of this type and merge it with the
      * message being built.  This is just a small wrapper around
@@ -406,6 +413,15 @@ public interface Message {
                       ExtensionRegistry extensionRegistry)
                       throws InvalidProtocolBufferException;
 
+    /**
+     * Parse {@code data} as a message of this type and merge it with the
+     * message being built.  This is just a small wrapper around
+     * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+     */
+    Builder mergeFrom(byte[] data, int off, int len,
+                      ExtensionRegistry extensionRegistry)
+                      throws InvalidProtocolBufferException;
+
     /**
      * Parse a message of this type from {@code input} and merge it with the
      * message being built.  This is just a small wrapper around

+ 10 - 0
java/src/test/java/com/google/protobuf/CodedInputStreamTest.java

@@ -412,4 +412,14 @@ public class CodedInputStreamTest extends TestCase {
     String text = input.readString();
     assertEquals(0xfffd, text.charAt(0));
   }
+
+  public void testReadFromSlice() throws Exception {
+    byte[] bytes = bytes(0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+    CodedInputStream in = CodedInputStream.newInstance(bytes, 3, 5);
+    for (int i = 3; i < 8; i++) {
+      assertEquals(i, in.readRawByte());
+    }
+    // eof
+    assertEquals(0, in.readTag());
+  }
 }

+ 21 - 3
java/src/test/java/com/google/protobuf/GeneratedMessageTest.java

@@ -30,8 +30,9 @@
 
 package com.google.protobuf;
 
-import protobuf_unittest.UnittestOptimizeFor.TestRequiredOptimizedForSize;
+import protobuf_unittest.UnittestOptimizeFor.TestOptimizedForSize;
 import protobuf_unittest.UnittestOptimizeFor.TestOptionalOptimizedForSize;
+import protobuf_unittest.UnittestOptimizeFor.TestRequiredOptimizedForSize;
 import protobuf_unittest.UnittestProto;
 import protobuf_unittest.UnittestProto.ForeignMessage;
 import protobuf_unittest.UnittestProto.ForeignEnum;
@@ -260,8 +261,10 @@ public class GeneratedMessageTest extends TestCase {
         MultipleFilesTestProto.extensionWithOuter));
   }
 
-  public void testOptionalFieldWithRequiredSubfieldsOptimizedForSize() throws Exception {
-    TestOptionalOptimizedForSize message = TestOptionalOptimizedForSize.getDefaultInstance();
+  public void testOptionalFieldWithRequiredSubfieldsOptimizedForSize()
+    throws Exception {
+    TestOptionalOptimizedForSize message =
+        TestOptionalOptimizedForSize.getDefaultInstance();
     assertTrue(message.isInitialized());
     
     message = TestOptionalOptimizedForSize.newBuilder().setO(
@@ -274,4 +277,19 @@ public class GeneratedMessageTest extends TestCase {
         ).buildPartial();
     assertTrue(message.isInitialized());
   }
+
+  public void testUninitializedExtensionInOptimizedForSize()
+      throws Exception {
+    TestOptimizedForSize.Builder builder = TestOptimizedForSize.newBuilder();
+    builder.setExtension(TestOptimizedForSize.testExtension2,
+        TestRequiredOptimizedForSize.newBuilder().buildPartial());
+    assertFalse(builder.isInitialized());
+    assertFalse(builder.buildPartial().isInitialized());
+
+    builder = TestOptimizedForSize.newBuilder();
+    builder.setExtension(TestOptimizedForSize.testExtension2,
+        TestRequiredOptimizedForSize.newBuilder().setX(10).buildPartial());
+    assertTrue(builder.isInitialized());
+    assertTrue(builder.buildPartial().isInitialized());
+  }
 }

+ 41 - 0
java/src/test/java/com/google/protobuf/WireFormatTest.java

@@ -135,6 +135,47 @@ public class WireFormatTest extends TestCase {
     assertFieldsInOrder(dynamic_data);
   }
 
+  private ExtensionRegistry getTestFieldOrderingsRegistry() {
+    ExtensionRegistry result = ExtensionRegistry.newInstance();
+    result.add(UnittestProto.myExtensionInt);
+    result.add(UnittestProto.myExtensionString);
+    return result;
+  }
+
+  public void testParseMultipleExtensionRanges() throws Exception {
+    // Make sure we can parse a message that contains multiple extensions
+    // ranges.
+    TestFieldOrderings source =
+      TestFieldOrderings.newBuilder()
+        .setMyInt(1)
+        .setMyString("foo")
+        .setMyFloat(1.0F)
+        .setExtension(UnittestProto.myExtensionInt, 23)
+        .setExtension(UnittestProto.myExtensionString, "bar")
+        .build();
+    TestFieldOrderings dest =
+      TestFieldOrderings.parseFrom(source.toByteString(),
+                                   getTestFieldOrderingsRegistry());
+    assertEquals(source, dest);
+  }
+
+  public void testParseMultipleExtensionRangesDynamic() throws Exception {
+    // Same as above except with DynamicMessage.
+    Descriptors.Descriptor descriptor = TestFieldOrderings.getDescriptor();
+    DynamicMessage source =
+      DynamicMessage.newBuilder(TestFieldOrderings.getDescriptor())
+        .setField(descriptor.findFieldByName("my_int"), 1L)
+        .setField(descriptor.findFieldByName("my_string"), "foo")
+        .setField(descriptor.findFieldByName("my_float"), 1.0F)
+        .setField(UnittestProto.myExtensionInt.getDescriptor(), 23)
+        .setField(UnittestProto.myExtensionString.getDescriptor(), "bar")
+        .build();
+    DynamicMessage dest =
+      DynamicMessage.parseFrom(descriptor, source.toByteString(),
+                               getTestFieldOrderingsRegistry());
+    assertEquals(source, dest);
+  }
+
   private static final int UNKNOWN_TYPE_ID = 1550055;
   private static final int TYPE_ID_1 =
     TestMessageSetExtension1.getDescriptor().getExtensions().get(0).getNumber();

+ 179 - 0
python/google/protobuf/internal/containers.py

@@ -0,0 +1,179 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Contains container classes to represent different protocol buffer types.
+
+This file defines container classes which represent categories of protocol
+buffer field types which need extra maintenance. Currently these categories
+are:
+  - Repeated scalar fields - These are all repeated fields which aren't
+    composite (e.g. they are of simple types like int32, string, etc).
+  - Repeated composite fields - Repeated fields which are composite. This
+    includes groups and nested messages.
+"""
+
+__author__ = 'petar@google.com (Petar Petrov)'
+
+
+class BaseContainer(object):
+
+  """Base container class."""
+
+  # Minimizes memory usage and disallows assignment to other attributes.
+  __slots__ = ['_message_listener', '_values']
+
+  def __init__(self, message_listener):
+    """
+    Args:
+      message_listener: A MessageListener implementation.
+        The RepeatedScalarFieldContainer will call this object's
+        TransitionToNonempty() method when it transitions from being empty to
+        being nonempty.
+    """
+    self._message_listener = message_listener
+    self._values = []
+
+  def __getitem__(self, key):
+    """Retrieves item by the specified key."""
+    return self._values[key]
+
+  def __len__(self):
+    """Returns the number of elements in the container."""
+    return len(self._values)
+
+  def __ne__(self, other):
+    """Checks if another instance isn't equal to this one."""
+    # The concrete classes should define __eq__.
+    return not self == other
+
+
+class RepeatedScalarFieldContainer(BaseContainer):
+
+  """Simple, type-checked, list-like container for holding repeated scalars."""
+
+  # Disallows assignment to other attributes.
+  __slots__ = ['_type_checker']
+
+  def __init__(self, message_listener, type_checker):
+    """
+    Args:
+      message_listener: A MessageListener implementation.
+        The RepeatedScalarFieldContainer will call this object's
+        TransitionToNonempty() method when it transitions from being empty to
+        being nonempty.
+      type_checker: A type_checkers.ValueChecker instance to run on elements
+        inserted into this container.
+    """
+    super(RepeatedScalarFieldContainer, self).__init__(message_listener)
+    self._type_checker = type_checker
+
+  def append(self, elem):
+    """Appends a scalar to the list. Similar to list.append()."""
+    self._type_checker.CheckValue(elem)
+    self._values.append(elem)
+    self._message_listener.ByteSizeDirty()
+    if len(self._values) == 1:
+      self._message_listener.TransitionToNonempty()
+
+  def remove(self, elem):
+    """Removes a scalar from the list. Similar to list.remove()."""
+    self._values.remove(elem)
+    self._message_listener.ByteSizeDirty()
+
+  def __setitem__(self, key, value):
+    """Sets the item on the specified position."""
+    # No need to call TransitionToNonempty(), since if we're able to
+    # set the element at this index, we were already nonempty before
+    # this method was called.
+    self._message_listener.ByteSizeDirty()
+    self._type_checker.CheckValue(value)
+    self._values[key] = value
+
+  def __eq__(self, other):
+    """Compares the current instance with another one."""
+    if self is other:
+      return True
+    # Special case for the same type which should be common and fast.
+    if isinstance(other, self.__class__):
+      return other._values == self._values
+    # We are presumably comparing against some other sequence type.
+    return other == self._values
+
+
+class RepeatedCompositeFieldContainer(BaseContainer):
+
+  """Simple, list-like container for holding repeated composite fields."""
+
+  # Disallows assignment to other attributes.
+  __slots__ = ['_message_descriptor']
+
+  def __init__(self, message_listener, message_descriptor):
+    """
+    Note that we pass in a descriptor instead of the generated directly,
+    since at the time we construct a _RepeatedCompositeFieldContainer we
+    haven't yet necessarily initialized the type that will be contained in the
+    container.
+
+    Args:
+      message_listener: A MessageListener implementation.
+        The RepeatedCompositeFieldContainer will call this object's
+        TransitionToNonempty() method when it transitions from being empty to
+        being nonempty.
+      message_descriptor: A Descriptor instance describing the protocol type
+        that should be present in this container.  We'll use the
+        _concrete_class field of this descriptor when the client calls add().
+    """
+    super(RepeatedCompositeFieldContainer, self).__init__(message_listener)
+    self._message_descriptor = message_descriptor
+
+  def add(self):
+    """Adds a new element to the list and returns it."""
+    new_element = self._message_descriptor._concrete_class()
+    new_element._SetListener(self._message_listener)
+    self._values.append(new_element)
+    self._message_listener.ByteSizeDirty()
+    self._message_listener.TransitionToNonempty()
+    return new_element
+
+  def __delitem__(self, key):
+    """Deletes the element on the specified position."""
+    self._message_listener.ByteSizeDirty()
+    del self._values[key]
+
+  def __eq__(self, other):
+    """Compares the current instance with another one."""
+    if self is other:
+      return True
+    if not isinstance(other, self.__class__):
+      raise TypeError('Can only compare repeated composite fields against '
+                      'other repeated composite fields.')
+    return self._values == other._values
+
+  # TODO(robinson): Implement, document, and test slicing support.

+ 2 - 0
python/google/protobuf/internal/decoder_test.py

@@ -1,3 +1,5 @@
+#! /usr/bin/python
+#
 # Protocol Buffers - Google's data interchange format
 # Copyright 2008 Google Inc.  All rights reserved.
 # http://code.google.com/p/protobuf/

+ 2 - 0
python/google/protobuf/internal/descriptor_test.py

@@ -1,3 +1,5 @@
+#! /usr/bin/python
+#
 # Protocol Buffers - Google's data interchange format
 # Copyright 2008 Google Inc.  All rights reserved.
 # http://code.google.com/p/protobuf/

+ 2 - 0
python/google/protobuf/internal/encoder_test.py

@@ -1,3 +1,5 @@
+#! /usr/bin/python
+#
 # Protocol Buffers - Google's data interchange format
 # Copyright 2008 Google Inc.  All rights reserved.
 # http://code.google.com/p/protobuf/

+ 2 - 0
python/google/protobuf/internal/generator_test.py

@@ -1,3 +1,5 @@
+#! /usr/bin/python
+#
 # Protocol Buffers - Google's data interchange format
 # Copyright 2008 Google Inc.  All rights reserved.
 # http://code.google.com/p/protobuf/

+ 2 - 0
python/google/protobuf/internal/input_stream_test.py

@@ -1,3 +1,5 @@
+#! /usr/bin/python
+#
 # Protocol Buffers - Google's data interchange format
 # Copyright 2008 Google Inc.  All rights reserved.
 # http://code.google.com/p/protobuf/

+ 2 - 0
python/google/protobuf/internal/output_stream_test.py

@@ -1,3 +1,5 @@
+#! /usr/bin/python
+#
 # Protocol Buffers - Google's data interchange format
 # Copyright 2008 Google Inc.  All rights reserved.
 # http://code.google.com/p/protobuf/

+ 2 - 1
python/google/protobuf/internal/reflection_test.py

@@ -1,5 +1,6 @@
+#! /usr/bin/python
 # -*- coding: utf-8 -*-
-
+#
 # Protocol Buffers - Google's data interchange format
 # Copyright 2008 Google Inc.  All rights reserved.
 # http://code.google.com/p/protobuf/

+ 2 - 0
python/google/protobuf/internal/service_reflection_test.py

@@ -1,3 +1,5 @@
+#! /usr/bin/python
+#
 # Protocol Buffers - Google's data interchange format
 # Copyright 2008 Google Inc.  All rights reserved.
 # http://code.google.com/p/protobuf/

+ 2 - 0
python/google/protobuf/internal/text_format_test.py

@@ -1,3 +1,5 @@
+#! /usr/bin/python
+#
 # Protocol Buffers - Google's data interchange format
 # Copyright 2008 Google Inc.  All rights reserved.
 # http://code.google.com/p/protobuf/

+ 2 - 0
python/google/protobuf/internal/wire_format_test.py

@@ -1,3 +1,5 @@
+#! /usr/bin/python
+#
 # Protocol Buffers - Google's data interchange format
 # Copyright 2008 Google Inc.  All rights reserved.
 # http://code.google.com/p/protobuf/

+ 1 - 1
python/google/protobuf/message.py

@@ -198,7 +198,7 @@ class Message(object):
   # Typically (in python), an underscore is appended to names that are
   # keywords. So they would become lambda_ or yield_.
   # """
-  def ListFields(self, field_name):
+  def ListFields(self):
     """Returns a list of (FieldDescriptor, value) tuples for all
     fields in the message which are not empty.  A singular field is non-empty
     if HasField() would return true, and a repeated field is non-empty if

+ 4 - 131
python/google/protobuf/reflection.py

@@ -54,6 +54,7 @@ import heapq
 import threading
 import weakref
 # We use "as" to avoid name collisions with variables.
+from google.protobuf.internal import containers
 from google.protobuf.internal import decoder
 from google.protobuf.internal import encoder
 from google.protobuf.internal import message_listener as message_listener_mod
@@ -274,9 +275,10 @@ def _DefaultValueForField(message, field):
     if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
       # We can't look at _concrete_class yet since it might not have
       # been set.  (Depends on order in which we initialize the classes).
-      return _RepeatedCompositeFieldContainer(listener, field.message_type)
+      return containers.RepeatedCompositeFieldContainer(
+          listener, field.message_type)
     else:
-      return _RepeatedScalarFieldContainer(
+      return containers.RepeatedScalarFieldContainer(
           listener, type_checkers.GetTypeChecker(field.cpp_type, field.type))
 
   if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
@@ -1270,135 +1272,6 @@ class _Listener(object):
       pass
 
 
-# TODO(robinson): Move elsewhere?
-# TODO(robinson): Provide a clear() method here in addition to ClearField()?
-class _RepeatedScalarFieldContainer(object):
-
-  """Simple, type-checked, list-like container for holding repeated scalars."""
-
-  # Minimizes memory usage and disallows assignment to other attributes.
-  __slots__ = ['_message_listener', '_type_checker', '_values']
-
-  def __init__(self, message_listener, type_checker):
-    """
-    Args:
-      message_listener: A MessageListener implementation.
-        The _RepeatedScalarFieldContaininer will call this object's
-        TransitionToNonempty() method when it transitions from being empty to
-        being nonempty.
-      type_checker: A _ValueChecker instance to run on elements inserted
-        into this container.
-    """
-    self._message_listener = message_listener
-    self._type_checker = type_checker
-    self._values = []
-
-  def append(self, elem):
-    self._type_checker.CheckValue(elem)
-    self._values.append(elem)
-    self._message_listener.ByteSizeDirty()
-    if len(self._values) == 1:
-      self._message_listener.TransitionToNonempty()
-
-  def remove(self, elem):
-    self._values.remove(elem)
-    self._message_listener.ByteSizeDirty()
-
-  # List-like __getitem__() support also makes us iterable (via "iter(foo)"
-  # or implicitly via "for i in mylist:") for free.
-  def __getitem__(self, key):
-    return self._values[key]
-
-  def __setitem__(self, key, value):
-    # No need to call TransitionToNonempty(), since if we're able to
-    # set the element at this index, we were already nonempty before
-    # this method was called.
-    self._message_listener.ByteSizeDirty()
-    self._type_checker.CheckValue(value)
-    self._values[key] = value
-
-  def __len__(self):
-    return len(self._values)
-
-  def __eq__(self, other):
-    if self is other:
-      return True
-    # Special case for the same type which should be common and fast.
-    if isinstance(other, self.__class__):
-      return other._values == self._values
-    # We are presumably comparing against some other sequence type.
-    return other == self._values
-
-  def __ne__(self, other):
-    # Can't use != here since it would infinitely recurse.
-    return not self == other
-
-
-# TODO(robinson): Move elsewhere?
-# TODO(robinson): Provide a clear() method here in addition to ClearField()?
-# TODO(robinson): Unify common functionality with
-# _RepeatedScalarFieldContaininer?
-class _RepeatedCompositeFieldContainer(object):
-
-  """Simple, list-like container for holding repeated composite fields."""
-
-  # Minimizes memory usage and disallows assignment to other attributes.
-  __slots__ = ['_values', '_message_descriptor', '_message_listener']
-
-  def __init__(self, message_listener, message_descriptor):
-    """Note that we pass in a descriptor instead of the generated directly,
-    since at the time we construct a _RepeatedCompositeFieldContainer we
-    haven't yet necessarily initialized the type that will be contained in the
-    container.
-
-    Args:
-      message_listener: A MessageListener implementation.
-        The _RepeatedCompositeFieldContainer will call this object's
-        TransitionToNonempty() method when it transitions from being empty to
-        being nonempty.
-      message_descriptor: A Descriptor instance describing the protocol type
-        that should be present in this container.  We'll use the
-        _concrete_class field of this descriptor when the client calls add().
-    """
-    self._message_listener = message_listener
-    self._message_descriptor = message_descriptor
-    self._values = []
-
-  def add(self):
-    new_element = self._message_descriptor._concrete_class()
-    new_element._SetListener(self._message_listener)
-    self._values.append(new_element)
-    self._message_listener.ByteSizeDirty()
-    self._message_listener.TransitionToNonempty()
-    return new_element
-
-  def __delitem__(self, key):
-    self._message_listener.ByteSizeDirty()
-    del self._values[key]
-
-  # List-like __getitem__() support also makes us iterable (via "iter(foo)"
-  # or implicitly via "for i in mylist:") for free.
-  def __getitem__(self, key):
-    return self._values[key]
-
-  def __len__(self):
-    return len(self._values)
-
-  def __eq__(self, other):
-    if self is other:
-      return True
-    if not isinstance(other, self.__class__):
-      raise TypeError('Can only compare repeated composite fields against '
-                      'other repeated composite fields.')
-    return self._values == other._values
-
-  def __ne__(self, other):
-    # Can't use != here since it would infinitely recurse.
-    return not self == other
-
-  # TODO(robinson): Implement, document, and test slicing support.
-
-
 # TODO(robinson): Move elsewhere?  This file is getting pretty ridiculous...
 # TODO(robinson): Unify error handling of "unknown extension" crap.
 # TODO(robinson): There's so much similarity between the way that

+ 1 - 0
python/setup.py

@@ -108,6 +108,7 @@ if __name__ == '__main__':
         test_suite = 'setup.MakeTestSuite',
         # Must list modules explicitly so that we don't install tests.
         py_modules = [
+          'google.protobuf.internal.containers',
           'google.protobuf.internal.decoder',
           'google.protobuf.internal.encoder',
           'google.protobuf.internal.input_stream',

+ 2 - 0
src/Makefile.am

@@ -68,6 +68,7 @@ libprotobuf_la_SOURCES =                                       \
   google/protobuf/stubs/substitute.h                           \
   google/protobuf/stubs/strutil.cc                             \
   google/protobuf/stubs/strutil.h                              \
+  google/protobuf/stubs/structurally_valid.cc                  \
   google/protobuf/descriptor.cc                                \
   google/protobuf/descriptor.pb.cc                             \
   google/protobuf/descriptor_database.cc                       \
@@ -209,6 +210,7 @@ protobuf_test_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la
 protobuf_test_SOURCES =                                        \
   google/protobuf/stubs/common_unittest.cc                     \
   google/protobuf/stubs/strutil_unittest.cc                    \
+  google/protobuf/stubs/structurally_valid_unittest.cc         \
   google/protobuf/descriptor_database_unittest.cc              \
   google/protobuf/descriptor_unittest.cc                       \
   google/protobuf/dynamic_message_unittest.cc                  \

+ 1 - 0
src/google/protobuf/compiler/command_line_interface.cc

@@ -32,6 +32,7 @@
 //  Based on original Protocol Buffers design by
 //  Sanjay Ghemawat, Jeff Dean, and others.
 
+#include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>

+ 10 - 0
src/google/protobuf/compiler/cpp/cpp_enum_field.cc

@@ -110,6 +110,11 @@ GenerateMergingCode(io::Printer* printer) const {
   printer->Print(variables_, "set_$name$(from.$name$());\n");
 }
 
+void EnumFieldGenerator::
+GenerateSwappingCode(io::Printer* printer) const {
+  printer->Print(variables_, "std::swap($name$_, other->$name$_);\n");
+}
+
 void EnumFieldGenerator::
 GenerateInitializer(io::Printer* printer) const {
   printer->Print(variables_, ",\n$name$_($default$)");
@@ -200,6 +205,11 @@ GenerateMergingCode(io::Printer* printer) const {
   printer->Print(variables_, "$name$_.MergeFrom(from.$name$_);\n");
 }
 
+void RepeatedEnumFieldGenerator::
+GenerateSwappingCode(io::Printer* printer) const {
+  printer->Print(variables_, "$name$_.Swap(&other->$name$_);\n");
+}
+
 void RepeatedEnumFieldGenerator::
 GenerateInitializer(io::Printer* printer) const {
   // Not needed for repeated fields.

+ 2 - 0
src/google/protobuf/compiler/cpp/cpp_enum_field.h

@@ -55,6 +55,7 @@ class EnumFieldGenerator : public FieldGenerator {
   void GenerateInlineAccessorDefinitions(io::Printer* printer) const;
   void GenerateClearingCode(io::Printer* printer) const;
   void GenerateMergingCode(io::Printer* printer) const;
+  void GenerateSwappingCode(io::Printer* printer) const;
   void GenerateInitializer(io::Printer* printer) const;
   void GenerateMergeFromCodedStream(io::Printer* printer) const;
   void GenerateSerializeWithCachedSizes(io::Printer* printer) const;
@@ -78,6 +79,7 @@ class RepeatedEnumFieldGenerator : public FieldGenerator {
   void GenerateInlineAccessorDefinitions(io::Printer* printer) const;
   void GenerateClearingCode(io::Printer* printer) const;
   void GenerateMergingCode(io::Printer* printer) const;
+  void GenerateSwappingCode(io::Printer* printer) const;
   void GenerateInitializer(io::Printer* printer) const;
   void GenerateMergeFromCodedStream(io::Printer* printer) const;
   void GenerateSerializeWithCachedSizes(io::Printer* printer) const;

+ 7 - 0
src/google/protobuf/compiler/cpp/cpp_field.h

@@ -87,6 +87,13 @@ class FieldGenerator {
   // GenerateMergeFrom method.
   virtual void GenerateMergingCode(io::Printer* printer) const = 0;
 
+  // Generate lines of code (statements, not declarations) which swaps
+  // this field and the corresponding field of another message, which
+  // is stored in the generated code variable "other". This is used to
+  // define the Swap method. Details of usage can be found in
+  // message.cc under the GenerateSwap method.
+  virtual void GenerateSwappingCode(io::Printer* printer) const = 0;
+
   // Generate any initializers needed for the private members declared by
   // GeneratePrivateMembers().  These go into the message class's
   // constructor's initializer list.  For each initializer, this method

+ 53 - 19
src/google/protobuf/compiler/cpp/cpp_message.cc

@@ -416,7 +416,8 @@ GenerateClassDefinition(io::Printer* printer) {
     "}\n"
     "\n"
     "static const ::google::protobuf::Descriptor* descriptor();\n"
-    "static const $classname$& default_instance();"
+    "static const $classname$& default_instance();\n"
+    "void Swap($classname$* other);\n"
     "\n"
     "// implements Message ----------------------------------------------\n"
     "\n"
@@ -617,7 +618,8 @@ GenerateDescriptorInitializer(io::Printer* printer, int index) {
       "    -1,\n");
   }
   printer->Print(vars,
-    "    ::google::protobuf::DescriptorPool::generated_pool());\n");
+    "    ::google::protobuf::DescriptorPool::generated_pool(),\n"
+    "    sizeof($classname$));\n");
 
   // Handle nested types.
   for (int i = 0; i < descriptor_->nested_type_count(); i++) {
@@ -693,6 +695,9 @@ GenerateClassMethods(io::Printer* printer) {
     GenerateCopyFrom(printer);
     printer->Print("\n");
 
+    GenerateSwap(printer);
+    printer->Print("\n");
+
     GenerateIsInitialized(printer);
     printer->Print("\n");
   }
@@ -946,6 +951,37 @@ GenerateClear(io::Printer* printer) {
   printer->Print("}\n");
 }
 
+void MessageGenerator::
+GenerateSwap(io::Printer* printer) {
+  // Generate the Swap member function.
+  printer->Print("void $classname$::Swap($classname$* other) {\n",
+                 "classname", classname_);
+  printer->Indent();
+  printer->Print("if (other != this) {\n");
+  printer->Indent();
+
+  for (int i = 0; i < descriptor_->field_count(); i++) {
+    const FieldDescriptor* field = descriptor_->field(i);
+    field_generators_.get(field).GenerateSwappingCode(printer);
+  }
+
+  for (int i = 0; i < (descriptor_->field_count() + 31) / 32; ++i) {
+    printer->Print("std::swap(_has_bits_[$i$], other->_has_bits_[$i$]);\n",
+                   "i", SimpleItoa(i));
+  }
+
+  printer->Print("_unknown_fields_.Swap(&other->_unknown_fields_);\n");
+  printer->Print("std::swap(_cached_size_, other->_cached_size_);\n");
+  if (descriptor_->extension_range_count() > 0) {
+    printer->Print("_extensions_.Swap(&other->_extensions_);\n");
+  }
+
+  printer->Outdent();
+  printer->Print("}\n");
+  printer->Outdent();
+  printer->Print("}\n");
+}
+
 void MessageGenerator::
 GenerateMergeFrom(io::Printer* printer) {
   // Generate the generalized MergeFrom (aka that which takes in the Message
@@ -956,22 +992,20 @@ GenerateMergeFrom(io::Printer* printer) {
     "classname", classname_);
   printer->Indent();
 
-  if (descriptor_->field_count() > 0) {
-    // Cast the message to the proper type. If we find that the message is
-    // *not* of the proper type, we can still call Merge via the reflection
-    // system, as the GOOGLE_CHECK above ensured that we have the same descriptor
-    // for each message.
-    printer->Print(
-      "const $classname$* source =\n"
-      "  ::google::protobuf::internal::dynamic_cast_if_available<const $classname$*>(\n"
-      "    &from);\n"
-      "if (source == NULL) {\n"
-      "  ::google::protobuf::internal::ReflectionOps::Merge(from, this);\n"
-      "} else {\n"
-      "  MergeFrom(*source);\n"
-      "}\n",
-      "classname", classname_);
-  }
+  // Cast the message to the proper type. If we find that the message is
+  // *not* of the proper type, we can still call Merge via the reflection
+  // system, as the GOOGLE_CHECK above ensured that we have the same descriptor
+  // for each message.
+  printer->Print(
+    "const $classname$* source =\n"
+    "  ::google::protobuf::internal::dynamic_cast_if_available<const $classname$*>(\n"
+    "    &from);\n"
+    "if (source == NULL) {\n"
+    "  ::google::protobuf::internal::ReflectionOps::Merge(from, this);\n"
+    "} else {\n"
+    "  MergeFrom(*source);\n"
+    "}\n",
+    "classname", classname_);
 
   printer->Outdent();
   printer->Print("}\n\n");
@@ -1199,7 +1233,7 @@ GenerateMergeFromCodedStream(io::Printer* printer) {
     for (int i = 0; i < descriptor_->extension_range_count(); i++) {
       const Descriptor::ExtensionRange* range =
         descriptor_->extension_range(i);
-      if (i > 0) printer->Print(" &&\n    ");
+      if (i > 0) printer->Print(" ||\n    ");
 
       uint32 start_tag = WireFormat::MakeTag(
         range->start, static_cast<WireFormat::WireType>(0));

+ 1 - 0
src/google/protobuf/compiler/cpp/cpp_message.h

@@ -115,6 +115,7 @@ class MessageGenerator {
   void GenerateByteSize(io::Printer* printer);
   void GenerateMergeFrom(io::Printer* printer);
   void GenerateCopyFrom(io::Printer* printer);
+  void GenerateSwap(io::Printer* printer);
   void GenerateIsInitialized(io::Printer* printer);
 
   // Helpers for GenerateSerializeWithCachedSizes().

+ 10 - 0
src/google/protobuf/compiler/cpp/cpp_message_field.cc

@@ -110,6 +110,11 @@ GenerateMergingCode(io::Printer* printer) const {
     "mutable_$name$()->$type$::MergeFrom(from.$name$());\n");
 }
 
+void MessageFieldGenerator::
+GenerateSwappingCode(io::Printer* printer) const {
+  printer->Print(variables_, "std::swap($name$_, other->$name$_);\n");
+}
+
 void MessageFieldGenerator::
 GenerateInitializer(io::Printer* printer) const {
   printer->Print(variables_, ",\n$name$_(NULL)");
@@ -201,6 +206,11 @@ GenerateMergingCode(io::Printer* printer) const {
   printer->Print(variables_, "$name$_.MergeFrom(from.$name$_);\n");
 }
 
+void RepeatedMessageFieldGenerator::
+GenerateSwappingCode(io::Printer* printer) const {
+  printer->Print(variables_, "$name$_.Swap(&other->$name$_);\n");
+}
+
 void RepeatedMessageFieldGenerator::
 GenerateInitializer(io::Printer* printer) const {
   // Not needed for repeated fields.

+ 2 - 0
src/google/protobuf/compiler/cpp/cpp_message_field.h

@@ -55,6 +55,7 @@ class MessageFieldGenerator : public FieldGenerator {
   void GenerateInlineAccessorDefinitions(io::Printer* printer) const;
   void GenerateClearingCode(io::Printer* printer) const;
   void GenerateMergingCode(io::Printer* printer) const;
+  void GenerateSwappingCode(io::Printer* printer) const;
   void GenerateInitializer(io::Printer* printer) const;
   void GenerateMergeFromCodedStream(io::Printer* printer) const;
   void GenerateSerializeWithCachedSizes(io::Printer* printer) const;
@@ -78,6 +79,7 @@ class RepeatedMessageFieldGenerator : public FieldGenerator {
   void GenerateInlineAccessorDefinitions(io::Printer* printer) const;
   void GenerateClearingCode(io::Printer* printer) const;
   void GenerateMergingCode(io::Printer* printer) const;
+  void GenerateSwappingCode(io::Printer* printer) const;
   void GenerateInitializer(io::Printer* printer) const;
   void GenerateMergeFromCodedStream(io::Printer* printer) const;
   void GenerateSerializeWithCachedSizes(io::Printer* printer) const;

+ 10 - 0
src/google/protobuf/compiler/cpp/cpp_primitive_field.cc

@@ -174,6 +174,11 @@ GenerateMergingCode(io::Printer* printer) const {
   printer->Print(variables_, "set_$name$(from.$name$());\n");
 }
 
+void PrimitiveFieldGenerator::
+GenerateSwappingCode(io::Printer* printer) const {
+  printer->Print(variables_, "std::swap($name$_, other->$name$_);\n");
+}
+
 void PrimitiveFieldGenerator::
 GenerateInitializer(io::Printer* printer) const {
   printer->Print(variables_, ",\n$name$_($default$)");
@@ -266,6 +271,11 @@ GenerateMergingCode(io::Printer* printer) const {
   printer->Print(variables_, "$name$_.MergeFrom(from.$name$_);\n");
 }
 
+void RepeatedPrimitiveFieldGenerator::
+GenerateSwappingCode(io::Printer* printer) const {
+  printer->Print(variables_, "$name$_.Swap(&other->$name$_);\n");
+}
+
 void RepeatedPrimitiveFieldGenerator::
 GenerateInitializer(io::Printer* printer) const {
   // Not needed for repeated fields.

+ 2 - 0
src/google/protobuf/compiler/cpp/cpp_primitive_field.h

@@ -55,6 +55,7 @@ class PrimitiveFieldGenerator : public FieldGenerator {
   void GenerateInlineAccessorDefinitions(io::Printer* printer) const;
   void GenerateClearingCode(io::Printer* printer) const;
   void GenerateMergingCode(io::Printer* printer) const;
+  void GenerateSwappingCode(io::Printer* printer) const;
   void GenerateInitializer(io::Printer* printer) const;
   void GenerateMergeFromCodedStream(io::Printer* printer) const;
   void GenerateSerializeWithCachedSizes(io::Printer* printer) const;
@@ -78,6 +79,7 @@ class RepeatedPrimitiveFieldGenerator : public FieldGenerator {
   void GenerateInlineAccessorDefinitions(io::Printer* printer) const;
   void GenerateClearingCode(io::Printer* printer) const;
   void GenerateMergingCode(io::Printer* printer) const;
+  void GenerateSwappingCode(io::Printer* printer) const;
   void GenerateInitializer(io::Printer* printer) const;
   void GenerateMergeFromCodedStream(io::Printer* printer) const;
   void GenerateSerializeWithCachedSizes(io::Printer* printer) const;

+ 19 - 9
src/google/protobuf/compiler/cpp/cpp_string_field.cc

@@ -163,12 +163,12 @@ GenerateInlineAccessorDefinitions(io::Printer* printer) const {
     "inline ::std::string* $classname$::mutable_$name$() {\n"
     "  _set_bit($index$);\n"
     "  if ($name$_ == &_default_$name$_) {\n");
-  if (descriptor_->has_default_value()) {
+  if (descriptor_->default_value_string().empty()) {
     printer->Print(variables_,
-      "    $name$_ = new ::std::string(_default_$name$_);\n");
+      "    $name$_ = new ::std::string;\n");
   } else {
     printer->Print(variables_,
-      "    $name$_ = new ::std::string;\n");
+      "    $name$_ = new ::std::string(_default_$name$_);\n");
   }
   printer->Print(variables_,
     "  }\n"
@@ -178,26 +178,26 @@ GenerateInlineAccessorDefinitions(io::Printer* printer) const {
 
 void StringFieldGenerator::
 GenerateNonInlineAccessorDefinitions(io::Printer* printer) const {
-  if (descriptor_->has_default_value()) {
+  if (descriptor_->default_value_string().empty()) {
     printer->Print(variables_,
-      "const ::std::string $classname$::_default_$name$_($default$);");
+      "const ::std::string $classname$::_default_$name$_;");
   } else {
     printer->Print(variables_,
-      "const ::std::string $classname$::_default_$name$_;");
+      "const ::std::string $classname$::_default_$name$_($default$);");
   }
 }
 
 void StringFieldGenerator::
 GenerateClearingCode(io::Printer* printer) const {
-  if (descriptor_->has_default_value()) {
+  if (descriptor_->default_value_string().empty()) {
     printer->Print(variables_,
       "if ($name$_ != &_default_$name$_) {\n"
-      "  $name$_->assign(_default_$name$_);\n"
+      "  $name$_->clear();\n"
       "}\n");
   } else {
     printer->Print(variables_,
       "if ($name$_ != &_default_$name$_) {\n"
-      "  $name$_->clear();\n"
+      "  $name$_->assign(_default_$name$_);\n"
       "}\n");
   }
 }
@@ -207,6 +207,11 @@ GenerateMergingCode(io::Printer* printer) const {
   printer->Print(variables_, "set_$name$(from.$name$());\n");
 }
 
+void StringFieldGenerator::
+GenerateSwappingCode(io::Printer* printer) const {
+  printer->Print(variables_, "std::swap($name$_, other->$name$_);\n");
+}
+
 void StringFieldGenerator::
 GenerateInitializer(io::Printer* printer) const {
   printer->Print(variables_,
@@ -349,6 +354,11 @@ GenerateMergingCode(io::Printer* printer) const {
   printer->Print(variables_, "$name$_.MergeFrom(from.$name$_);\n");
 }
 
+void RepeatedStringFieldGenerator::
+GenerateSwappingCode(io::Printer* printer) const {
+  printer->Print(variables_, "$name$_.Swap(&other->$name$_);\n");
+}
+
 void RepeatedStringFieldGenerator::
 GenerateInitializer(io::Printer* printer) const {
   // Not needed for repeated fields.

+ 2 - 0
src/google/protobuf/compiler/cpp/cpp_string_field.h

@@ -56,6 +56,7 @@ class StringFieldGenerator : public FieldGenerator {
   void GenerateNonInlineAccessorDefinitions(io::Printer* printer) const;
   void GenerateClearingCode(io::Printer* printer) const;
   void GenerateMergingCode(io::Printer* printer) const;
+  void GenerateSwappingCode(io::Printer* printer) const;
   void GenerateInitializer(io::Printer* printer) const;
   void GenerateDestructorCode(io::Printer* printer) const;
   void GenerateMergeFromCodedStream(io::Printer* printer) const;
@@ -80,6 +81,7 @@ class RepeatedStringFieldGenerator : public FieldGenerator {
   void GenerateInlineAccessorDefinitions(io::Printer* printer) const;
   void GenerateClearingCode(io::Printer* printer) const;
   void GenerateMergingCode(io::Printer* printer) const;
+  void GenerateSwappingCode(io::Printer* printer) const;
   void GenerateInitializer(io::Printer* printer) const;
   void GenerateMergeFromCodedStream(io::Printer* printer) const;
   void GenerateSerializeWithCachedSizes(io::Printer* printer) const;

+ 116 - 0
src/google/protobuf/compiler/cpp/cpp_unittest.cc

@@ -236,6 +236,83 @@ TEST(GeneratedMessageTest, CopyFrom) {
   TestUtil::ExpectAllFieldsSet(message2);
 }
 
+TEST(GeneratedMessageTest, SwapWithEmpty) {
+  unittest::TestAllTypes message1, message2;
+  TestUtil::SetAllFields(&message1);
+
+  TestUtil::ExpectAllFieldsSet(message1);
+  TestUtil::ExpectClear(message2);
+  message1.Swap(&message2);
+  TestUtil::ExpectAllFieldsSet(message2);
+  TestUtil::ExpectClear(message1);
+}
+
+TEST(GeneratedMessageTest, SwapWithSelf) {
+  unittest::TestAllTypes message;
+  TestUtil::SetAllFields(&message);
+  TestUtil::ExpectAllFieldsSet(message);
+  message.Swap(&message);
+  TestUtil::ExpectAllFieldsSet(message);
+}
+
+TEST(GeneratedMessageTest, SwapWithOther) {
+  unittest::TestAllTypes message1, message2;
+
+  message1.set_optional_int32(123);
+  message1.set_optional_string("abc");
+  message1.mutable_optional_nested_message()->set_bb(1);
+  message1.set_optional_nested_enum(unittest::TestAllTypes::FOO);
+  message1.add_repeated_int32(1);
+  message1.add_repeated_int32(2);
+  message1.add_repeated_string("a");
+  message1.add_repeated_string("b");
+  message1.add_repeated_nested_message()->set_bb(7);
+  message1.add_repeated_nested_message()->set_bb(8);
+  message1.add_repeated_nested_enum(unittest::TestAllTypes::FOO);
+  message1.add_repeated_nested_enum(unittest::TestAllTypes::BAR);
+
+  message2.set_optional_int32(456);
+  message2.set_optional_string("def");
+  message2.mutable_optional_nested_message()->set_bb(2);
+  message2.set_optional_nested_enum(unittest::TestAllTypes::BAR);
+  message2.add_repeated_int32(3);
+  message2.add_repeated_string("c");
+  message2.add_repeated_nested_message()->set_bb(9);
+  message2.add_repeated_nested_enum(unittest::TestAllTypes::BAZ);
+
+  message1.Swap(&message2);
+
+  EXPECT_EQ(456, message1.optional_int32());
+  EXPECT_EQ("def", message1.optional_string());
+  EXPECT_EQ(2, message1.optional_nested_message().bb());
+  EXPECT_EQ(unittest::TestAllTypes::BAR, message1.optional_nested_enum());
+  ASSERT_EQ(1, message1.repeated_int32_size());
+  EXPECT_EQ(3, message1.repeated_int32(0));
+  ASSERT_EQ(1, message1.repeated_string_size());
+  EXPECT_EQ("c", message1.repeated_string(0));
+  ASSERT_EQ(1, message1.repeated_nested_message_size());
+  EXPECT_EQ(9, message1.repeated_nested_message(0).bb());
+  ASSERT_EQ(1, message1.repeated_nested_enum_size());
+  EXPECT_EQ(unittest::TestAllTypes::BAZ, message1.repeated_nested_enum(0));
+
+  EXPECT_EQ(123, message2.optional_int32());
+  EXPECT_EQ("abc", message2.optional_string());
+  EXPECT_EQ(1, message2.optional_nested_message().bb());
+  EXPECT_EQ(unittest::TestAllTypes::FOO, message2.optional_nested_enum());
+  ASSERT_EQ(2, message2.repeated_int32_size());
+  EXPECT_EQ(1, message2.repeated_int32(0));
+  EXPECT_EQ(2, message2.repeated_int32(1));
+  ASSERT_EQ(2, message2.repeated_string_size());
+  EXPECT_EQ("a", message2.repeated_string(0));
+  EXPECT_EQ("b", message2.repeated_string(1));
+  ASSERT_EQ(2, message2.repeated_nested_message_size());
+  EXPECT_EQ(7, message2.repeated_nested_message(0).bb());
+  EXPECT_EQ(8, message2.repeated_nested_message(1).bb());
+  ASSERT_EQ(2, message2.repeated_nested_enum_size());
+  EXPECT_EQ(unittest::TestAllTypes::FOO, message2.repeated_nested_enum(0));
+  EXPECT_EQ(unittest::TestAllTypes::BAR, message2.repeated_nested_enum(1));
+}
+
 TEST(GeneratedMessageTest, CopyConstructor) {
   unittest::TestAllTypes message1;
   TestUtil::SetAllFields(&message1);
@@ -492,6 +569,45 @@ TEST(GeneratedMessageTest, TestEmbedOptimizedForSize) {
   EXPECT_EQ(2, message2.repeated_message(0).msg().c());
 }
 
+TEST(GeneratedMessageTest, TestSpaceUsed) {
+  unittest::TestAllTypes message1;
+  // sizeof provides a lower bound on SpaceUsed().
+  EXPECT_LE(sizeof(unittest::TestAllTypes), message1.SpaceUsed());
+  const int empty_message_size = message1.SpaceUsed();
+
+  // Setting primitive types shouldn't affect the space used.
+  message1.set_optional_int32(123);
+  message1.set_optional_int64(12345);
+  message1.set_optional_uint32(123);
+  message1.set_optional_uint64(12345);
+  EXPECT_EQ(empty_message_size, message1.SpaceUsed());
+
+  // On some STL implementations, setting the string to a small value should
+  // only increase SpaceUsed() by the size of a string object, though this is
+  // not true everywhere.
+  message1.set_optional_string("abc");
+  EXPECT_LE(empty_message_size + sizeof(string), message1.SpaceUsed());
+
+  // Setting a string to a value larger than the string object itself should
+  // increase SpaceUsed(), because it cannot store the value internally.
+  message1.set_optional_string(string(sizeof(string) + 1, 'x'));
+  int min_expected_increase = message1.optional_string().capacity() +
+      sizeof(string);
+  EXPECT_LE(empty_message_size + min_expected_increase,
+            message1.SpaceUsed());
+
+  int previous_size = message1.SpaceUsed();
+  // Adding an optional message should increase the size by the size of the
+  // nested message type. NestedMessage is simple enough (1 int field) that it
+  // is equal to sizeof(NestedMessage)
+  message1.mutable_optional_nested_message();
+  ASSERT_EQ(sizeof(unittest::TestAllTypes::NestedMessage),
+            message1.optional_nested_message().SpaceUsed());
+  EXPECT_EQ(previous_size +
+            sizeof(unittest::TestAllTypes::NestedMessage),
+            message1.SpaceUsed());
+}
+
 // ===================================================================
 
 TEST(GeneratedEnumTest, EnumValuesAsSwitchCases) {

+ 17 - 1
src/google/protobuf/compiler/parser.cc

@@ -215,6 +215,11 @@ bool Parser::ConsumeString(string* output, const char* error) {
   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
     io::Tokenizer::ParseString(input_->current().text, output);
     input_->Next();
+    // Allow C++ like concatenation of adjacent string tokens.
+    while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
+      io::Tokenizer::ParseStringAppend(input_->current().text, output);
+      input_->Next();
+    }
     return true;
   } else {
     AddError(error);
@@ -864,13 +869,24 @@ bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value) {
   if (is_negative) number *= -1;
   enum_value->set_number(number);
 
-  // TODO(kenton):  Options for enum values?
+  DO(ParseEnumConstantOptions(enum_value));
 
   DO(Consume(";"));
 
   return true;
 }
 
+bool Parser::ParseEnumConstantOptions(EnumValueDescriptorProto* value) {
+  if (!TryConsume("[")) return true;
+
+  do {
+    DO(ParseOptionAssignment(value->mutable_options()));
+  } while (TryConsume(","));
+
+  DO(Consume("]"));
+  return true;
+}
+
 // -------------------------------------------------------------------
 // Services
 

+ 4 - 0
src/google/protobuf/compiler/parser.h

@@ -236,6 +236,10 @@ class LIBPROTOBUF_EXPORT Parser {
   // Parse a single enum value within an enum block.
   bool ParseEnumConstant(EnumValueDescriptorProto* enum_value);
 
+  // Parse enum constant options, i.e. the list in square brackets at the end
+  // of the enum constant value definition.
+  bool ParseEnumConstantOptions(EnumValueDescriptorProto* value);
+
   // Parse a single method within a service definition.
   bool ParseServiceMethod(MethodDescriptorProto* method);
 

+ 38 - 0
src/google/protobuf/compiler/parser_unittest.cc

@@ -305,7 +305,9 @@ TEST_F(ParseMessageTest, FieldDefaults) {
     "  required double foo = 1 [default=-11.5];\n"
     "  required double foo = 1 [default= 12  ];\n"
     "  required string foo = 1 [default='13\\001'];\n"
+    "  required string foo = 1 [default='a' \"b\" \n \"c\"];\n"
     "  required bytes  foo = 1 [default='14\\002'];\n"
+    "  required bytes  foo = 1 [default='a' \"b\" \n 'c'];\n"
     "  required bool   foo = 1 [default=true ];\n"
     "  required Foo    foo = 1 [default=FOO  ];\n"
 
@@ -334,7 +336,9 @@ TEST_F(ParseMessageTest, FieldDefaults) {
     "  field { type:TYPE_DOUBLE  default_value:\"-11.5\"     "ETC" }"
     "  field { type:TYPE_DOUBLE  default_value:\"12\"        "ETC" }"
     "  field { type:TYPE_STRING  default_value:\"13\\001\"   "ETC" }"
+    "  field { type:TYPE_STRING  default_value:\"abc\"       "ETC" }"
     "  field { type:TYPE_BYTES   default_value:\"14\\\\002\" "ETC" }"
+    "  field { type:TYPE_BYTES   default_value:\"abc\"       "ETC" }"
     "  field { type:TYPE_BOOL    default_value:\"true\"      "ETC" }"
     "  field { type_name:\"Foo\" default_value:\"FOO\"       "ETC" }"
 
@@ -534,6 +538,40 @@ TEST_F(ParseEnumTest, Values) {
     "}");
 }
 
+TEST_F(ParseEnumTest, ValueOptions) {
+  ExpectParsesTo(
+    "enum TestEnum {\n"
+    "  FOO = 13;\n"
+    "  BAR = -10 [ (something.text) = 'abc' ];\n"
+    "  BAZ = 500 [ (something.text) = 'def', other = 1 ];\n"
+    "}\n",
+
+    "enum_type {"
+    "  name: \"TestEnum\""
+    "  value { name: \"FOO\" number: 13 }"
+    "  value { name: \"BAR\" number: -10 "
+    "    options { "
+    "      uninterpreted_option { "
+    "        name { name_part: \"something.text\" is_extension: true } "
+    "        string_value: \"abc\" "
+    "      } "
+    "    } "
+    "  } "
+    "  value { name: \"BAZ\" number: 500 "
+    "    options { "
+    "      uninterpreted_option { "
+    "        name { name_part: \"something.text\" is_extension: true } "
+    "        string_value: \"def\" "
+    "      } "
+    "      uninterpreted_option { "
+    "        name { name_part: \"other\" is_extension: false } "
+    "        positive_int_value: 1 "
+    "      } "
+    "    } "
+    "  } "
+    "}");
+}
+
 // ===================================================================
 
 typedef ParserTest ParseServiceTest;

+ 24 - 9
src/google/protobuf/descriptor.cc

@@ -1656,6 +1656,10 @@ class DescriptorBuilder {
   //   dependencies.
   Symbol FindSymbol(const string& name);
 
+  // Like FindSymbol() but does not require that the symbol is in one of the
+  // file's declared dependencies.
+  Symbol FindSymbolNotEnforcingDeps(const string& name);
+
   // Like FindSymbol(), but looks up the name relative to some other symbol
   // name.  This first searches siblings of relative_to, then siblings of its
   // parents, etc.  For example, LookupSymbol("foo.bar", "baz.qux.corge") makes
@@ -2016,7 +2020,7 @@ bool DescriptorBuilder::IsInPackage(const FileDescriptor* file,
             file->package()[package_name.size()] == '.');
 }
 
-Symbol DescriptorBuilder::FindSymbol(const string& name) {
+Symbol DescriptorBuilder::FindSymbolNotEnforcingDeps(const string& name) {
   Symbol result;
 
   // We need to search our pool and all its underlays.
@@ -2035,6 +2039,12 @@ Symbol DescriptorBuilder::FindSymbol(const string& name) {
     pool = pool->underlay_;
   }
 
+  return result;
+}
+
+Symbol DescriptorBuilder::FindSymbol(const string& name) {
+  Symbol result = FindSymbolNotEnforcingDeps(name);
+
   if (!pool_->enforce_dependencies_) {
     // Hack for CompilerUpgrader.
     return result;
@@ -3315,7 +3325,8 @@ bool DescriptorBuilder::OptionInterpreter::InterpretSingleOption(
   // Note that we use DescriptorBuilder::FindSymbol(), not
   // DescriptorPool::FindMessageTypeByName() because we're already holding the
   // pool's mutex, and the latter method locks it again.
-  Symbol symbol = builder_->FindSymbol(options->GetDescriptor()->full_name());
+  Symbol symbol = builder_->FindSymbolNotEnforcingDeps(
+    options->GetDescriptor()->full_name());
   if (!symbol.IsNull() && symbol.type == Symbol::MESSAGE) {
     options_descriptor = symbol.descriptor;
   } else {
@@ -3362,11 +3373,14 @@ bool DescriptorBuilder::OptionInterpreter::InterpretSingleOption(
       debug_msg_name += name_part;
       // Search for the field's descriptor as a regular field in the builder's
       // pool. First we must qualify it by its message name. Note that we use
-      // DescriptorBuilder::FindSymbol(), not DescriptorPool::FindFieldByName()
-      // because we're already holding the pool's mutex, and the latter method
-      // locks it again.
+      // DescriptorBuilder::FindSymbolNotEnforcingDeps(), not
+      // DescriptorPool::FindFieldByName() because we're already holding the
+      // pool's mutex, and the latter method locks it again.  We must not
+      // enforce dependencies here because we did not enforce dependencies
+      // when looking up |descriptor|, and we need the two to match.
       string fully_qualified_name = descriptor->full_name() + "." + name_part;
-      Symbol symbol = builder_->FindSymbol(fully_qualified_name);
+      Symbol symbol =
+        builder_->FindSymbolNotEnforcingDeps(fully_qualified_name);
       if (!symbol.IsNull() && symbol.type == Symbol::FIELD) {
         field = symbol.field_descriptor;
       } else {
@@ -3378,7 +3392,7 @@ bool DescriptorBuilder::OptionInterpreter::InterpretSingleOption(
       }
     }
 
-    if (!field) {
+    if (field == NULL) {
       return AddNameError("Option \"" + debug_msg_name + "\" unknown.");
     } else if (field->containing_type() != descriptor) {
       // This can only happen if, due to some insane misconfiguration of the
@@ -3670,10 +3684,11 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
         fully_qualified_name += value_name;
 
         // Search for the enum value's descriptor in the builder's pool. Note
-        // that we use DescriptorBuilder::LookupSymbol(), not
+        // that we use DescriptorBuilder::FindSymbolNotEnforcingDeps(), not
         // DescriptorPool::FindEnumValueByName() because we're already holding
         // the pool's mutex, and the latter method locks it again.
-        Symbol symbol = builder_->FindSymbol(fully_qualified_name);
+        Symbol symbol =
+          builder_->FindSymbolNotEnforcingDeps(fully_qualified_name);
         if (!symbol.IsNull() && symbol.type == Symbol::ENUM_VALUE) {
           if (symbol.enum_value_descriptor->type() != enum_type) {
             return AddValueError("Enum type \"" + enum_type->full_name() +

+ 248 - 18
src/google/protobuf/descriptor.pb.cc

@@ -87,7 +87,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileDescriptorSet, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileDescriptorSet, _unknown_fields_),
       -1,
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(FileDescriptorSet));
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     FileDescriptorSet_descriptor_, FileDescriptorSet::default_instance_);
   FileDescriptorProto_descriptor_ = file->message_type(1);
@@ -110,7 +111,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileDescriptorProto, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileDescriptorProto, _unknown_fields_),
       -1,
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(FileDescriptorProto));
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     FileDescriptorProto_descriptor_, FileDescriptorProto::default_instance_);
   DescriptorProto_descriptor_ = file->message_type(2);
@@ -132,7 +134,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DescriptorProto, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DescriptorProto, _unknown_fields_),
       -1,
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(DescriptorProto));
   DescriptorProto_ExtensionRange_descriptor_ = DescriptorProto_descriptor_->nested_type(0);
   DescriptorProto_ExtensionRange::default_instance_ = new DescriptorProto_ExtensionRange();
   static const int DescriptorProto_ExtensionRange_offsets_[2] = {
@@ -147,7 +150,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DescriptorProto_ExtensionRange, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(DescriptorProto_ExtensionRange, _unknown_fields_),
       -1,
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(DescriptorProto_ExtensionRange));
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     DescriptorProto_ExtensionRange_descriptor_, DescriptorProto_ExtensionRange::default_instance_);
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
@@ -172,7 +176,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FieldDescriptorProto, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FieldDescriptorProto, _unknown_fields_),
       -1,
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(FieldDescriptorProto));
   FieldDescriptorProto_Type_descriptor_ = FieldDescriptorProto_descriptor_->enum_type(0);
   FieldDescriptorProto_Label_descriptor_ = FieldDescriptorProto_descriptor_->enum_type(1);
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
@@ -192,7 +197,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumDescriptorProto, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumDescriptorProto, _unknown_fields_),
       -1,
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(EnumDescriptorProto));
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     EnumDescriptorProto_descriptor_, EnumDescriptorProto::default_instance_);
   EnumValueDescriptorProto_descriptor_ = file->message_type(5);
@@ -210,7 +216,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumValueDescriptorProto, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumValueDescriptorProto, _unknown_fields_),
       -1,
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(EnumValueDescriptorProto));
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     EnumValueDescriptorProto_descriptor_, EnumValueDescriptorProto::default_instance_);
   ServiceDescriptorProto_descriptor_ = file->message_type(6);
@@ -228,7 +235,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(ServiceDescriptorProto, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(ServiceDescriptorProto, _unknown_fields_),
       -1,
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(ServiceDescriptorProto));
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     ServiceDescriptorProto_descriptor_, ServiceDescriptorProto::default_instance_);
   MethodDescriptorProto_descriptor_ = file->message_type(7);
@@ -247,7 +255,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MethodDescriptorProto, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MethodDescriptorProto, _unknown_fields_),
       -1,
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(MethodDescriptorProto));
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     MethodDescriptorProto_descriptor_, MethodDescriptorProto::default_instance_);
   FileOptions_descriptor_ = file->message_type(8);
@@ -267,7 +276,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileOptions, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileOptions, _unknown_fields_),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FileOptions, _extensions_),
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(FileOptions));
   FileOptions_OptimizeMode_descriptor_ = FileOptions_descriptor_->enum_type(0);
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     FileOptions_descriptor_, FileOptions::default_instance_);
@@ -285,7 +295,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MessageOptions, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MessageOptions, _unknown_fields_),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MessageOptions, _extensions_),
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(MessageOptions));
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     MessageOptions_descriptor_, MessageOptions::default_instance_);
   FieldOptions_descriptor_ = file->message_type(10);
@@ -303,7 +314,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FieldOptions, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FieldOptions, _unknown_fields_),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(FieldOptions, _extensions_),
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(FieldOptions));
   FieldOptions_CType_descriptor_ = FieldOptions_descriptor_->enum_type(0);
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     FieldOptions_descriptor_, FieldOptions::default_instance_);
@@ -320,7 +332,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumOptions, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumOptions, _unknown_fields_),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumOptions, _extensions_),
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(EnumOptions));
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     EnumOptions_descriptor_, EnumOptions::default_instance_);
   EnumValueOptions_descriptor_ = file->message_type(12);
@@ -336,7 +349,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumValueOptions, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumValueOptions, _unknown_fields_),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(EnumValueOptions, _extensions_),
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(EnumValueOptions));
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     EnumValueOptions_descriptor_, EnumValueOptions::default_instance_);
   ServiceOptions_descriptor_ = file->message_type(13);
@@ -352,7 +366,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(ServiceOptions, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(ServiceOptions, _unknown_fields_),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(ServiceOptions, _extensions_),
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(ServiceOptions));
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     ServiceOptions_descriptor_, ServiceOptions::default_instance_);
   MethodOptions_descriptor_ = file->message_type(14);
@@ -368,7 +383,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MethodOptions, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MethodOptions, _unknown_fields_),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(MethodOptions, _extensions_),
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(MethodOptions));
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     MethodOptions_descriptor_, MethodOptions::default_instance_);
   UninterpretedOption_descriptor_ = file->message_type(15);
@@ -389,7 +405,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(UninterpretedOption, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(UninterpretedOption, _unknown_fields_),
       -1,
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(UninterpretedOption));
   UninterpretedOption_NamePart_descriptor_ = UninterpretedOption_descriptor_->nested_type(0);
   UninterpretedOption_NamePart::default_instance_ = new UninterpretedOption_NamePart();
   static const int UninterpretedOption_NamePart_offsets_[2] = {
@@ -404,7 +421,8 @@ void protobuf_BuildDesc_google_2fprotobuf_2fdescriptor_2eproto_AssignGlobalDescr
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(UninterpretedOption_NamePart, _has_bits_[0]),
       GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(UninterpretedOption_NamePart, _unknown_fields_),
       -1,
-      ::google::protobuf::DescriptorPool::generated_pool());
+      ::google::protobuf::DescriptorPool::generated_pool(),
+      sizeof(UninterpretedOption_NamePart));
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
     UninterpretedOption_NamePart_descriptor_, UninterpretedOption_NamePart::default_instance_);
   ::google::protobuf::MessageFactory::InternalRegisterGeneratedMessage(
@@ -683,6 +701,15 @@ void FileDescriptorSet::CopyFrom(const FileDescriptorSet& from) {
   MergeFrom(from);
 }
 
+void FileDescriptorSet::Swap(FileDescriptorSet* other) {
+  if (other != this) {
+    file_.Swap(&other->file_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+  }
+}
+
 bool FileDescriptorSet::IsInitialized() const {
   
   for (int i = 0; i < file_size(); i++) {
@@ -1081,6 +1108,22 @@ void FileDescriptorProto::CopyFrom(const FileDescriptorProto& from) {
   MergeFrom(from);
 }
 
+void FileDescriptorProto::Swap(FileDescriptorProto* other) {
+  if (other != this) {
+    std::swap(name_, other->name_);
+    std::swap(package_, other->package_);
+    dependency_.Swap(&other->dependency_);
+    message_type_.Swap(&other->message_type_);
+    enum_type_.Swap(&other->enum_type_);
+    service_.Swap(&other->service_);
+    extension_.Swap(&other->extension_);
+    std::swap(options_, other->options_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+  }
+}
+
 bool FileDescriptorProto::IsInitialized() const {
   
   for (int i = 0; i < message_type_size(); i++) {
@@ -1298,6 +1341,16 @@ void DescriptorProto_ExtensionRange::CopyFrom(const DescriptorProto_ExtensionRan
   MergeFrom(from);
 }
 
+void DescriptorProto_ExtensionRange::Swap(DescriptorProto_ExtensionRange* other) {
+  if (other != this) {
+    std::swap(start_, other->start_);
+    std::swap(end_, other->end_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+  }
+}
+
 bool DescriptorProto_ExtensionRange::IsInitialized() const {
   
   return true;
@@ -1657,6 +1710,21 @@ void DescriptorProto::CopyFrom(const DescriptorProto& from) {
   MergeFrom(from);
 }
 
+void DescriptorProto::Swap(DescriptorProto* other) {
+  if (other != this) {
+    std::swap(name_, other->name_);
+    field_.Swap(&other->field_);
+    extension_.Swap(&other->extension_);
+    nested_type_.Swap(&other->nested_type_);
+    enum_type_.Swap(&other->enum_type_);
+    extension_range_.Swap(&other->extension_range_);
+    std::swap(options_, other->options_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+  }
+}
+
 bool DescriptorProto::IsInitialized() const {
   
   for (int i = 0; i < field_size(); i++) {
@@ -2171,6 +2239,22 @@ void FieldDescriptorProto::CopyFrom(const FieldDescriptorProto& from) {
   MergeFrom(from);
 }
 
+void FieldDescriptorProto::Swap(FieldDescriptorProto* other) {
+  if (other != this) {
+    std::swap(name_, other->name_);
+    std::swap(number_, other->number_);
+    std::swap(label_, other->label_);
+    std::swap(type_, other->type_);
+    std::swap(type_name_, other->type_name_);
+    std::swap(extendee_, other->extendee_);
+    std::swap(default_value_, other->default_value_);
+    std::swap(options_, other->options_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+  }
+}
+
 bool FieldDescriptorProto::IsInitialized() const {
   
   if (has_options()) {
@@ -2413,6 +2497,17 @@ void EnumDescriptorProto::CopyFrom(const EnumDescriptorProto& from) {
   MergeFrom(from);
 }
 
+void EnumDescriptorProto::Swap(EnumDescriptorProto* other) {
+  if (other != this) {
+    std::swap(name_, other->name_);
+    value_.Swap(&other->value_);
+    std::swap(options_, other->options_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+  }
+}
+
 bool EnumDescriptorProto::IsInitialized() const {
   
   for (int i = 0; i < value_size(); i++) {
@@ -2661,6 +2756,17 @@ void EnumValueDescriptorProto::CopyFrom(const EnumValueDescriptorProto& from) {
   MergeFrom(from);
 }
 
+void EnumValueDescriptorProto::Swap(EnumValueDescriptorProto* other) {
+  if (other != this) {
+    std::swap(name_, other->name_);
+    std::swap(number_, other->number_);
+    std::swap(options_, other->options_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+  }
+}
+
 bool EnumValueDescriptorProto::IsInitialized() const {
   
   if (has_options()) {
@@ -2903,6 +3009,17 @@ void ServiceDescriptorProto::CopyFrom(const ServiceDescriptorProto& from) {
   MergeFrom(from);
 }
 
+void ServiceDescriptorProto::Swap(ServiceDescriptorProto* other) {
+  if (other != this) {
+    std::swap(name_, other->name_);
+    method_.Swap(&other->method_);
+    std::swap(options_, other->options_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+  }
+}
+
 bool ServiceDescriptorProto::IsInitialized() const {
   
   for (int i = 0; i < method_size(); i++) {
@@ -3192,6 +3309,18 @@ void MethodDescriptorProto::CopyFrom(const MethodDescriptorProto& from) {
   MergeFrom(from);
 }
 
+void MethodDescriptorProto::Swap(MethodDescriptorProto* other) {
+  if (other != this) {
+    std::swap(name_, other->name_);
+    std::swap(input_type_, other->input_type_);
+    std::swap(output_type_, other->output_type_);
+    std::swap(options_, other->options_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+  }
+}
+
 bool MethodDescriptorProto::IsInitialized() const {
   
   if (has_options()) {
@@ -3540,6 +3669,20 @@ void FileOptions::CopyFrom(const FileOptions& from) {
   MergeFrom(from);
 }
 
+void FileOptions::Swap(FileOptions* other) {
+  if (other != this) {
+    std::swap(java_package_, other->java_package_);
+    std::swap(java_outer_classname_, other->java_outer_classname_);
+    std::swap(java_multiple_files_, other->java_multiple_files_);
+    std::swap(optimize_for_, other->optimize_for_);
+    uninterpreted_option_.Swap(&other->uninterpreted_option_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+    _extensions_.Swap(&other->_extensions_);
+  }
+}
+
 bool FileOptions::IsInitialized() const {
   
   for (int i = 0; i < uninterpreted_option_size(); i++) {
@@ -3759,6 +3902,17 @@ void MessageOptions::CopyFrom(const MessageOptions& from) {
   MergeFrom(from);
 }
 
+void MessageOptions::Swap(MessageOptions* other) {
+  if (other != this) {
+    std::swap(message_set_wire_format_, other->message_set_wire_format_);
+    uninterpreted_option_.Swap(&other->uninterpreted_option_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+    _extensions_.Swap(&other->_extensions_);
+  }
+}
+
 bool MessageOptions::IsInitialized() const {
   
   for (int i = 0; i < uninterpreted_option_size(); i++) {
@@ -4040,6 +4194,18 @@ void FieldOptions::CopyFrom(const FieldOptions& from) {
   MergeFrom(from);
 }
 
+void FieldOptions::Swap(FieldOptions* other) {
+  if (other != this) {
+    std::swap(ctype_, other->ctype_);
+    std::swap(experimental_map_key_, other->experimental_map_key_);
+    uninterpreted_option_.Swap(&other->uninterpreted_option_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+    _extensions_.Swap(&other->_extensions_);
+  }
+}
+
 bool FieldOptions::IsInitialized() const {
   
   for (int i = 0; i < uninterpreted_option_size(); i++) {
@@ -4223,6 +4389,16 @@ void EnumOptions::CopyFrom(const EnumOptions& from) {
   MergeFrom(from);
 }
 
+void EnumOptions::Swap(EnumOptions* other) {
+  if (other != this) {
+    uninterpreted_option_.Swap(&other->uninterpreted_option_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+    _extensions_.Swap(&other->_extensions_);
+  }
+}
+
 bool EnumOptions::IsInitialized() const {
   
   for (int i = 0; i < uninterpreted_option_size(); i++) {
@@ -4406,6 +4582,16 @@ void EnumValueOptions::CopyFrom(const EnumValueOptions& from) {
   MergeFrom(from);
 }
 
+void EnumValueOptions::Swap(EnumValueOptions* other) {
+  if (other != this) {
+    uninterpreted_option_.Swap(&other->uninterpreted_option_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+    _extensions_.Swap(&other->_extensions_);
+  }
+}
+
 bool EnumValueOptions::IsInitialized() const {
   
   for (int i = 0; i < uninterpreted_option_size(); i++) {
@@ -4589,6 +4775,16 @@ void ServiceOptions::CopyFrom(const ServiceOptions& from) {
   MergeFrom(from);
 }
 
+void ServiceOptions::Swap(ServiceOptions* other) {
+  if (other != this) {
+    uninterpreted_option_.Swap(&other->uninterpreted_option_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+    _extensions_.Swap(&other->_extensions_);
+  }
+}
+
 bool ServiceOptions::IsInitialized() const {
   
   for (int i = 0; i < uninterpreted_option_size(); i++) {
@@ -4772,6 +4968,16 @@ void MethodOptions::CopyFrom(const MethodOptions& from) {
   MergeFrom(from);
 }
 
+void MethodOptions::Swap(MethodOptions* other) {
+  if (other != this) {
+    uninterpreted_option_.Swap(&other->uninterpreted_option_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+    _extensions_.Swap(&other->_extensions_);
+  }
+}
+
 bool MethodOptions::IsInitialized() const {
   
   for (int i = 0; i < uninterpreted_option_size(); i++) {
@@ -4980,6 +5186,16 @@ void UninterpretedOption_NamePart::CopyFrom(const UninterpretedOption_NamePart&
   MergeFrom(from);
 }
 
+void UninterpretedOption_NamePart::Swap(UninterpretedOption_NamePart* other) {
+  if (other != this) {
+    std::swap(name_part_, other->name_part_);
+    std::swap(is_extension_, other->is_extension_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+  }
+}
+
 bool UninterpretedOption_NamePart::IsInitialized() const {
   if ((_has_bits_[0] & 0x00000003) != 0x00000003) return false;
   
@@ -5319,6 +5535,20 @@ void UninterpretedOption::CopyFrom(const UninterpretedOption& from) {
   MergeFrom(from);
 }
 
+void UninterpretedOption::Swap(UninterpretedOption* other) {
+  if (other != this) {
+    name_.Swap(&other->name_);
+    std::swap(identifier_value_, other->identifier_value_);
+    std::swap(positive_int_value_, other->positive_int_value_);
+    std::swap(negative_int_value_, other->negative_int_value_);
+    std::swap(double_value_, other->double_value_);
+    std::swap(string_value_, other->string_value_);
+    std::swap(_has_bits_[0], other->_has_bits_[0]);
+    _unknown_fields_.Swap(&other->_unknown_fields_);
+    std::swap(_cached_size_, other->_cached_size_);
+  }
+}
+
 bool UninterpretedOption::IsInitialized() const {
   
   for (int i = 0; i < name_size(); i++) {

+ 36 - 0
src/google/protobuf/descriptor.pb.h

@@ -126,6 +126,8 @@ class LIBPROTOBUF_EXPORT FileDescriptorSet : public ::google::protobuf::Message
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const FileDescriptorSet& default_instance();
+  void Swap(FileDescriptorSet* other);
+  
   // implements Message ----------------------------------------------
   
   FileDescriptorSet* New() const;
@@ -209,6 +211,8 @@ class LIBPROTOBUF_EXPORT FileDescriptorProto : public ::google::protobuf::Messag
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const FileDescriptorProto& default_instance();
+  void Swap(FileDescriptorProto* other);
+  
   // implements Message ----------------------------------------------
   
   FileDescriptorProto* New() const;
@@ -363,6 +367,8 @@ class LIBPROTOBUF_EXPORT DescriptorProto_ExtensionRange : public ::google::proto
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const DescriptorProto_ExtensionRange& default_instance();
+  void Swap(DescriptorProto_ExtensionRange* other);
+  
   // implements Message ----------------------------------------------
   
   DescriptorProto_ExtensionRange* New() const;
@@ -450,6 +456,8 @@ class LIBPROTOBUF_EXPORT DescriptorProto : public ::google::protobuf::Message {
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const DescriptorProto& default_instance();
+  void Swap(DescriptorProto* other);
+  
   // implements Message ----------------------------------------------
   
   DescriptorProto* New() const;
@@ -592,6 +600,8 @@ class LIBPROTOBUF_EXPORT FieldDescriptorProto : public ::google::protobuf::Messa
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const FieldDescriptorProto& default_instance();
+  void Swap(FieldDescriptorProto* other);
+  
   // implements Message ----------------------------------------------
   
   FieldDescriptorProto* New() const;
@@ -780,6 +790,8 @@ class LIBPROTOBUF_EXPORT EnumDescriptorProto : public ::google::protobuf::Messag
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const EnumDescriptorProto& default_instance();
+  void Swap(EnumDescriptorProto* other);
+  
   // implements Message ----------------------------------------------
   
   EnumDescriptorProto* New() const;
@@ -880,6 +892,8 @@ class LIBPROTOBUF_EXPORT EnumValueDescriptorProto : public ::google::protobuf::M
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const EnumValueDescriptorProto& default_instance();
+  void Swap(EnumValueDescriptorProto* other);
+  
   // implements Message ----------------------------------------------
   
   EnumValueDescriptorProto* New() const;
@@ -977,6 +991,8 @@ class LIBPROTOBUF_EXPORT ServiceDescriptorProto : public ::google::protobuf::Mes
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const ServiceDescriptorProto& default_instance();
+  void Swap(ServiceDescriptorProto* other);
+  
   // implements Message ----------------------------------------------
   
   ServiceDescriptorProto* New() const;
@@ -1077,6 +1093,8 @@ class LIBPROTOBUF_EXPORT MethodDescriptorProto : public ::google::protobuf::Mess
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const MethodDescriptorProto& default_instance();
+  void Swap(MethodDescriptorProto* other);
+  
   // implements Message ----------------------------------------------
   
   MethodDescriptorProto* New() const;
@@ -1187,6 +1205,8 @@ class LIBPROTOBUF_EXPORT FileOptions : public ::google::protobuf::Message {
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const FileOptions& default_instance();
+  void Swap(FileOptions* other);
+  
   // implements Message ----------------------------------------------
   
   FileOptions* New() const;
@@ -1401,6 +1421,8 @@ class LIBPROTOBUF_EXPORT MessageOptions : public ::google::protobuf::Message {
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const MessageOptions& default_instance();
+  void Swap(MessageOptions* other);
+  
   // implements Message ----------------------------------------------
   
   MessageOptions* New() const;
@@ -1573,6 +1595,8 @@ class LIBPROTOBUF_EXPORT FieldOptions : public ::google::protobuf::Message {
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const FieldOptions& default_instance();
+  void Swap(FieldOptions* other);
+  
   // implements Message ----------------------------------------------
   
   FieldOptions* New() const;
@@ -1770,6 +1794,8 @@ class LIBPROTOBUF_EXPORT EnumOptions : public ::google::protobuf::Message {
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const EnumOptions& default_instance();
+  void Swap(EnumOptions* other);
+  
   // implements Message ----------------------------------------------
   
   EnumOptions* New() const;
@@ -1935,6 +1961,8 @@ class LIBPROTOBUF_EXPORT EnumValueOptions : public ::google::protobuf::Message {
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const EnumValueOptions& default_instance();
+  void Swap(EnumValueOptions* other);
+  
   // implements Message ----------------------------------------------
   
   EnumValueOptions* New() const;
@@ -2100,6 +2128,8 @@ class LIBPROTOBUF_EXPORT ServiceOptions : public ::google::protobuf::Message {
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const ServiceOptions& default_instance();
+  void Swap(ServiceOptions* other);
+  
   // implements Message ----------------------------------------------
   
   ServiceOptions* New() const;
@@ -2265,6 +2295,8 @@ class LIBPROTOBUF_EXPORT MethodOptions : public ::google::protobuf::Message {
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const MethodOptions& default_instance();
+  void Swap(MethodOptions* other);
+  
   // implements Message ----------------------------------------------
   
   MethodOptions* New() const;
@@ -2430,6 +2462,8 @@ class LIBPROTOBUF_EXPORT UninterpretedOption_NamePart : public ::google::protobu
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const UninterpretedOption_NamePart& default_instance();
+  void Swap(UninterpretedOption_NamePart* other);
+  
   // implements Message ----------------------------------------------
   
   UninterpretedOption_NamePart* New() const;
@@ -2520,6 +2554,8 @@ class LIBPROTOBUF_EXPORT UninterpretedOption : public ::google::protobuf::Messag
   
   static const ::google::protobuf::Descriptor* descriptor();
   static const UninterpretedOption& default_instance();
+  void Swap(UninterpretedOption* other);
+  
   // implements Message ----------------------------------------------
   
   UninterpretedOption* New() const;

+ 1 - 0
src/google/protobuf/descriptor.proto

@@ -252,6 +252,7 @@ message FileOptions {
   }
   optional OptimizeMode optimize_for = 9 [default=CODE_SIZE];
 
+
   // The parser stores options it doesn't recognize here. See above.
   repeated UninterpretedOption uninterpreted_option = 999;
 

+ 55 - 0
src/google/protobuf/descriptor_unittest.cc

@@ -1658,6 +1658,61 @@ TEST(CustomOptions, ComplexExtensionOptions) {
   EXPECT_EQ(24, options->GetExtension(protobuf_unittest::complexopt6).xyzzy());
 }
 
+TEST(CustomOptions, OptionsFromOtherFile) {
+  // Test that to use a custom option, we only need to import the file
+  // defining the option; we do not also have to import descriptor.proto.
+  DescriptorPool pool;
+
+  FileDescriptorProto file_proto;
+  FileDescriptorProto::descriptor()->file()->CopyTo(&file_proto);
+  ASSERT_TRUE(pool.BuildFile(file_proto) != NULL);
+
+  protobuf_unittest::TestMessageWithCustomOptions::descriptor()
+    ->file()->CopyTo(&file_proto);
+  ASSERT_TRUE(pool.BuildFile(file_proto) != NULL);
+
+  ASSERT_TRUE(TextFormat::ParseFromString(
+    "name: \"custom_options_import.proto\" "
+    "package: \"protobuf_unittest\" "
+    "dependency: \"google/protobuf/unittest_custom_options.proto\" "
+    "options { "
+    "  uninterpreted_option { "
+    "    name { "
+    "      name_part: \"file_opt1\" "
+    "      is_extension: true "
+    "    } "
+    "    positive_int_value: 1234 "
+    "  } "
+    // Test a non-extension option too.  (At one point this failed due to a
+    // bug.)
+    "  uninterpreted_option { "
+    "    name { "
+    "      name_part: \"java_package\" "
+    "      is_extension: false "
+    "    } "
+    "    string_value: \"foo\" "
+    "  } "
+    // Test that enum-typed options still work too.  (At one point this also
+    // failed due to a bug.)
+    "  uninterpreted_option { "
+    "    name { "
+    "      name_part: \"optimize_for\" "
+    "      is_extension: false "
+    "    } "
+    "    identifier_value: \"SPEED\" "
+    "  } "
+    "}"
+    ,
+    &file_proto));
+
+  const FileDescriptor* file = pool.BuildFile(file_proto);
+  ASSERT_TRUE(file != NULL);
+  EXPECT_EQ(1234, file->options().GetExtension(protobuf_unittest::file_opt1));
+  EXPECT_TRUE(file->options().has_java_package());
+  EXPECT_EQ("foo", file->options().java_package());
+  EXPECT_TRUE(file->options().has_optimize_for());
+  EXPECT_EQ(FileOptions::SPEED, file->options().optimize_for());
+}
 
 
 // ===================================================================

+ 2 - 1
src/google/protobuf/dynamic_message.cc

@@ -507,7 +507,8 @@ const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) {
       type_info->has_bits_offset,
       type_info->unknown_fields_offset,
       type_info->extensions_offset,
-      type_info->pool));
+      type_info->pool,
+      type_info->size));
 
   // Cross link prototypes.
   prototype->CrossLinkPrototypes();

+ 15 - 0
src/google/protobuf/dynamic_message_unittest.cc

@@ -127,5 +127,20 @@ TEST_F(DynamicMessageTest, Extensions) {
   reflection_tester.ExpectAllFieldsSetViaReflection(*message);
 }
 
+TEST_F(DynamicMessageTest, SpaceUsed) {
+  // Test that SpaceUsed() works properly
+
+  // Since we share the implementation with generated messages, we don't need
+  // to test very much here.  Just make sure it appears to be working.
+
+  scoped_ptr<Message> message(prototype_->New());
+  TestUtil::ReflectionTester reflection_tester(descriptor_);
+
+  int initial_space_used = message->SpaceUsed();
+
+  reflection_tester.SetAllFieldsViaReflection(message.get());
+  EXPECT_LT(initial_space_used, message->SpaceUsed());
+}
+
 }  // namespace protobuf
 }  // namespace google

+ 58 - 0
src/google/protobuf/extension_set.cc

@@ -40,6 +40,7 @@
 #include <google/protobuf/io/coded_stream.h>
 #include <google/protobuf/wire_format.h>
 #include <google/protobuf/repeated_field.h>
+#include <google/protobuf/generated_message_reflection.h>
 
 namespace google {
 namespace protobuf {
@@ -515,6 +516,13 @@ void ExtensionSet::MergeFrom(const ExtensionSet& other) {
   }
 }
 
+void ExtensionSet::Swap(ExtensionSet* x) {
+  extensions_.swap(x->extensions_);
+  std::swap(extendee_, x->extendee_);
+  std::swap(descriptor_pool_, x->descriptor_pool_);
+  std::swap(message_factory_, x->message_factory_);
+}
+
 bool ExtensionSet::IsInitialized() const {
   // Extensions are never requried.  However, we need to check that all
   // embedded messages are initialized.
@@ -575,6 +583,18 @@ int ExtensionSet::ByteSize(const Message& message) const {
   return total_size;
 }
 
+int ExtensionSet::SpaceUsedExcludingSelf() const {
+  int total_size =
+      extensions_.size() * sizeof(map<int, Extension>::value_type);
+  for (map<int, Extension>::const_iterator iter = extensions_.begin(),
+       end = extensions_.end();
+       iter != end;
+       ++iter) {
+    total_size += iter->second.SpaceUsedExcludingSelf();
+  }
+  return total_size;
+}
+
 // ===================================================================
 // Methods of ExtensionSet::Extension
 
@@ -712,6 +732,44 @@ void ExtensionSet::Extension::Free() {
   }
 }
 
+int ExtensionSet::Extension::SpaceUsedExcludingSelf() const {
+  int total_size = 0;
+  if (descriptor->is_repeated()) {
+    switch (descriptor->cpp_type()) {
+#define HANDLE_TYPE(UPPERCASE, LOWERCASE)                          \
+      case FieldDescriptor::CPPTYPE_##UPPERCASE:                   \
+        total_size += sizeof(*repeated_##LOWERCASE##_value) +      \
+            repeated_##LOWERCASE##_value->SpaceUsedExcludingSelf();\
+        break
+
+      HANDLE_TYPE(  INT32,   int32);
+      HANDLE_TYPE(  INT64,   int64);
+      HANDLE_TYPE( UINT32,  uint32);
+      HANDLE_TYPE( UINT64,  uint64);
+      HANDLE_TYPE(  FLOAT,   float);
+      HANDLE_TYPE( DOUBLE,  double);
+      HANDLE_TYPE(   BOOL,    bool);
+      HANDLE_TYPE(   ENUM,    enum);
+      HANDLE_TYPE( STRING,  string);
+      HANDLE_TYPE(MESSAGE, message);
+    }
+  } else {
+    switch (descriptor->cpp_type()) {
+      case FieldDescriptor::CPPTYPE_STRING:
+        total_size += sizeof(*string_value) +
+                      StringSpaceUsedExcludingSelf(*string_value);
+        break;
+      case FieldDescriptor::CPPTYPE_MESSAGE:
+        total_size += message_value->SpaceUsed();
+        break;
+      default:
+        // No extra storage costs for primitive types.
+        break;
+    }
+  }
+  return total_size;
+}
+
 }  // namespace internal
 }  // namespace protobuf
 }  // namespace google

+ 6 - 0
src/google/protobuf/extension_set.h

@@ -209,6 +209,7 @@ class LIBPROTOBUF_EXPORT ExtensionSet {
 
   void Clear();
   void MergeFrom(const ExtensionSet& other);
+  void Swap(ExtensionSet* other);
   bool IsInitialized() const;
 
   // These parsing and serialization functions all want a pointer to the
@@ -234,6 +235,10 @@ class LIBPROTOBUF_EXPORT ExtensionSet {
   // Returns the total serialized size of all the extensions.
   int ByteSize(const Message& message) const;
 
+  // Returns (an estimate of) the total number of bytes used for storing the
+  // extensions in memory, excluding sizeof(*this).
+  int SpaceUsedExcludingSelf() const;
+
  private:
   // Like FindKnownExtension(), but GOOGLE_CHECK-fail if not found.
   const FieldDescriptor* FindKnownExtensionOrDie(int number) const;
@@ -286,6 +291,7 @@ class LIBPROTOBUF_EXPORT ExtensionSet {
     void Clear();
     int GetSize() const;
     void Free();
+    int SpaceUsedExcludingSelf() const;
   };
 
   // The Extension struct is small enough to be passed by value, so we use it

+ 167 - 0
src/google/protobuf/extension_set_unittest.cc

@@ -136,6 +136,36 @@ TEST(ExtensionSetTest, CopyFrom) {
   TestUtil::ExpectAllExtensionsSet(message2);
 }
 
+TEST(ExtensionSetTest, CopyFromUpcasted) {
+  unittest::TestAllExtensions message1, message2;
+  string data;
+  const Message& upcasted_message = message1;
+
+  TestUtil::SetAllExtensions(&message1);
+  message2.CopyFrom(upcasted_message);
+  TestUtil::ExpectAllExtensionsSet(message2);
+}
+
+TEST(ExtensionSetTest, SwapWithEmpty) {
+  unittest::TestAllExtensions message1, message2;
+  TestUtil::SetAllExtensions(&message1);
+
+  TestUtil::ExpectAllExtensionsSet(message1);
+  TestUtil::ExpectExtensionsClear(message2);
+  message1.Swap(&message2);
+  TestUtil::ExpectAllExtensionsSet(message2);
+  TestUtil::ExpectExtensionsClear(message1);
+}
+
+TEST(ExtensionSetTest, SwapWithSelf) {
+  unittest::TestAllExtensions message;
+  TestUtil::SetAllExtensions(&message);
+
+  TestUtil::ExpectAllExtensionsSet(message);
+  message.Swap(&message);
+  TestUtil::ExpectAllExtensionsSet(message);
+}
+
 TEST(ExtensionSetTest, Serialization) {
   // Serialize as TestAllExtensions and parse as TestAllTypes to insure wire
   // compatibility of extensions.
@@ -203,6 +233,143 @@ TEST(ExtensionSetTest, MutableString) {
             message.GetExtension(unittest::repeated_string_extension, 0));
 }
 
+TEST(ExtensionSetTest, SpaceUsedExcludingSelf) {
+  // Scalar primitive extensions should increase the extension set size by a
+  // minimum of the size of the primitive type.
+#define TEST_SCALAR_EXTENSIONS_SPACE_USED(type, value)                        \
+  do {                                                                        \
+    unittest::TestAllExtensions message;                                      \
+    const int base_size = message.SpaceUsed();                                \
+    message.SetExtension(unittest::optional_##type##_extension, value);       \
+    int min_expected_size = base_size +                                       \
+        sizeof(message.GetExtension(unittest::optional_##type##_extension));  \
+    EXPECT_LE(min_expected_size, message.SpaceUsed());                        \
+  } while (0)
+
+  TEST_SCALAR_EXTENSIONS_SPACE_USED(int32   , 101);
+  TEST_SCALAR_EXTENSIONS_SPACE_USED(int64   , 102);
+  TEST_SCALAR_EXTENSIONS_SPACE_USED(uint32  , 103);
+  TEST_SCALAR_EXTENSIONS_SPACE_USED(uint64  , 104);
+  TEST_SCALAR_EXTENSIONS_SPACE_USED(sint32  , 105);
+  TEST_SCALAR_EXTENSIONS_SPACE_USED(sint64  , 106);
+  TEST_SCALAR_EXTENSIONS_SPACE_USED(fixed32 , 107);
+  TEST_SCALAR_EXTENSIONS_SPACE_USED(fixed64 , 108);
+  TEST_SCALAR_EXTENSIONS_SPACE_USED(sfixed32, 109);
+  TEST_SCALAR_EXTENSIONS_SPACE_USED(sfixed64, 110);
+  TEST_SCALAR_EXTENSIONS_SPACE_USED(float   , 111);
+  TEST_SCALAR_EXTENSIONS_SPACE_USED(double  , 112);
+  TEST_SCALAR_EXTENSIONS_SPACE_USED(bool    , true);
+#undef TEST_SCALAR_EXTENSIONS_SPACE_USED
+  {
+    unittest::TestAllExtensions message;
+    const int base_size = message.SpaceUsed();
+    message.SetExtension(unittest::optional_nested_enum_extension,
+                         unittest::TestAllTypes::FOO);
+    int min_expected_size = base_size +
+        sizeof(message.GetExtension(unittest::optional_nested_enum_extension));
+    EXPECT_LE(min_expected_size, message.SpaceUsed());
+  }
+  {
+    // Strings may cause extra allocations depending on their length; ensure
+    // that gets included as well.
+    unittest::TestAllExtensions message;
+    const int base_size = message.SpaceUsed();
+    const string s("this is a fairly large string that will cause some "
+                   "allocation in order to store it in the extension");
+    message.SetExtension(unittest::optional_string_extension, s);
+    int min_expected_size = base_size + s.length();
+    EXPECT_LE(min_expected_size, message.SpaceUsed());
+  }
+  {
+    // Messages also have additional allocation that need to be counted.
+    unittest::TestAllExtensions message;
+    const int base_size = message.SpaceUsed();
+    unittest::ForeignMessage foreign;
+    foreign.set_c(42);
+    message.MutableExtension(unittest::optional_foreign_message_extension)->
+        CopyFrom(foreign);
+    int min_expected_size = base_size + foreign.SpaceUsed();
+    EXPECT_LE(min_expected_size, message.SpaceUsed());
+  }
+
+  // Repeated primitive extensions will increase space used by at least a
+  // RepeatedField<T>, and will cause additional allocations when the array
+  // gets too big for the initial space.
+  // This macro:
+  //   - Adds a value to the repeated extension, then clears it, establishing
+  //     the base size.
+  //   - Adds a small number of values, testing that it doesn't increase the
+  //     SpaceUsed()
+  //   - Adds a large number of values (requiring allocation in the repeated
+  //     field), and ensures that that allocation is included in SpaceUsed()
+#define TEST_REPEATED_EXTENSIONS_SPACE_USED(type, cpptype, value)              \
+  do {                                                                         \
+    unittest::TestAllExtensions message;                                       \
+    const int base_size = message.SpaceUsed();                                 \
+    int min_expected_size = sizeof(RepeatedField<cpptype>) + base_size;        \
+    message.AddExtension(unittest::repeated_##type##_extension, value);        \
+    message.ClearExtension(unittest::repeated_##type##_extension);             \
+    const int empty_repeated_field_size = message.SpaceUsed();                 \
+    EXPECT_LE(min_expected_size, empty_repeated_field_size) << #type;          \
+    message.AddExtension(unittest::repeated_##type##_extension, value);        \
+    message.AddExtension(unittest::repeated_##type##_extension, value);        \
+    EXPECT_EQ(empty_repeated_field_size, message.SpaceUsed()) << #type;        \
+    message.ClearExtension(unittest::repeated_##type##_extension);             \
+    for (int i = 0; i < 16; ++i) {                                             \
+      message.AddExtension(unittest::repeated_##type##_extension, value);      \
+    }                                                                          \
+    int expected_size = sizeof(cpptype) * 16 + empty_repeated_field_size;      \
+    EXPECT_EQ(expected_size, message.SpaceUsed()) << #type;                    \
+  } while (0)
+
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(int32   , int32 , 101);
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(int64   , int64 , 102);
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(uint32  , uint32, 103);
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(uint64  , uint64, 104);
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(sint32  , int32 , 105);
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(sint64  , int64 , 106);
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(fixed32 , uint32, 107);
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(fixed64 , uint64, 108);
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(sfixed32, int32 , 109);
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(sfixed64, int64 , 110);
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(float   , float , 111);
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(double  , double, 112);
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(bool    , bool  , true);
+  TEST_REPEATED_EXTENSIONS_SPACE_USED(nested_enum, int,
+                                      unittest::TestAllTypes::FOO);
+#undef TEST_REPEATED_EXTENSIONS_SPACE_USED
+  // Repeated strings
+  {
+    unittest::TestAllExtensions message;
+    const int base_size = message.SpaceUsed();
+    int min_expected_size = sizeof(RepeatedPtrField<string>) + base_size;
+    const string value(256, 'x');
+    // Once items are allocated, they may stick around even when cleared so
+    // without the hardcore memory management accessors there isn't a notion of
+    // the empty repeated field memory usage as there is with primitive types.
+    for (int i = 0; i < 16; ++i) {
+      message.AddExtension(unittest::repeated_string_extension, value);
+    }
+    min_expected_size += (sizeof(value) + value.size()) * 16;
+    EXPECT_LE(min_expected_size, message.SpaceUsed());
+  }
+  // Repeated messages
+  {
+    unittest::TestAllExtensions message;
+    const int base_size = message.SpaceUsed();
+    int min_expected_size = sizeof(RepeatedPtrField<unittest::ForeignMessage>) +
+        base_size;
+    unittest::ForeignMessage prototype;
+    prototype.set_c(2);
+    for (int i = 0; i < 16; ++i) {
+      message.AddExtension(unittest::repeated_foreign_message_extension)->
+          CopyFrom(prototype);
+    }
+    min_expected_size += 16 * prototype.SpaceUsed();
+    EXPECT_LE(min_expected_size, message.SpaceUsed());
+  }
+}
+
 }  // namespace
 }  // namespace internal
 }  // namespace protobuf

+ 81 - 1
src/google/protobuf/generated_message_reflection.cc

@@ -46,6 +46,18 @@ namespace internal {
 
 namespace { const string kEmptyString; }
 
+int StringSpaceUsedExcludingSelf(const string& str) {
+  const void* start = &str;
+  const void* end = &str + 1;
+
+  if (start <= str.data() && str.data() <= end) {
+    // The string's data is stored inside the string object itself.
+    return 0;
+  } else {
+    return str.capacity();
+  }
+}
+
 // ===================================================================
 // Helpers for reporting usage errors (e.g. trying to use GetInt32() on
 // a string field).
@@ -147,13 +159,15 @@ GeneratedMessageReflection::GeneratedMessageReflection(
     int has_bits_offset,
     int unknown_fields_offset,
     int extensions_offset,
-    const DescriptorPool* descriptor_pool)
+    const DescriptorPool* descriptor_pool,
+    int object_size)
   : descriptor_       (descriptor),
     default_instance_ (default_instance),
     offsets_          (offsets),
     has_bits_offset_  (has_bits_offset),
     unknown_fields_offset_(unknown_fields_offset),
     extensions_offset_(extensions_offset),
+    object_size_      (object_size),
     descriptor_pool_  ((descriptor_pool == NULL) ?
                          DescriptorPool::generated_pool() :
                          descriptor_pool) {
@@ -173,6 +187,71 @@ UnknownFieldSet* GeneratedMessageReflection::MutableUnknownFields(
   return reinterpret_cast<UnknownFieldSet*>(ptr);
 }
 
+int GeneratedMessageReflection::SpaceUsed(const Message& message) const {
+  // object_size_ already includes the in-memory representation of each field
+  // in the message, so we only need to account for additional memory used by
+  // the fields.
+  int total_size = object_size_;
+
+  total_size += GetUnknownFields(message).SpaceUsedExcludingSelf();
+
+  if (extensions_offset_ != -1) {
+    total_size += GetExtensionSet(message).SpaceUsedExcludingSelf();
+  }
+
+  for (int i = 0; i < descriptor_->field_count(); i++) {
+    const FieldDescriptor* field = descriptor_->field(i);
+
+    if (field->is_repeated()) {
+      total_size += GetRaw<GenericRepeatedField>(message, field)
+                      .GenericSpaceUsedExcludingSelf();
+    } else {
+      switch (field->cpp_type()) {
+        case FieldDescriptor::CPPTYPE_INT32 :
+        case FieldDescriptor::CPPTYPE_INT64 :
+        case FieldDescriptor::CPPTYPE_UINT32:
+        case FieldDescriptor::CPPTYPE_UINT64:
+        case FieldDescriptor::CPPTYPE_DOUBLE:
+        case FieldDescriptor::CPPTYPE_FLOAT :
+        case FieldDescriptor::CPPTYPE_BOOL  :
+        case FieldDescriptor::CPPTYPE_ENUM  :
+          // Field is inline, so we've already counted it.
+          break;
+
+        case FieldDescriptor::CPPTYPE_STRING: {
+            const string* ptr = GetField<const string*>(message, field);
+
+            // Initially, the string points to the default value stored in
+            // the prototype. Only count the string if it has been changed
+            // from the default value.
+            const string* default_ptr = DefaultRaw<const string*>(field);
+
+            if (ptr != default_ptr) {
+              // string fields are represented by just a pointer, so also
+              // include sizeof(string) as well.
+              total_size += sizeof(*ptr) + StringSpaceUsedExcludingSelf(*ptr);
+            }
+          break;
+        }
+
+        case FieldDescriptor::CPPTYPE_MESSAGE:
+          if (&message == default_instance_) {
+            // For singular fields, the prototype just stores a pointer to the
+            // external type's prototype, so there is no extra memory usage.
+          } else {
+            const Message* sub_message = GetRaw<const Message*>(message, field);
+            if (sub_message != NULL) {
+              total_size += sub_message->SpaceUsed();
+            }
+          }
+          break;
+      }
+    }
+  }
+
+  return total_size;
+}
+
 // -------------------------------------------------------------------
 
 bool GeneratedMessageReflection::HasField(const Message& message,
@@ -765,6 +844,7 @@ inline Type* GeneratedMessageReflection::AddField(
   return reinterpret_cast<Type*>(repeated->GenericAdd());
 }
 
+
 }  // namespace internal
 }  // namespace protobuf
 }  // namespace google

+ 12 - 1
src/google/protobuf/generated_message_reflection.h

@@ -116,13 +116,16 @@ class LIBPROTOBUF_EXPORT GeneratedMessageReflection : public Reflection {
   //   pool:          DescriptorPool to search for extension definitions.  Only
   //                  used by FindKnownExtensionByName() and
   //                  FindKnownExtensionByNumber().
+  //   object_size:   The size of a message object of this type, as measured
+  //                  by sizeof().
   GeneratedMessageReflection(const Descriptor* descriptor,
                              const Message* default_instance,
                              const int offsets[],
                              int has_bits_offset,
                              int unknown_fields_offset,
                              int extensions_offset,
-                             const DescriptorPool* pool);
+                             const DescriptorPool* pool,
+                             int object_size);
   ~GeneratedMessageReflection();
 
   // implements Reflection -------------------------------------------
@@ -130,6 +133,8 @@ class LIBPROTOBUF_EXPORT GeneratedMessageReflection : public Reflection {
   const UnknownFieldSet& GetUnknownFields(const Message& message) const;
   UnknownFieldSet* MutableUnknownFields(Message* message) const;
 
+  int SpaceUsed(const Message& message) const;
+
   bool HasField(const Message& message, const FieldDescriptor* field) const;
   int FieldSize(const Message& message, const FieldDescriptor* field) const;
   void ClearField(Message* message, const FieldDescriptor* field) const;
@@ -266,6 +271,7 @@ class LIBPROTOBUF_EXPORT GeneratedMessageReflection : public Reflection {
   int has_bits_offset_;
   int unknown_fields_offset_;
   int extensions_offset_;
+  int object_size_;
 
   const DescriptorPool* descriptor_pool_;
 
@@ -374,6 +380,11 @@ inline To dynamic_cast_if_available(From from) {
 #endif
 }
 
+// Compute the space used by a string, not including sizeof(string) itself.
+// This is slightly complicated because small strings store their data within
+// the string object but large strings do not.
+int StringSpaceUsedExcludingSelf(const string& str);
+
 
 }  // namespace internal
 }  // namespace protobuf

+ 4 - 6
src/google/protobuf/io/tokenizer.cc

@@ -623,19 +623,17 @@ double Tokenizer::ParseFloat(const string& text) {
   return result;
 }
 
-void Tokenizer::ParseString(const string& text, string* output) {
-  output->clear();
-
+void Tokenizer::ParseStringAppend(const string& text, string* output) {
   // Reminder:  text[0] is always the quote character.  (If text is
   //   empty, it's invalid, so we'll just return.)
   if (text.empty()) {
     GOOGLE_LOG(DFATAL)
-      << " ParseString::ParseString() passed text that could not have been"
-         " tokenized as a string: " << CEscape(text);
+      << " Tokenizer::ParseStringAppend() passed text that could not"
+         " have been tokenized as a string: " << CEscape(text);
     return;
   }
 
-  output->reserve(text.size());
+  output->reserve(output->size() + text.size());
 
   // Loop through the string copying characters to "output" and
   // interpreting escape sequences.  Note that any invalid escape

+ 8 - 0
src/google/protobuf/io/tokenizer.h

@@ -139,6 +139,9 @@ class LIBPROTOBUF_EXPORT Tokenizer {
   // result is undefined (possibly an assert failure).
   static void ParseString(const string& text, string* output);
 
+  // Identical to ParseString, but appends to output.
+  static void ParseStringAppend(const string& text, string* output);
+
   // Parses a TYPE_INTEGER token.  Returns false if the result would be
   // greater than max_value.  Otherwise, returns true and sets *output to the
   // result.  If the text is not from a Token of type TYPE_INTEGER originally
@@ -283,6 +286,11 @@ inline const Tokenizer::Token& Tokenizer::current() {
   return current_;
 }
 
+inline void Tokenizer::ParseString(const string& text, string* output) {
+  output->clear();
+  ParseStringAppend(text, output);
+}
+
 }  // namespace io
 }  // namespace protobuf
 

+ 9 - 0
src/google/protobuf/io/tokenizer_unittest.cc

@@ -584,6 +584,15 @@ TEST_F(TokenizerTest, ParseString) {
 #endif  // GTEST_HAS_DEATH_TEST
 }
 
+TEST_F(TokenizerTest, ParseStringAppend) {
+  // Check that ParseString and ParseStringAppend differ.
+  string output("stuff+");
+  Tokenizer::ParseStringAppend("'hello'", &output);
+  EXPECT_EQ("stuff+hello", output);
+  Tokenizer::ParseString("'hello'", &output);
+  EXPECT_EQ("hello", output);
+}
+
 // -------------------------------------------------------------------
 
 // Each case parses some input text, ignoring the tokens produced, and

+ 3 - 1
src/google/protobuf/io/zero_copy_stream_unittest.cc

@@ -156,7 +156,9 @@ int IoTest::ReadFromInput(ZeroCopyInputStream* input, void* data, int size) {
 
     if (out_size <= in_size) {
       memcpy(out, in, out_size);
-      input->BackUp(in_size - out_size);
+      if (in_size > out_size) {
+        input->BackUp(in_size - out_size);
+      }
       return size;  // Copied all of it.
     }
 

+ 22 - 0
src/google/protobuf/message.cc

@@ -204,6 +204,10 @@ void Message::SetCachedSize(int size) const {
                 "Must implement one or the other.";
 }
 
+int Message::SpaceUsed() const {
+  return GetReflection()->SpaceUsed(*this);
+}
+
 bool Message::SerializeToCodedStream(io::CodedOutputStream* output) const {
   GOOGLE_DCHECK(IsInitialized()) << InitializationErrorMessage("serialize", *this);
   return SerializePartialToCodedStream(output);
@@ -291,6 +295,24 @@ bool Message::SerializePartialToOstream(ostream* output) const {
 }
 
 
+string Message::SerializeAsString() const {
+  // If the compiler implements the (Named) Return Value Optimization,
+  // the local variable 'result' will not actually reside on the stack
+  // of this function, but will be overlaid with the object that the
+  // caller supplied for the return value to be constructed in.
+  string output;
+  if (!AppendToString(&output))
+    output.clear();
+  return output;
+}
+
+string Message::SerializePartialAsString() const {
+  string output;
+  if (!AppendPartialToString(&output))
+    output.clear();
+  return output;
+}
+
 Reflection::~Reflection() {}
 
 // ===================================================================

+ 19 - 1
src/google/protobuf/message.h

@@ -95,7 +95,7 @@
 //     foo->ParseFromString(data);
 //
 //     // Use the reflection interface to examine the contents.
-//     Reflection* reflection = foo->GetReflection();
+//     const Reflection* reflection = foo->GetReflection();
 //     assert(reflection->GetString(foo, text_field) == "Hello World!");
 //     assert(reflection->CountField(foo, numbers_field) == 3);
 //     assert(reflection->GetInt32(foo, numbers_field, 0) == 1);
@@ -315,6 +315,16 @@ class LIBPROTOBUF_EXPORT Message {
   bool SerializePartialToOstream(ostream* output) const;
 
 
+  // Make a string encoding the message. Is equivalent to calling
+  // SerializeToString() on a string and using that.  Returns the empty
+  // string if SerializeToString() would have returned an error.
+  // Note: If you intend to generate many such strings, you may
+  // reduce heap fragmentation by instead re-using the same string
+  // object with calls to SerializeToString().
+  string SerializeAsString() const;
+  // Like SerializeAsString(), but allows missing required fields.
+  string SerializePartialAsString() const;
+
   // Like SerializeToString(), but appends to the data to the string's existing
   // contents.  All required fields must be set.
   bool AppendToString(string* output) const;
@@ -326,6 +336,11 @@ class LIBPROTOBUF_EXPORT Message {
   // this, it MUST override SetCachedSize().
   virtual int ByteSize() const;
 
+  // Computes (an estimate of) the total number of bytes currently used for
+  // storing the message in memory.  The default implementation calls the
+  // Reflection object's SpaceUsed() method.
+  virtual int SpaceUsed() const;
+
   // Serializes the message without recomputing the size.  The message must
   // not have changed since the last call to ByteSize(); if it has, the results
   // are undefined.
@@ -432,6 +447,9 @@ class LIBPROTOBUF_EXPORT Reflection {
   // recognized according to the Message's definition.
   virtual UnknownFieldSet* MutableUnknownFields(Message* message) const = 0;
 
+  // Estimate the amount of memory used by the message object.
+  virtual int SpaceUsed(const Message& message) const = 0;
+
   // Check if the given non-repeated field is set.
   virtual bool HasField(const Message& message,
                         const FieldDescriptor* field) const = 0;

+ 2 - 0
src/google/protobuf/message_unittest.cc

@@ -91,6 +91,8 @@ TEST(MessageTest, SerializeHelpers) {
   string temp = stream.str();
   EXPECT_TRUE(temp == str1);
 
+  EXPECT_TRUE(message.SerializeAsString() == str1);
+
 }
 
 TEST(MessageTest, ParseFromFileDescriptor) {

+ 52 - 0
src/google/protobuf/repeated_field.h

@@ -87,10 +87,14 @@ class LIBPROTOBUF_EXPORT GenericRepeatedField {
   virtual void* GenericAdd() = 0;
   virtual void GenericClear() = 0;
   virtual int GenericSize() const = 0;
+  virtual int GenericSpaceUsedExcludingSelf() const = 0;
 
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(GenericRepeatedField);
 };
 
+// We need this (from generated_message_reflection.cc).
+int StringSpaceUsedExcludingSelf(const string& str);
+
 }  // namespace internal
 
 // RepeatedField is used to represent repeated fields of a primitive type (in
@@ -140,6 +144,10 @@ class RepeatedField : public internal::GenericRepeatedField {
   iterator end();
   const_iterator end() const;
 
+  // Returns the number of bytes used by the repeated field, excluding
+  // sizeof(*this)
+  int SpaceUsedExcludingSelf() const;
+
  private:  // See GenericRepeatedField for why this is private.
   // implements GenericRepeatedField ---------------------------------
   const void* GenericGet(int index) const;
@@ -147,6 +155,7 @@ class RepeatedField : public internal::GenericRepeatedField {
   void* GenericAdd();
   void GenericClear();
   int GenericSize() const;
+  int GenericSpaceUsedExcludingSelf() const;
 
  private:
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(RepeatedField);
@@ -214,6 +223,10 @@ class RepeatedPtrField : public internal::GenericRepeatedField {
   iterator end();
   const_iterator end() const;
 
+  // Returns (an estimate of) the number of bytes used by the repeated field,
+  // excluding sizeof(*this).
+  int SpaceUsedExcludingSelf() const;
+
   // Advanced memory management --------------------------------------
   // When hardcore memory management becomes necessary -- as it often
   // does here at Google -- the following methods may be useful.
@@ -254,8 +267,13 @@ class RepeatedPtrField : public internal::GenericRepeatedField {
   void* GenericAdd();
   void GenericClear();
   int GenericSize() const;
+  int GenericSpaceUsedExcludingSelf() const;
 
  private:
+  // Returns (an estimate of) the number of bytes used by an individual
+  // element.
+  int ElementSpaceUsed(Element* element) const;
+
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(RepeatedPtrField);
 
   static const int kInitialSize = 4;
@@ -398,6 +416,10 @@ RepeatedField<Element>::end() const {
   return elements_ + current_size_;
 }
 
+template <typename Element>
+inline int RepeatedField<Element>::SpaceUsedExcludingSelf() const {
+  return (elements_ != initial_space_) ? total_size_ * sizeof(elements_[0]) : 0;
+}
 
 template <typename Element>
 const void* RepeatedField<Element>::GenericGet(int index) const {
@@ -426,6 +448,11 @@ int RepeatedField<Element>::GenericSize() const {
   return size();
 }
 
+template <typename Element>
+int RepeatedField<Element>::GenericSpaceUsedExcludingSelf() const {
+  return SpaceUsedExcludingSelf();
+}
+
 template <typename Element>
 inline void RepeatedField<Element>::Reserve(int new_size) {
   if (total_size_ >= new_size) return;
@@ -595,6 +622,26 @@ void RepeatedPtrField<Element>::Swap(RepeatedPtrField* other) {
   }
 }
 
+template <typename Element>
+inline int RepeatedPtrField<Element>::SpaceUsedExcludingSelf() const {
+  int allocated_bytes =
+      (elements_ != initial_space_) ? total_size_ * sizeof(elements_[0]) : 0;
+  for (int i = 0; i < allocated_size_; ++i) {
+    allocated_bytes += ElementSpaceUsed(elements_[i]);
+  }
+  return allocated_bytes;
+}
+
+template <typename Element>
+inline int RepeatedPtrField<Element>::ElementSpaceUsed(Element* e) const {
+  return e->SpaceUsed();
+}
+
+template <>
+inline int RepeatedPtrField<string>::ElementSpaceUsed(string* s) const {
+  return sizeof(*s) + internal::StringSpaceUsedExcludingSelf(*s);
+}
+
 
 template <typename Element>
 inline void RepeatedPtrField<Element>::AddAllocated(Element* value) {
@@ -665,6 +712,11 @@ int RepeatedPtrField<Element>::GenericSize() const {
   return size();
 }
 
+template <typename Element>
+int RepeatedPtrField<Element>::GenericSpaceUsedExcludingSelf() const {
+  return SpaceUsedExcludingSelf();
+}
+
 
 template <typename Element>
 inline void RepeatedPtrField<Element>::Reserve(int new_size) {

+ 8 - 0
src/google/protobuf/repeated_field_unittest.cc

@@ -69,6 +69,7 @@ TEST(RepeatedField, Small) {
   EXPECT_EQ(field.size(), 2);
   EXPECT_EQ(field.Get(0), 5);
   EXPECT_EQ(field.Get(1), 23);
+  EXPECT_EQ(field.SpaceUsedExcludingSelf(), 0);
 
   field.RemoveLast();
 
@@ -78,6 +79,7 @@ TEST(RepeatedField, Small) {
   field.Clear();
 
   EXPECT_EQ(field.size(), 0);
+  EXPECT_EQ(field.SpaceUsedExcludingSelf(), 0);
 }
 
 // Test operations on a RepeatedField which is large enough to allocate a
@@ -94,6 +96,9 @@ TEST(RepeatedField, Large) {
   for (int i = 0; i < 16; i++) {
     EXPECT_EQ(field.Get(i), i * i);
   }
+
+  int expected_usage = 16 * sizeof(int);
+  EXPECT_GE(field.SpaceUsedExcludingSelf(), expected_usage);
 }
 
 // Test swapping between various types of RepeatedFields.
@@ -278,6 +283,9 @@ TEST(RepeatedPtrField, Large) {
     EXPECT_EQ(field.Get(i).size(), 1);
     EXPECT_EQ(field.Get(i)[0], 'a' + i);
   }
+
+  int min_expected_usage = 16 * sizeof(string);
+  EXPECT_GE(field.SpaceUsedExcludingSelf(), min_expected_usage);
 }
 
 TEST(RepeatedPtrField, SwapSmallSmall) {

+ 6 - 0
src/google/protobuf/stubs/common.h

@@ -1071,6 +1071,12 @@ template<typename T> struct remove_pointer<T* volatile> { typedef T type; };
 template<typename T> struct remove_pointer<T* const volatile> {
   typedef T type; };
 
+// ===================================================================
+
+// Checks if the buffer contains structurally-valid UTF-8.  Implemented in
+// structurally_valid.cc.
+bool IsStructurallyValidUTF8(const char* buf, int len);
+
 }  // namespace internal
 
 }  // namespace protobuf

+ 521 - 0
src/google/protobuf/stubs/structurally_valid.cc

@@ -0,0 +1,521 @@
+// Copyright 2005-2008 Google Inc. All Rights Reserved.
+// Author: jrm@google.com (Jim Meehan)
+
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+namespace protobuf {
+namespace internal {
+
+// These four-byte entries compactly encode how many bytes 0..255 to delete
+// in making a string replacement, how many bytes to add 0..255, and the offset
+// 0..64k-1 of the replacement string in remap_string.
+struct RemapEntry {
+  uint8 delete_bytes;
+  uint8 add_bytes;
+  uint16 bytes_offset;
+};
+
+// Exit type codes for state tables. All but the first get stuffed into
+// signed one-byte entries. The first is only generated by executable code.
+// To distinguish from next-state entries, these must be contiguous and
+// all <= kExitNone
+typedef enum {
+  kExitDstSpaceFull = 239,
+  kExitIllegalStructure,  // 240
+  kExitOK,                // 241
+  kExitReject,            // ...
+  kExitReplace1,
+  kExitReplace2,
+  kExitReplace3,
+  kExitReplace21,
+  kExitReplace31,
+  kExitReplace32,
+  kExitReplaceOffset1,
+  kExitReplaceOffset2,
+  kExitReplace1S0,
+  kExitSpecial,
+  kExitDoAgain,
+  kExitRejectAlt,
+  kExitNone               // 255
+} ExitReason;
+
+
+// This struct represents one entire state table. The three initialized byte
+// areas are state_table, remap_base, and remap_string. state0 and state0_size
+// give the byte offset and length within state_table of the initial state --
+// table lookups are expected to start and end in this state, but for
+// truncated UTF-8 strings, may end in a different state. These allow a quick
+// test for that condition. entry_shift is 8 for tables subscripted by a full
+// byte value and 6 for space-optimized tables subscripted by only six
+// significant bits in UTF-8 continuation bytes.
+typedef struct {
+  const uint32 state0;
+  const uint32 state0_size;
+  const uint32 total_size;
+  const int max_expand;
+  const int entry_shift;
+  const int bytes_per_entry;
+  const uint32 losub;
+  const uint32 hiadd;
+  const uint8* state_table;
+  const RemapEntry* remap_base;
+  const uint8* remap_string;
+  const uint8* fast_state;
+} UTF8StateMachineObj;
+
+typedef UTF8StateMachineObj UTF8ScanObj;
+
+#define X__ (kExitIllegalStructure)
+#define RJ_ (kExitReject)
+#define S1_ (kExitReplace1)
+#define S2_ (kExitReplace2)
+#define S3_ (kExitReplace3)
+#define S21 (kExitReplace21)
+#define S31 (kExitReplace31)
+#define S32 (kExitReplace32)
+#define T1_ (kExitReplaceOffset1)
+#define T2_ (kExitReplaceOffset2)
+#define S11 (kExitReplace1S0)
+#define SP_ (kExitSpecial)
+#define D__ (kExitDoAgain)
+#define RJA (kExitRejectAlt)
+
+//  Entire table has 9 state blocks of 256 entries each
+static const unsigned int utf8acceptnonsurrogates_STATE0 = 0;     // state[0]
+static const unsigned int utf8acceptnonsurrogates_STATE0_SIZE = 256;  // =[1]
+static const unsigned int utf8acceptnonsurrogates_TOTAL_SIZE = 2304;
+static const unsigned int utf8acceptnonsurrogates_MAX_EXPAND_X4 = 0;
+static const unsigned int utf8acceptnonsurrogates_SHIFT = 8;
+static const unsigned int utf8acceptnonsurrogates_BYTES = 1;
+static const unsigned int utf8acceptnonsurrogates_LOSUB = 0x20202020;
+static const unsigned int utf8acceptnonsurrogates_HIADD = 0x00000000;
+
+static const uint8 utf8acceptnonsurrogates[] = {
+// state[0] 0x000000 Byte 1
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  2,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   7,   3,   3,
+  4,   5,   5,   5,   6, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[1] 0x000080 Byte 2 of 2
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[2] 0x000000 Byte 2 of 3
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[3] 0x001000 Byte 2 of 3
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[4] 0x000000 Byte 2 of 4
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[5] 0x040000 Byte 2 of 4
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[6] 0x100000 Byte 2 of 4
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[7] 0x00d000 Byte 2 of 3
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  8,   8,   8,   8,   8,   8,   8,   8,    8,   8,   8,   8,   8,   8,   8,   8,
+  8,   8,   8,   8,   8,   8,   8,   8,    8,   8,   8,   8,   8,   8,   8,   8,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[8] 0x00d800 Byte 3 of 3
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
+RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
+RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
+RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+};
+
+// Remap base[0] = (del, add, string_offset)
+static const RemapEntry utf8acceptnonsurrogates_remap_base[] = {
+{0, 0, 0} };
+
+// Remap string[0]
+static const unsigned char utf8acceptnonsurrogates_remap_string[] = {
+0 };
+
+static const unsigned char utf8acceptnonsurrogates_fast[256] = {
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+};
+
+static const UTF8ScanObj utf8acceptnonsurrogates_obj = {
+  utf8acceptnonsurrogates_STATE0,
+  utf8acceptnonsurrogates_STATE0_SIZE,
+  utf8acceptnonsurrogates_TOTAL_SIZE,
+  utf8acceptnonsurrogates_MAX_EXPAND_X4,
+  utf8acceptnonsurrogates_SHIFT,
+  utf8acceptnonsurrogates_BYTES,
+  utf8acceptnonsurrogates_LOSUB,
+  utf8acceptnonsurrogates_HIADD,
+  utf8acceptnonsurrogates,
+  utf8acceptnonsurrogates_remap_base,
+  utf8acceptnonsurrogates_remap_string,
+  utf8acceptnonsurrogates_fast
+};
+
+
+#undef X__
+#undef RJ_
+#undef S1_
+#undef S2_
+#undef S3_
+#undef S21
+#undef S31
+#undef S32
+#undef T1_
+#undef T2_
+#undef S11
+#undef SP_
+#undef D__
+#undef RJA
+
+// Return true if current Tbl pointer is within state0 range
+// Note that unsigned compare checks both ends of range simultaneously
+static inline bool InStateZero(const UTF8ScanObj* st, const uint8* Tbl) {
+  const uint8* Tbl0 = &st->state_table[st->state0];
+  return (static_cast<uint32>(Tbl - Tbl0) < st->state0_size);
+}
+
+// Scan a UTF-8 string based on state table.
+// Always scan complete UTF-8 characters
+// Set number of bytes scanned. Return reason for exiting
+int UTF8GenericScan(const UTF8ScanObj* st,
+                    const char * str,
+                    int str_length,
+                    int* bytes_consumed) {
+  *bytes_consumed = 0;
+  if (str_length == 0) return kExitOK;
+
+  int eshift = st->entry_shift;
+  const uint8* isrc = reinterpret_cast<const uint8*>(str);
+  const uint8* src = isrc;
+  const uint8* srclimit = isrc + str_length;
+  const uint8* srclimit8 = srclimit - 7;
+  const uint8* Tbl_0 = &st->state_table[st->state0];
+
+ DoAgain:
+  // Do state-table scan
+  int e = 0;
+  uint8 c;
+
+  // Do fast for groups of 8 identity bytes.
+  // This covers a lot of 7-bit ASCII ~8x faster then the 1-byte loop,
+  // including slowing slightly on cr/lf/ht
+  //----------------------------
+  const uint8* Tbl2 = &st->fast_state[0];
+  uint32 losub = st->losub;
+  uint32 hiadd = st->hiadd;
+  while (src < srclimit8) {
+    uint32 s0123 = (reinterpret_cast<const uint32 *>(src))[0];
+    uint32 s4567 = (reinterpret_cast<const uint32 *>(src))[1];
+    src += 8;
+    // This is a fast range check for all bytes in [lowsub..0x80-hiadd)
+    uint32 temp = (s0123 - losub) | (s0123 + hiadd) |
+                  (s4567 - losub) | (s4567 + hiadd);
+    if ((temp & 0x80808080) != 0) {
+      // We typically end up here on cr/lf/ht; src was incremented
+      int e0123 = (Tbl2[src[-8]] | Tbl2[src[-7]]) |
+                  (Tbl2[src[-6]] | Tbl2[src[-5]]);
+      if (e0123 != 0) {
+        src -= 8;
+        break;
+      }    // Exit on Non-interchange
+      e0123 = (Tbl2[src[-4]] | Tbl2[src[-3]]) |
+              (Tbl2[src[-2]] | Tbl2[src[-1]]);
+      if (e0123 != 0) {
+        src -= 4;
+        break;
+      }    // Exit on Non-interchange
+      // Else OK, go around again
+    }
+  }
+  //----------------------------
+
+  // Byte-at-a-time scan
+  //----------------------------
+  const uint8* Tbl = Tbl_0;
+  while (src < srclimit) {
+    c = *src;
+    e = Tbl[c];
+    src++;
+    if (e >= kExitIllegalStructure) {break;}
+    Tbl = &Tbl_0[e << eshift];
+  }
+  //----------------------------
+
+
+  // Exit posibilities:
+  //  Some exit code, !state0, back up over last char
+  //  Some exit code, state0, back up one byte exactly
+  //  source consumed, !state0, back up over partial char
+  //  source consumed, state0, exit OK
+  // For illegal byte in state0, avoid backup up over PREVIOUS char
+  // For truncated last char, back up to beginning of it
+
+  if (e >= kExitIllegalStructure) {
+    // Back up over exactly one byte of rejected/illegal UTF-8 character
+    src--;
+    // Back up more if needed
+    if (!InStateZero(st, Tbl)) {
+      do {
+        src--;
+      } while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
+    }
+  } else if (!InStateZero(st, Tbl)) {
+    // Back up over truncated UTF-8 character
+    e = kExitIllegalStructure;
+    do {
+      src--;
+    } while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
+  } else {
+    // Normal termination, source fully consumed
+    e = kExitOK;
+  }
+
+  if (e == kExitDoAgain) {
+    // Loop back up to the fast scan
+    goto DoAgain;
+  }
+
+  *bytes_consumed = src - isrc;
+  return e;
+}
+
+int UTF8GenericScanFastAscii(const UTF8ScanObj* st,
+                    const char * str,
+                    int str_length,
+                    int* bytes_consumed) {
+  *bytes_consumed = 0;
+  if (str_length == 0) return kExitOK;
+
+  const uint8* isrc =  reinterpret_cast<const uint8*>(str);
+  const uint8* src = isrc;
+  const uint8* srclimit = isrc + str_length;
+  const uint8* srclimit8 = srclimit - 7;
+  int n;
+  int rest_consumed;
+  int exit_reason;
+  do {
+    while ((src < srclimit8) &&
+           (((reinterpret_cast<const uint32*>(src)[0] |
+              reinterpret_cast<const uint32*>(src)[1]) & 0x80808080) == 0)) {
+      src += 8;
+    }
+    while ((src < srclimit) && (src[0] < 0x80)) {
+      src++;
+    }
+    // Run state table on the rest
+    n = src - isrc;
+    exit_reason = UTF8GenericScan(st, str + n, str_length - n, &rest_consumed);
+    src += rest_consumed;
+  } while ( exit_reason == kExitDoAgain );
+
+  *bytes_consumed = src - isrc;
+  return exit_reason;
+}
+
+// Hack:  On some compilers the static tables are initialized at startup.
+//   We can't use them until they are initialized.  However, some Protocol
+//   Buffer parsing happens at static init time and may try to validate
+//   UTF-8 strings.  Since UTF-8 validation is only used for debugging
+//   anyway, we simply always return success if initialization hasn't
+//   occurred yet.
+namespace {
+
+bool module_initialized_ = false;
+
+struct InitDetector {
+  InitDetector() {
+    module_initialized_ = true;
+  }
+};
+InitDetector init_detector;
+
+}  // namespace
+
+bool IsStructurallyValidUTF8(const char* buf, int len) {
+  if (!module_initialized_) return true;
+  
+  int bytes_consumed = 0;
+  UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj,
+                           buf, len, &bytes_consumed);
+  return (bytes_consumed == len);
+}
+
+}  // namespace internal
+}  // namespace protobuf
+}  // namespace google

+ 30 - 0
src/google/protobuf/stubs/structurally_valid_unittest.cc

@@ -0,0 +1,30 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+// Author: xpeng@google.com (Peter Peng)
+
+#include <google/protobuf/stubs/common.h>
+#include <gtest/gtest.h>
+
+namespace google {
+namespace protobuf {
+namespace internal {
+namespace {
+
+TEST(StructurallyValidTest, ValidUTF8String) {
+  // On GCC, this string can be written as:
+  //   "abcd 1234 - \u2014\u2013\u2212"
+  // MSVC seems to interpret \u differently.
+  string valid_str("abcd 1234 - \342\200\224\342\200\223\342\210\222");
+  EXPECT_TRUE(IsStructurallyValidUTF8(valid_str.data(),
+                                      valid_str.size()));
+}
+
+TEST(StructurallyValidTest, InvalidUTF8String) {
+  string invalid_str("\xA0\xB0");
+  EXPECT_FALSE(IsStructurallyValidUTF8(invalid_str.data(),
+                                       invalid_str.size()));
+}
+
+}  // namespace
+}  // namespace internal
+}  // namespace protobuf
+}  // namespace google

+ 27 - 14
src/google/protobuf/text_format.cc

@@ -34,6 +34,7 @@
 
 #include <float.h>
 #include <math.h>
+#include <stdio.h>
 #include <stack>
 #include <limits>
 
@@ -65,13 +66,23 @@ string Message::ShortDebugString() const {
   //   DebugString() and munging the result.
   string result = DebugString();
 
-  // Replace each contiguous range of whitespace (including newlines) with a
-  // single space.
-  for (int i = 0; i < result.size(); i++) {
-    int pos = i;
-    while (isspace(result[pos])) ++pos;
-    if (pos > i) result.replace(i, pos - i, " ");
+  // Replace each contiguous range of whitespace (including newlines, and
+  // starting with a newline) with a single space.
+  int out = 0;
+  for (int i = 0; i < result.size(); ++i) {
+    if (result[i] != '\n') {
+      result[out++] = result[i];
+    } else {
+      while (i < result.size() && isspace(result[i])) ++i;
+      --i;
+      result[out++] = ' ';
+    }
   }
+  // Remove trailing space, if there is one.
+  if (out > 0 && isspace(result[out - 1])) {
+    --out;
+  }
+  result.resize(out);
 
   return result;
 }
@@ -103,14 +114,16 @@ class TextFormat::Parser::ParserImpl {
     FORBID_SINGULAR_OVERWRITES = 1,  // an error is issued
   };
 
-  ParserImpl(io::ZeroCopyInputStream* input_stream,
+  ParserImpl(const Descriptor* root_message_type,
+             io::ZeroCopyInputStream* input_stream,
              io::ErrorCollector* error_collector,
              SingularOverwritePolicy singular_overwrite_policy)
     : error_collector_(error_collector),
       tokenizer_error_collector_(this),
       tokenizer_(input_stream, &tokenizer_error_collector_),
-      root_message_type_(NULL),
-      singular_overwrite_policy_(singular_overwrite_policy) {
+      root_message_type_(root_message_type),
+      singular_overwrite_policy_(singular_overwrite_policy),
+      had_errors_(false) {
     // For backwards-compatibility with proto1, we need to allow the 'f' suffix
     // for floats.
     tokenizer_.set_allow_f_after_float(true);
@@ -128,12 +141,10 @@ class TextFormat::Parser::ParserImpl {
   // false if an error occurs (an error will also be logged to
   // GOOGLE_LOG(ERROR)).
   bool Parse(Message* output) {
-    root_message_type_ = output->GetDescriptor();
-
     // Consume fields until we cannot do so anymore.
     while(true) {
       if (LookingAtType(io::Tokenizer::TYPE_END)) {
-        return true;
+        return !had_errors_;
       }
 
       DO(ConsumeField(output));
@@ -141,6 +152,7 @@ class TextFormat::Parser::ParserImpl {
   }
 
   void ReportError(int line, int col, const string& message) {
+    had_errors_ = true;
     if (error_collector_ == NULL) {
       if (line >= 0) {
         GOOGLE_LOG(ERROR) << "Error parsing text-format "
@@ -571,6 +583,7 @@ class TextFormat::Parser::ParserImpl {
   io::Tokenizer tokenizer_;
   const Descriptor* root_message_type_;
   SingularOverwritePolicy singular_overwrite_policy_;
+  bool had_errors_;
 };
 
 #undef DO
@@ -699,7 +712,7 @@ TextFormat::Parser::~Parser() {}
 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
                                Message* output) {
   output->Clear();
-  ParserImpl parser(input, error_collector_,
+  ParserImpl parser(output->GetDescriptor(), input, error_collector_,
                     ParserImpl::FORBID_SINGULAR_OVERWRITES);
   return MergeUsingImpl(input, output, &parser);
 }
@@ -712,7 +725,7 @@ bool TextFormat::Parser::ParseFromString(const string& input,
 
 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
                                Message* output) {
-  ParserImpl parser(input, error_collector_,
+  ParserImpl parser(output->GetDescriptor(), input, error_collector_,
                     ParserImpl::ALLOW_SINGULAR_OVERWRITES);
   return MergeUsingImpl(input, output, &parser);
 }

+ 34 - 2
src/google/protobuf/text_format_unittest.cc

@@ -64,12 +64,12 @@ inline bool IsNaN(double value) {
 // A basic string with different escapable characters for testing.
 const string kEscapeTestString =
   "\"A string with ' characters \n and \r newlines and \t tabs and \001 "
-  "slashes \\";
+  "slashes \\ and  multiple   spaces";
 
 // A representation of the above string with all the characters escaped.
 const string kEscapeTestStringEscaped =
   "\"\\\"A string with \\' characters \\n and \\r newlines "
-  "and \\t tabs and \\001 slashes \\\\\"";
+  "and \\t tabs and \\001 slashes \\\\ and  multiple   spaces\"";
 
 class TextFormatTest : public testing::Test {
  public:
@@ -126,6 +126,18 @@ TEST_F(TextFormatExtensionsTest, Extensions) {
   EXPECT_EQ(proto_debug_string_, proto_.DebugString());
 }
 
+TEST_F(TextFormatTest, ShortDebugString) {
+  proto_.set_optional_int32(1);
+  proto_.set_optional_string("hello");
+  proto_.mutable_optional_nested_message()->set_bb(2);
+  proto_.mutable_optional_foreign_message();
+
+  EXPECT_EQ("optional_int32: 1 optional_string: \"hello\" "
+            "optional_nested_message { bb: 2 } "
+            "optional_foreign_message { }",
+            proto_.ShortDebugString());
+}
+
 TEST_F(TextFormatTest, StringEscape) {
   // Set the string value to test.
   proto_.set_optional_string(kEscapeTestString);
@@ -140,6 +152,10 @@ TEST_F(TextFormatTest, StringEscape) {
 
   // Compare.
   EXPECT_EQ(correct_string, debug_string);
+
+  string expected_short_debug_string = "optional_string: "
+      + kEscapeTestStringEscaped;
+  EXPECT_EQ(expected_short_debug_string, proto_.ShortDebugString());
 }
 
 TEST_F(TextFormatTest, PrintUnknownFields) {
@@ -736,6 +752,22 @@ TEST_F(TextFormatParserTest, PrintErrorsToStderr) {
             errors[0]);
 }
 
+TEST_F(TextFormatParserTest, FailsOnTokenizationError) {
+  vector<string> errors;
+
+  {
+    ScopedMemoryLog log;
+    unittest::TestAllTypes proto;
+    EXPECT_FALSE(TextFormat::ParseFromString("\020", &proto));
+    errors = log.GetMessages(ERROR);
+  }
+
+  ASSERT_EQ(1, errors.size());
+  EXPECT_EQ("Error parsing text-format protobuf_unittest.TestAllTypes: "
+            "1:1: Invalid control characters encountered in text.",
+            errors[0]);
+}
+
 
 class TextFormatMessageSetTest : public testing::Test {
  protected:

+ 9 - 0
src/google/protobuf/unittest.proto

@@ -452,6 +452,15 @@ message TestExtremeDefaultValues {
   optional string utf8_string = 6 [default = "\341\210\264"];
 }
 
+// Test String and Bytes: string is for valid UTF-8 strings
+message OneString {
+  optional string data = 1;
+}
+
+message OneBytes {
+  optional bytes data = 1;
+}
+
 // Test that RPC services work.
 message FooRequest  {}
 message FooResponse {}

+ 1 - 0
src/google/protobuf/unittest_optimize_for.proto

@@ -48,6 +48,7 @@ message TestOptimizedForSize {
 
   extend TestOptimizedForSize {
     optional int32 test_extension = 1234;
+    optional TestRequiredOptimizedForSize test_extension2 = 1235;
   }
 }
 

+ 34 - 0
src/google/protobuf/unknown_field_set.cc

@@ -130,6 +130,30 @@ UnknownField* UnknownFieldSet::AddField(int number) {
   return field;
 }
 
+int UnknownFieldSet::SpaceUsedExcludingSelf() const {
+  int total_size = 0;
+  if (internal_ != NULL) {
+    total_size += sizeof(*internal_);
+    total_size += internal_->active_fields_.capacity() *
+                  sizeof(Internal::FieldVector::value_type);
+    total_size += internal_->fields_.size() *
+        sizeof(Internal::FieldMap::value_type);
+
+    // Account for the UnknownField objects themselves.
+    for (Internal::FieldMap::const_iterator it = internal_->fields_.begin(),
+         end = internal_->fields_.end();
+         it != end;
+         ++it) {
+      total_size += it->second->SpaceUsed();
+    }
+  }
+  return total_size;
+}
+
+int UnknownFieldSet::SpaceUsed() const {
+  return sizeof(*this) + SpaceUsedExcludingSelf();
+}
+
 UnknownField::UnknownField(int number)
   : number_(number),
     index_(-1) {
@@ -154,5 +178,15 @@ void UnknownField::MergeFrom(const UnknownField& other) {
   group_           .MergeFrom(other.group_           );
 }
 
+int UnknownField::SpaceUsed() const {
+  int total_size = sizeof(*this);
+  total_size += varint_.SpaceUsedExcludingSelf();
+  total_size += fixed32_.SpaceUsedExcludingSelf();
+  total_size += fixed64_.SpaceUsedExcludingSelf();
+  total_size += length_delimited_.SpaceUsedExcludingSelf();
+  total_size += group_.SpaceUsedExcludingSelf();
+  return total_size;
+}
+
 }  // namespace protobuf
 }  // namespace google

+ 22 - 2
src/google/protobuf/unknown_field_set.h

@@ -75,6 +75,9 @@ class LIBPROTOBUF_EXPORT UnknownFieldSet {
   // Merge the contents of some other UnknownFieldSet with this one.
   void MergeFrom(const UnknownFieldSet& other);
 
+  // Swaps the contents of some other UnknownFieldSet with this one.
+  inline void Swap(UnknownFieldSet* x);
+
   // Returns the number of fields present in the UnknownFieldSet.
   inline int field_count() const;
   // Get a field in the set, where 0 <= index < field_count().  The fields
@@ -102,6 +105,13 @@ class LIBPROTOBUF_EXPORT UnknownFieldSet {
     return ParseFromArray(data.data(), data.size());
   }
 
+  // Computes (an estimate of) the total number of bytes currently used for
+  // storing the unknown fields in memory. Does NOT include
+  // sizeof(*this) in the calculation.
+  int SpaceUsedExcludingSelf() const;
+  // Version of SpaceUsed() including sizeof(*this).
+  int SpaceUsed() const;
+
  private:
   // "Active" fields are ones which have been added since the last time Clear()
   // was called.  Inactive fields are objects we are keeping around incase
@@ -114,10 +124,12 @@ class LIBPROTOBUF_EXPORT UnknownFieldSet {
     // the same field number they were used for originally because this makes
     // it more likely that the previously-allocated memory will have the right
     // layout.
-    map<int, UnknownField*> fields_;
+    typedef map<int, UnknownField*> FieldMap;
+    FieldMap fields_;
 
     // Contains the fields from fields_ that are currently active.
-    vector<UnknownField*> active_fields_;
+    typedef vector<UnknownField*> FieldVector;
+    FieldVector active_fields_;
   };
 
   // We want an UnknownFieldSet to use no more space than a single pointer
@@ -203,6 +215,10 @@ class LIBPROTOBUF_EXPORT UnknownField {
   inline RepeatedPtrField<string         >* mutable_length_delimited();
   inline RepeatedPtrField<UnknownFieldSet>* mutable_group           ();
 
+  // Returns (an estimate of) the total number of bytes used to represent the
+  // unknown field.
+  int SpaceUsed() const;
+
  private:
   friend class UnknownFieldSet;
   UnknownField(int number);
@@ -226,6 +242,10 @@ inline bool UnknownFieldSet::empty() const {
   return internal_ == NULL || internal_->active_fields_.empty();
 }
 
+inline void UnknownFieldSet::Swap(UnknownFieldSet* x) {
+  std::swap(internal_, x->internal_);
+}
+
 inline int UnknownFieldSet::field_count() const {
   return (internal_ == NULL) ? 0 : internal_->active_fields_.size();
 }

+ 90 - 0
src/google/protobuf/unknown_field_set_unittest.cc

@@ -222,6 +222,30 @@ TEST_F(UnknownFieldSetTest, CopyFrom) {
   EXPECT_EQ(empty_message_.DebugString(), message.DebugString());
 }
 
+TEST_F(UnknownFieldSetTest, Swap) {
+  unittest::TestEmptyMessage other_message;
+  ASSERT_TRUE(other_message.ParseFromString(GetBizarroData()));
+
+  EXPECT_GT(empty_message_.unknown_fields().field_count(), 0);
+  EXPECT_GT(other_message.unknown_fields().field_count(), 0);
+  const string debug_string = empty_message_.DebugString();
+  const string other_debug_string = other_message.DebugString();
+  EXPECT_NE(debug_string, other_debug_string);
+
+  empty_message_.Swap(&other_message);
+  EXPECT_EQ(debug_string, other_message.DebugString());
+  EXPECT_EQ(other_debug_string, empty_message_.DebugString());
+}
+
+TEST_F(UnknownFieldSetTest, SwapWithSelf) {
+  const string debug_string = empty_message_.DebugString();
+  EXPECT_GT(empty_message_.unknown_fields().field_count(), 0);
+
+  empty_message_.Swap(&empty_message_);
+  EXPECT_GT(empty_message_.unknown_fields().field_count(), 0);
+  EXPECT_EQ(debug_string, empty_message_.DebugString());
+}
+
 TEST_F(UnknownFieldSetTest, MergeFrom) {
   unittest::TestEmptyMessage source, destination;
 
@@ -426,6 +450,72 @@ TEST_F(UnknownFieldSetTest, UnknownEnumValue) {
   }
 }
 
+TEST_F(UnknownFieldSetTest, SpaceUsedExcludingSelf) {
+  {
+    // Make sure an unknown field set has zero space used until a field is
+    // actually added.
+    unittest::TestEmptyMessage empty_message;
+    const int empty_message_size = empty_message.SpaceUsed();
+    UnknownFieldSet* unknown_fields = empty_message.mutable_unknown_fields();
+    EXPECT_EQ(empty_message_size, empty_message.SpaceUsed());
+    unknown_fields->AddField(1)->add_varint(0);
+    EXPECT_LT(empty_message_size, empty_message.SpaceUsed());
+  }
+  {
+    // Test varints.
+    UnknownFieldSet unknown_fields;
+    UnknownField* field = unknown_fields.AddField(1);
+    const int base_size = unknown_fields.SpaceUsedExcludingSelf();
+    for (int i = 0; i < 16; ++i) {
+      field->add_varint(i);
+    }
+    // Should just defer computation to the RepeatedField.
+    int expected_size = base_size + field->varint().SpaceUsedExcludingSelf();
+    EXPECT_EQ(expected_size, unknown_fields.SpaceUsedExcludingSelf());
+  }
+  {
+    // Test fixed32s.
+    UnknownFieldSet unknown_fields;
+    UnknownField* field = unknown_fields.AddField(1);
+    const int base_size = unknown_fields.SpaceUsedExcludingSelf();
+    for (int i = 0; i < 16; ++i) {
+      field->add_fixed32(i);
+    }
+    int expected_size = base_size + field->fixed32().SpaceUsedExcludingSelf();
+    EXPECT_EQ(expected_size, unknown_fields.SpaceUsedExcludingSelf());
+  }
+  {
+    // Test fixed64s.
+    UnknownFieldSet unknown_fields;
+    UnknownField* field = unknown_fields.AddField(1);
+    const int base_size = unknown_fields.SpaceUsedExcludingSelf();
+    for (int i = 0; i < 16; ++i) {
+      field->add_fixed64(i);
+    }
+    int expected_size = base_size + field->fixed64().SpaceUsedExcludingSelf();
+    EXPECT_EQ(expected_size, unknown_fields.SpaceUsedExcludingSelf());
+  }
+  {
+    // Test length-delimited types.
+    UnknownFieldSet unknown_fields;
+    UnknownField* field = unknown_fields.AddField(1);
+    const int base_size = unknown_fields.SpaceUsedExcludingSelf();
+    for (int i = 0; i < 16; ++i) {
+      field->add_length_delimited()->assign("my length delimited string");
+    }
+    int expected_size = base_size +
+        field->length_delimited().SpaceUsedExcludingSelf();
+    EXPECT_EQ(expected_size, unknown_fields.SpaceUsedExcludingSelf());
+  }
+}
+
+TEST_F(UnknownFieldSetTest, SpaceUsed) {
+  UnknownFieldSet unknown_fields;
+  const int expected_size = sizeof(unknown_fields) +
+      unknown_fields.SpaceUsedExcludingSelf();
+  EXPECT_EQ(expected_size, unknown_fields.SpaceUsed());
+}
+
 }  // namespace
 }  // namespace protobuf
 }  // namespace google

+ 11 - 2
src/google/protobuf/wire_format.cc

@@ -648,8 +648,7 @@ bool WireFormat::SerializeFieldWithCachedSizes(
 
       // Handle strings separately so that we can get string references
       // instead of copying.
-      case FieldDescriptor::TYPE_STRING:
-      case FieldDescriptor::TYPE_BYTES: {
+      case FieldDescriptor::TYPE_STRING: {
           string scratch;
           const string& value = field->is_repeated() ?
             message_reflection->GetRepeatedStringReference(
@@ -658,6 +657,16 @@ bool WireFormat::SerializeFieldWithCachedSizes(
           if (!WriteString(field->number(), value, output)) return false;
         break;
       }
+
+      case FieldDescriptor::TYPE_BYTES: {
+          string scratch;
+          const string& value = field->is_repeated() ?
+            message_reflection->GetRepeatedStringReference(
+              message, field, j, &scratch) :
+            message_reflection->GetStringReference(message, field, &scratch);
+          if (!WriteBytes(field->number(), value, output)) return false;
+        break;
+      }
     }
   }
 

+ 25 - 7
src/google/protobuf/wire_format_inl.h

@@ -36,10 +36,17 @@
 #define GOOGLE_PROTOBUF_WIRE_FORMAT_INL_H__
 
 #include <string>
+#include <google/protobuf/stubs/common.h>
 #include <google/protobuf/wire_format.h>
 #include <google/protobuf/io/coded_stream.h>
 
 
+// Do UTF-8 validation on string type in Debug build only
+#ifndef NDEBUG
+#define GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+#endif
+
+
 namespace google {
 namespace protobuf {
 namespace internal {
@@ -122,12 +129,18 @@ inline bool WireFormat::ReadEnum(io::CodedInputStream* input, int* value) {
 }
 
 inline bool WireFormat::ReadString(io::CodedInputStream* input, string* value) {
-  // WARNING:  In wire_format.cc, both strings and bytes are handled by
-  //   ReadString() to avoid code duplication.  If the implementations become
-  //   different, you will need to update that usage.
+  // String is for UTF-8 text only
   uint32 length;
   if (!input->ReadVarint32(&length)) return false;
-  return input->ReadString(value, length);
+  if (!input->ReadString(value, length)) return false;
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+  if (!IsStructurallyValidUTF8(value->data(), length)) {
+    GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while "
+               "parsing protocol buffer. Strings must contain only UTF-8; "
+               "use the 'bytes' type for raw bytes.";
+  }
+#endif  // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+  return true;
 }
 inline bool WireFormat::ReadBytes(io::CodedInputStream* input, string* value) {
   uint32 length;
@@ -270,9 +283,14 @@ inline bool WireFormat::WriteEnum(int field_number, int value,
 
 inline bool WireFormat::WriteString(int field_number, const string& value,
                                     io::CodedOutputStream* output) {
-  // WARNING:  In wire_format.cc, both strings and bytes are handled by
-  //   WriteString() to avoid code duplication.  If the implementations become
-  //   different, you will need to update that usage.
+  // String is for UTF-8 text only
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+  if (!IsStructurallyValidUTF8(value.data(), value.size())) {
+    GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while "
+               "serializing protocol buffer. Strings must contain only UTF-8; "
+               "use the 'bytes' type for raw bytes.";
+  }
+#endif  // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
   return WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output) &&
          output->WriteVarint32(value.size()) &&
          output->WriteString(value);

+ 149 - 1
src/google/protobuf/wire_format_unittest.cc

@@ -199,6 +199,30 @@ TEST(WireFormatTest, SerializeFieldsAndExtensions) {
   TestUtil::ExpectAllFieldsAndExtensionsInOrder(generated_data);
 }
 
+TEST(WireFormatTest, ParseMultipleExtensionRanges) {
+  // Make sure we can parse a message that contains multiple extensions ranges.
+  unittest::TestFieldOrderings source;
+  string data;
+
+  TestUtil::SetAllFieldsAndExtensions(&source);
+  source.SerializeToString(&data);
+
+  {
+    unittest::TestFieldOrderings dest;
+    EXPECT_TRUE(dest.ParseFromString(data));
+    EXPECT_EQ(source.DebugString(), dest.DebugString());
+  }
+
+  // Also test using reflection-based parsing.
+  {
+    unittest::TestFieldOrderings dest;
+    io::ArrayInputStream raw_input(data.data(), data.size());
+    io::CodedInputStream coded_input(&raw_input);
+    EXPECT_TRUE(WireFormat::ParseAndMergePartial(&coded_input, &dest));
+    EXPECT_EQ(source.DebugString(), dest.DebugString());
+  }
+}
+
 const int kUnknownTypeId = 1550055;
 
 TEST(WireFormatTest, SerializeMessageSet) {
@@ -421,7 +445,7 @@ class WireFormatInvalidInputTest : public testing::Test {
       io::StringOutputStream raw_output(&result);
       io::CodedOutputStream output(&raw_output);
 
-      EXPECT_TRUE(WireFormat::WriteString(
+      EXPECT_TRUE(WireFormat::WriteBytes(
         field->number(), string(bytes, size), &output));
     }
 
@@ -541,6 +565,130 @@ TEST_F(WireFormatInvalidInputTest, InvalidStringInUnknownGroup) {
   EXPECT_FALSE(WireFormat::SkipMessage(&coded_input, &unknown_fields));
 }
 
+// Test differences between string and bytes.
+// Value of a string type must be valid UTF-8 string.  When UTF-8
+// validation is enabled (GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED):
+// WriteInvalidUTF8String:  see error message.
+// ReadInvalidUTF8String:  see error message.
+// WriteValidUTF8String: fine.
+// ReadValidUTF8String:  fine.
+// WriteAnyBytes: fine.
+// ReadAnyBytes: fine.
+const char * kInvalidUTF8String = "Invalid UTF-8: \xA0\xB0\xC0\xD0";
+const char * kValidUTF8String = "Valid UTF-8: \x01\x02\u8C37\u6B4C";
+
+template<typename T>
+bool WriteMessage(const char *value, T *message, string *wire_buffer) {
+  message->set_data(value);
+  wire_buffer->clear();
+  message->AppendToString(wire_buffer);
+  return (wire_buffer->size() > 0);
+}
+
+template<typename T>
+bool ReadMessage(const string &wire_buffer, T *message) {
+  return message->ParseFromArray(wire_buffer.data(), wire_buffer.size());
+}
+
+TEST(Utf8ValidationTest, WriteInvalidUTF8String) {
+  string wire_buffer;
+  protobuf_unittest::OneString input;
+  vector<string> errors;
+  {
+    ScopedMemoryLog log;
+    WriteMessage(kInvalidUTF8String, &input, &wire_buffer);
+    errors = log.GetMessages(ERROR);
+  }
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+  ASSERT_EQ(1, errors.size());
+  EXPECT_EQ("Encountered string containing invalid UTF-8 data while "
+            "serializing protocol buffer. Strings must contain only UTF-8; "
+            "use the 'bytes' type for raw bytes.",
+            errors[0]);
+
+#else
+  ASSERT_EQ(0, errors.size());
+#endif  // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+}
+
+TEST(Utf8ValidationTest, ReadInvalidUTF8String) {
+  string wire_buffer;
+  protobuf_unittest::OneString input;
+  WriteMessage(kInvalidUTF8String, &input, &wire_buffer);
+  protobuf_unittest::OneString output;
+  vector<string> errors;
+  {
+    ScopedMemoryLog log;
+    ReadMessage(wire_buffer, &output);
+    errors = log.GetMessages(ERROR);
+  }
+#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+  ASSERT_EQ(1, errors.size());
+  EXPECT_EQ("Encountered string containing invalid UTF-8 data while "
+            "parsing protocol buffer. Strings must contain only UTF-8; "
+            "use the 'bytes' type for raw bytes.",
+            errors[0]);
+
+#else
+  ASSERT_EQ(0, errors.size());
+#endif  // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
+}
+
+TEST(Utf8ValidationTest, WriteValidUTF8String) {
+  string wire_buffer;
+  protobuf_unittest::OneString input;
+  vector<string> errors;
+  {
+    ScopedMemoryLog log;
+    WriteMessage(kValidUTF8String, &input, &wire_buffer);
+    errors = log.GetMessages(ERROR);
+  }
+  ASSERT_EQ(0, errors.size());
+}
+
+TEST(Utf8ValidationTest, ReadValidUTF8String) {
+  string wire_buffer;
+  protobuf_unittest::OneString input;
+  WriteMessage(kValidUTF8String, &input, &wire_buffer);
+  protobuf_unittest::OneString output;
+  vector<string> errors;
+  {
+    ScopedMemoryLog log;
+    ReadMessage(wire_buffer, &output);
+    errors = log.GetMessages(ERROR);
+  }
+  ASSERT_EQ(0, errors.size());
+  EXPECT_EQ(input.data(), output.data());
+}
+
+// Bytes: anything can pass as bytes, use invalid UTF-8 string to test
+TEST(Utf8ValidationTest, WriteArbitraryBytes) {
+  string wire_buffer;
+  protobuf_unittest::OneBytes input;
+  vector<string> errors;
+  {
+    ScopedMemoryLog log;
+    WriteMessage(kInvalidUTF8String, &input, &wire_buffer);
+    errors = log.GetMessages(ERROR);
+  }
+  ASSERT_EQ(0, errors.size());
+}
+
+TEST(Utf8ValidationTest, ReadArbitraryBytes) {
+  string wire_buffer;
+  protobuf_unittest::OneBytes input;
+  WriteMessage(kInvalidUTF8String, &input, &wire_buffer);
+  protobuf_unittest::OneBytes output;
+  vector<string> errors;
+  {
+    ScopedMemoryLog log;
+    ReadMessage(wire_buffer, &output);
+    errors = log.GetMessages(ERROR);
+  }
+  ASSERT_EQ(0, errors.size());
+  EXPECT_EQ(input.data(), output.data());
+}
+
 }  // namespace
 }  // namespace internal
 }  // namespace protobuf

+ 4 - 0
vsprojects/libprotobuf.vcproj

@@ -355,6 +355,10 @@
 				RelativePath="..\src\google\protobuf\stubs\substitute.cc"
 				>
 			</File>
+			<File
+				RelativePath="..\src\google\protobuf\stubs\structurally_valid.cc"
+				>
+			</File>
 			<File
 				RelativePath="..\src\google\protobuf\text_format.cc"
 				>

+ 4 - 0
vsprojects/tests.vcproj

@@ -286,6 +286,10 @@
 				RelativePath="..\src\google\protobuf\stubs\strutil_unittest.cc"
 				>
 			</File>
+			<File
+				RelativePath="..\src\google\protobuf\stubs\structurally_valid_unittest.cc"
+				>
+			</File>
 			<File
 				RelativePath="..\src\google\protobuf\io\coded_stream_unittest.cc"
 				>