Эх сурвалжийг харах

Submit recent changes from internal branch. See CHANGES.txt for more details.

liujisi@google.com 15 жил өмнө
parent
commit
33165fe0d5
100 өөрчлөгдсөн 12107 нэмэгдсэн , 2251 устгасан
  1. 63 0
      CHANGES.txt
  2. 32 0
      Makefile.am
  3. 6 0
      java/pom.xml
  4. 58 2
      java/src/main/java/com/google/protobuf/AbstractMessage.java
  5. 12 0
      java/src/main/java/com/google/protobuf/ByteString.java
  6. 35 15
      java/src/main/java/com/google/protobuf/CodedInputStream.java
  7. 55 3
      java/src/main/java/com/google/protobuf/CodedOutputStream.java
  8. 8 0
      java/src/main/java/com/google/protobuf/Descriptors.java
  9. 161 85
      java/src/main/java/com/google/protobuf/FieldSet.java
  10. 554 123
      java/src/main/java/com/google/protobuf/GeneratedMessage.java
  11. 255 92
      java/src/main/java/com/google/protobuf/GeneratedMessageLite.java
  12. 85 0
      java/src/main/java/com/google/protobuf/Internal.java
  13. 155 0
      java/src/main/java/com/google/protobuf/LazyStringArrayList.java
  14. 72 0
      java/src/main/java/com/google/protobuf/LazyStringList.java
  15. 7 97
      java/src/main/java/com/google/protobuf/Message.java
  16. 18 28
      java/src/main/java/com/google/protobuf/MessageLite.java
  17. 56 0
      java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java
  18. 110 0
      java/src/main/java/com/google/protobuf/MessageOrBuilder.java
  19. 696 0
      java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java
  20. 9 1
      java/src/main/java/com/google/protobuf/ServiceException.java
  21. 241 0
      java/src/main/java/com/google/protobuf/SingleFieldBuilder.java
  22. 618 0
      java/src/main/java/com/google/protobuf/SmallSortedMap.java
  23. 313 175
      java/src/main/java/com/google/protobuf/TextFormat.java
  24. 146 0
      java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java
  25. 6 6
      java/src/main/java/com/google/protobuf/WireFormat.java
  26. 7 6
      java/src/test/java/com/google/protobuf/AbstractMessageTest.java
  27. 21 0
      java/src/test/java/com/google/protobuf/CodedInputStreamTest.java
  28. 80 0
      java/src/test/java/com/google/protobuf/DeprecatedFieldTest.java
  29. 1 0
      java/src/test/java/com/google/protobuf/DescriptorsTest.java
  30. 48 0
      java/src/test/java/com/google/protobuf/ForceFieldBuildersPreRun.java
  31. 350 26
      java/src/test/java/com/google/protobuf/GeneratedMessageTest.java
  32. 118 0
      java/src/test/java/com/google/protobuf/LazyStringArrayListTest.java
  33. 115 0
      java/src/test/java/com/google/protobuf/LazyStringEndToEndTest.java
  34. 29 0
      java/src/test/java/com/google/protobuf/LiteTest.java
  35. 185 0
      java/src/test/java/com/google/protobuf/NestedBuildersTest.java
  36. 190 0
      java/src/test/java/com/google/protobuf/RepeatedFieldBuilderTest.java
  37. 8 0
      java/src/test/java/com/google/protobuf/ServiceTest.java
  38. 155 0
      java/src/test/java/com/google/protobuf/SingleFieldBuilderTest.java
  39. 378 0
      java/src/test/java/com/google/protobuf/SmallSortedMapTest.java
  40. 49 0
      java/src/test/java/com/google/protobuf/TestBadIdentifiers.java
  41. 51 20
      java/src/test/java/com/google/protobuf/TestUtil.java
  42. 124 34
      java/src/test/java/com/google/protobuf/TextFormatTest.java
  43. 152 0
      java/src/test/java/com/google/protobuf/UnmodifiableLazyStringListTest.java
  44. 4 0
      java/src/test/java/com/google/protobuf/multiple_files_test.proto
  45. 53 0
      java/src/test/java/com/google/protobuf/nested_builders_test.proto
  46. 45 0
      java/src/test/java/com/google/protobuf/nested_extension.proto
  47. 48 0
      java/src/test/java/com/google/protobuf/nested_extension_lite.proto
  48. 48 0
      java/src/test/java/com/google/protobuf/non_nested_extension.proto
  49. 50 0
      java/src/test/java/com/google/protobuf/non_nested_extension_lite.proto
  50. 66 0
      java/src/test/java/com/google/protobuf/test_bad_identifiers.proto
  51. 18 10
      python/google/protobuf/descriptor.py
  52. 64 0
      python/google/protobuf/internal/api_implementation.py
  53. 21 6
      python/google/protobuf/internal/containers.py
  54. 616 0
      python/google/protobuf/internal/cpp_message.py
  55. 75 2
      python/google/protobuf/internal/decoder.py
  56. 85 2
      python/google/protobuf/internal/encoder.py
  57. 27 4
      python/google/protobuf/internal/generator_test.py
  58. 254 0
      python/google/protobuf/internal/message_test.py
  59. 1098 0
      python/google/protobuf/internal/python_message.py
  60. 186 40
      python/google/protobuf/internal/reflection_test.py
  61. 145 13
      python/google/protobuf/internal/text_format_test.py
  62. 11 0
      python/google/protobuf/message.py
  63. 1658 0
      python/google/protobuf/pyext/python-proto2.cc
  64. 334 0
      python/google/protobuf/pyext/python_descriptor.cc
  65. 87 0
      python/google/protobuf/pyext/python_descriptor.h
  66. 63 0
      python/google/protobuf/pyext/python_protobuf.cc
  67. 57 0
      python/google/protobuf/pyext/python_protobuf.h
  68. 17 1035
      python/google/protobuf/reflection.py
  69. 55 37
      python/google/protobuf/text_format.py
  70. 20 3
      python/setup.py
  71. 2 0
      src/Makefile.am
  72. 8 3
      src/google/protobuf/compiler/code_generator.cc
  73. 17 7
      src/google/protobuf/compiler/code_generator.h
  74. 45 37
      src/google/protobuf/compiler/command_line_interface.cc
  75. 4 4
      src/google/protobuf/compiler/command_line_interface.h
  76. 58 7
      src/google/protobuf/compiler/command_line_interface_unittest.cc
  77. 16 16
      src/google/protobuf/compiler/cpp/cpp_bootstrap_unittest.cc
  78. 1 1
      src/google/protobuf/compiler/cpp/cpp_enum_field.cc
  79. 1 0
      src/google/protobuf/compiler/cpp/cpp_file.cc
  80. 3 3
      src/google/protobuf/compiler/cpp/cpp_generator.cc
  81. 1 1
      src/google/protobuf/compiler/cpp/cpp_generator.h
  82. 214 51
      src/google/protobuf/compiler/cpp/cpp_message.cc
  83. 9 2
      src/google/protobuf/compiler/cpp/cpp_message_field.cc
  84. 11 11
      src/google/protobuf/compiler/cpp/cpp_plugin_unittest.cc
  85. 2 2
      src/google/protobuf/compiler/cpp/cpp_primitive_field.cc
  86. 35 23
      src/google/protobuf/compiler/cpp/cpp_string_field.cc
  87. 4 0
      src/google/protobuf/compiler/cpp/cpp_test_bad_identifiers.proto
  88. 45 1
      src/google/protobuf/compiler/cpp/cpp_unittest.cc
  89. 9 11
      src/google/protobuf/compiler/java/java_enum.cc
  90. 229 65
      src/google/protobuf/compiler/java/java_enum_field.cc
  91. 22 2
      src/google/protobuf/compiler/java/java_enum_field.h
  92. 95 80
      src/google/protobuf/compiler/java/java_extension.cc
  93. 1 1
      src/google/protobuf/compiler/java/java_extension.h
  94. 35 10
      src/google/protobuf/compiler/java/java_field.cc
  95. 11 1
      src/google/protobuf/compiler/java/java_field.h
  96. 30 30
      src/google/protobuf/compiler/java/java_file.cc
  97. 4 2
      src/google/protobuf/compiler/java/java_file.h
  98. 17 11
      src/google/protobuf/compiler/java/java_generator.cc
  99. 1 1
      src/google/protobuf/compiler/java/java_generator.h
  100. 140 3
      src/google/protobuf/compiler/java/java_helpers.cc

+ 63 - 0
CHANGES.txt

@@ -1,3 +1,66 @@
+2010-11-01 version 2.4.0:
+
+  General
+  * The RPC (cc|java|py)_generic_services default value is now false instead of
+    true.
+  * Custom options can have aggregate types. For example,
+      message MyOption {
+        optional string comment = 1;
+        optional string author = 2;
+      }
+      extend google.protobuf.FieldOptions {
+        optional MyOption myoption = 12345;
+      }
+    This option can now be set as follows:
+      message SomeType {
+        optional int32 field = 1 [(myoption) = { comment:'x' author:'y' }];
+      }
+
+  C++
+  * Various speed and code size optimizations.
+  * Added a release_foo() method on string and message fields.
+  * Fixed gzip_output_stream sub-stream handling.
+
+  Java
+  * Builders now maintain sub-builders for sub-messages. Use getFooBuilder() to
+    get the builder for the sub-message "foo". This allows you to repeatedly
+    modify deeply-nested sub-messages without rebuilding them.
+  * Builder.build() no longer invalidates the Builder for generated messages
+    (You may continue to modify it and then build another message).
+  * Code generator will generate efficient equals() and hashCode()
+    implementations if new option java_generate_equals_and_hash is enabled.
+    (Otherwise, reflection-based implementations are used.)
+  * Generated messages now implement Serializable.
+  * Fields with [deprecated=true] will be marked with @Deprecated in Java.
+  * Added lazy conversion of UTF-8 encoded strings to String objects to improve
+    performance.
+  * Various optimizations.
+  * Enum value can be accessed directly, instead of calling getNumber() on the
+    enum member.
+  * For each enum value, an integer constant is also generated with the suffix
+    _VALUE.
+
+  Python
+  * Added an experimental  C++ implementation for Python messages via a Python
+    extension. Implementation type is controlled by an environment variable
+    PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION (valid values: "cpp" and "python")
+    The default value is currently "python" but will be changed to "cpp" in
+    future release.
+  * Improved performance on message instantiation significantly.
+    Most of the work on message instantiation is done just once per message
+    class, instead of once per message instance.
+  * Improved performance on text message parsing.
+  * Allow add() to forward keyword arguments to the concrete class.
+      E.g. instead of
+        item = repeated_field.add()
+        item.foo = bar
+        item.baz = quux
+      You can do:
+        repeated_field.add(foo=bar, baz=quux)
+  * Added a sort() interface to the BaseContainer.
+  * Added an extend() method to repeated composite fields.
+  * Added UTF8 debug string support.
+
 2010-01-08 version 2.3.0:
 
   General

+ 32 - 0
Makefile.am

@@ -83,33 +83,57 @@ EXTRA_DIST =                                                                 \
   java/src/main/java/com/google/protobuf/GeneratedMessageLite.java           \
   java/src/main/java/com/google/protobuf/Internal.java                       \
   java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java \
+  java/src/main/java/com/google/protobuf/LazyStringArrayList.java            \
+  java/src/main/java/com/google/protobuf/LazyStringList.java                 \
   java/src/main/java/com/google/protobuf/Message.java                        \
   java/src/main/java/com/google/protobuf/MessageLite.java                    \
+  java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java           \
+  java/src/main/java/com/google/protobuf/MessageOrBuilder.java               \
   java/src/main/java/com/google/protobuf/ProtocolMessageEnum.java            \
+  java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java           \
   java/src/main/java/com/google/protobuf/RpcCallback.java                    \
   java/src/main/java/com/google/protobuf/RpcChannel.java                     \
   java/src/main/java/com/google/protobuf/RpcController.java                  \
   java/src/main/java/com/google/protobuf/RpcUtil.java                        \
   java/src/main/java/com/google/protobuf/Service.java                        \
   java/src/main/java/com/google/protobuf/ServiceException.java               \
+  java/src/main/java/com/google/protobuf/SingleFieldBuilder.java             \
+  java/src/main/java/com/google/protobuf/SmallSortedMap.java                 \
   java/src/main/java/com/google/protobuf/TextFormat.java                     \
   java/src/main/java/com/google/protobuf/UninitializedMessageException.java  \
   java/src/main/java/com/google/protobuf/UnknownFieldSet.java                \
+  java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java     \
   java/src/main/java/com/google/protobuf/WireFormat.java                     \
   java/src/test/java/com/google/protobuf/AbstractMessageTest.java            \
   java/src/test/java/com/google/protobuf/CodedInputStreamTest.java           \
   java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java          \
+  java/src/test/java/com/google/protobuf/DeprecatedFieldTest.java            \
   java/src/test/java/com/google/protobuf/DescriptorsTest.java                \
   java/src/test/java/com/google/protobuf/DynamicMessageTest.java             \
+  java/src/test/java/com/google/protobuf/ForceFieldBuildersPreRun.java       \
   java/src/test/java/com/google/protobuf/GeneratedMessageTest.java           \
+  java/src/test/java/com/google/protobuf/LazyStringArrayListTest.java        \
+  java/src/test/java/com/google/protobuf/LazyStringEndToEndTest.java         \
   java/src/test/java/com/google/protobuf/LiteTest.java                       \
   java/src/test/java/com/google/protobuf/MessageTest.java                    \
+  java/src/test/java/com/google/protobuf/NestedBuildersTest.java             \
+  java/src/test/java/com/google/protobuf/RepeatedFieldBuilderTest.java       \
   java/src/test/java/com/google/protobuf/ServiceTest.java                    \
+  java/src/test/java/com/google/protobuf/SingleFieldBuilderTest.java         \
+  java/src/test/java/com/google/protobuf/SmallSortedMapTest.java             \
+  java/src/test/java/com/google/protobuf/TestBadIdentifiers.java             \
   java/src/test/java/com/google/protobuf/TestUtil.java                       \
   java/src/test/java/com/google/protobuf/TextFormatTest.java                 \
   java/src/test/java/com/google/protobuf/UnknownFieldSetTest.java            \
+  java/src/test/java/com/google/protobuf/UnmodifiableLazyStringListTest.java \
   java/src/test/java/com/google/protobuf/WireFormatTest.java                 \
   java/src/test/java/com/google/protobuf/multiple_files_test.proto           \
+  java/src/test/java/com/google/protobuf/nested_builders_test.proto          \
+  java/src/test/java/com/google/protobuf/nested_extension.proto              \
+  java/src/test/java/com/google/protobuf/nested_extension_lite.proto         \
+  java/src/test/java/com/google/protobuf/non_nested_extension.proto          \
+  java/src/test/java/com/google/protobuf/non_nested_extension_lite.proto     \
+  java/src/test/java/com/google/protobuf/test_bad_identifiers.proto          \
   java/pom.xml                                                               \
   java/README.txt                                                            \
   python/google/protobuf/internal/generator_test.py                          \
@@ -121,6 +145,9 @@ EXTRA_DIST =                                                                 \
   python/google/protobuf/internal/message_test.py                            \
   python/google/protobuf/internal/more_extensions.proto                      \
   python/google/protobuf/internal/more_messages.proto                        \
+  python/google/protobuf/internal/python_message.py                          \
+  python/google/protobuf/internal/cpp_message.py                             \
+  python/google/protobuf/internal/api_implementation.py                      \
   python/google/protobuf/internal/reflection_test.py                         \
   python/google/protobuf/internal/service_reflection_test.py                 \
   python/google/protobuf/internal/test_util.py                               \
@@ -129,6 +156,11 @@ EXTRA_DIST =                                                                 \
   python/google/protobuf/internal/wire_format.py                             \
   python/google/protobuf/internal/wire_format_test.py                        \
   python/google/protobuf/internal/__init__.py                                \
+  python/google/protobuf/pyext/python-proto2.cc                              \
+  python/google/protobuf/pyext/python_descriptor.cc                          \
+  python/google/protobuf/pyext/python_descriptor.h                           \
+  python/google/protobuf/pyext/python_protobuf.cc                            \
+  python/google/protobuf/pyext/python_protobuf.h                             \
   python/google/protobuf/descriptor.py                                       \
   python/google/protobuf/message.py                                          \
   python/google/protobuf/reflection.py                                       \

+ 6 - 0
java/pom.xml

@@ -105,6 +105,12 @@
                   <arg value="../src/google/protobuf/unittest_mset.proto" />
                   <arg
                     value="src/test/java/com/google/protobuf/multiple_files_test.proto" />
+                  <arg value="src/test/java/com/google/protobuf/nested_builders_test.proto" />
+                  <arg value="src/test/java/com/google/protobuf/nested_extension.proto" />
+                  <arg value="src/test/java/com/google/protobuf/nested_extension_lite.proto" />
+                  <arg value="src/test/java/com/google/protobuf/non_nested_extension.proto" />
+                  <arg value="src/test/java/com/google/protobuf/non_nested_extension_lite.proto" />
+                  <arg value="src/test/java/com/google/protobuf/test_bad_identifiers.proto" />
                   <arg
                     value="../src/google/protobuf/unittest_optimize_for.proto" />
                   <arg

+ 58 - 2
java/src/main/java/com/google/protobuf/AbstractMessage.java

@@ -32,9 +32,10 @@ package com.google.protobuf;
 
 import com.google.protobuf.Descriptors.Descriptor;
 import com.google.protobuf.Descriptors.FieldDescriptor;
+import com.google.protobuf.Internal.EnumLite;
 
-import java.io.InputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
@@ -167,11 +168,66 @@ public abstract class AbstractMessage extends AbstractMessageLite
   public int hashCode() {
     int hash = 41;
     hash = (19 * hash) + getDescriptorForType().hashCode();
-    hash = (53 * hash) + getAllFields().hashCode();
+    hash = hashFields(hash, getAllFields());
     hash = (29 * hash) + getUnknownFields().hashCode();
     return hash;
   }
 
+  /** Get a hash code for given fields and values, using the given seed. */
+  @SuppressWarnings("unchecked")
+  protected int hashFields(int hash, Map<FieldDescriptor, Object> map) {
+    for (Map.Entry<FieldDescriptor, Object> entry : map.entrySet()) {
+      FieldDescriptor field = entry.getKey();
+      Object value = entry.getValue();
+      hash = (37 * hash) + field.getNumber();
+      if (field.getType() != FieldDescriptor.Type.ENUM){
+        hash = (53 * hash) + value.hashCode();
+      } else if (field.isRepeated()) {
+        List<? extends EnumLite> list = (List<? extends EnumLite>) value;
+        hash = (53 * hash) + hashEnumList(list);
+      } else {
+        hash = (53 * hash) + hashEnum((EnumLite) value);
+      }
+    }
+    return hash;
+  }
+
+  /**
+   * Helper method for implementing {@link Message#hashCode()}.
+   * @see Boolean#hashCode()
+   */
+  protected static int hashLong(long n) {
+    return (int) (n ^ (n >>> 32));
+  }
+
+  /**
+   * Helper method for implementing {@link Message#hashCode()}.
+   * @see Boolean#hashCode()
+   */
+  protected static int hashBoolean(boolean b) {
+    return b ? 1231 : 1237;
+  }
+
+  /**
+   * Helper method for implementing {@link Message#hashCode()}.
+   * <p>
+   * This is needed because {@link java.lang.Enum#hashCode()} is final, but we
+   * need to use the field number as the hash code to ensure compatibility
+   * between statically and dynamically generated enum objects.
+   */
+  protected static int hashEnum(EnumLite e) {
+    return e.getNumber();
+  }
+
+  /** Helper method for implementing {@link Message#hashCode()}. */
+  protected static int hashEnumList(List<? extends EnumLite> list) {
+    int hash = 1;
+    for (EnumLite e : list) {
+      hash = 31 * hash + hashEnum(e);
+    }
+    return hash;
+  }
+
   // =================================================================
 
   /**

+ 12 - 0
java/src/main/java/com/google/protobuf/ByteString.java

@@ -193,6 +193,18 @@ public final class ByteString {
     System.arraycopy(bytes, sourceOffset, target, targetOffset, size);
   }
 
+  /**
+   * Copies bytes into a ByteBuffer.
+   *
+   * @param target ByteBuffer to copy into.
+   * @throws ReadOnlyBufferException if the {@code target} is read-only
+   * @throws BufferOverflowException if the {@code target}'s remaining()
+   *         space is not large enough to hold the data.
+   */
+  public void copyTo(ByteBuffer target) {
+    target.put(bytes, 0, bytes.length);
+  }
+
   /**
    * Copies bytes to a {@code byte[]}.
    */

+ 35 - 15
java/src/main/java/com/google/protobuf/CodedInputStream.java

@@ -67,7 +67,25 @@ public final class CodedInputStream {
    */
   public static CodedInputStream newInstance(final byte[] buf, final int off,
                                              final int len) {
-    return new CodedInputStream(buf, off, len);
+    CodedInputStream result = new CodedInputStream(buf, off, len);
+    try {
+      // Some uses of CodedInputStream can be more efficient if they know
+      // exactly how many bytes are available.  By pushing the end point of the
+      // buffer as a limit, we allow them to get this information via
+      // getBytesUntilLimit().  Pushing a limit that we know is at the end of
+      // the stream can never hurt, since we can never past that point anyway.
+      result.pushLimit(len);
+    } catch (InvalidProtocolBufferException ex) {
+      // The only reason pushLimit() might throw an exception here is if len
+      // is negative. Normally pushLimit()'s parameter comes directly off the
+      // wire, so it's important to catch exceptions in case of corrupt or
+      // malicious data. However, in this case, we expect that len is not a
+      // user-supplied value, so we can assume that it being negative indicates
+      // a programming error. Therefore, throwing an unchecked exception is
+      // appropriate.
+      throw new IllegalArgumentException(ex);
+    }
+    return result;
   }
 
   // -----------------------------------------------------------------
@@ -263,7 +281,9 @@ public final class CodedInputStream {
   /** Read a {@code bytes} field value from the stream. */
   public ByteString readBytes() throws IOException {
     final int size = readRawVarint32();
-    if (size <= (bufferSize - bufferPos) && size > 0) {
+    if (size == 0) {
+      return ByteString.EMPTY;
+    } else if (size <= (bufferSize - bufferPos) && size > 0) {
       // Fast path:  We already have the bytes in a contiguous buffer, so
       //   just copy directly from it.
       final ByteString result = ByteString.copyFrom(buffer, bufferPos, size);
@@ -368,8 +388,8 @@ public final class CodedInputStream {
    * has already read one byte.  This allows the caller to determine if EOF
    * has been reached before attempting to read.
    */
-  static int readRawVarint32(final int firstByte,
-                             final InputStream input) throws IOException {
+  public static int readRawVarint32(
+      final int firstByte, final InputStream input) throws IOException {
     if ((firstByte & 0x80) == 0) {
       return firstByte;
     }
@@ -847,19 +867,19 @@ public final class CodedInputStream {
     } else {
       // Skipping more bytes than are in the buffer.  First skip what we have.
       int pos = bufferSize - bufferPos;
-      totalBytesRetired += pos;
-      bufferPos = 0;
-      bufferSize = 0;
+      bufferPos = bufferSize;
 
-      // Then skip directly from the InputStream for the rest.
-      while (pos < size) {
-        final int n = (input == null) ? -1 : (int) input.skip(size - pos);
-        if (n <= 0) {
-          throw InvalidProtocolBufferException.truncatedMessage();
-        }
-        pos += n;
-        totalBytesRetired += n;
+      // Keep refilling the buffer until we get to the point we wanted to skip
+      // to.  This has the side effect of ensuring the limits are updated
+      // correctly.
+      refillBuffer(true);
+      while (size - pos > bufferSize) {
+        pos += bufferSize;
+        bufferPos = bufferSize;
+        refillBuffer(true);
       }
+
+      bufferPos = size - pos; 
     }
   }
 }

+ 55 - 3
java/src/main/java/com/google/protobuf/CodedOutputStream.java

@@ -33,6 +33,7 @@ package com.google.protobuf;
 import java.io.OutputStream;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
+import java.io.InputStream;
 
 /**
  * Encodes and writes protocol message fields.
@@ -381,9 +382,8 @@ public final class CodedOutputStream {
 
   /** Write a {@code bytes} field to the stream. */
   public void writeBytesNoTag(final ByteString value) throws IOException {
-    final byte[] bytes = value.toByteArray();
-    writeRawVarint32(bytes.length);
-    writeRawBytes(bytes);
+    writeRawVarint32(value.size());
+    writeRawBytes(value);
   }
 
   /** Write a {@code uint32} field to the stream. */
@@ -870,6 +870,11 @@ public final class CodedOutputStream {
     writeRawByte((byte) value);
   }
 
+  /** Write a byte string. */
+  public void writeRawBytes(final ByteString value) throws IOException {
+    writeRawBytes(value, 0, value.size());
+  }
+
   /** Write an array of bytes. */
   public void writeRawBytes(final byte[] value) throws IOException {
     writeRawBytes(value, 0, value.length);
@@ -906,6 +911,53 @@ public final class CodedOutputStream {
     }
   }
 
+  /** Write part of a byte string. */
+  public void writeRawBytes(final ByteString value, int offset, int length)
+                            throws IOException {
+    if (limit - position >= length) {
+      // We have room in the current buffer.
+      value.copyTo(buffer, offset, position, length);
+      position += length;
+    } else {
+      // Write extends past current buffer.  Fill the rest of this buffer and
+      // flush.
+      final int bytesWritten = limit - position;
+      value.copyTo(buffer, offset, position, bytesWritten);
+      offset += bytesWritten;
+      length -= bytesWritten;
+      position = limit;
+      refreshBuffer();
+
+      // Now deal with the rest.
+      // Since we have an output stream, this is our buffer
+      // and buffer offset == 0
+      if (length <= limit) {
+        // Fits in new buffer.
+        value.copyTo(buffer, offset, 0, length);
+        position = length;
+      } else {
+        // Write is very big, but we can't do it all at once without allocating
+        // an a copy of the byte array since ByteString does not give us access
+        // to the underlying bytes. Use the InputStream interface on the
+        // ByteString and our buffer to copy between the two.
+        InputStream inputStreamFrom = value.newInput();
+        if (offset != inputStreamFrom.skip(offset)) {
+          throw new IllegalStateException("Skip failed? Should never happen.");
+        }
+        // Use the buffer as the temporary buffer to avoid allocating memory.
+        while (length > 0) {
+          int bytesToRead = Math.min(length, limit);
+          int bytesRead = inputStreamFrom.read(buffer, 0, bytesToRead);
+          if (bytesRead != bytesToRead) {
+            throw new IllegalStateException("Read failed? Should never happen");
+          }
+          output.write(buffer, 0, bytesRead);
+          length -= bytesRead;
+        }
+      }
+    }
+  }
+
   /** Encode and write a tag. */
   public void writeTag(final int fieldNumber, final int wireType)
                        throws IOException {

+ 8 - 0
java/src/main/java/com/google/protobuf/Descriptors.java

@@ -46,6 +46,11 @@ import java.io.UnsupportedEncodingException;
  * its fields and other information about a type.  You can get a message
  * type's descriptor by calling {@code MessageType.getDescriptor()}, or
  * (given a message object of the type) {@code message.getDescriptorForType()}.
+ * Furthermore, each message is associated with a {@link FileDescriptor} for
+ * a relevant {@code .proto} file. You can obtain it by calling
+ * {@code Descriptor.getFile()}. A {@link FileDescriptor} contains descriptors
+ * for all the messages defined in that file, and file descriptors for all the
+ * imported {@code .proto} files.
  *
  * Descriptors are built from DescriptorProtos, as defined in
  * {@code google/protobuf/descriptor.proto}.
@@ -55,6 +60,9 @@ import java.io.UnsupportedEncodingException;
 public final class Descriptors {
   /**
    * Describes a {@code .proto} file, including everything defined within.
+   * That includes, in particular, descriptors for all the messages and
+   * file descriptors for all other imported {@code .proto} files
+   * (dependencies).
    */
   public static final class FileDescriptor {
     /** Convert the descriptor to its protocol message representation. */

+ 161 - 85
java/src/main/java/com/google/protobuf/FieldSet.java

@@ -33,7 +33,6 @@ package com.google.protobuf;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
-import java.util.TreeMap;
 import java.util.List;
 import java.util.Map;
 import java.io.IOException;
@@ -67,16 +66,12 @@ final class FieldSet<FieldDescriptorType extends
         MessageLite.Builder to, MessageLite from);
   }
 
-  private Map<FieldDescriptorType, Object> fields;
+  private final SmallSortedMap<FieldDescriptorType, Object> fields;
+  private boolean isImmutable;
 
   /** Construct a new FieldSet. */
   private FieldSet() {
-    // Use a TreeMap because fields need to be in canonical order when
-    // serializing.
-    // TODO(kenton):  Maybe use some sort of sparse array instead?  It would
-    //   even make sense to store the first 16 or so tags in a flat array
-    //   to make DynamicMessage faster.
-    fields = new TreeMap<FieldDescriptorType, Object>();
+    this.fields = SmallSortedMap.newFieldMap(16);
   }
 
   /**
@@ -84,7 +79,8 @@ final class FieldSet<FieldDescriptorType extends
    * DEFAULT_INSTANCE.
    */
   private FieldSet(final boolean dummy) {
-    this.fields = Collections.emptyMap();
+    this.fields = SmallSortedMap.newFieldMap(0);
+    makeImmutable();
   }
 
   /** Construct a new FieldSet. */
@@ -105,14 +101,45 @@ final class FieldSet<FieldDescriptorType extends
   /** Make this FieldSet immutable from this point forward. */
   @SuppressWarnings("unchecked")
   public void makeImmutable() {
-    for (final Map.Entry<FieldDescriptorType, Object> entry:
-         fields.entrySet()) {
-      if (entry.getKey().isRepeated()) {
-        final List value = (List)entry.getValue();
-        fields.put(entry.getKey(), Collections.unmodifiableList(value));
-      }
+    if (isImmutable) {
+      return;
     }
-    fields = Collections.unmodifiableMap(fields);
+    fields.makeImmutable();
+    isImmutable = true;
+  }
+
+  /**
+   * Retuns whether the FieldSet is immutable. This is true if it is the
+   * {@link #emptySet} or if {@link #makeImmutable} were called.
+   *
+   * @return whether the FieldSet is immutable.
+   */
+  public boolean isImmutable() {
+    return isImmutable;
+  }
+
+  /**
+   * Clones the FieldSet. The returned FieldSet will be mutable even if the
+   * original FieldSet was immutable.
+   *
+   * @return the newly cloned FieldSet
+   */
+  @Override
+  public FieldSet<FieldDescriptorType> clone() {
+    // We can't just call fields.clone because List objects in the map
+    // should not be shared.
+    FieldSet<FieldDescriptorType> clone = FieldSet.newFieldSet();
+    for (int i = 0; i < fields.getNumArrayEntries(); i++) {
+      Map.Entry<FieldDescriptorType, Object> entry = fields.getArrayEntryAt(i);
+      FieldDescriptorType descriptor = entry.getKey();
+      clone.setField(descriptor, entry.getValue());
+    }
+    for (Map.Entry<FieldDescriptorType, Object> entry :
+             fields.getOverflowEntries()) {
+      FieldDescriptorType descriptor = entry.getKey();
+      clone.setField(descriptor, entry.getValue());
+    }
+    return clone;
   }
 
   // =================================================================
@@ -126,12 +153,13 @@ final class FieldSet<FieldDescriptorType extends
    * Get a simple map containing all the fields.
    */
   public Map<FieldDescriptorType, Object> getAllFields() {
-    return Collections.unmodifiableMap(fields);
+    return fields.isImmutable() ? fields : Collections.unmodifiableMap(fields);
   }
 
   /**
-   * Get an iterator to the field map.  This iterator should not be leaked
-   * out of the protobuf library as it is not protected from mutation.
+   * Get an iterator to the field map. This iterator should not be leaked out
+   * of the protobuf library as it is not protected from mutation when
+   * fields is not immutable.
    */
   public Iterator<Map.Entry<FieldDescriptorType, Object>> iterator() {
     return fields.entrySet().iterator();
@@ -336,27 +364,39 @@ final class FieldSet<FieldDescriptorType extends
    * aren't actually present in the set, it is up to the caller to check
    * that all required fields are present.
    */
-  @SuppressWarnings("unchecked")
   public boolean isInitialized() {
-    for (final Map.Entry<FieldDescriptorType, Object> entry:
-         fields.entrySet()) {
-      final FieldDescriptorType descriptor = entry.getKey();
-      if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE) {
-        if (descriptor.isRepeated()) {
-          for (final MessageLite element:
-               (List<MessageLite>) entry.getValue()) {
-            if (!element.isInitialized()) {
-              return false;
-            }
-          }
-        } else {
-          if (!((MessageLite) entry.getValue()).isInitialized()) {
+    for (int i = 0; i < fields.getNumArrayEntries(); i++) {
+      if (!isInitialized(fields.getArrayEntryAt(i))) {
+        return false;
+      }
+    }
+    for (final Map.Entry<FieldDescriptorType, Object> entry :
+             fields.getOverflowEntries()) {
+      if (!isInitialized(entry)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  @SuppressWarnings("unchecked")
+  private boolean isInitialized(
+      final Map.Entry<FieldDescriptorType, Object> entry) {
+    final FieldDescriptorType descriptor = entry.getKey();
+    if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE) {
+      if (descriptor.isRepeated()) {
+        for (final MessageLite element:
+                 (List<MessageLite>) entry.getValue()) {
+          if (!element.isInitialized()) {
             return false;
           }
         }
+      } else {
+        if (!((MessageLite) entry.getValue()).isInitialized()) {
+          return false;
+        }
       }
     }
-
     return true;
   }
 
@@ -378,39 +418,48 @@ final class FieldSet<FieldDescriptorType extends
   /**
    * Like {@link #mergeFrom(Message)}, but merges from another {@link FieldSet}.
    */
-  @SuppressWarnings("unchecked")
   public void mergeFrom(final FieldSet<FieldDescriptorType> other) {
-    for (final Map.Entry<FieldDescriptorType, Object> entry:
-         other.fields.entrySet()) {
-      final FieldDescriptorType descriptor = entry.getKey();
-      final Object otherValue = entry.getValue();
+    for (int i = 0; i < other.fields.getNumArrayEntries(); i++) {
+      mergeFromField(other.fields.getArrayEntryAt(i));
+    }
+    for (final Map.Entry<FieldDescriptorType, Object> entry :
+             other.fields.getOverflowEntries()) {
+      mergeFromField(entry);
+    }
+  }
 
-      if (descriptor.isRepeated()) {
-        Object value = fields.get(descriptor);
-        if (value == null) {
-          // Our list is empty, but we still need to make a defensive copy of
-          // the other list since we don't know if the other FieldSet is still
-          // mutable.
-          fields.put(descriptor, new ArrayList((List) otherValue));
-        } else {
-          // Concatenate the lists.
-          ((List) value).addAll((List) otherValue);
-        }
-      } else if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE) {
-        Object value = fields.get(descriptor);
-        if (value == null) {
-          fields.put(descriptor, otherValue);
-        } else {
-          // Merge the messages.
-          fields.put(descriptor,
-              descriptor.internalMergeFrom(
-                ((MessageLite) value).toBuilder(), (MessageLite) otherValue)
-              .build());
-        }
+  @SuppressWarnings("unchecked")
+  private void mergeFromField(
+      final Map.Entry<FieldDescriptorType, Object> entry) {
+    final FieldDescriptorType descriptor = entry.getKey();
+    final Object otherValue = entry.getValue();
 
+    if (descriptor.isRepeated()) {
+      Object value = fields.get(descriptor);
+      if (value == null) {
+        // Our list is empty, but we still need to make a defensive copy of
+        // the other list since we don't know if the other FieldSet is still
+        // mutable.
+        fields.put(descriptor, new ArrayList((List) otherValue));
       } else {
+        // Concatenate the lists.
+        ((List) value).addAll((List) otherValue);
+      }
+    } else if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE) {
+      Object value = fields.get(descriptor);
+      if (value == null) {
         fields.put(descriptor, otherValue);
+      } else {
+        // Merge the messages.
+        fields.put(
+            descriptor,
+            descriptor.internalMergeFrom(
+                ((MessageLite) value).toBuilder(), (MessageLite) otherValue)
+            .build());
       }
+
+    } else {
+      fields.put(descriptor, otherValue);
     }
   }
 
@@ -468,8 +517,13 @@ final class FieldSet<FieldDescriptorType extends
   /** See {@link Message#writeTo(CodedOutputStream)}. */
   public void writeTo(final CodedOutputStream output)
                       throws IOException {
-    for (final Map.Entry<FieldDescriptorType, Object> entry:
-         fields.entrySet()) {
+    for (int i = 0; i < fields.getNumArrayEntries(); i++) {
+      final Map.Entry<FieldDescriptorType, Object> entry =
+          fields.getArrayEntryAt(i);
+      writeField(entry.getKey(), entry.getValue(), output);
+    }
+    for (final Map.Entry<FieldDescriptorType, Object> entry :
+         fields.getOverflowEntries()) {
       writeField(entry.getKey(), entry.getValue(), output);
     }
   }
@@ -479,16 +533,25 @@ final class FieldSet<FieldDescriptorType extends
    */
   public void writeMessageSetTo(final CodedOutputStream output)
                                 throws IOException {
-    for (final Map.Entry<FieldDescriptorType, Object> entry:
-         fields.entrySet()) {
-      final FieldDescriptorType descriptor = entry.getKey();
-      if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE &&
-          !descriptor.isRepeated() && !descriptor.isPacked()) {
-        output.writeMessageSetExtension(entry.getKey().getNumber(),
-                                        (MessageLite) entry.getValue());
-      } else {
-        writeField(descriptor, entry.getValue(), output);
-      }
+    for (int i = 0; i < fields.getNumArrayEntries(); i++) {
+      writeMessageSetTo(fields.getArrayEntryAt(i), output);
+    }
+    for (final Map.Entry<FieldDescriptorType, Object> entry :
+             fields.getOverflowEntries()) {
+      writeMessageSetTo(entry, output);
+    }
+  }
+
+  private void writeMessageSetTo(
+      final Map.Entry<FieldDescriptorType, Object> entry,
+      final CodedOutputStream output) throws IOException {
+    final FieldDescriptorType descriptor = entry.getKey();
+    if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE &&
+        !descriptor.isRepeated() && !descriptor.isPacked()) {
+      output.writeMessageSetExtension(entry.getKey().getNumber(),
+                                      (MessageLite) entry.getValue());
+    } else {
+      writeField(descriptor, entry.getValue(), output);
     }
   }
 
@@ -593,8 +656,13 @@ final class FieldSet<FieldDescriptorType extends
    */
   public int getSerializedSize() {
     int size = 0;
-    for (final Map.Entry<FieldDescriptorType, Object> entry:
-         fields.entrySet()) {
+    for (int i = 0; i < fields.getNumArrayEntries(); i++) {
+      final Map.Entry<FieldDescriptorType, Object> entry =
+          fields.getArrayEntryAt(i);
+      size += computeFieldSize(entry.getKey(), entry.getValue());
+    }
+    for (final Map.Entry<FieldDescriptorType, Object> entry :
+         fields.getOverflowEntries()) {
       size += computeFieldSize(entry.getKey(), entry.getValue());
     }
     return size;
@@ -605,20 +673,28 @@ final class FieldSet<FieldDescriptorType extends
    */
   public int getMessageSetSerializedSize() {
     int size = 0;
-    for (final Map.Entry<FieldDescriptorType, Object> entry:
-         fields.entrySet()) {
-      final FieldDescriptorType descriptor = entry.getKey();
-      if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE &&
-          !descriptor.isRepeated() && !descriptor.isPacked()) {
-        size += CodedOutputStream.computeMessageSetExtensionSize(
-                  entry.getKey().getNumber(), (MessageLite) entry.getValue());
-      } else {
-        size += computeFieldSize(descriptor, entry.getValue());
-      }
+    for (int i = 0; i < fields.getNumArrayEntries(); i++) {
+      size += getMessageSetSerializedSize(fields.getArrayEntryAt(i));
+    }
+    for (final Map.Entry<FieldDescriptorType, Object> entry :
+             fields.getOverflowEntries()) {
+      size += getMessageSetSerializedSize(entry);
     }
     return size;
   }
 
+  private int getMessageSetSerializedSize(
+      final Map.Entry<FieldDescriptorType, Object> entry) {
+    final FieldDescriptorType descriptor = entry.getKey();
+    if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE &&
+        !descriptor.isRepeated() && !descriptor.isPacked()) {
+      return CodedOutputStream.computeMessageSetExtensionSize(
+          entry.getKey().getNumber(), (MessageLite) entry.getValue());
+    } else {
+      return computeFieldSize(descriptor, entry.getValue());
+    }
+  }
+
   /**
    * Compute the number of bytes that would be needed to encode a
    * single tag/value pair of arbitrary type.

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 554 - 123
java/src/main/java/com/google/protobuf/GeneratedMessage.java


+ 255 - 92
java/src/main/java/com/google/protobuf/GeneratedMessageLite.java

@@ -31,6 +31,10 @@
 package com.google.protobuf;
 
 import java.io.IOException;
+import java.io.ObjectStreamException;
+import java.io.Serializable;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
@@ -41,8 +45,14 @@ import java.util.Map;
  *
  * @author kenton@google.com Kenton Varda
  */
-public abstract class GeneratedMessageLite extends AbstractMessageLite {
-  protected GeneratedMessageLite() {}
+public abstract class GeneratedMessageLite extends AbstractMessageLite
+    implements Serializable {
+
+  protected GeneratedMessageLite() {
+  }
+
+  protected GeneratedMessageLite(Builder builder) {
+  }
 
   @SuppressWarnings("unchecked")
   public abstract static class Builder<MessageType extends GeneratedMessageLite,
@@ -50,6 +60,11 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
       extends AbstractMessageLite.Builder<BuilderType> {
     protected Builder() {}
 
+    //@Override (Java 1.6 override semantics, but we must support 1.5)
+    public BuilderType clear() {
+      return (BuilderType) this;
+    }
+
     // This is implemented here only to work around an apparent bug in the
     // Java compiler and/or build system.  See bug #1898463.  The mere presence
     // of this dummy clone() implementation makes it go away.
@@ -65,12 +80,6 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
     // Defined here for return type covariance.
     public abstract MessageType getDefaultInstanceForType();
 
-    /**
-     * Get the message being built.  We don't just pass this to the
-     * constructor because it becomes null when build() is called.
-     */
-    protected abstract MessageType internalGetResult();
-
     /**
      * Called by subclasses to parse an unknown field.
      * @return {@code true} unless the tag is an end-group tag.
@@ -86,15 +95,46 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
   // =================================================================
   // Extensions-related stuff
 
+  /**
+   * Lite equivalent of {@link com.google.protobuf.GeneratedMessage.ExtendableMessageOrBuilder}.
+   */
+  public interface ExtendableMessageOrBuilder<
+      MessageType extends ExtendableMessage> extends MessageLiteOrBuilder {
+
+    /** Check if a singular extension is present. */
+    <Type> boolean hasExtension(
+        GeneratedExtension<MessageType, Type> extension);
+
+    /** Get the number of elements in a repeated extension. */
+    <Type> int getExtensionCount(
+        GeneratedExtension<MessageType, List<Type>> extension);
+
+    /** Get the value of an extension. */
+    <Type> Type getExtension(GeneratedExtension<MessageType, Type> extension);
+
+    /** Get one element of a repeated extension. */
+    <Type> Type getExtension(
+        GeneratedExtension<MessageType, List<Type>> extension,
+        int index);
+  }
+
   /**
    * Lite equivalent of {@link GeneratedMessage.ExtendableMessage}.
    */
   public abstract static class ExtendableMessage<
         MessageType extends ExtendableMessage<MessageType>>
-      extends GeneratedMessageLite {
-    protected ExtendableMessage() {}
-    private final FieldSet<ExtensionDescriptor> extensions =
-        FieldSet.newFieldSet();
+      extends GeneratedMessageLite
+      implements ExtendableMessageOrBuilder<MessageType> {
+
+    private final FieldSet<ExtensionDescriptor> extensions;
+
+    protected ExtendableMessage() {
+      this.extensions = FieldSet.newFieldSet();
+    }
+
+    protected ExtendableMessage(ExtendableBuilder<MessageType, ?> builder) {
+      this.extensions = builder.buildExtensions();
+    }
 
     private void verifyExtensionContainingType(
         final GeneratedExtension<MessageType, ?> extension) {
@@ -108,13 +148,15 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
     }
 
     /** Check if a singular extension is present. */
-    public final boolean hasExtension(
-        final GeneratedExtension<MessageType, ?> extension) {
+    //@Override (Java 1.6 override semantics, but we must support 1.5)
+    public final <Type> boolean hasExtension(
+        final GeneratedExtension<MessageType, Type> extension) {
       verifyExtensionContainingType(extension);
       return extensions.hasField(extension.descriptor);
     }
 
     /** Get the number of elements in a repeated extension. */
+    //@Override (Java 1.6 override semantics, but we must support 1.5)
     public final <Type> int getExtensionCount(
         final GeneratedExtension<MessageType, List<Type>> extension) {
       verifyExtensionContainingType(extension);
@@ -122,6 +164,7 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
     }
 
     /** Get the value of an extension. */
+    //@Override (Java 1.6 override semantics, but we must support 1.5)
     @SuppressWarnings("unchecked")
     public final <Type> Type getExtension(
         final GeneratedExtension<MessageType, Type> extension) {
@@ -135,6 +178,7 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
     }
 
     /** Get one element of a repeated extension. */
+    //@Override (Java 1.6 override semantics, but we must support 1.5)
     @SuppressWarnings("unchecked")
     public final <Type> Type getExtension(
         final GeneratedExtension<MessageType, List<Type>> extension,
@@ -214,53 +258,104 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
   public abstract static class ExtendableBuilder<
         MessageType extends ExtendableMessage<MessageType>,
         BuilderType extends ExtendableBuilder<MessageType, BuilderType>>
-      extends Builder<MessageType, BuilderType> {
+      extends Builder<MessageType, BuilderType>
+      implements ExtendableMessageOrBuilder<MessageType> {
     protected ExtendableBuilder() {}
 
-    // This is implemented here only to work around an apparent bug in the
-    // Java compiler and/or build system.  See bug #1898463.  The mere presence
-    // of this dummy clone() implementation makes it go away.
+    private FieldSet<ExtensionDescriptor> extensions = FieldSet.emptySet();
+    private boolean extensionsIsMutable;
+
     @Override
-    public BuilderType clone() {
-      throw new UnsupportedOperationException(
-          "This is supposed to be overridden by subclasses.");
+    public BuilderType clear() {
+      extensions.clear();
+      extensionsIsMutable = false;
+      return super.clear();
     }
 
-    @Override
-    protected abstract MessageType internalGetResult();
+    private void ensureExtensionsIsMutable() {
+      if (!extensionsIsMutable) {
+        extensions = extensions.clone();
+        extensionsIsMutable = true;
+      }
+    }
 
-    /** Check if a singular extension is present. */
-    public final boolean hasExtension(
+    /**
+     * Called by the build code path to create a copy of the extensions for
+     * building the message.
+     */
+    private FieldSet<ExtensionDescriptor> buildExtensions() {
+      extensions.makeImmutable();
+      extensionsIsMutable = false;
+      return extensions;
+    }
+
+    private void verifyExtensionContainingType(
         final GeneratedExtension<MessageType, ?> extension) {
-      return internalGetResult().hasExtension(extension);
+      if (extension.getContainingTypeDefaultInstance() !=
+          getDefaultInstanceForType()) {
+        // This can only happen if someone uses unchecked operations.
+        throw new IllegalArgumentException(
+          "This extension is for a different message type.  Please make " +
+          "sure that you are not suppressing any generics type warnings.");
+      }
+    }
+
+    /** Check if a singular extension is present. */
+    //@Override (Java 1.6 override semantics, but we must support 1.5)
+    public final <Type> boolean hasExtension(
+        final GeneratedExtension<MessageType, Type> extension) {
+      verifyExtensionContainingType(extension);
+      return extensions.hasField(extension.descriptor);
     }
 
     /** Get the number of elements in a repeated extension. */
+    //@Override (Java 1.6 override semantics, but we must support 1.5)
     public final <Type> int getExtensionCount(
         final GeneratedExtension<MessageType, List<Type>> extension) {
-      return internalGetResult().getExtensionCount(extension);
+      verifyExtensionContainingType(extension);
+      return extensions.getRepeatedFieldCount(extension.descriptor);
     }
 
     /** Get the value of an extension. */
+    //@Override (Java 1.6 override semantics, but we must support 1.5)
+    @SuppressWarnings("unchecked")
     public final <Type> Type getExtension(
         final GeneratedExtension<MessageType, Type> extension) {
-      return internalGetResult().getExtension(extension);
+      verifyExtensionContainingType(extension);
+      final Object value = extensions.getField(extension.descriptor);
+      if (value == null) {
+        return extension.defaultValue;
+      } else {
+        return (Type) value;
+      }
     }
 
     /** Get one element of a repeated extension. */
+    @SuppressWarnings("unchecked")
+    //@Override (Java 1.6 override semantics, but we must support 1.5)
     public final <Type> Type getExtension(
         final GeneratedExtension<MessageType, List<Type>> extension,
         final int index) {
-      return internalGetResult().getExtension(extension, index);
+      verifyExtensionContainingType(extension);
+      return (Type) extensions.getRepeatedField(extension.descriptor, index);
+    }
+
+    // This is implemented here only to work around an apparent bug in the
+    // Java compiler and/or build system.  See bug #1898463.  The mere presence
+    // of this dummy clone() implementation makes it go away.
+    @Override
+    public BuilderType clone() {
+      throw new UnsupportedOperationException(
+          "This is supposed to be overridden by subclasses.");
     }
 
     /** Set the value of an extension. */
     public final <Type> BuilderType setExtension(
         final GeneratedExtension<MessageType, Type> extension,
         final Type value) {
-      final ExtendableMessage<MessageType> message = internalGetResult();
-      message.verifyExtensionContainingType(extension);
-      message.extensions.setField(extension.descriptor, value);
+      verifyExtensionContainingType(extension);
+      ensureExtensionsIsMutable();
+      extensions.setField(extension.descriptor, value);
       return (BuilderType) this;
     }
 
@@ -268,9 +363,9 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
     public final <Type> BuilderType setExtension(
         final GeneratedExtension<MessageType, List<Type>> extension,
         final int index, final Type value) {
-      final ExtendableMessage<MessageType> message = internalGetResult();
-      message.verifyExtensionContainingType(extension);
-      message.extensions.setRepeatedField(extension.descriptor, index, value);
+      verifyExtensionContainingType(extension);
+      ensureExtensionsIsMutable();
+      extensions.setRepeatedField(extension.descriptor, index, value);
       return (BuilderType) this;
     }
 
@@ -278,21 +373,26 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
     public final <Type> BuilderType addExtension(
         final GeneratedExtension<MessageType, List<Type>> extension,
         final Type value) {
-      final ExtendableMessage<MessageType> message = internalGetResult();
-      message.verifyExtensionContainingType(extension);
-      message.extensions.addRepeatedField(extension.descriptor, value);
+      verifyExtensionContainingType(extension);
+      ensureExtensionsIsMutable();
+      extensions.addRepeatedField(extension.descriptor, value);
       return (BuilderType) this;
     }
 
     /** Clear an extension. */
     public final <Type> BuilderType clearExtension(
         final GeneratedExtension<MessageType, ?> extension) {
-      final ExtendableMessage<MessageType> message = internalGetResult();
-      message.verifyExtensionContainingType(extension);
-      message.extensions.clearField(extension.descriptor);
+      verifyExtensionContainingType(extension);
+      ensureExtensionsIsMutable();
+      extensions.clearField(extension.descriptor);
       return (BuilderType) this;
     }
 
+    /** Called by subclasses to check if all extensions are initialized. */
+    protected boolean extensionsAreInitialized() {
+      return extensions.isInitialized();
+    }
+
     /**
      * Called by subclasses to parse an unknown field or an extension.
      * @return {@code true} unless the tag is an end-group tag.
@@ -302,9 +402,6 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
         final CodedInputStream input,
         final ExtensionRegistryLite extensionRegistry,
         final int tag) throws IOException {
-      final FieldSet<ExtensionDescriptor> extensions =
-          ((ExtendableMessage) internalGetResult()).extensions;
-
       final int wireType = WireFormat.getTagWireType(tag);
       final int fieldNumber = WireFormat.getTagFieldNumber(tag);
 
@@ -347,6 +444,7 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
               // enum, drop it (don't even add it to unknownFields).
               return true;
             }
+            ensureExtensionsIsMutable();
             extensions.addRepeatedField(extension.descriptor, value);
           }
         } else {
@@ -354,6 +452,7 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
             final Object value =
               FieldSet.readPrimitiveField(input,
                                           extension.descriptor.getLiteType());
+            ensureExtensionsIsMutable();
             extensions.addRepeatedField(extension.descriptor, value);
           }
         }
@@ -400,8 +499,10 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
         }
 
         if (extension.descriptor.isRepeated()) {
+          ensureExtensionsIsMutable();
           extensions.addRepeatedField(extension.descriptor, value);
         } else {
+          ensureExtensionsIsMutable();
           extensions.setField(extension.descriptor, value);
         }
       }
@@ -410,8 +511,8 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
     }
 
     protected final void mergeExtensionFields(final MessageType other) {
-      ((ExtendableMessage) internalGetResult()).extensions.mergeFrom(
-          ((ExtendableMessage) other).extensions);
+      ensureExtensionsIsMutable();
+      extensions.mergeFrom(((ExtendableMessage) other).extensions);
     }
   }
 
@@ -420,8 +521,40 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
   /** For use by generated code only. */
   public static <ContainingType extends MessageLite, Type>
       GeneratedExtension<ContainingType, Type>
-      newGeneratedExtension() {
-    return new GeneratedExtension<ContainingType, Type>();
+          newSingularGeneratedExtension(
+              final ContainingType containingTypeDefaultInstance,
+              final Type defaultValue,
+              final MessageLite messageDefaultInstance,
+              final Internal.EnumLiteMap<?> enumTypeMap,
+              final int number,
+              final WireFormat.FieldType type) {
+    return new GeneratedExtension<ContainingType, Type>(
+        containingTypeDefaultInstance,
+        defaultValue,
+        messageDefaultInstance,
+        new ExtensionDescriptor(enumTypeMap, number, type,
+                                false /* isRepeated */,
+                                false /* isPacked */));
+  }
+
+  /** For use by generated code only. */
+  public static <ContainingType extends MessageLite, Type>
+      GeneratedExtension<ContainingType, Type>
+          newRepeatedGeneratedExtension(
+              final ContainingType containingTypeDefaultInstance,
+              final MessageLite messageDefaultInstance,
+              final Internal.EnumLiteMap<?> enumTypeMap,
+              final int number,
+              final WireFormat.FieldType type,
+              final boolean isPacked) {
+    @SuppressWarnings("unchecked")  // Subclasses ensure Type is a List
+    Type emptyList = (Type) Collections.emptyList();
+    return new GeneratedExtension<ContainingType, Type>(
+        containingTypeDefaultInstance,
+        emptyList,
+        messageDefaultInstance,
+        new ExtensionDescriptor(
+            enumTypeMap, number, type, true /* isRepeated */, isPacked));
   }
 
   private static final class ExtensionDescriptor
@@ -489,60 +622,33 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
    */
   public static final class GeneratedExtension<
       ContainingType extends MessageLite, Type> {
-    // We can't always initialize a GeneratedExtension when we first construct
-    // it due to initialization order difficulties (namely, the default
-    // instances may not have been constructed yet).  So, we construct an
-    // uninitialized GeneratedExtension once, then call internalInit() on it
-    // later.  Generated code will always call internalInit() on all extensions
-    // as part of the static initialization code, and internalInit() throws an
-    // exception if called more than once, so this method is useless to users.
-    private GeneratedExtension() {}
-
-    private void internalInit(
+
+    private GeneratedExtension(
         final ContainingType containingTypeDefaultInstance,
         final Type defaultValue,
         final MessageLite messageDefaultInstance,
         final ExtensionDescriptor descriptor) {
+      // Defensive checks to verify the correct initialization order of
+      // GeneratedExtensions and their related GeneratedMessages.
+      if (containingTypeDefaultInstance == null) {
+        throw new IllegalArgumentException(
+            "Null containingTypeDefaultInstance");
+      }
+      if (descriptor.getLiteType() == WireFormat.FieldType.MESSAGE &&
+          messageDefaultInstance == null) {
+        throw new IllegalArgumentException(
+            "Null messageDefaultInstance");
+      }
       this.containingTypeDefaultInstance = containingTypeDefaultInstance;
       this.defaultValue = defaultValue;
       this.messageDefaultInstance = messageDefaultInstance;
       this.descriptor = descriptor;
     }
 
-    /** For use by generated code only. */
-    public void internalInitSingular(
-        final ContainingType containingTypeDefaultInstance,
-        final Type defaultValue,
-        final MessageLite messageDefaultInstance,
-        final Internal.EnumLiteMap<?> enumTypeMap,
-        final int number,
-        final WireFormat.FieldType type) {
-      internalInit(
-        containingTypeDefaultInstance, defaultValue, messageDefaultInstance,
-        new ExtensionDescriptor(enumTypeMap, number, type,
-          false /* isRepeated */, false /* isPacked */));
-    }
-
-    /** For use by generated code only. */
-    @SuppressWarnings("unchecked")
-    public void internalInitRepeated(
-        final ContainingType containingTypeDefaultInstance,
-        final MessageLite messageDefaultInstance,
-        final Internal.EnumLiteMap<?> enumTypeMap,
-        final int number,
-        final WireFormat.FieldType type,
-        final boolean isPacked) {
-      internalInit(
-        containingTypeDefaultInstance, (Type) Collections.emptyList(),
-        messageDefaultInstance,
-        new ExtensionDescriptor(
-          enumTypeMap, number, type, true /* isRepeated */, isPacked));
-    }
-
-    private ContainingType containingTypeDefaultInstance;
-    private Type defaultValue;
-    private MessageLite messageDefaultInstance;
-    private ExtensionDescriptor descriptor;
+    private final ContainingType containingTypeDefaultInstance;
+    private final Type defaultValue;
+    private final MessageLite messageDefaultInstance;
+    private final ExtensionDescriptor descriptor;
 
     /**
      * Default instance of the type being extended, used to identify that type.
@@ -564,4 +670,61 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite {
       return messageDefaultInstance;
     }
   }
+
+  /**
+   * A serialized (serializable) form of the generated message.  Stores the
+   * message as a class name and a byte array.
+   */
+  static final class SerializedForm implements Serializable {
+    private static final long serialVersionUID = 0L;
+
+    private String messageClassName;
+    private byte[] asBytes;
+
+    /**
+     * Creates the serialized form by calling {@link com.google.protobuf.MessageLite#toByteArray}.
+     * @param regularForm the message to serialize
+     */
+    SerializedForm(MessageLite regularForm) {
+      messageClassName = regularForm.getClass().getName();
+      asBytes = regularForm.toByteArray();
+    }
+
+    /**
+     * When read from an ObjectInputStream, this method converts this object
+     * back to the regular form.  Part of Java's serialization magic.
+     * @return a GeneratedMessage of the type that was serialized
+     */
+    @SuppressWarnings("unchecked")
+    protected Object readResolve() throws ObjectStreamException {
+      try {
+        Class messageClass = Class.forName(messageClassName);
+        Method newBuilder = messageClass.getMethod("newBuilder");
+        MessageLite.Builder builder =
+            (MessageLite.Builder) newBuilder.invoke(null);
+        builder.mergeFrom(asBytes);
+        return builder.buildPartial();
+      } catch (ClassNotFoundException e) {
+        throw new RuntimeException("Unable to find proto buffer class", e);
+      } catch (NoSuchMethodException e) {
+        throw new RuntimeException("Unable to find newBuilder method", e);
+      } catch (IllegalAccessException e) {
+        throw new RuntimeException("Unable to call newBuilder method", e);
+      } catch (InvocationTargetException e) {
+        throw new RuntimeException("Error calling newBuilder", e.getCause());
+      } catch (InvalidProtocolBufferException e) {
+        throw new RuntimeException("Unable to understand proto buffer", e);
+      }
+    }
+  }
+
+  /**
+   * Replaces this object in the output stream with a serialized form.
+   * Part of Java's serialization magic.  Generated sub-classes must override
+   * this method by calling <code>return super.writeReplace();</code>
+   * @return a SerializedForm of this message
+   */
+  protected Object writeReplace() throws ObjectStreamException {
+    return new SerializedForm(this);
+  }
 }

+ 85 - 0
java/src/main/java/com/google/protobuf/Internal.java

@@ -99,6 +99,91 @@ public class Internal {
     }
   }
 
+  /**
+   * Helper called by generated code to determine if a byte array is a valid
+   * UTF-8 encoded string such that the original bytes can be converted to
+   * a String object and then back to a byte array round tripping the bytes
+   * without loss.
+   * <p>
+   * This is inspired by UTF_8.java in sun.nio.cs.
+   *
+   * @param byteString the string to check
+   * @return whether the byte array is round trippable
+   */
+  public static boolean isValidUtf8(ByteString byteString) {
+    int index = 0;
+    int size = byteString.size();
+    // To avoid the masking, we could change this to use bytes;
+    // Then X > 0xC2 gets turned into X < -0xC2; X < 0x80
+    // gets turned into X >= 0, etc.
+
+    while (index < size) {
+      int byte1 = byteString.byteAt(index++) & 0xFF;
+      if (byte1 < 0x80) {
+        // fast loop for single bytes
+        continue;
+
+        // we know from this point on that we have 2-4 byte forms
+      } else if (byte1 < 0xC2 || byte1 > 0xF4) {
+        // catch illegal first bytes: < C2 or > F4
+        return false;
+      }
+      if (index >= size) {
+        // fail if we run out of bytes
+        return false;
+      }
+      int byte2 = byteString.byteAt(index++) & 0xFF;
+      if (byte2 < 0x80 || byte2 > 0xBF) {
+        // general trail-byte test
+        return false;
+      }
+      if (byte1 <= 0xDF) {
+        // two-byte form; general trail-byte test is sufficient
+        continue;
+      }
+
+      // we know from this point on that we have 3 or 4 byte forms
+      if (index >= size) {
+        // fail if we run out of bytes
+        return false;
+      }
+      int byte3 = byteString.byteAt(index++) & 0xFF;
+      if (byte3 < 0x80 || byte3 > 0xBF) {
+        // general trail-byte test
+        return false;
+      }
+      if (byte1 <= 0xEF) {
+        // three-byte form. Vastly more frequent than four-byte forms
+        // The following has an extra test, but not worth restructuring
+        if (byte1 == 0xE0 && byte2 < 0xA0 ||
+            byte1 == 0xED && byte2 > 0x9F) {
+          // check special cases of byte2
+          return false;
+        }
+
+      } else {
+        // four-byte form
+
+        if (index >= size) {
+          // fail if we run out of bytes
+          return false;
+        }
+        int byte4 = byteString.byteAt(index++) & 0xFF;
+        if (byte4 < 0x80 || byte4 > 0xBF) {
+          // general trail-byte test
+          return false;
+        }
+        // The following has an extra test, but not worth restructuring
+        if (byte1 == 0xF0 && byte2 < 0x90 ||
+            byte1 == 0xF4 && byte2 > 0x8F) {
+          // check special cases of byte2
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
   /**
    * Interface for an enum value or value descriptor, to be used in FieldSet.
    * The lite library stores enum values directly in FieldSets but the full

+ 155 - 0
java/src/main/java/com/google/protobuf/LazyStringArrayList.java

@@ -0,0 +1,155 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.util.List;
+import java.util.AbstractList;
+import java.util.ArrayList;
+import java.util.RandomAccess;
+import java.util.Collection;
+
+/**
+ * An implementation of {@link LazyStringList} that wraps an ArrayList. Each
+ * element is either a ByteString or a String. It caches the last one requested
+ * which is most likely the one needed next. This minimizes memory usage while
+ * satisfying the most common use cases.
+ * <p>
+ * <strong>Note that this implementation is not synchronized.</strong>
+ * If multiple threads access an <tt>ArrayList</tt> instance concurrently,
+ * and at least one of the threads modifies the list structurally, it
+ * <i>must</i> be synchronized externally.  (A structural modification is
+ * any operation that adds or deletes one or more elements, or explicitly
+ * resizes the backing array; merely setting the value of an element is not
+ * a structural modification.)  This is typically accomplished by
+ * synchronizing on some object that naturally encapsulates the list.
+ * <p>
+ * If the implementation is accessed via concurrent reads, this is thread safe.
+ * Conversions are done in a thread safe manner. It's possible that the
+ * conversion may happen more than once if two threads attempt to access the
+ * same element and the modifications were not visible to each other, but this
+ * will not result in any corruption of the list or change in behavior other
+ * than performance.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public class LazyStringArrayList extends AbstractList<String>
+    implements LazyStringList, RandomAccess {
+
+  public final static LazyStringList EMPTY = new UnmodifiableLazyStringList(
+      new LazyStringArrayList());
+
+  private final List<Object> list;
+
+  public LazyStringArrayList() {
+    list = new ArrayList<Object>();
+  }
+
+  public LazyStringArrayList(List<String> from) {
+    list = new ArrayList<Object>(from);
+  }
+
+  @Override
+  public String get(int index) {
+    Object o = list.get(index);
+    if (o instanceof String) {
+      return (String) o;
+    } else {
+      ByteString bs = (ByteString) o;
+      String s = bs.toStringUtf8();
+      if (Internal.isValidUtf8(bs)) {
+        list.set(index, s);
+      }
+      return s;
+    }
+  }
+
+  @Override
+  public int size() {
+    return list.size();
+  }
+
+  @Override
+  public String set(int index, String s) {
+    Object o = list.set(index, s);
+    return asString(o);
+  }
+
+  @Override
+  public void add(int index, String element) {
+    list.add(index, element);
+    modCount++;
+  }
+
+  @Override
+  public boolean addAll(int index, Collection<? extends String> c) {
+    boolean ret = list.addAll(index, c);
+    modCount++;
+    return ret;
+  }
+
+  @Override
+  public String remove(int index) {
+    Object o = list.remove(index);
+    modCount++;
+    return asString(o);
+  }
+
+  public void clear() {
+    list.clear();
+    modCount++;
+  }
+
+  // @Override
+  public void add(ByteString element) {
+    list.add(element);
+    modCount++;
+  }
+
+  // @Override
+  public ByteString getByteString(int index) {
+    Object o = list.get(index);
+    if (o instanceof String) {
+      ByteString b = ByteString.copyFromUtf8((String) o);
+      list.set(index, b);
+      return b;
+    } else {
+      return (ByteString) o;
+    }
+  }
+
+  private String asString(Object o) {
+    if (o instanceof String) {
+      return (String) o;
+    } else {
+      return ((ByteString) o).toStringUtf8();
+    }
+  }
+}

+ 72 - 0
java/src/main/java/com/google/protobuf/LazyStringList.java

@@ -0,0 +1,72 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.util.List;
+
+/**
+ * An interface extending List&lt;String&gt; that also provides access to the
+ * items of the list as UTF8-encoded ByteString objects. This is used by the
+ * protocol buffer implementation to support lazily converting bytes parsed
+ * over the wire to String objects until needed and also increases the
+ * efficiency of serialization if the String was never requested as the
+ * ByteString is already cached.
+ * <p>
+ * This only adds additional methods that are required for the use in the
+ * protocol buffer code in order to be able successfuly round trip byte arrays
+ * through parsing and serialization without conversion to strings. It's not
+ * attempting to support the functionality of say List&ltByteString&gt, hence
+ * why only these two very specific methods are added.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public interface LazyStringList extends List<String> {
+
+  /**
+   * Returns the element at the specified position in this list as a ByteString.
+   *
+   * @param index index of the element to return
+   * @return the element at the specified position in this list
+   * @throws IndexOutOfBoundsException if the index is out of range
+   *         (<tt>index &lt; 0 || index &gt;= size()</tt>)
+   */
+  ByteString getByteString(int index);
+
+  /**
+   * Appends the specified element to the end of this list (optional
+   * operation).
+   *
+   * @param element element to be appended to this list
+   * @throws UnsupportedOperationException if the <tt>add</tt> operation
+   *         is not supported by this list
+   */
+  void add(ByteString element);
+}

+ 7 - 97
java/src/main/java/com/google/protobuf/Message.java

@@ -48,69 +48,7 @@ import java.util.Map;
  *
  * @author kenton@google.com Kenton Varda
  */
-public interface Message extends MessageLite {
-  /**
-   * Get the message's type's descriptor.  This differs from the
-   * {@code getDescriptor()} method of generated message classes in that
-   * this method is an abstract method of the {@code Message} interface
-   * whereas {@code getDescriptor()} is a static method of a specific class.
-   * They return the same thing.
-   */
-  Descriptors.Descriptor getDescriptorForType();
-
-  // (From MessageLite, re-declared here only for return type covariance.)
-  Message getDefaultInstanceForType();
-
-  /**
-   * Returns a collection of all the fields in this message which are set
-   * and their corresponding values.  A singular ("required" or "optional")
-   * field is set iff hasField() returns true for that field.  A "repeated"
-   * field is set iff getRepeatedFieldSize() is greater than zero.  The
-   * values are exactly what would be returned by calling
-   * {@link #getField(Descriptors.FieldDescriptor)} for each field.  The map
-   * is guaranteed to be a sorted map, so iterating over it will return fields
-   * in order by field number.
-   */
-  Map<Descriptors.FieldDescriptor, Object> getAllFields();
-
-  /**
-   * Returns true if the given field is set.  This is exactly equivalent to
-   * calling the generated "has" accessor method corresponding to the field.
-   * @throws IllegalArgumentException The field is a repeated field, or
-   *           {@code field.getContainingType() != getDescriptorForType()}.
-   */
-  boolean hasField(Descriptors.FieldDescriptor field);
-
-  /**
-   * Obtains the value of the given field, or the default value if it is
-   * not set.  For primitive fields, the boxed primitive value is returned.
-   * For enum fields, the EnumValueDescriptor for the value is returend. For
-   * embedded message fields, the sub-message is returned.  For repeated
-   * fields, a java.util.List is returned.
-   */
-  Object getField(Descriptors.FieldDescriptor field);
-
-  /**
-   * Gets the number of elements of a repeated field.  This is exactly
-   * equivalent to calling the generated "Count" accessor method corresponding
-   * to the field.
-   * @throws IllegalArgumentException The field is not a repeated field, or
-   *           {@code field.getContainingType() != getDescriptorForType()}.
-   */
-  int getRepeatedFieldCount(Descriptors.FieldDescriptor field);
-
-  /**
-   * Gets an element of a repeated field.  For primitive fields, the boxed
-   * primitive value is returned.  For enum fields, the EnumValueDescriptor
-   * for the value is returend. For embedded message fields, the sub-message
-   * is returned.
-   * @throws IllegalArgumentException The field is not a repeated field, or
-   *           {@code field.getContainingType() != getDescriptorForType()}.
-   */
-  Object getRepeatedField(Descriptors.FieldDescriptor field, int index);
-
-  /** Get the {@link UnknownFieldSet} for this message. */
-  UnknownFieldSet getUnknownFields();
+public interface Message extends MessageLite, MessageOrBuilder {
 
   // -----------------------------------------------------------------
   // Comparison and hashing
@@ -119,7 +57,8 @@ public interface Message extends MessageLite {
    * Compares the specified object with this message for equality.  Returns
    * <tt>true</tt> if the given object is a message of the same type (as
    * defined by {@code getDescriptorForType()}) and has identical values for
-   * all of its fields.
+   * all of its fields.  Subclasses must implement this; inheriting
+   * {@code Object.equals()} is incorrect.
    *
    * @param other object to be compared for equality with this message
    * @return <tt>true</tt> if the specified object is equal to this message
@@ -129,8 +68,9 @@ public interface Message extends MessageLite {
 
   /**
    * Returns the hash code value for this message.  The hash code of a message
-   * is defined to be <tt>getDescriptor().hashCode() ^ map.hashCode()</tt>,
-   * where <tt>map</tt> is a map of field numbers to field values.
+   * should mix the message's type (object identity of the decsriptor) with its
+   * contents (known and unknown field values).  Subclasses must implement this;
+   * inheriting {@code Object.hashCode()} is incorrect.
    *
    * @return the hash code value for this message
    * @see Map#hashCode()
@@ -158,7 +98,7 @@ public interface Message extends MessageLite {
   /**
    * Abstract interface implemented by Protocol Message builders.
    */
-  interface Builder extends MessageLite.Builder {
+  interface Builder extends MessageLite.Builder, MessageOrBuilder {
     // (From MessageLite.Builder, re-declared here only for return type
     // covariance.)
     Builder clear();
@@ -197,17 +137,6 @@ public interface Message extends MessageLite {
      */
     Descriptors.Descriptor getDescriptorForType();
 
-    // (From MessageLite.Builder, re-declared here only for return type
-    // covariance.)
-    Message getDefaultInstanceForType();
-
-    /**
-     * Like {@link Message#getAllFields()}.  The returned map may or may not
-     * reflect future changes to the builder.  Either way, the returned map is
-     * itself unmodifiable.
-     */
-    Map<Descriptors.FieldDescriptor, Object> getAllFields();
-
     /**
      * Create a Builder for messages of the appropriate type for the given
      * field.  Messages built with this can then be passed to setField(),
@@ -215,12 +144,6 @@ public interface Message extends MessageLite {
      */
     Builder newBuilderForField(Descriptors.FieldDescriptor field);
 
-    /** Like {@link Message#hasField(Descriptors.FieldDescriptor)} */
-    boolean hasField(Descriptors.FieldDescriptor field);
-
-    /** Like {@link Message#getField(Descriptors.FieldDescriptor)} */
-    Object getField(Descriptors.FieldDescriptor field);
-
     /**
      * Sets a field to the given value.  The value must be of the correct type
      * for this field, i.e. the same type that
@@ -234,16 +157,6 @@ public interface Message extends MessageLite {
      */
     Builder clearField(Descriptors.FieldDescriptor field);
 
-    /**
-     * Like {@link Message#getRepeatedFieldCount(Descriptors.FieldDescriptor)}
-     */
-    int getRepeatedFieldCount(Descriptors.FieldDescriptor field);
-
-    /**
-     * Like {@link Message#getRepeatedField(Descriptors.FieldDescriptor,int)}
-     */
-    Object getRepeatedField(Descriptors.FieldDescriptor field, int index);
-
     /**
      * Sets an element of a repeated field to the given value.  The value must
      * be of the correct type for this field, i.e. the same type that
@@ -262,9 +175,6 @@ public interface Message extends MessageLite {
      */
     Builder addRepeatedField(Descriptors.FieldDescriptor field, Object value);
 
-    /** Get the {@link UnknownFieldSet} for this message. */
-    UnknownFieldSet getUnknownFields();
-
     /** Set the {@link UnknownFieldSet} for this message. */
     Builder setUnknownFields(UnknownFieldSet unknownFields);
 

+ 18 - 28
java/src/main/java/com/google/protobuf/MessageLite.java

@@ -64,22 +64,8 @@ import java.io.OutputStream;
  *
  * @author kenton@google.com Kenton Varda
  */
-public interface MessageLite {
-  /**
-   * Get an instance of the type with all fields set to their default values.
-   * This may or may not be a singleton.  This differs from the
-   * {@code getDefaultInstance()} method of generated message classes in that
-   * this method is an abstract method of the {@code MessageLite} interface
-   * whereas {@code getDefaultInstance()} is a static method of a specific
-   * class.  They return the same thing.
-   */
-  MessageLite getDefaultInstanceForType();
+public interface MessageLite extends MessageLiteOrBuilder {
 
-  /**
-   * Returns true if all required fields in the message and all embedded
-   * messages are set, false otherwise.
-   */
-  boolean isInitialized();
 
   /**
    * Serializes the message and writes it to {@code output}.  This does not
@@ -153,7 +139,7 @@ public interface MessageLite {
   /**
    * Abstract interface implemented by Protocol Message builders.
    */
-  interface Builder extends Cloneable {
+  interface Builder extends MessageLiteOrBuilder, Cloneable {
     /** Resets all fields to their default values. */
     Builder clear();
 
@@ -186,12 +172,6 @@ public interface MessageLite {
      */
     Builder clone();
 
-    /**
-     * Returns true if all required fields in the message and all embedded
-     * messages are set, false otherwise.
-     */
-    boolean isInitialized();
-
     /**
      * Parses a message of this type from the input and merges it with this
      * message, as if using {@link Builder#mergeFrom(MessageLite)}.
@@ -230,12 +210,6 @@ public interface MessageLite {
                       ExtensionRegistryLite extensionRegistry)
                       throws IOException;
 
-    /**
-     * Get the message's type's default instance.
-     * See {@link MessageLite#getDefaultInstanceForType()}.
-     */
-    MessageLite getDefaultInstanceForType();
-
     // ---------------------------------------------------------------
     // Convenience methods.
 
@@ -243,6 +217,8 @@ public interface MessageLite {
      * Parse {@code data} as a message of this type and merge it with the
      * message being built.  This is just a small wrapper around
      * {@link #mergeFrom(CodedInputStream)}.
+     *
+     * @return this
      */
     Builder mergeFrom(ByteString data) throws InvalidProtocolBufferException;
 
@@ -250,6 +226,8 @@ public interface MessageLite {
      * Parse {@code data} as a message of this type and merge it with the
      * message being built.  This is just a small wrapper around
      * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+     *
+     * @return this
      */
     Builder mergeFrom(ByteString data,
                       ExtensionRegistryLite extensionRegistry)
@@ -259,6 +237,8 @@ public interface MessageLite {
      * Parse {@code data} as a message of this type and merge it with the
      * message being built.  This is just a small wrapper around
      * {@link #mergeFrom(CodedInputStream)}.
+     *
+     * @return this
      */
     Builder mergeFrom(byte[] data) throws InvalidProtocolBufferException;
 
@@ -266,6 +246,8 @@ public interface MessageLite {
      * Parse {@code data} as a message of this type and merge it with the
      * message being built.  This is just a small wrapper around
      * {@link #mergeFrom(CodedInputStream)}.
+     *
+     * @return this
      */
     Builder mergeFrom(byte[] data, int off, int len)
                       throws InvalidProtocolBufferException;
@@ -274,6 +256,8 @@ public interface MessageLite {
      * Parse {@code data} as a message of this type and merge it with the
      * message being built.  This is just a small wrapper around
      * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+     *
+     * @return this
      */
     Builder mergeFrom(byte[] data,
                       ExtensionRegistryLite extensionRegistry)
@@ -283,6 +267,8 @@ public interface MessageLite {
      * Parse {@code data} as a message of this type and merge it with the
      * message being built.  This is just a small wrapper around
      * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+     *
+     * @return this
      */
     Builder mergeFrom(byte[] data, int off, int len,
                       ExtensionRegistryLite extensionRegistry)
@@ -299,6 +285,8 @@ public interface MessageLite {
      * and {@link #mergeDelimitedFrom(InputStream)} to read it.
      * <p>
      * Despite usually reading the entire input, this does not close the stream.
+     *
+     * @return this
      */
     Builder mergeFrom(InputStream input) throws IOException;
 
@@ -306,6 +294,8 @@ public interface MessageLite {
      * Parse a message of this type from {@code input} and merge it with the
      * message being built.  This is just a small wrapper around
      * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+     *
+     * @return this
      */
     Builder mergeFrom(InputStream input,
                       ExtensionRegistryLite extensionRegistry)

+ 56 - 0
java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java

@@ -0,0 +1,56 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+/**
+ * Base interface for methods common to {@link MessageLite}
+ * and {@link MessageLite.Builder} to provide type equivalency.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public interface MessageLiteOrBuilder {
+  /**
+   * Get an instance of the type with all fields set to their default values.
+   * This may or may not be a singleton.  This differs from the
+   * {@code getDefaultInstance()} method of generated message classes in that
+   * this method is an abstract method of the {@code MessageLite} interface
+   * whereas {@code getDefaultInstance()} is a static method of a specific
+   * class.  They return the same thing.
+   */
+  MessageLite getDefaultInstanceForType();
+
+  /**
+   * Returns true if all required fields in the message and all embedded
+   * messages are set, false otherwise.
+   */
+  boolean isInitialized();
+
+}

+ 110 - 0
java/src/main/java/com/google/protobuf/MessageOrBuilder.java

@@ -0,0 +1,110 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.util.Map;
+
+/**
+ * Base interface for methods common to {@link Message} and
+ * {@link Message.Builder} to provide type equivalency.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public interface MessageOrBuilder extends MessageLiteOrBuilder {
+
+  // (From MessageLite, re-declared here only for return type covariance.)
+  //@Override (Java 1.6 override semantics, but we must support 1.5)
+  Message getDefaultInstanceForType();
+
+  /**
+   * Get the message's type's descriptor.  This differs from the
+   * {@code getDescriptor()} method of generated message classes in that
+   * this method is an abstract method of the {@code Message} interface
+   * whereas {@code getDescriptor()} is a static method of a specific class.
+   * They return the same thing.
+   */
+  Descriptors.Descriptor getDescriptorForType();
+
+  /**
+   * Returns a collection of all the fields in this message which are set
+   * and their corresponding values.  A singular ("required" or "optional")
+   * field is set iff hasField() returns true for that field.  A "repeated"
+   * field is set iff getRepeatedFieldSize() is greater than zero.  The
+   * values are exactly what would be returned by calling
+   * {@link #getField(Descriptors.FieldDescriptor)} for each field.  The map
+   * is guaranteed to be a sorted map, so iterating over it will return fields
+   * in order by field number.
+   * <br>
+   * If this is for a builder, the returned map may or may not reflect future
+   * changes to the builder.  Either way, the returned map is itself
+   * unmodifiable.
+   */
+  Map<Descriptors.FieldDescriptor, Object> getAllFields();
+
+  /**
+   * Returns true if the given field is set.  This is exactly equivalent to
+   * calling the generated "has" accessor method corresponding to the field.
+   * @throws IllegalArgumentException The field is a repeated field, or
+   *           {@code field.getContainingType() != getDescriptorForType()}.
+   */
+  boolean hasField(Descriptors.FieldDescriptor field);
+
+  /**
+   * Obtains the value of the given field, or the default value if it is
+   * not set.  For primitive fields, the boxed primitive value is returned.
+   * For enum fields, the EnumValueDescriptor for the value is returend. For
+   * embedded message fields, the sub-message is returned.  For repeated
+   * fields, a java.util.List is returned.
+   */
+  Object getField(Descriptors.FieldDescriptor field);
+
+  /**
+   * Gets the number of elements of a repeated field.  This is exactly
+   * equivalent to calling the generated "Count" accessor method corresponding
+   * to the field.
+   * @throws IllegalArgumentException The field is not a repeated field, or
+   *           {@code field.getContainingType() != getDescriptorForType()}.
+   */
+  int getRepeatedFieldCount(Descriptors.FieldDescriptor field);
+
+  /**
+   * Gets an element of a repeated field.  For primitive fields, the boxed
+   * primitive value is returned.  For enum fields, the EnumValueDescriptor
+   * for the value is returend. For embedded message fields, the sub-message
+   * is returned.
+   * @throws IllegalArgumentException The field is not a repeated field, or
+   *           {@code field.getContainingType() != getDescriptorForType()}.
+   */
+  Object getRepeatedField(Descriptors.FieldDescriptor field, int index);
+
+  /** Get the {@link UnknownFieldSet} for this message. */
+  UnknownFieldSet getUnknownFields();
+}

+ 696 - 0
java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java

@@ -0,0 +1,696 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.util.AbstractList;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * <code>RepeatedFieldBuilder</code> implements a structure that a protocol
+ * message uses to hold a repeated field of other protocol messages. It supports
+ * the classical use case of adding immutable {@link Message}'s to the
+ * repeated field and is highly optimized around this (no extra memory
+ * allocations and sharing of immutable arrays).
+ * <br>
+ * It also supports the additional use case of adding a {@link Message.Builder}
+ * to the repeated field and deferring conversion of that <code>Builder</code>
+ * to an immutable <code>Message</code>. In this way, it's possible to maintain
+ * a tree of <code>Builder</code>'s that acts as a fully read/write data
+ * structure.
+ * <br>
+ * Logically, one can think of a tree of builders as converting the entire tree
+ * to messages when build is called on the root or when any method is called
+ * that desires a Message instead of a Builder. In terms of the implementation,
+ * the <code>SingleFieldBuilder</code> and <code>RepeatedFieldBuilder</code>
+ * classes cache messages that were created so that messages only need to be
+ * created when some change occured in its builder or a builder for one of its
+ * descendants.
+ *
+ * @param <MType> the type of message for the field
+ * @param <BType> the type of builder for the field
+ * @param <IType> the common interface for the message and the builder
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public class RepeatedFieldBuilder
+    <MType extends GeneratedMessage,
+     BType extends GeneratedMessage.Builder,
+     IType extends MessageOrBuilder>
+    implements GeneratedMessage.BuilderParent {
+
+  // Parent to send changes to.
+  private GeneratedMessage.BuilderParent parent;
+
+  // List of messages. Never null. It may be immutable, in which case
+  // isMessagesListImmutable will be true. See note below.
+  private List<MType> messages;
+
+  // Whether messages is an mutable array that can be modified.
+  private boolean isMessagesListMutable;
+
+  // List of builders. May be null, in which case, no nested builders were
+  // created. If not null, entries represent the builder for that index.
+  private List<SingleFieldBuilder<MType, BType, IType>> builders;
+
+  // Here are the invariants for messages and builders:
+  // 1. messages is never null and its count corresponds to the number of items
+  //    in the repeated field.
+  // 2. If builders is non-null, messages and builders MUST always
+  //    contain the same number of items.
+  // 3. Entries in either array can be null, but for any index, there MUST be
+  //    either a Message in messages or a builder in builders.
+  // 4. If the builder at an index is non-null, the builder is
+  //    authoritative. This is the case where a Builder was set on the index.
+  //    Any message in the messages array MUST be ignored.
+  // t. If the builder at an index is null, the message in the messages
+  //    list is authoritative. This is the case where a Message (not a Builder)
+  //    was set directly for an index.
+
+  // Indicates that we've built a message and so we are now obligated
+  // to dispatch dirty invalidations. See GeneratedMessage.BuilderListener.
+  private boolean isClean;
+
+  // A view of this builder that exposes a List interface of messages. This is
+  // initialized on demand. This is fully backed by this object and all changes
+  // are reflected in it. Access to any item converts it to a message if it
+  // was a builder.
+  private MessageExternalList<MType, BType, IType> externalMessageList;
+
+  // A view of this builder that exposes a List interface of builders. This is
+  // initialized on demand. This is fully backed by this object and all changes
+  // are reflected in it. Access to any item converts it to a builder if it
+  // was a message.
+  private BuilderExternalList<MType, BType, IType> externalBuilderList;
+
+  // A view of this builder that exposes a List interface of the interface
+  // implemented by messages and builders. This is initialized on demand. This
+  // is fully backed by this object and all changes are reflected in it.
+  // Access to any item returns either a builder or message depending on
+  // what is most efficient.
+  private MessageOrBuilderExternalList<MType, BType, IType>
+      externalMessageOrBuilderList;
+
+  /**
+   * Constructs a new builder with an empty list of messages.
+   *
+   * @param messages the current list of messages
+   * @param isMessagesListMutable Whether the messages list is mutable
+   * @param parent a listener to notify of changes
+   * @param isClean whether the builder is initially marked clean
+   */
+  public RepeatedFieldBuilder(
+      List<MType> messages,
+      boolean isMessagesListMutable,
+      GeneratedMessage.BuilderParent parent,
+      boolean isClean) {
+    this.messages = messages;
+    this.isMessagesListMutable = isMessagesListMutable;
+    this.parent = parent;
+    this.isClean = isClean;
+  }
+
+  public void dispose() {
+    // Null out parent so we stop sending it invalidations.
+    parent = null;
+  }
+
+  /**
+   * Ensures that the list of messages is mutable so it can be updated. If it's
+   * immutable, a copy is made.
+   */
+  private void ensureMutableMessageList() {
+    if (!isMessagesListMutable) {
+      messages = new ArrayList<MType>(messages);
+      isMessagesListMutable = true;
+    }
+  }
+
+  /**
+   * Ensures that the list of builders is not null. If it's null, the list is
+   * created and initialized to be the same size as the messages list with
+   * null entries.
+   */
+  private void ensureBuilders() {
+    if (this.builders == null) {
+      this.builders =
+          new ArrayList<SingleFieldBuilder<MType, BType, IType>>(
+              messages.size());
+      for (int i = 0; i < messages.size(); i++) {
+        builders.add(null);
+      }
+    }
+  }
+
+  /**
+   * Gets the count of items in the list.
+   *
+   * @return the count of items in the list.
+   */
+  public int getCount() {
+    return messages.size();
+  }
+
+  /**
+   * Gets whether the list is empty.
+   *
+   * @return whether the list is empty
+   */
+  public boolean isEmpty() {
+    return messages.isEmpty();
+  }
+
+  /**
+   * Get the message at the specified index. If the message is currently stored
+   * as a <code>Builder</code>, it is converted to a <code>Message</code> by
+   * calling {@link Message.Builder#buildPartial} on it.
+   *
+   * @param index the index of the message to get
+   * @return the message for the specified index
+   */
+  public MType getMessage(int index) {
+    return getMessage(index, false);
+  }
+
+  /**
+   * Get the message at the specified index. If the message is currently stored
+   * as a <code>Builder</code>, it is converted to a <code>Message</code> by
+   * calling {@link Message.Builder#buildPartial} on it.
+   *
+   * @param index the index of the message to get
+   * @param forBuild this is being called for build so we want to make sure
+   *     we SingleFieldBuilder.build to send dirty invalidations
+   * @return the message for the specified index
+   */
+  private MType getMessage(int index, boolean forBuild) {
+    if (this.builders == null) {
+      // We don't have any builders -- return the current Message.
+      // This is the case where no builder was created, so we MUST have a
+      // Message.
+      return messages.get(index);
+    }
+
+    SingleFieldBuilder<MType, BType, IType> builder = builders.get(index);
+    if (builder == null) {
+      // We don't have a builder -- return the current message.
+      // This is the case where no builder was created for the entry at index,
+      // so we MUST have a message.
+      return messages.get(index);
+
+    } else {
+      return forBuild ? builder.build() : builder.getMessage();
+    }
+  }
+
+  /**
+   * Gets a builder for the specified index. If no builder has been created for
+   * that index, a builder is created on demand by calling
+   * {@link Message#toBuilder}.
+   *
+   * @param index the index of the message to get
+   * @return The builder for that index
+   */
+  public BType getBuilder(int index) {
+    ensureBuilders();
+    SingleFieldBuilder<MType, BType, IType> builder = builders.get(index);
+    if (builder == null) {
+      MType message = messages.get(index);
+      builder = new SingleFieldBuilder<MType, BType, IType>(
+          message, this, isClean);
+      builders.set(index, builder);
+    }
+    return builder.getBuilder();
+  }
+
+  /**
+   * Gets the base class interface for the specified index. This may either be
+   * a builder or a message. It will return whatever is more efficient.
+   *
+   * @param index the index of the message to get
+   * @return the message or builder for the index as the base class interface
+   */
+  @SuppressWarnings("unchecked")
+  public IType getMessageOrBuilder(int index) {
+    if (this.builders == null) {
+      // We don't have any builders -- return the current Message.
+      // This is the case where no builder was created, so we MUST have a
+      // Message.
+      return (IType) messages.get(index);
+    }
+
+    SingleFieldBuilder<MType, BType, IType> builder = builders.get(index);
+    if (builder == null) {
+      // We don't have a builder -- return the current message.
+      // This is the case where no builder was created for the entry at index,
+      // so we MUST have a message.
+      return (IType) messages.get(index);
+
+    } else {
+      return builder.getMessageOrBuilder();
+    }
+  }
+
+  /**
+   * Sets a  message at the specified index replacing the existing item at
+   * that index.
+   *
+   * @param index the index to set.
+   * @param message the message to set
+   * @return the builder
+   */
+  public RepeatedFieldBuilder<MType, BType, IType> setMessage(
+      int index, MType message) {
+    if (message == null) {
+      throw new NullPointerException();
+    }
+    ensureMutableMessageList();
+    messages.set(index, message);
+    if (builders != null) {
+      SingleFieldBuilder<MType, BType, IType> entry =
+          builders.set(index, null);
+      if (entry != null) {
+        entry.dispose();
+      }
+    }
+    onChanged();
+    incrementModCounts();
+    return this;
+  }
+
+  /**
+   * Appends the specified element to the end of this list.
+   *
+   * @param message the message to add
+   * @return the builder
+   */
+  public RepeatedFieldBuilder<MType, BType, IType> addMessage(
+      MType message) {
+    if (message == null) {
+      throw new NullPointerException();
+    }
+    ensureMutableMessageList();
+    messages.add(message);
+    if (builders != null) {
+      builders.add(null);
+    }
+    onChanged();
+    incrementModCounts();
+    return this;
+  }
+
+  /**
+   * Inserts the specified message at the specified position in this list.
+   * Shifts the element currently at that position (if any) and any subsequent
+   * elements to the right (adds one to their indices).
+   *
+   * @param index the index at which to insert the message
+   * @param message the message to add
+   * @return the builder
+   */
+  public RepeatedFieldBuilder<MType, BType, IType> addMessage(
+      int index, MType message) {
+    if (message == null) {
+      throw new NullPointerException();
+    }
+    ensureMutableMessageList();
+    messages.add(index, message);
+    if (builders != null) {
+      builders.add(index, null);
+    }
+    onChanged();
+    incrementModCounts();
+    return this;
+  }
+
+  /**
+   * Appends all of the messages in the specified collection to the end of
+   * this list, in the order that they are returned by the specified
+   * collection's iterator.
+   *
+   * @param values the messages to add
+   * @return the builder
+   */
+  public RepeatedFieldBuilder<MType, BType, IType> addAllMessages(
+      Iterable<? extends MType> values) {
+    for (final MType value : values) {
+      if (value == null) {
+        throw new NullPointerException();
+      }
+    }
+    if (values instanceof Collection) {
+      @SuppressWarnings("unchecked") final
+      Collection<MType> collection = (Collection<MType>) values;
+      if (collection.size() == 0) {
+        return this;
+      }
+      ensureMutableMessageList();
+      for (MType value : values) {
+        addMessage(value);
+      }
+    } else {
+      ensureMutableMessageList();
+      for (MType value : values) {
+        addMessage(value);
+      }
+    }
+    onChanged();
+    incrementModCounts();
+    return this;
+  }
+
+  /**
+   * Appends a new builder to the end of this list and returns the builder.
+   *
+   * @param message the message to add which is the basis of the builder
+   * @return the new builder
+   */
+  public BType addBuilder(MType message) {
+    ensureMutableMessageList();
+    ensureBuilders();
+    SingleFieldBuilder<MType, BType, IType> builder =
+        new SingleFieldBuilder<MType, BType, IType>(
+            message, this, isClean);
+    messages.add(null);
+    builders.add(builder);
+    onChanged();
+    incrementModCounts();
+    return builder.getBuilder();
+  }
+
+  /**
+   * Inserts a new builder at the specified position in this list.
+   * Shifts the element currently at that position (if any) and any subsequent
+   * elements to the right (adds one to their indices).
+   *
+   * @param index the index at which to insert the builder
+   * @param message the message to add which is the basis of the builder
+   * @return the builder
+   */
+  public BType addBuilder(int index, MType message) {
+    ensureMutableMessageList();
+    ensureBuilders();
+    SingleFieldBuilder<MType, BType, IType> builder =
+        new SingleFieldBuilder<MType, BType, IType>(
+            message, this, isClean);
+    messages.add(index, null);
+    builders.add(index, builder);
+    onChanged();
+    incrementModCounts();
+    return builder.getBuilder();
+  }
+
+  /**
+   * Removes the element at the specified position in this list. Shifts any
+   * subsequent elements to the left (subtracts one from their indices).
+   * Returns the element that was removed from the list.
+   *
+   * @param index the index at which to remove the message
+   */
+  public void remove(int index) {
+    ensureMutableMessageList();
+    messages.remove(index);
+    if (builders != null) {
+      SingleFieldBuilder<MType, BType, IType> entry =
+          builders.remove(index);
+      if (entry != null) {
+        entry.dispose();
+      }
+    }
+    onChanged();
+    incrementModCounts();
+  }
+
+  /**
+   * Removes all of the elements from this list.
+   * The list will be empty after this call returns.
+   */
+  public void clear() {
+    messages = Collections.emptyList();
+    isMessagesListMutable = false;
+    if (builders != null) {
+      for (SingleFieldBuilder<MType, BType, IType> entry :
+          builders) {
+        if (entry != null) {
+          entry.dispose();
+        }
+      }
+      builders = null;
+    }
+    onChanged();
+    incrementModCounts();
+  }
+
+  /**
+   * Builds the list of messages from the builder and returns them.
+   *
+   * @return an immutable list of messages
+   */
+  public List<MType> build() {
+    // Now that build has been called, we are required to dispatch
+    // invalidations.
+    isClean = true;
+
+    if (!isMessagesListMutable && builders == null) {
+      // We still have an immutable list and we never created a builder.
+      return messages;
+    }
+
+    boolean allMessagesInSync = true;
+    if (!isMessagesListMutable) {
+      // We still have an immutable list. Let's see if any of them are out
+      // of sync with their builders.
+      for (int i = 0; i < messages.size(); i++) {
+        Message message = messages.get(i);
+        SingleFieldBuilder<MType, BType, IType> builder = builders.get(i);
+        if (builder != null) {
+          if (builder.build() != message) {
+            allMessagesInSync = false;
+            break;
+          }
+        }
+      }
+      if (allMessagesInSync) {
+        // Immutable list is still in sync.
+        return messages;
+      }
+    }
+
+    // Need to make sure messages is up to date
+    ensureMutableMessageList();
+    for (int i = 0; i < messages.size(); i++) {
+      messages.set(i, getMessage(i, true));
+    }
+
+    // We're going to return our list as immutable so we mark that we can
+    // no longer update it.
+    messages = Collections.unmodifiableList(messages);
+    isMessagesListMutable = false;
+    return messages;
+  }
+
+  /**
+   * Gets a view of the builder as a list of messages. The returned list is live
+   * and will reflect any changes to the underlying builder.
+   *
+   * @return the messages in the list
+   */
+  public List<MType> getMessageList() {
+    if (externalMessageList == null) {
+      externalMessageList =
+          new MessageExternalList<MType, BType, IType>(this);
+    }
+    return externalMessageList;
+  }
+
+  /**
+   * Gets a view of the builder as a list of builders. This returned list is
+   * live and will reflect any changes to the underlying builder.
+   *
+   * @return the builders in the list
+   */
+  public List<BType> getBuilderList() {
+    if (externalBuilderList == null) {
+      externalBuilderList =
+          new BuilderExternalList<MType, BType, IType>(this);
+    }
+    return externalBuilderList;
+  }
+
+  /**
+   * Gets a view of the builder as a list of MessageOrBuilders. This returned
+   * list is live and will reflect any changes to the underlying builder.
+   *
+   * @return the builders in the list
+   */
+  public List<IType> getMessageOrBuilderList() {
+    if (externalMessageOrBuilderList == null) {
+      externalMessageOrBuilderList =
+          new MessageOrBuilderExternalList<MType, BType, IType>(this);
+    }
+    return externalMessageOrBuilderList;
+  }
+
+  /**
+   * Called when a the builder or one of its nested children has changed
+   * and any parent should be notified of its invalidation.
+   */
+  private void onChanged() {
+    if (isClean && parent != null) {
+      parent.markDirty();
+
+      // Don't keep dispatching invalidations until build is called again.
+      isClean = false;
+    }
+  }
+
+  @Override
+  public void markDirty() {
+    onChanged();
+  }
+
+  /**
+   * Increments the mod counts so that an ConcurrentModificationException can
+   * be thrown if calling code tries to modify the builder while its iterating
+   * the list.
+   */
+  private void incrementModCounts() {
+    if (externalMessageList != null) {
+      externalMessageList.incrementModCount();
+    }
+    if (externalBuilderList != null) {
+      externalBuilderList.incrementModCount();
+    }
+    if (externalMessageOrBuilderList != null) {
+      externalMessageOrBuilderList.incrementModCount();
+    }
+  }
+
+  /**
+   * Provides a live view of the builder as a list of messages.
+   *
+   * @param <MType> the type of message for the field
+   * @param <BType> the type of builder for the field
+   * @param <IType> the common interface for the message and the builder
+   */
+  private static class MessageExternalList<
+      MType extends GeneratedMessage,
+      BType extends GeneratedMessage.Builder,
+      IType extends MessageOrBuilder>
+      extends AbstractList<MType> implements List<MType> {
+
+    RepeatedFieldBuilder<MType, BType, IType> builder;
+
+    MessageExternalList(
+        RepeatedFieldBuilder<MType, BType, IType> builder) {
+      this.builder = builder;
+    }
+
+    public int size() {
+      return this.builder.getCount();
+    }
+
+    public MType get(int index) {
+      return builder.getMessage(index);
+    }
+
+    void incrementModCount() {
+      modCount++;
+    }
+  }
+
+  /**
+   * Provides a live view of the builder as a list of builders.
+   *
+   * @param <MType> the type of message for the field
+   * @param <BType> the type of builder for the field
+   * @param <IType> the common interface for the message and the builder
+   */
+  private static class BuilderExternalList<
+      MType extends GeneratedMessage,
+      BType extends GeneratedMessage.Builder,
+      IType extends MessageOrBuilder>
+      extends AbstractList<BType> implements List<BType> {
+
+    RepeatedFieldBuilder<MType, BType, IType> builder;
+
+    BuilderExternalList(
+        RepeatedFieldBuilder<MType, BType, IType> builder) {
+      this.builder = builder;
+    }
+
+    public int size() {
+      return this.builder.getCount();
+    }
+
+    public BType get(int index) {
+      return builder.getBuilder(index);
+    }
+
+    void incrementModCount() {
+      modCount++;
+    }
+  }
+
+  /**
+   * Provides a live view of the builder as a list of builders.
+   *
+   * @param <MType> the type of message for the field
+   * @param <BType> the type of builder for the field
+   * @param <IType> the common interface for the message and the builder
+   */
+  private static class MessageOrBuilderExternalList<
+      MType extends GeneratedMessage,
+      BType extends GeneratedMessage.Builder,
+      IType extends MessageOrBuilder>
+      extends AbstractList<IType> implements List<IType> {
+
+    RepeatedFieldBuilder<MType, BType, IType> builder;
+
+    MessageOrBuilderExternalList(
+        RepeatedFieldBuilder<MType, BType, IType> builder) {
+      this.builder = builder;
+    }
+
+    public int size() {
+      return this.builder.getCount();
+    }
+
+    public IType get(int index) {
+      return builder.getMessageOrBuilder(index);
+    }
+
+    void incrementModCount() {
+      modCount++;
+    }
+  }
+}

+ 9 - 1
java/src/main/java/com/google/protobuf/ServiceException.java

@@ -35,10 +35,18 @@ package com.google.protobuf;
  *
  * @author cpovirk@google.com (Chris Povirk)
  */
-public final class ServiceException extends Exception {
+public class ServiceException extends Exception {
   private static final long serialVersionUID = -1219262335729891920L;
 
   public ServiceException(final String message) {
     super(message);
   }
+
+  public ServiceException(final Throwable cause) {
+    super(cause);
+  }
+
+  public ServiceException(final String message, final Throwable cause) {
+    super(message, cause);
+  }
 }

+ 241 - 0
java/src/main/java/com/google/protobuf/SingleFieldBuilder.java

@@ -0,0 +1,241 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+/**
+ * <code>SingleFieldBuilder</code> implements a structure that a protocol
+ * message uses to hold a single field of another protocol message. It supports
+ * the classical use case of setting an immutable {@link Message} as the value
+ * of the field and is highly optimized around this.
+ * <br>
+ * It also supports the additional use case of setting a {@link Message.Builder}
+ * as the field and deferring conversion of that <code>Builder</code>
+ * to an immutable <code>Message</code>. In this way, it's possible to maintain
+ * a tree of <code>Builder</code>'s that acts as a fully read/write data
+ * structure.
+ * <br>
+ * Logically, one can think of a tree of builders as converting the entire tree
+ * to messages when build is called on the root or when any method is called
+ * that desires a Message instead of a Builder. In terms of the implementation,
+ * the <code>SingleFieldBuilder</code> and <code>RepeatedFieldBuilder</code>
+ * classes cache messages that were created so that messages only need to be
+ * created when some change occured in its builder or a builder for one of its
+ * descendants.
+ *
+ * @param <MType> the type of message for the field
+ * @param <BType> the type of builder for the field
+ * @param <IType> the common interface for the message and the builder
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public class SingleFieldBuilder
+    <MType extends GeneratedMessage,
+     BType extends GeneratedMessage.Builder,
+     IType extends MessageOrBuilder>
+    implements GeneratedMessage.BuilderParent {
+
+  // Parent to send changes to.
+  private GeneratedMessage.BuilderParent parent;
+
+  // Invariant: one of builder or message fields must be non-null.
+
+  // If set, this is the case where we are backed by a builder. In this case,
+  // message field represents a cached message for the builder (or null if
+  // there is no cached message).
+  private BType builder;
+
+  // If builder is non-null, this represents a cached message from the builder.
+  // If builder is null, this is the authoritative message for the field.
+  private MType message;
+
+  // Indicates that we've built a message and so we are now obligated
+  // to dispatch dirty invalidations. See GeneratedMessage.BuilderListener.
+  private boolean isClean;
+
+  public SingleFieldBuilder(
+      MType message,
+      GeneratedMessage.BuilderParent parent,
+      boolean isClean) {
+    if (message == null) {
+      throw new NullPointerException();
+    }
+    this.message = message;
+    this.parent = parent;
+    this.isClean = isClean;
+  }
+
+  public void dispose() {
+    // Null out parent so we stop sending it invalidations.
+    parent = null;
+  }
+
+  /**
+   * Get the message for the field. If the message is currently stored
+   * as a <code>Builder</code>, it is converted to a <code>Message</code> by
+   * calling {@link Message.Builder#buildPartial} on it. If no message has
+   * been set, returns the default instance of the message.
+   *
+   * @return the message for the field
+   */
+  @SuppressWarnings("unchecked")
+  public MType getMessage() {
+    if (message == null) {
+      // If message is null, the invariant is that we must be have a builder.
+      message = (MType) builder.buildPartial();
+    }
+    return message;
+  }
+
+  /**
+   * Builds the message and returns it.
+   *
+   * @return the message
+   */
+  public MType build() {
+    // Now that build has been called, we are required to dispatch
+    // invalidations.
+    isClean = true;
+    return getMessage();
+  }
+
+  /**
+   * Gets a builder for the field. If no builder has been created yet, a
+   * builder is created on demand by calling {@link Message#toBuilder}.
+   *
+   * @return The builder for the field
+   */
+  @SuppressWarnings("unchecked")
+  public BType getBuilder() {
+    if (builder == null) {
+      // builder.mergeFrom() on a fresh builder
+      // does not create any sub-objects with independent clean/dirty states,
+      // therefore setting the builder itself to clean without actually calling
+      // build() cannot break any invariants.
+      builder = (BType) message.newBuilderForType(this);
+      builder.mergeFrom(message); // no-op if message is the default message
+      builder.markClean();
+    }
+    return builder;
+  }
+
+  /**
+   * Gets the base class interface for the field. This may either be a builder
+   * or a message. It will return whatever is more efficient.
+   *
+   * @return the message or builder for the field as the base class interface
+   */
+  @SuppressWarnings("unchecked")
+  public IType getMessageOrBuilder() {
+    if (builder != null) {
+      return  (IType) builder;
+    } else {
+      return (IType) message;
+    }
+  }
+
+  /**
+   * Sets a  message for the field replacing any existing value.
+   *
+   * @param message the message to set
+   * @return the builder
+   */
+  public SingleFieldBuilder<MType, BType, IType> setMessage(
+      MType message) {
+    if (message == null) {
+      throw new NullPointerException();
+    }
+    this.message = message;
+    if (builder != null) {
+      builder.dispose();
+      builder = null;
+    }
+    onChanged();
+    return this;
+  }
+
+  /**
+   * Merges the field from another field.
+   *
+   * @param value the value to merge from
+   * @return the builder
+   */
+  public SingleFieldBuilder<MType, BType, IType> mergeFrom(
+      MType value) {
+    if (builder == null && message == message.getDefaultInstanceForType()) {
+      message = value;
+    } else {
+      getBuilder().mergeFrom(value);
+    }
+    onChanged();
+    return this;
+  }
+
+  /**
+   * Clears the value of the field.
+   *
+   * @return the builder
+   */
+  @SuppressWarnings("unchecked")
+  public SingleFieldBuilder<MType, BType, IType> clear() {
+    message = (MType) (message != null ?
+        message.getDefaultInstanceForType() :
+        builder.getDefaultInstanceForType());
+    if (builder != null) {
+      builder.dispose();
+      builder = null;
+    }
+    onChanged();
+    return this;
+  }
+
+  /**
+   * Called when a the builder or one of its nested children has changed
+   * and any parent should be notified of its invalidation.
+   */
+  private void onChanged() {
+    // If builder is null, this is the case where onChanged is being called
+    // from setMessage or clear.
+    if (builder != null) {
+      message = null;
+    }
+    if (isClean && parent != null) {
+      parent.markDirty();
+
+      // Don't keep dispatching invalidations until build is called again.
+      isClean = false;
+    }
+  }
+
+  @Override
+  public void markDirty() {
+    onChanged();
+  }
+}

+ 618 - 0
java/src/main/java/com/google/protobuf/SmallSortedMap.java

@@ -0,0 +1,618 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.util.AbstractMap;
+import java.util.AbstractSet;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.TreeMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.SortedMap;
+
+/**
+ * A custom map implementation from FieldDescriptor to Object optimized to
+ * minimize the number of memory allocations for instances with a small number
+ * of mappings. The implementation stores the first {@code k} mappings in an
+ * array for a configurable value of {@code k}, allowing direct access to the
+ * corresponding {@code Entry}s without the need to create an Iterator. The
+ * remaining entries are stored in an overflow map. Iteration over the entries
+ * in the map should be done as follows:
+ *
+ * <pre>
+ * for (int i = 0; i &lt; fieldMap.getNumArrayEntries(); i++) {
+ *   process(fieldMap.getArrayEntryAt(i));
+ * }
+ * for (Map.Entry&lt;K, V&gt; entry : fieldMap.getOverflowEntries()) {
+ *   process(entry);
+ * }
+ * </pre>
+ *
+ * The resulting iteration is in order of ascending field tag number. The
+ * object returned by {@link #entrySet()} adheres to the same contract but is
+ * less efficient as it necessarily involves creating an object for iteration.
+ * <p>
+ * The tradeoff for this memory efficiency is that the worst case running time
+ * of the {@code put()} operation is {@code O(k + lg n)}, which happens when
+ * entries are added in descending order. {@code k} should be chosen such that
+ * it covers enough common cases without adversely affecting larger maps. In
+ * practice, the worst case scenario does not happen for extensions because
+ * extension fields are serialized and deserialized in order of ascending tag
+ * number, but the worst case scenario can happen for DynamicMessages.
+ * <p>
+ * The running time for all other operations is similar to that of
+ * {@code TreeMap}.
+ * <p>
+ * Instances are not thread-safe until {@link #makeImmutable()} is called,
+ * after which any modifying operation will result in an
+ * {@link UnsupportedOperationException}.
+ *
+ * @author darick@google.com Darick Tong
+ */
+// This class is final for all intents and purposes because the constructor is
+// private. However, the FieldDescriptor-specific logic is encapsulated in
+// a subclass to aid testability of the core logic.
+class SmallSortedMap<K extends Comparable<K>, V> extends AbstractMap<K, V> {
+
+  /**
+   * Creates a new instance for mapping FieldDescriptors to their values.
+   * The {@link #makeImmutable()} implementation will convert the List values
+   * of any repeated fields to unmodifiable lists.
+   *
+   * @param arraySize The size of the entry array containing the
+   *        lexicographically smallest mappings.
+   */
+  static <FieldDescriptorType extends
+      FieldSet.FieldDescriptorLite<FieldDescriptorType>>
+      SmallSortedMap<FieldDescriptorType, Object> newFieldMap(int arraySize) {
+    return new SmallSortedMap<FieldDescriptorType, Object>(arraySize) {
+      @Override
+      @SuppressWarnings("unchecked")
+      public void makeImmutable() {
+        if (!isImmutable()) {
+          for (int i = 0; i < getNumArrayEntries(); i++) {
+            final Map.Entry<FieldDescriptorType, Object> entry =
+                getArrayEntryAt(i);
+            if (entry.getKey().isRepeated()) {
+              final List value = (List) entry.getValue();
+              entry.setValue(Collections.unmodifiableList(value));
+            }
+          }
+          for (Map.Entry<FieldDescriptorType, Object> entry :
+                   getOverflowEntries()) {
+            if (entry.getKey().isRepeated()) {
+              final List value = (List) entry.getValue();
+              entry.setValue(Collections.unmodifiableList(value));
+            }
+          }
+        }
+        super.makeImmutable();
+      }
+    };
+  }
+
+  /**
+   * Creates a new instance for testing.
+   *
+   * @param arraySize The size of the entry array containing the
+   *        lexicographically smallest mappings.
+   */
+  static <K extends Comparable<K>, V> SmallSortedMap<K, V> newInstanceForTest(
+      int arraySize) {
+    return new SmallSortedMap<K, V>(arraySize);
+  }
+
+  private final int maxArraySize;
+  // The "entry array" is actually a List because generic arrays are not
+  // allowed. ArrayList also nicely handles the entry shifting on inserts and
+  // removes.
+  private List<Entry> entryList;
+  private Map<K, V> overflowEntries;
+  private boolean isImmutable;
+  // The EntrySet is a stateless view of the Map. It's initialized the first
+  // time it is requested and reused henceforth.
+  private volatile EntrySet lazyEntrySet;
+
+  /**
+   * @code arraySize Size of the array in which the lexicographically smallest
+   *       mappings are stored. (i.e. the {@code k} referred to in the class
+   *       documentation).
+   */
+  private SmallSortedMap(int arraySize) {
+    this.maxArraySize = arraySize;
+    this.entryList = Collections.emptyList();
+    this.overflowEntries = Collections.emptyMap();
+  }
+
+  /** Make this map immutable from this point forward. */
+  public void makeImmutable() {
+    if (!isImmutable) {
+      // Note: There's no need to wrap the entryList in an unmodifiableList
+      // because none of the list's accessors are exposed. The iterator() of
+      // overflowEntries, on the other hand, is exposed so it must be made
+      // unmodifiable.
+      overflowEntries = overflowEntries.isEmpty() ?
+          Collections.<K, V>emptyMap() :
+          Collections.unmodifiableMap(overflowEntries);
+      isImmutable = true;
+    }
+  }
+
+  /** @return Whether {@link #makeImmutable()} has been called. */
+  public boolean isImmutable() {
+    return isImmutable;
+  }
+
+  /** @return The number of entries in the entry array. */
+  public int getNumArrayEntries() {
+    return entryList.size();
+  }
+
+  /** @return The array entry at the given {@code index}. */
+  public Map.Entry<K, V> getArrayEntryAt(int index) {
+    return entryList.get(index);
+  }
+
+  /** @return There number of overflow entries. */
+  public int getNumOverflowEntries() {
+    return overflowEntries.size();
+  }
+
+  /** @return An iterable over the overflow entries. */
+  public Iterable<Map.Entry<K, V>> getOverflowEntries() {
+    return overflowEntries.isEmpty() ?
+        EmptySet.<Map.Entry<K, V>>iterable() :
+        overflowEntries.entrySet();
+  }
+
+  @Override
+  public int size() {
+    return entryList.size() + overflowEntries.size();
+  }
+
+  /**
+   * The implementation throws a {@code ClassCastException} if o is not an
+   * object of type {@code K}.
+   *
+   * {@inheritDoc}
+   */
+  @Override
+  public boolean containsKey(Object o) {
+    @SuppressWarnings("unchecked")
+    final K key = (K) o;
+    return binarySearchInArray(key) >= 0 || overflowEntries.containsKey(key);
+  }
+
+  /**
+   * The implementation throws a {@code ClassCastException} if o is not an
+   * object of type {@code K}.
+   *
+   * {@inheritDoc}
+   */
+  @Override
+  public V get(Object o) {
+    @SuppressWarnings("unchecked")
+    final K key = (K) o;
+    final int index = binarySearchInArray(key);
+    if (index >= 0) {
+      return entryList.get(index).getValue();
+    }
+    return overflowEntries.get(key);
+  }
+
+  @Override
+  public V put(K key, V value) {
+    checkMutable();
+    final int index = binarySearchInArray(key);
+    if (index >= 0) {
+      // Replace existing array entry.
+      return entryList.get(index).setValue(value);
+    }
+    ensureEntryArrayMutable();
+    final int insertionPoint = -(index + 1);
+    if (insertionPoint >= maxArraySize) {
+      // Put directly in overflow.
+      return getOverflowEntriesMutable().put(key, value);
+    }
+    // Insert new Entry in array.
+    if (entryList.size() == maxArraySize) {
+      // Shift the last array entry into overflow.
+      final Entry lastEntryInArray = entryList.remove(maxArraySize - 1);
+      getOverflowEntriesMutable().put(lastEntryInArray.getKey(),
+                                      lastEntryInArray.getValue());
+    }
+    entryList.add(insertionPoint, new Entry(key, value));
+    return null;
+  }
+
+  @Override
+  public void clear() {
+    checkMutable();
+    if (!entryList.isEmpty()) {
+      entryList.clear();
+    }
+    if (!overflowEntries.isEmpty()) {
+      overflowEntries.clear();
+    }
+  }
+
+  /**
+   * The implementation throws a {@code ClassCastException} if o is not an
+   * object of type {@code K}.
+   *
+   * {@inheritDoc}
+   */
+  @Override
+  public V remove(Object o) {
+    checkMutable();
+    @SuppressWarnings("unchecked")
+    final K key = (K) o;
+    final int index = binarySearchInArray(key);
+    if (index >= 0) {
+      return removeArrayEntryAt(index);
+    }
+    // overflowEntries might be Collections.unmodifiableMap(), so only
+    // call remove() if it is non-empty.
+    if (overflowEntries.isEmpty()) {
+      return null;
+    } else {
+      return overflowEntries.remove(key);
+    }
+  }
+
+  private V removeArrayEntryAt(int index) {
+    checkMutable();
+    final V removed = entryList.remove(index).getValue();
+    if (!overflowEntries.isEmpty()) {
+      // Shift the first entry in the overflow to be the last entry in the
+      // array.
+      final Iterator<Map.Entry<K, V>> iterator =
+          getOverflowEntriesMutable().entrySet().iterator();
+      entryList.add(new Entry(iterator.next()));
+      iterator.remove();
+    }
+    return removed;
+  }
+
+  /**
+   * @param key The key to find in the entry array.
+   * @return The returned integer position follows the same semantics as the
+   *     value returned by {@link java.util.Arrays#binarySearch()}.
+   */
+  private int binarySearchInArray(K key) {
+    int left = 0;
+    int right = entryList.size() - 1;
+
+    // Optimization: For the common case in which entries are added in
+    // ascending tag order, check the largest element in the array before
+    // doing a full binary search.
+    if (right >= 0) {
+      int cmp = key.compareTo(entryList.get(right).getKey());
+      if (cmp > 0) {
+        return -(right + 2);  // Insert point is after "right".
+      } else if (cmp == 0) {
+        return right;
+      }
+    }
+
+    while (left <= right) {
+      int mid = (left + right) / 2;
+      int cmp = key.compareTo(entryList.get(mid).getKey());
+      if (cmp < 0) {
+        right = mid - 1;
+      } else if (cmp > 0) {
+        left = mid + 1;
+      } else {
+        return mid;
+      }
+    }
+    return -(left + 1);
+  }
+
+  /**
+   * Similar to the AbstractMap implementation of {@code keySet()} and
+   * {@code values()}, the entry set is created the first time this method is
+   * called, and returned in response to all subsequent calls.
+   *
+   * {@inheritDoc}
+   */
+  @Override
+  public Set<Map.Entry<K, V>> entrySet() {
+    if (lazyEntrySet == null) {
+      lazyEntrySet = new EntrySet();
+    }
+    return lazyEntrySet;
+  }
+
+  /**
+   * @throws UnsupportedOperationException if {@link #makeImmutable()} has
+   *         has been called.
+   */
+  private void checkMutable() {
+    if (isImmutable) {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  /**
+   * @return a {@link SortedMap} to which overflow entries mappings can be
+   *         added or removed.
+   * @throws UnsupportedOperationException if {@link #makeImmutable()} has been
+   *         called.
+   */
+  @SuppressWarnings("unchecked")
+  private SortedMap<K, V> getOverflowEntriesMutable() {
+    checkMutable();
+    if (overflowEntries.isEmpty() && !(overflowEntries instanceof TreeMap)) {
+      overflowEntries = new TreeMap<K, V>();
+    }
+    return (SortedMap<K, V>) overflowEntries;
+  }
+
+  /**
+   * Lazily creates the entry list. Any code that adds to the list must first
+   * call this method.
+   */
+  private void ensureEntryArrayMutable() {
+    checkMutable();
+    if (entryList.isEmpty() && !(entryList instanceof ArrayList)) {
+      entryList = new ArrayList<Entry>(maxArraySize);
+    }
+  }
+
+  /**
+   * Entry implementation that implements Comparable in order to support
+   * binary search witin the entry array. Also checks mutability in
+   * {@link #setValue()}.
+   */
+  private class Entry implements Map.Entry<K, V>, Comparable<Entry> {
+
+    private final K key;
+    private V value;
+
+    Entry(Map.Entry<K, V> copy) {
+      this(copy.getKey(), copy.getValue());
+    }
+
+    Entry(K key, V value) {
+      this.key = key;
+      this.value = value;
+    }
+
+    @Override
+    public K getKey() {
+      return key;
+    }
+
+    @Override
+    public V getValue() {
+      return value;
+    }
+
+    @Override
+    public int compareTo(Entry other) {
+      return getKey().compareTo(other.getKey());
+    }
+
+    @Override
+    public V setValue(V newValue) {
+      checkMutable();
+      final V oldValue = this.value;
+      this.value = newValue;
+      return oldValue;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (o == this) {
+        return true;
+      }
+      if (!(o instanceof Map.Entry)) {
+        return false;
+      }
+      @SuppressWarnings("unchecked")
+      Map.Entry<?, ?> other = (Map.Entry<?, ?>) o;
+      return equals(key, other.getKey()) && equals(value, other.getValue());
+    }
+
+    @Override
+    public int hashCode() {
+      return (key == null ? 0 : key.hashCode()) ^
+          (value == null ? 0 : value.hashCode());
+    }
+
+    @Override
+    public String toString() {
+      return key + "=" + value;
+    }
+
+    /** equals() that handles null values. */
+    private boolean equals(Object o1, Object o2) {
+      return o1 == null ? o2 == null : o1.equals(o2);
+    }
+  }
+
+  /**
+   * Stateless view of the entries in the field map.
+   */
+  private class EntrySet extends AbstractSet<Map.Entry<K, V>> {
+
+    @Override
+    public Iterator<Map.Entry<K, V>> iterator() {
+      return new EntryIterator();
+    }
+
+    @Override
+    public int size() {
+      return SmallSortedMap.this.size();
+    }
+
+    /**
+     * Throws a {@link ClassCastException} if o is not of the expected type.
+     *
+     * {@inheritDoc}
+     */
+    @Override
+    public boolean contains(Object o) {
+      @SuppressWarnings("unchecked")
+      final Map.Entry<K, V> entry = (Map.Entry<K, V>) o;
+      final V existing = get(entry.getKey());
+      final V value = entry.getValue();
+      return existing == value ||
+          (existing != null && existing.equals(value));
+    }
+
+    @Override
+    public boolean add(Map.Entry<K, V> entry) {
+      if (!contains(entry)) {
+        put(entry.getKey(), entry.getValue());
+        return true;
+      }
+      return false;
+    }
+
+    /**
+     * Throws a {@link ClassCastException} if o is not of the expected type.
+     *
+     * {@inheritDoc}
+     */
+    @Override
+    public boolean remove(Object o) {
+      @SuppressWarnings("unchecked")
+      final Map.Entry<K, V> entry = (Map.Entry<K, V>) o;
+      if (contains(entry)) {
+        SmallSortedMap.this.remove(entry.getKey());
+        return true;
+      }
+      return false;
+    }
+
+    @Override
+    public void clear() {
+      SmallSortedMap.this.clear();
+    }
+  }
+
+  /**
+   * Iterator implementation that switches from the entry array to the overflow
+   * entries appropriately.
+   */
+  private class EntryIterator implements Iterator<Map.Entry<K, V>> {
+
+    private int pos = -1;
+    private boolean nextCalledBeforeRemove;
+    private Iterator<Map.Entry<K, V>> lazyOverflowIterator;
+
+    @Override
+    public boolean hasNext() {
+      return (pos + 1) < entryList.size() ||
+          getOverflowIterator().hasNext();
+    }
+
+    @Override
+    public Map.Entry<K, V> next() {
+      nextCalledBeforeRemove = true;
+      // Always increment pos so that we know whether the last returned value
+      // was from the array or from overflow.
+      if (++pos < entryList.size()) {
+        return entryList.get(pos);
+      }
+      return getOverflowIterator().next();
+    }
+
+    @Override
+    public void remove() {
+      if (!nextCalledBeforeRemove) {
+        throw new IllegalStateException("remove() was called before next()");
+      }
+      nextCalledBeforeRemove = false;
+      checkMutable();
+
+      if (pos < entryList.size()) {
+        removeArrayEntryAt(pos--);
+      } else {
+        getOverflowIterator().remove();
+      }
+    }
+
+    /**
+     * It is important to create the overflow iterator only after the array
+     * entries have been iterated over because the overflow entry set changes
+     * when the client calls remove() on the array entries, which invalidates
+     * any existing iterators.
+     */
+    private Iterator<Map.Entry<K, V>> getOverflowIterator() {
+      if (lazyOverflowIterator == null) {
+        lazyOverflowIterator = overflowEntries.entrySet().iterator();
+      }
+      return lazyOverflowIterator;
+    }
+  }
+
+  /**
+   * Helper class that holds immutable instances of an Iterable/Iterator that
+   * we return when the overflow entries is empty. This eliminates the creation
+   * of an Iterator object when there is nothing to iterate over.
+   */
+  private static class EmptySet {
+
+    private static final Iterator<Object> ITERATOR = new Iterator<Object>() {
+      @Override
+      public boolean hasNext() {
+        return false;
+      }
+      @Override
+      public Object next() {
+        throw new NoSuchElementException();
+      }
+      @Override
+      public void remove() {
+        throw new UnsupportedOperationException();
+      }
+    };
+
+    private static final Iterable<Object> ITERABLE = new Iterable<Object>() {
+      @Override
+      public Iterator<Object> iterator() {
+        return ITERATOR;
+      }
+    };
+
+    @SuppressWarnings("unchecked")
+    static <T> Iterable<T> iterable() {
+      return (Iterable<T>) ITERABLE;
+    }
+  }
+}

+ 313 - 175
java/src/main/java/com/google/protobuf/TextFormat.java

@@ -46,15 +46,17 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 /**
- * Provide ascii text parsing and formatting support for proto2 instances.
+ * Provide text parsing and formatting support for proto2 instances.
  * The implementation largely follows google/protobuf/text_format.cc.
  *
  * @author wenboz@google.com Wenbo Zhu
  * @author kenton@google.com Kenton Varda
  */
 public final class TextFormat {
-  private TextFormat() {
-  }
+  private TextFormat() {}
+
+  private static final Printer DEFAULT_PRINTER = new Printer(false);
+  private static final Printer SINGLE_LINE_PRINTER = new Printer(true);
 
   /**
    * Outputs a textual representation of the Protocol Message supplied into
@@ -63,16 +65,44 @@ public final class TextFormat {
    */
   public static void print(final Message message, final Appendable output)
                            throws IOException {
-    final TextGenerator generator = new TextGenerator(output);
-    print(message, generator);
+    DEFAULT_PRINTER.print(message, new TextGenerator(output));
   }
 
   /** Outputs a textual representation of {@code fields} to {@code output}. */
   public static void print(final UnknownFieldSet fields,
                            final Appendable output)
                            throws IOException {
-    final TextGenerator generator = new TextGenerator(output);
-    printUnknownFields(fields, generator);
+    DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output));
+  }
+
+  /**
+   * Generates a human readable form of this message, useful for debugging and
+   * other purposes, with no newline characters.
+   */
+  public static String shortDebugString(final Message message) {
+    try {
+      final StringBuilder sb = new StringBuilder();
+      SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));
+      // Single line mode currently might have an extra space at the end.
+      return sb.toString().trim();
+    } catch (IOException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+
+  /**
+   * Generates a human readable form of the unknown fields, useful for debugging
+   * and other purposes, with no newline characters.
+   */
+  public static String shortDebugString(final UnknownFieldSet fields) {
+    try {
+      final StringBuilder sb = new StringBuilder();
+      SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb));
+      // Single line mode currently might have an extra space at the end.
+      return sb.toString().trim();
+    } catch (IOException e) {
+      throw new IllegalStateException(e);
+    }
   }
 
   /**
@@ -85,9 +115,7 @@ public final class TextFormat {
       print(message, text);
       return text.toString();
     } catch (IOException e) {
-      throw new RuntimeException(
-        "Writing to a StringBuilder threw an IOException (should never " +
-        "happen).", e);
+      throw new IllegalStateException(e);
     }
   }
 
@@ -101,28 +129,15 @@ public final class TextFormat {
       print(fields, text);
       return text.toString();
     } catch (IOException e) {
-      throw new RuntimeException(
-        "Writing to a StringBuilder threw an IOException (should never " +
-        "happen).", e);
+      throw new IllegalStateException(e);
     }
   }
 
-  private static void print(final Message message,
-                            final TextGenerator generator)
-      throws IOException {
-    for (final Map.Entry<FieldDescriptor, Object> field :
-         message.getAllFields().entrySet()) {
-      printField(field.getKey(), field.getValue(), generator);
-    }
-    printUnknownFields(message.getUnknownFields(), generator);
-  }
-
   public static void printField(final FieldDescriptor field,
                                 final Object value,
                                 final Appendable output)
                                 throws IOException {
-    final TextGenerator generator = new TextGenerator(output);
-    printField(field, value, generator);
+    DEFAULT_PRINTER.printField(field, value, new TextGenerator(output));
   }
 
   public static String printFieldToString(final FieldDescriptor field,
@@ -132,157 +147,263 @@ public final class TextFormat {
       printField(field, value, text);
       return text.toString();
     } catch (IOException e) {
-      throw new RuntimeException(
-        "Writing to a StringBuilder threw an IOException (should never " +
-        "happen).", e);
+      throw new IllegalStateException(e);
     }
   }
 
-  private static void printField(final FieldDescriptor field,
-                                final Object value,
-                                final TextGenerator generator)
-                                throws IOException {
-    if (field.isRepeated()) {
-      // Repeated field.  Print each element.
-      for (final Object element : (List<?>) value) {
-        printSingleField(field, element, generator);
-      }
-    } else {
-      printSingleField(field, value, generator);
+  /**
+   * Outputs a textual representation of the value of given field value.
+   *
+   * @param field the descriptor of the field
+   * @param value the value of the field
+   * @param output the output to which to append the formatted value
+   * @throws ClassCastException if the value is not appropriate for the
+   *     given field descriptor
+   * @throws IOException if there is an exception writing to the output
+   */
+  public static void printFieldValue(final FieldDescriptor field,
+                                     final Object value,
+                                     final Appendable output)
+                                     throws IOException {
+    DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output));
+  }
+
+  /**
+   * Outputs a textual representation of the value of an unknown field.
+   *
+   * @param tag the field's tag number
+   * @param value the value of the field
+   * @param output the output to which to append the formatted value
+   * @throws ClassCastException if the value is not appropriate for the
+   *     given field descriptor
+   * @throws IOException if there is an exception writing to the output
+   */
+  public static void printUnknownFieldValue(final int tag,
+                                            final Object value,
+                                            final Appendable output)
+                                            throws IOException {
+    printUnknownFieldValue(tag, value, new TextGenerator(output));
+  }
+
+  private static void printUnknownFieldValue(final int tag,
+                                             final Object value,
+                                             final TextGenerator generator)
+                                             throws IOException {
+    switch (WireFormat.getTagWireType(tag)) {
+      case WireFormat.WIRETYPE_VARINT:
+        generator.print(unsignedToString((Long) value));
+        break;
+      case WireFormat.WIRETYPE_FIXED32:
+        generator.print(
+            String.format((Locale) null, "0x%08x", (Integer) value));
+        break;
+      case WireFormat.WIRETYPE_FIXED64:
+        generator.print(String.format((Locale) null, "0x%016x", (Long) value));
+        break;
+      case WireFormat.WIRETYPE_LENGTH_DELIMITED:
+        generator.print("\"");
+        generator.print(escapeBytes((ByteString) value));
+        generator.print("\"");
+        break;
+      case WireFormat.WIRETYPE_START_GROUP:
+        DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator);
+        break;
+      default:
+        throw new IllegalArgumentException("Bad tag: " + tag);
     }
   }
 
-  private static void printSingleField(final FieldDescriptor field,
-                                       final Object value,
-                                       final TextGenerator generator)
-                                       throws IOException {
-    if (field.isExtension()) {
-      generator.print("[");
-      // We special-case MessageSet elements for compatibility with proto1.
-      if (field.getContainingType().getOptions().getMessageSetWireFormat()
-          && (field.getType() == FieldDescriptor.Type.MESSAGE)
-          && (field.isOptional())
-          // object equality
-          && (field.getExtensionScope() == field.getMessageType())) {
-        generator.print(field.getMessageType().getFullName());
-      } else {
-        generator.print(field.getFullName());
+  /** Helper class for converting protobufs to text. */
+  private static final class Printer {
+    /** Whether to omit newlines from the output. */
+    final boolean singleLineMode;
+
+    private Printer(final boolean singleLineMode) {
+      this.singleLineMode = singleLineMode;
+    }
+
+    private void print(final Message message, final TextGenerator generator)
+        throws IOException {
+      for (Map.Entry<FieldDescriptor, Object> field
+          : message.getAllFields().entrySet()) {
+        printField(field.getKey(), field.getValue(), generator);
       }
-      generator.print("]");
-    } else {
-      if (field.getType() == FieldDescriptor.Type.GROUP) {
-        // Groups must be serialized with their original capitalization.
-        generator.print(field.getMessageType().getName());
+      printUnknownFields(message.getUnknownFields(), generator);
+    }
+
+    private void printField(final FieldDescriptor field, final Object value,
+        final TextGenerator generator) throws IOException {
+      if (field.isRepeated()) {
+        // Repeated field.  Print each element.
+        for (Object element : (List<?>) value) {
+          printSingleField(field, element, generator);
+        }
       } else {
-        generator.print(field.getName());
+        printSingleField(field, value, generator);
       }
     }
 
-    if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
-      generator.print(" {\n");
-      generator.indent();
-    } else {
-      generator.print(": ");
-    }
+    private void printSingleField(final FieldDescriptor field,
+                                  final Object value,
+                                  final TextGenerator generator)
+                                  throws IOException {
+      if (field.isExtension()) {
+        generator.print("[");
+        // We special-case MessageSet elements for compatibility with proto1.
+        if (field.getContainingType().getOptions().getMessageSetWireFormat()
+            && (field.getType() == FieldDescriptor.Type.MESSAGE)
+            && (field.isOptional())
+            // object equality
+            && (field.getExtensionScope() == field.getMessageType())) {
+          generator.print(field.getMessageType().getFullName());
+        } else {
+          generator.print(field.getFullName());
+        }
+        generator.print("]");
+      } else {
+        if (field.getType() == FieldDescriptor.Type.GROUP) {
+          // Groups must be serialized with their original capitalization.
+          generator.print(field.getMessageType().getName());
+        } else {
+          generator.print(field.getName());
+        }
+      }
 
-    printFieldValue(field, value, generator);
+      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+        if (singleLineMode) {
+          generator.print(" { ");
+        } else {
+          generator.print(" {\n");
+          generator.indent();
+        }
+      } else {
+        generator.print(": ");
+      }
 
-    if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
-      generator.outdent();
-      generator.print("}");
+      printFieldValue(field, value, generator);
+
+      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+        if (singleLineMode) {
+          generator.print("} ");
+        } else {
+          generator.outdent();
+          generator.print("}\n");
+        }
+      } else {
+        if (singleLineMode) {
+          generator.print(" ");
+        } else {
+          generator.print("\n");
+        }
+      }
     }
-    generator.print("\n");
-  }
 
-  private static void printFieldValue(final FieldDescriptor field,
-                                      final Object value,
-                                      final TextGenerator generator)
-                                      throws IOException {
-    switch (field.getType()) {
-      case INT32:
-      case INT64:
-      case SINT32:
-      case SINT64:
-      case SFIXED32:
-      case SFIXED64:
-      case FLOAT:
-      case DOUBLE:
-      case BOOL:
-        // Good old toString() does what we want for these types.
-        generator.print(value.toString());
-        break;
+    private void printFieldValue(final FieldDescriptor field,
+                                 final Object value,
+                                 final TextGenerator generator)
+                                 throws IOException {
+      switch (field.getType()) {
+        case INT32:
+        case SINT32:
+        case SFIXED32:
+          generator.print(((Integer) value).toString());
+          break;
 
-      case UINT32:
-      case FIXED32:
-        generator.print(unsignedToString((Integer) value));
-        break;
+        case INT64:
+        case SINT64:
+        case SFIXED64:
+          generator.print(((Long) value).toString());
+          break;
 
-      case UINT64:
-      case FIXED64:
-        generator.print(unsignedToString((Long) value));
-        break;
+        case BOOL:
+          generator.print(((Boolean) value).toString());
+          break;
 
-      case STRING:
-        generator.print("\"");
-        generator.print(escapeText((String) value));
-        generator.print("\"");
-        break;
+        case FLOAT:
+          generator.print(((Float) value).toString());
+          break;
 
-      case BYTES:
-        generator.print("\"");
-        generator.print(escapeBytes((ByteString) value));
-        generator.print("\"");
-        break;
+        case DOUBLE:
+          generator.print(((Double) value).toString());
+          break;
 
-      case ENUM:
-        generator.print(((EnumValueDescriptor) value).getName());
-        break;
+        case UINT32:
+        case FIXED32:
+          generator.print(unsignedToString((Integer) value));
+          break;
 
-      case MESSAGE:
-      case GROUP:
-        print((Message) value, generator);
-        break;
-    }
-  }
+        case UINT64:
+        case FIXED64:
+          generator.print(unsignedToString((Long) value));
+          break;
 
-  private static void printUnknownFields(final UnknownFieldSet unknownFields,
-                                         final TextGenerator generator)
-                                         throws IOException {
-    for (final Map.Entry<Integer, UnknownFieldSet.Field> entry :
-         unknownFields.asMap().entrySet()) {
-      final UnknownFieldSet.Field field = entry.getValue();
+        case STRING:
+          generator.print("\"");
+          generator.print(escapeText((String) value));
+          generator.print("\"");
+          break;
 
-      for (final long value : field.getVarintList()) {
-        generator.print(entry.getKey().toString());
-        generator.print(": ");
-        generator.print(unsignedToString(value));
-        generator.print("\n");
+        case BYTES:
+          generator.print("\"");
+          generator.print(escapeBytes((ByteString) value));
+          generator.print("\"");
+          break;
+
+        case ENUM:
+          generator.print(((EnumValueDescriptor) value).getName());
+          break;
+
+        case MESSAGE:
+        case GROUP:
+          print((Message) value, generator);
+          break;
       }
-      for (final int value : field.getFixed32List()) {
-        generator.print(entry.getKey().toString());
-        generator.print(": ");
-        generator.print(String.format((Locale) null, "0x%08x", value));
-        generator.print("\n");
+    }
+
+    private void printUnknownFields(final UnknownFieldSet unknownFields,
+                                    final TextGenerator generator)
+                                    throws IOException {
+      for (Map.Entry<Integer, UnknownFieldSet.Field> entry :
+               unknownFields.asMap().entrySet()) {
+        final int number = entry.getKey();
+        final UnknownFieldSet.Field field = entry.getValue();
+        printUnknownField(number, WireFormat.WIRETYPE_VARINT,
+            field.getVarintList(), generator);
+        printUnknownField(number, WireFormat.WIRETYPE_FIXED32,
+            field.getFixed32List(), generator);
+        printUnknownField(number, WireFormat.WIRETYPE_FIXED64,
+            field.getFixed64List(), generator);
+        printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED,
+            field.getLengthDelimitedList(), generator);
+        for (final UnknownFieldSet value : field.getGroupList()) {
+          generator.print(entry.getKey().toString());
+          if (singleLineMode) {
+            generator.print(" { ");
+          } else {
+            generator.print(" {\n");
+            generator.indent();
+          }
+          printUnknownFields(value, generator);
+          if (singleLineMode) {
+            generator.print("} ");
+          } else {
+            generator.outdent();
+            generator.print("}\n");
+          }
+        }
       }
-      for (final long value : field.getFixed64List()) {
-        generator.print(entry.getKey().toString());
+    }
+
+    private void printUnknownField(final int number,
+                                   final int wireType,
+                                   final List<?> values,
+                                   final TextGenerator generator)
+                                   throws IOException {
+      for (final Object value : values) {
+        generator.print(String.valueOf(number));
         generator.print(": ");
-        generator.print(String.format((Locale) null, "0x%016x", value));
-        generator.print("\n");
-      }
-      for (final ByteString value : field.getLengthDelimitedList()) {
-        generator.print(entry.getKey().toString());
-        generator.print(": \"");
-        generator.print(escapeBytes(value));
-        generator.print("\"\n");
-      }
-      for (final UnknownFieldSet value : field.getGroupList()) {
-        generator.print(entry.getKey().toString());
-        generator.print(" {\n");
-        generator.indent();
-        printUnknownFields(value, generator);
-        generator.outdent();
-        generator.print("}\n");
+        printUnknownFieldValue(wireType, value, generator);
+        generator.print(singleLineMode ? " " : "\n");
       }
     }
   }
@@ -312,9 +433,9 @@ public final class TextFormat {
    * An inner class for writing text to the output stream.
    */
   private static final class TextGenerator {
-    private Appendable output;
-    private boolean atStartOfLine = true;
+    private final Appendable output;
     private final StringBuilder indent = new StringBuilder();
+    private boolean atStartOfLine = true;
 
     private TextGenerator(final Appendable output) {
       this.output = output;
@@ -670,10 +791,14 @@ public final class TextFormat {
      * Otherwise, throw a {@link ParseException}.
      */
     public boolean consumeBoolean() throws ParseException {
-      if (currentToken.equals("true")) {
+      if (currentToken.equals("true") ||
+          currentToken.equals("t") ||
+          currentToken.equals("1")) {
         nextToken();
         return true;
-      } else if (currentToken.equals("false")) {
+      } else if (currentToken.equals("false") ||
+                 currentToken.equals("f") ||
+                 currentToken.equals("0")) {
         nextToken();
         return false;
       } else {
@@ -1063,6 +1188,9 @@ public final class TextFormat {
         case '\'': builder.append("\\\'"); break;
         case '"' : builder.append("\\\""); break;
         default:
+          // Note:  Bytes with the high-order bit set should be escaped.  Since
+          //   bytes are signed, such bytes will compare less than 0x20, hence
+          //   the following line is correct.
           if (b >= 0x20) {
             builder.append((char) b);
           } else {
@@ -1082,27 +1210,37 @@ public final class TextFormat {
    * {@link #escapeBytes(ByteString)}.  Two-digit hex escapes (starting with
    * "\x") are also recognized.
    */
-  static ByteString unescapeBytes(final CharSequence input)
+  static ByteString unescapeBytes(final CharSequence charString)
       throws InvalidEscapeSequenceException {
-    final byte[] result = new byte[input.length()];
+    // First convert the Java characater sequence to UTF-8 bytes.
+    ByteString input = ByteString.copyFromUtf8(charString.toString());
+    // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
+    // escapes can all be expressed with ASCII characters, so it is safe to
+    // operate on bytes here.
+    //
+    // Unescaping the input byte array will result in a byte sequence that's no
+    // longer than the input.  That's because each escape sequence is between
+    // two and four bytes long and stands for a single byte.
+    final byte[] result = new byte[input.size()];
     int pos = 0;
-    for (int i = 0; i < input.length(); i++) {
-      char c = input.charAt(i);
+    for (int i = 0; i < input.size(); i++) {
+      byte c = input.byteAt(i);
       if (c == '\\') {
-        if (i + 1 < input.length()) {
+        if (i + 1 < input.size()) {
           ++i;
-          c = input.charAt(i);
+          c = input.byteAt(i);
           if (isOctal(c)) {
             // Octal escape.
             int code = digitValue(c);
-            if (i + 1 < input.length() && isOctal(input.charAt(i + 1))) {
+            if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
               ++i;
-              code = code * 8 + digitValue(input.charAt(i));
+              code = code * 8 + digitValue(input.byteAt(i));
             }
-            if (i + 1 < input.length() && isOctal(input.charAt(i + 1))) {
+            if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
               ++i;
-              code = code * 8 + digitValue(input.charAt(i));
+              code = code * 8 + digitValue(input.byteAt(i));
             }
+            // TODO: Check that 0 <= code && code <= 0xFF.
             result[pos++] = (byte)code;
           } else {
             switch (c) {
@@ -1120,31 +1258,31 @@ public final class TextFormat {
               case 'x':
                 // hex escape
                 int code = 0;
-                if (i + 1 < input.length() && isHex(input.charAt(i + 1))) {
+                if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
                   ++i;
-                  code = digitValue(input.charAt(i));
+                  code = digitValue(input.byteAt(i));
                 } else {
                   throw new InvalidEscapeSequenceException(
-                    "Invalid escape sequence: '\\x' with no digits");
+                      "Invalid escape sequence: '\\x' with no digits");
                 }
-                if (i + 1 < input.length() && isHex(input.charAt(i + 1))) {
+                if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
                   ++i;
-                  code = code * 16 + digitValue(input.charAt(i));
+                  code = code * 16 + digitValue(input.byteAt(i));
                 }
                 result[pos++] = (byte)code;
                 break;
 
               default:
                 throw new InvalidEscapeSequenceException(
-                  "Invalid escape sequence: '\\" + c + '\'');
+                    "Invalid escape sequence: '\\" + (char)c + '\'');
             }
           }
         } else {
           throw new InvalidEscapeSequenceException(
-            "Invalid escape sequence: '\\' at end of string.");
+              "Invalid escape sequence: '\\' at end of string.");
         }
       } else {
-        result[pos++] = (byte)c;
+        result[pos++] = c;
       }
     }
 
@@ -1182,12 +1320,12 @@ public final class TextFormat {
   }
 
   /** Is this an octal digit? */
-  private static boolean isOctal(final char c) {
+  private static boolean isOctal(final byte c) {
     return '0' <= c && c <= '7';
   }
 
   /** Is this a hex digit? */
-  private static boolean isHex(final char c) {
+  private static boolean isHex(final byte c) {
     return ('0' <= c && c <= '9') ||
            ('a' <= c && c <= 'f') ||
            ('A' <= c && c <= 'F');
@@ -1198,7 +1336,7 @@ public final class TextFormat {
    * numeric value.  This is like {@code Character.digit()} but we don't accept
    * non-ASCII digits.
    */
-  private static int digitValue(final char c) {
+  private static int digitValue(final byte c) {
     if ('0' <= c && c <= '9') {
       return c - '0';
     } else if ('a' <= c && c <= 'z') {

+ 146 - 0
java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java

@@ -0,0 +1,146 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.util.AbstractList;
+import java.util.RandomAccess;
+import java.util.ListIterator;
+import java.util.Iterator;
+
+/**
+ * An implementation of {@link LazyStringList} that wraps another
+ * {@link LazyStringList} such that it cannot be modified via the wrapper.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public class UnmodifiableLazyStringList extends AbstractList<String>
+    implements LazyStringList, RandomAccess {
+
+  private final LazyStringList list;
+
+  public UnmodifiableLazyStringList(LazyStringList list) {
+    this.list = list;
+  }
+
+  @Override
+  public String get(int index) {
+    return list.get(index);
+  }
+
+  @Override
+  public int size() {
+    return list.size();
+  }
+
+  @Override
+  public ByteString getByteString(int index) {
+    return list.getByteString(index);
+  }
+
+  @Override
+  public void add(ByteString element) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public ListIterator<String> listIterator(final int index) {
+    return new ListIterator<String>() {
+      ListIterator<String> iter = list.listIterator(index);
+
+      @Override
+      public boolean hasNext() {
+        return iter.hasNext();
+      }
+
+      @Override
+      public String next() {
+        return iter.next();
+      }
+
+      @Override
+      public boolean hasPrevious() {
+        return iter.hasPrevious();
+      }
+
+      @Override
+      public String previous() {
+        return iter.previous();
+      }
+
+      @Override
+      public int nextIndex() {
+        return iter.nextIndex();
+      }
+
+      @Override
+      public int previousIndex() {
+        return iter.previousIndex();
+      }
+
+      @Override
+      public void remove() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void set(String o) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void add(String o) {
+        throw new UnsupportedOperationException();
+      }
+    };
+  }
+
+  @Override
+  public Iterator<String> iterator() {
+    return new Iterator<String>() {
+      Iterator<String> iter = list.iterator();
+
+      @Override
+      public boolean hasNext() {
+        return iter.hasNext();
+      }
+
+      @Override
+      public String next() {
+        return iter.next();
+      }
+
+      @Override
+      public void remove() {
+        throw new UnsupportedOperationException();
+      }
+    };
+  }
+}

+ 6 - 6
java/src/main/java/com/google/protobuf/WireFormat.java

@@ -45,12 +45,12 @@ public final class WireFormat {
   // Do not allow instantiation.
   private WireFormat() {}
 
-  static final int WIRETYPE_VARINT           = 0;
-  static final int WIRETYPE_FIXED64          = 1;
-  static final int WIRETYPE_LENGTH_DELIMITED = 2;
-  static final int WIRETYPE_START_GROUP      = 3;
-  static final int WIRETYPE_END_GROUP        = 4;
-  static final int WIRETYPE_FIXED32          = 5;
+  public static final int WIRETYPE_VARINT           = 0;
+  public static final int WIRETYPE_FIXED64          = 1;
+  public static final int WIRETYPE_LENGTH_DELIMITED = 2;
+  public static final int WIRETYPE_START_GROUP      = 3;
+  public static final int WIRETYPE_END_GROUP        = 4;
+  public static final int WIRETYPE_FIXED32          = 5;
 
   static final int TAG_TYPE_BITS = 3;
   static final int TAG_TYPE_MASK = (1 << TAG_TYPE_BITS) - 1;

+ 7 - 6
java/src/test/java/com/google/protobuf/AbstractMessageTest.java

@@ -366,7 +366,7 @@ public class AbstractMessageTest extends TestCase {
 
   // -----------------------------------------------------------------
   // Tests for equals and hashCode
-  
+
   public void testEqualsAndHashCode() throws Exception {
     TestAllTypes a = TestUtil.getAllSet();
     TestAllTypes b = TestAllTypes.newBuilder().build();
@@ -382,7 +382,7 @@ public class AbstractMessageTest extends TestCase {
     checkEqualsIsConsistent(d);
     checkEqualsIsConsistent(e);
     checkEqualsIsConsistent(f);
-    
+
     checkNotEqual(a, b);
     checkNotEqual(a, c);
     checkNotEqual(a, d);
@@ -413,19 +413,20 @@ public class AbstractMessageTest extends TestCase {
     checkEqualsIsConsistent(eUnknownFields);
     checkEqualsIsConsistent(fUnknownFields);
 
-    // Subseqent reconstitutions should be identical
+    // Subsequent reconstitutions should be identical
     UnittestProto.TestEmptyMessage eUnknownFields2 =
         UnittestProto.TestEmptyMessage.parseFrom(e.toByteArray());
     checkEqualsIsConsistent(eUnknownFields, eUnknownFields2);
   }
-  
+
+
   /**
    * Asserts that the given proto has symetric equals and hashCode methods.
    */
   private void checkEqualsIsConsistent(Message message) {
     // Object should be equal to itself.
     assertEquals(message, message);
-    
+
     // Object should be equal to a dynamic copy of itself.
     DynamicMessage dynamic = DynamicMessage.newBuilder(message).build();
     checkEqualsIsConsistent(message, dynamic);
@@ -442,7 +443,7 @@ public class AbstractMessageTest extends TestCase {
 
   /**
    * Asserts that the given protos are not equal and have different hash codes.
-   * 
+   *
    * @warning It's valid for non-equal objects to have the same hash code, so
    *   this test is stricter than it needs to be. However, this should happen
    *   relatively rarely.

+ 21 - 0
java/src/test/java/com/google/protobuf/CodedInputStreamTest.java

@@ -325,6 +325,27 @@ public class CodedInputStreamTest extends TestCase {
     assertEquals(2, input.readRawByte());
   }
 
+  /**
+   * Test that a bug in skipRawBytes() has been fixed:  if the skip skips
+   * past the end of a buffer with a limit that has been set past the end of
+   * that buffer, this should not break things.
+   */
+  public void testSkipRawBytesPastEndOfBufferWithLimit() throws Exception {
+    byte[] rawBytes = new byte[] { 1, 2, 3, 4, 5 };
+    CodedInputStream input = CodedInputStream.newInstance(
+        new SmallBlockInputStream(rawBytes, 3));
+
+    int limit = input.pushLimit(4);
+    // In order to expose the bug we need to read at least one byte to prime the
+    // buffer inside the CodedInputStream.
+    assertEquals(1, input.readRawByte());
+    // Skip to the end of the limit.
+    input.skipRawBytes(3);
+    assertTrue(input.isAtEnd());
+    input.popLimit(limit);
+    assertEquals(5, input.readRawByte());
+  }
+
   public void testReadHugeBlob() throws Exception {
     // Allocate and initialize a 1MB blob.
     byte[] blob = new byte[1 << 20];

+ 80 - 0
java/src/test/java/com/google/protobuf/DeprecatedFieldTest.java

@@ -0,0 +1,80 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import protobuf_unittest.UnittestProto.TestDeprecatedFields;
+
+import junit.framework.TestCase;
+
+import java.lang.reflect.AnnotatedElement;
+import java.lang.reflect.Method;
+/**
+ * Test field deprecation
+ * 
+ * @author birdo@google.com (Roberto Scaramuzzi)
+ */
+public class DeprecatedFieldTest extends TestCase {
+  private String[] deprecatedGetterNames = {
+      "hasDeprecatedInt32",
+      "getDeprecatedInt32"};
+  
+  private String[] deprecatedBuilderGetterNames = {
+      "hasDeprecatedInt32",
+      "getDeprecatedInt32",
+      "clearDeprecatedInt32"};
+  
+  private String[] deprecatedBuilderSetterNames = {
+      "setDeprecatedInt32"}; 
+  
+  public void testDeprecatedField() throws Exception {
+    Class<?> deprecatedFields = TestDeprecatedFields.class;
+    Class<?> deprecatedFieldsBuilder = TestDeprecatedFields.Builder.class;
+    for (String name : deprecatedGetterNames) {
+      Method method = deprecatedFields.getMethod(name);
+      assertTrue("Method " + name + " should be deprecated",
+          isDeprecated(method));
+    }
+    for (String name : deprecatedBuilderGetterNames) {
+      Method method = deprecatedFieldsBuilder.getMethod(name);
+      assertTrue("Method " + name + " should be deprecated",
+          isDeprecated(method));
+    }
+    for (String name : deprecatedBuilderSetterNames) {
+      Method method = deprecatedFieldsBuilder.getMethod(name, int.class);
+      assertTrue("Method " + name + " should be deprecated",
+          isDeprecated(method));
+    }
+  }
+  
+  private boolean isDeprecated(AnnotatedElement annotated) {
+    return annotated.isAnnotationPresent(Deprecated.class);
+  }
+}

+ 1 - 0
java/src/test/java/com/google/protobuf/DescriptorsTest.java

@@ -44,6 +44,7 @@ import com.google.protobuf.Descriptors.MethodDescriptor;
 
 import com.google.protobuf.test.UnittestImport;
 import com.google.protobuf.test.UnittestImport.ImportEnum;
+import com.google.protobuf.test.UnittestImport.ImportMessage;
 import protobuf_unittest.UnittestProto;
 import protobuf_unittest.UnittestProto.ForeignEnum;
 import protobuf_unittest.UnittestProto.ForeignMessage;

+ 48 - 0
java/src/test/java/com/google/protobuf/ForceFieldBuildersPreRun.java

@@ -0,0 +1,48 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+/**
+ * A prerun for a test suite that allows running the full protocol buffer
+ * tests in a mode that disables the optimization for not using
+ * {@link RepeatedFieldBuilder} and {@link SingleFieldBuilder} until they are
+ * requested. This allows us to run all the tests through both code paths
+ * and ensures that both code paths produce identical results.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public class ForceFieldBuildersPreRun implements Runnable {
+
+  @Override
+  public void run() {
+    GeneratedMessage.enableAlwaysUseFieldBuildersForTesting();
+  }
+}

+ 350 - 26
java/src/test/java/com/google/protobuf/GeneratedMessageTest.java

@@ -30,26 +30,43 @@
 
 package com.google.protobuf;
 
+import com.google.protobuf.UnittestLite.TestAllExtensionsLite;
+import com.google.protobuf.test.UnittestImport;
+import protobuf_unittest.EnumWithNoOuter;
+import protobuf_unittest.MessageWithNoOuter;
+import protobuf_unittest.MultipleFilesTestProto;
+import protobuf_unittest.NestedExtension.MyNestedExtension;
+import protobuf_unittest.NestedExtensionLite.MyNestedExtensionLite;
+import protobuf_unittest.NonNestedExtension;
+import protobuf_unittest.NonNestedExtension.MessageToBeExtended;
+import protobuf_unittest.NonNestedExtension.MyNonNestedExtension;
+import protobuf_unittest.NonNestedExtensionLite;
+import protobuf_unittest.NonNestedExtensionLite.MessageLiteToBeExtended;
+import protobuf_unittest.NonNestedExtensionLite.MyNonNestedExtensionLite;
+import protobuf_unittest.ServiceWithNoOuter;
 import protobuf_unittest.UnittestOptimizeFor.TestOptimizedForSize;
 import protobuf_unittest.UnittestOptimizeFor.TestOptionalOptimizedForSize;
 import protobuf_unittest.UnittestOptimizeFor.TestRequiredOptimizedForSize;
 import protobuf_unittest.UnittestProto;
-import protobuf_unittest.UnittestProto.ForeignMessage;
 import protobuf_unittest.UnittestProto.ForeignEnum;
-import protobuf_unittest.UnittestProto.TestAllTypes;
+import protobuf_unittest.UnittestProto.ForeignMessage;
+import protobuf_unittest.UnittestProto.ForeignMessageOrBuilder;
 import protobuf_unittest.UnittestProto.TestAllExtensions;
+import protobuf_unittest.UnittestProto.TestAllTypes;
+import protobuf_unittest.UnittestProto.TestAllTypesOrBuilder;
 import protobuf_unittest.UnittestProto.TestExtremeDefaultValues;
 import protobuf_unittest.UnittestProto.TestPackedTypes;
 import protobuf_unittest.UnittestProto.TestUnpackedTypes;
-import protobuf_unittest.MultipleFilesTestProto;
-import protobuf_unittest.MessageWithNoOuter;
-import protobuf_unittest.EnumWithNoOuter;
-import protobuf_unittest.ServiceWithNoOuter;
-import com.google.protobuf.UnittestLite;
-import com.google.protobuf.UnittestLite.TestAllExtensionsLite;
 
 import junit.framework.TestCase;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
 
 /**
  * Unit test for generated messages and generated code.  See also
@@ -68,32 +85,111 @@ public class GeneratedMessageTest extends TestCase {
                TestAllTypes.newBuilder().getDefaultInstanceForType());
   }
 
-  public void testAccessors() throws Exception {
+  public void testMessageOrBuilder() throws Exception {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     TestUtil.setAllFields(builder);
     TestAllTypes message = builder.build();
     TestUtil.assertAllFieldsSet(message);
   }
 
-  public void testDoubleBuildError() throws Exception {
+  public void testUsingBuilderMultipleTimes() throws Exception {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
-    builder.build();
-    try {
-      builder.build();
-      fail("Should have thrown exception.");
-    } catch (IllegalStateException e) {
-      // Success.
-    }
+    // primitive field scalar and repeated
+    builder.setOptionalSfixed64(100);
+    builder.addRepeatedInt32(100);
+    // enum field scalar and repeated
+    builder.setOptionalImportEnum(UnittestImport.ImportEnum.IMPORT_BAR);
+    builder.addRepeatedImportEnum(UnittestImport.ImportEnum.IMPORT_BAR);
+    // proto field scalar and repeated
+    builder.setOptionalForeignMessage(ForeignMessage.newBuilder().setC(1));
+    builder.addRepeatedForeignMessage(ForeignMessage.newBuilder().setC(1));
+
+    TestAllTypes value1 = builder.build();
+
+    assertEquals(100, value1.getOptionalSfixed64());
+    assertEquals(100, value1.getRepeatedInt32(0));
+    assertEquals(UnittestImport.ImportEnum.IMPORT_BAR,
+        value1.getOptionalImportEnum());
+    assertEquals(UnittestImport.ImportEnum.IMPORT_BAR,
+        value1.getRepeatedImportEnum(0));
+    assertEquals(1, value1.getOptionalForeignMessage().getC());
+    assertEquals(1, value1.getRepeatedForeignMessage(0).getC());
+
+    // Make sure that builder didn't update previously created values
+    builder.setOptionalSfixed64(200);
+    builder.setRepeatedInt32(0, 200);
+    builder.setOptionalImportEnum(UnittestImport.ImportEnum.IMPORT_FOO);
+    builder.setRepeatedImportEnum(0, UnittestImport.ImportEnum.IMPORT_FOO);
+    builder.setOptionalForeignMessage(ForeignMessage.newBuilder().setC(2));
+    builder.setRepeatedForeignMessage(0, ForeignMessage.newBuilder().setC(2));
+
+    TestAllTypes value2 = builder.build();
+
+    // Make sure value1 didn't change.
+    assertEquals(100, value1.getOptionalSfixed64());
+    assertEquals(100, value1.getRepeatedInt32(0));
+    assertEquals(UnittestImport.ImportEnum.IMPORT_BAR,
+        value1.getOptionalImportEnum());
+    assertEquals(UnittestImport.ImportEnum.IMPORT_BAR,
+        value1.getRepeatedImportEnum(0));
+    assertEquals(1, value1.getOptionalForeignMessage().getC());
+    assertEquals(1, value1.getRepeatedForeignMessage(0).getC());
+
+    // Make sure value2 is correct
+    assertEquals(200, value2.getOptionalSfixed64());
+    assertEquals(200, value2.getRepeatedInt32(0));
+    assertEquals(UnittestImport.ImportEnum.IMPORT_FOO,
+        value2.getOptionalImportEnum());
+    assertEquals(UnittestImport.ImportEnum.IMPORT_FOO,
+        value2.getRepeatedImportEnum(0));
+    assertEquals(2, value2.getOptionalForeignMessage().getC());
+    assertEquals(2, value2.getRepeatedForeignMessage(0).getC());
+  }
+
+  public void testProtosShareRepeatedArraysIfDidntChange() throws Exception {
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    builder.addRepeatedInt32(100);
+    builder.addRepeatedImportEnum(UnittestImport.ImportEnum.IMPORT_BAR);
+    builder.addRepeatedForeignMessage(ForeignMessage.getDefaultInstance());
+
+    TestAllTypes value1 = builder.build();
+    TestAllTypes value2 = value1.toBuilder().build();
+
+    assertSame(value1.getRepeatedInt32List(), value2.getRepeatedInt32List());
+    assertSame(value1.getRepeatedImportEnumList(),
+        value2.getRepeatedImportEnumList());
+    assertSame(value1.getRepeatedForeignMessageList(),
+        value2.getRepeatedForeignMessageList());
   }
 
-  public void testClearAfterBuildError() throws Exception {
+  public void testRepeatedArraysAreImmutable() throws Exception {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
-    builder.build();
-    try {
-      builder.clear();
-      fail("Should have thrown exception.");
-    } catch (IllegalStateException e) {
-      // Success.
+    builder.addRepeatedInt32(100);
+    builder.addRepeatedImportEnum(UnittestImport.ImportEnum.IMPORT_BAR);
+    builder.addRepeatedForeignMessage(ForeignMessage.getDefaultInstance());
+    assertIsUnmodifiable(builder.getRepeatedInt32List());
+    assertIsUnmodifiable(builder.getRepeatedImportEnumList());
+    assertIsUnmodifiable(builder.getRepeatedForeignMessageList());
+    assertIsUnmodifiable(builder.getRepeatedFloatList());
+
+
+    TestAllTypes value = builder.build();
+    assertIsUnmodifiable(value.getRepeatedInt32List());
+    assertIsUnmodifiable(value.getRepeatedImportEnumList());
+    assertIsUnmodifiable(value.getRepeatedForeignMessageList());
+    assertIsUnmodifiable(value.getRepeatedFloatList());
+  }
+
+  private void assertIsUnmodifiable(List<?> list) {
+    if (list == Collections.emptyList()) {
+      // OKAY -- Need to check this b/c EmptyList allows you to call clear.
+    } else {
+      try {
+        list.clear();
+        fail("List wasn't immutable");
+      } catch (UnsupportedOperationException e) {
+        // good
+      }
     }
   }
 
@@ -316,9 +412,19 @@ public class GeneratedMessageTest extends TestCase {
     assertTrue(Float.isNaN(message.getNanFloat()));
   }
 
+  public void testClear() throws Exception {
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    TestUtil.assertClear(builder);
+    TestUtil.setAllFields(builder);
+    builder.clear();
+    TestUtil.assertClear(builder);
+  }
+
   public void testReflectionGetters() throws Exception {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     TestUtil.setAllFields(builder);
+    reflectionTester.assertAllFieldsSetViaReflection(builder);
+
     TestAllTypes message = builder.build();
     reflectionTester.assertAllFieldsSetViaReflection(message);
   }
@@ -326,6 +432,8 @@ public class GeneratedMessageTest extends TestCase {
   public void testReflectionSetters() throws Exception {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     reflectionTester.setAllFieldsViaReflection(builder);
+    TestUtil.assertAllFieldsSet(builder);
+
     TestAllTypes message = builder.build();
     TestUtil.assertAllFieldsSet(message);
   }
@@ -339,6 +447,8 @@ public class GeneratedMessageTest extends TestCase {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     reflectionTester.setAllFieldsViaReflection(builder);
     reflectionTester.modifyRepeatedFieldsViaReflection(builder);
+    TestUtil.assertRepeatedFieldsModified(builder);
+
     TestAllTypes message = builder.build();
     TestUtil.assertRepeatedFieldsModified(message);
   }
@@ -391,7 +501,7 @@ public class GeneratedMessageTest extends TestCase {
     new TestUtil.ReflectionTester(TestAllExtensions.getDescriptor(),
                                   TestUtil.getExtensionRegistry());
 
-  public void testExtensionAccessors() throws Exception {
+  public void testExtensionMessageOrBuilder() throws Exception {
     TestAllExtensions.Builder builder = TestAllExtensions.newBuilder();
     TestUtil.setAllExtensions(builder);
     TestAllExtensions message = builder.build();
@@ -414,6 +524,8 @@ public class GeneratedMessageTest extends TestCase {
   public void testExtensionReflectionGetters() throws Exception {
     TestAllExtensions.Builder builder = TestAllExtensions.newBuilder();
     TestUtil.setAllExtensions(builder);
+    extensionsReflectionTester.assertAllFieldsSetViaReflection(builder);
+
     TestAllExtensions message = builder.build();
     extensionsReflectionTester.assertAllFieldsSetViaReflection(message);
   }
@@ -421,6 +533,8 @@ public class GeneratedMessageTest extends TestCase {
   public void testExtensionReflectionSetters() throws Exception {
     TestAllExtensions.Builder builder = TestAllExtensions.newBuilder();
     extensionsReflectionTester.setAllFieldsViaReflection(builder);
+    TestUtil.assertAllExtensionsSet(builder);
+
     TestAllExtensions message = builder.build();
     TestUtil.assertAllExtensionsSet(message);
   }
@@ -434,6 +548,8 @@ public class GeneratedMessageTest extends TestCase {
     TestAllExtensions.Builder builder = TestAllExtensions.newBuilder();
     extensionsReflectionTester.setAllFieldsViaReflection(builder);
     extensionsReflectionTester.modifyRepeatedFieldsViaReflection(builder);
+    TestUtil.assertRepeatedExtensionsModified(builder);
+
     TestAllExtensions message = builder.build();
     TestUtil.assertRepeatedExtensionsModified(message);
   }
@@ -491,9 +607,11 @@ public class GeneratedMessageTest extends TestCase {
   // lite fields directly since they are implemented exactly the same as
   // regular fields.
 
-  public void testLiteExtensionAccessors() throws Exception {
+  public void testLiteExtensionMessageOrBuilder() throws Exception {
     TestAllExtensionsLite.Builder builder = TestAllExtensionsLite.newBuilder();
     TestUtil.setAllExtensions(builder);
+    TestUtil.assertAllExtensionsSet(builder);
+
     TestAllExtensionsLite message = builder.build();
     TestUtil.assertAllExtensionsSet(message);
   }
@@ -502,6 +620,8 @@ public class GeneratedMessageTest extends TestCase {
     TestAllExtensionsLite.Builder builder = TestAllExtensionsLite.newBuilder();
     TestUtil.setAllExtensions(builder);
     TestUtil.modifyRepeatedExtensions(builder);
+    TestUtil.assertRepeatedExtensionsModified(builder);
+
     TestAllExtensionsLite message = builder.build();
     TestUtil.assertRepeatedExtensionsModified(message);
   }
@@ -609,6 +729,7 @@ public class GeneratedMessageTest extends TestCase {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     TestUtil.setAllFields(builder);
     TestAllTypes message = builder.build();
+    TestUtil.assertAllFieldsSet(message);
     TestUtil.assertAllFieldsSet(message.toBuilder().build());
   }
 
@@ -646,4 +767,207 @@ public class GeneratedMessageTest extends TestCase {
     assertTrue(message.getA() != null);
     assertTrue(message.getA() == message);
   }
+
+  public void testSerialize() throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    TestUtil.setAllFields(builder);
+    TestAllTypes expected = builder.build();
+    ObjectOutputStream out = new ObjectOutputStream(baos);
+    out.writeObject(expected);
+    out.close();
+    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+    ObjectInputStream in = new ObjectInputStream(bais);
+    TestAllTypes actual = (TestAllTypes) in.readObject();
+    assertEquals(expected, actual);
+  }
+
+  public void testSerializePartial() throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    TestAllTypes expected = builder.buildPartial();
+    ObjectOutputStream out = new ObjectOutputStream(baos);
+    out.writeObject(expected);
+    out.close();
+    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+    ObjectInputStream in = new ObjectInputStream(bais);
+    TestAllTypes actual = (TestAllTypes) in.readObject();
+    assertEquals(expected, actual);
+  }
+
+  public void testEnumValues() {
+     assertEquals(
+         TestAllTypes.NestedEnum.BAR.getNumber(),
+         TestAllTypes.NestedEnum.BAR_VALUE);
+    assertEquals(
+        TestAllTypes.NestedEnum.BAZ.getNumber(),
+        TestAllTypes.NestedEnum.BAZ_VALUE);
+    assertEquals(
+        TestAllTypes.NestedEnum.FOO.getNumber(),
+        TestAllTypes.NestedEnum.FOO_VALUE);
+  }
+
+  public void testNonNestedExtensionInitialization() {
+    assertTrue(NonNestedExtension.nonNestedExtension
+               .getMessageDefaultInstance() instanceof MyNonNestedExtension);
+    assertEquals("nonNestedExtension",
+                 NonNestedExtension.nonNestedExtension.getDescriptor().getName());
+  }
+
+  public void testNestedExtensionInitialization() {
+    assertTrue(MyNestedExtension.recursiveExtension.getMessageDefaultInstance()
+               instanceof MessageToBeExtended);
+    assertEquals("recursiveExtension",
+                 MyNestedExtension.recursiveExtension.getDescriptor().getName());
+  }
+
+  public void testNonNestedExtensionLiteInitialization() {
+    assertTrue(NonNestedExtensionLite.nonNestedExtensionLite
+               .getMessageDefaultInstance() instanceof MyNonNestedExtensionLite);
+  }
+
+  public void testNestedExtensionLiteInitialization() {
+    assertTrue(MyNestedExtensionLite.recursiveExtensionLite
+               .getMessageDefaultInstance() instanceof MessageLiteToBeExtended);
+  }
+
+  public void testInvalidations() throws Exception {
+    GeneratedMessage.enableAlwaysUseFieldBuildersForTesting();
+    TestAllTypes.NestedMessage nestedMessage1 =
+        TestAllTypes.NestedMessage.newBuilder().build();
+    TestAllTypes.NestedMessage nestedMessage2 =
+        TestAllTypes.NestedMessage.newBuilder().build();
+
+    // Set all three flavors (enum, primitive, message and singular/repeated)
+    // and verify no invalidations fired
+    TestUtil.MockBuilderParent mockParent = new TestUtil.MockBuilderParent();
+
+    TestAllTypes.Builder builder = (TestAllTypes.Builder)
+        ((GeneratedMessage) TestAllTypes.getDefaultInstance()).
+            newBuilderForType(mockParent);
+    builder.setOptionalInt32(1);
+    builder.setOptionalNestedEnum(TestAllTypes.NestedEnum.BAR);
+    builder.setOptionalNestedMessage(nestedMessage1);
+    builder.addRepeatedInt32(1);
+    builder.addRepeatedNestedEnum(TestAllTypes.NestedEnum.BAR);
+    builder.addRepeatedNestedMessage(nestedMessage1);
+    assertEquals(0, mockParent.getInvalidationCount());
+
+    // Now tell it we want changes and make sure it's only fired once
+    // And do this for each flavor
+
+    // primitive single
+    builder.buildPartial();
+    builder.setOptionalInt32(2);
+    builder.setOptionalInt32(3);
+    assertEquals(1, mockParent.getInvalidationCount());
+
+    // enum single
+    builder.buildPartial();
+    builder.setOptionalNestedEnum(TestAllTypes.NestedEnum.BAZ);
+    builder.setOptionalNestedEnum(TestAllTypes.NestedEnum.BAR);
+    assertEquals(2, mockParent.getInvalidationCount());
+
+    // message single
+    builder.buildPartial();
+    builder.setOptionalNestedMessage(nestedMessage2);
+    builder.setOptionalNestedMessage(nestedMessage1);
+    assertEquals(3, mockParent.getInvalidationCount());
+
+    // primitive repated
+    builder.buildPartial();
+    builder.addRepeatedInt32(2);
+    builder.addRepeatedInt32(3);
+    assertEquals(4, mockParent.getInvalidationCount());
+
+    // enum repeated
+    builder.buildPartial();
+    builder.addRepeatedNestedEnum(TestAllTypes.NestedEnum.BAZ);
+    builder.addRepeatedNestedEnum(TestAllTypes.NestedEnum.BAZ);
+    assertEquals(5, mockParent.getInvalidationCount());
+
+    // message repeated
+    builder.buildPartial();
+    builder.addRepeatedNestedMessage(nestedMessage2);
+    builder.addRepeatedNestedMessage(nestedMessage1);
+    assertEquals(6, mockParent.getInvalidationCount());
+
+  }
+
+  public void testInvalidations_Extensions() throws Exception {
+    TestUtil.MockBuilderParent mockParent = new TestUtil.MockBuilderParent();
+
+    TestAllExtensions.Builder builder = (TestAllExtensions.Builder)
+        ((GeneratedMessage) TestAllExtensions.getDefaultInstance()).
+            newBuilderForType(mockParent);
+
+    builder.addExtension(UnittestProto.repeatedInt32Extension, 1);
+    builder.setExtension(UnittestProto.repeatedInt32Extension, 0, 2);
+    builder.clearExtension(UnittestProto.repeatedInt32Extension);
+    assertEquals(0, mockParent.getInvalidationCount());
+
+    // Now tell it we want changes and make sure it's only fired once
+    builder.buildPartial();
+    builder.addExtension(UnittestProto.repeatedInt32Extension, 2);
+    builder.addExtension(UnittestProto.repeatedInt32Extension, 3);
+    assertEquals(1, mockParent.getInvalidationCount());
+
+    builder.buildPartial();
+    builder.setExtension(UnittestProto.repeatedInt32Extension, 0, 4);
+    builder.setExtension(UnittestProto.repeatedInt32Extension, 1, 5);
+    assertEquals(2, mockParent.getInvalidationCount());
+
+    builder.buildPartial();
+    builder.clearExtension(UnittestProto.repeatedInt32Extension);
+    builder.clearExtension(UnittestProto.repeatedInt32Extension);
+    assertEquals(3, mockParent.getInvalidationCount());
+  }
+
+  public void testBaseMessageOrBuilder() {
+    // Mostly just makes sure the base interface exists and has some methods.
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    TestAllTypes message = builder.buildPartial();
+    TestAllTypesOrBuilder builderAsInterface = (TestAllTypesOrBuilder) builder;
+    TestAllTypesOrBuilder messageAsInterface = (TestAllTypesOrBuilder) message;
+
+    assertEquals(
+        messageAsInterface.getDefaultBool(),
+        messageAsInterface.getDefaultBool());
+    assertEquals(
+        messageAsInterface.getOptionalDouble(),
+        messageAsInterface.getOptionalDouble());
+  }
+
+  public void testMessageOrBuilderGetters() {
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+
+    // single fields
+    assertSame(ForeignMessage.getDefaultInstance(),
+        builder.getOptionalForeignMessageOrBuilder());
+    ForeignMessage.Builder subBuilder =
+        builder.getOptionalForeignMessageBuilder();
+    assertSame(subBuilder, builder.getOptionalForeignMessageOrBuilder());
+
+    // repeated fields
+    ForeignMessage m0 = ForeignMessage.newBuilder().buildPartial();
+    ForeignMessage m1 = ForeignMessage.newBuilder().buildPartial();
+    ForeignMessage m2 = ForeignMessage.newBuilder().buildPartial();
+    builder.addRepeatedForeignMessage(m0);
+    builder.addRepeatedForeignMessage(m1);
+    builder.addRepeatedForeignMessage(m2);
+    assertSame(m0, builder.getRepeatedForeignMessageOrBuilder(0));
+    assertSame(m1, builder.getRepeatedForeignMessageOrBuilder(1));
+    assertSame(m2, builder.getRepeatedForeignMessageOrBuilder(2));
+    ForeignMessage.Builder b0 = builder.getRepeatedForeignMessageBuilder(0);
+    ForeignMessage.Builder b1 = builder.getRepeatedForeignMessageBuilder(1);
+    assertSame(b0, builder.getRepeatedForeignMessageOrBuilder(0));
+    assertSame(b1, builder.getRepeatedForeignMessageOrBuilder(1));
+    assertSame(m2, builder.getRepeatedForeignMessageOrBuilder(2));
+
+    List<? extends ForeignMessageOrBuilder> messageOrBuilderList =
+        builder.getRepeatedForeignMessageOrBuilderList();
+    assertSame(b0, messageOrBuilderList.get(0));
+    assertSame(b1, messageOrBuilderList.get(1));
+    assertSame(m2, messageOrBuilderList.get(2));
+  }
 }

+ 118 - 0
java/src/test/java/com/google/protobuf/LazyStringArrayListTest.java

@@ -0,0 +1,118 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests for {@link LazyStringArrayList}.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public class LazyStringArrayListTest extends TestCase {
+
+  private static String STRING_A = "A";
+  private static String STRING_B = "B";
+  private static String STRING_C = "C";
+
+  private static ByteString BYTE_STRING_A = ByteString.copyFromUtf8("A");
+  private static ByteString BYTE_STRING_B = ByteString.copyFromUtf8("B");
+  private static ByteString BYTE_STRING_C = ByteString.copyFromUtf8("C");
+
+  public void testJustStrings() {
+    LazyStringArrayList list = new LazyStringArrayList();
+    list.add(STRING_A);
+    list.add(STRING_B);
+    list.add(STRING_C);
+
+    assertEquals(3, list.size());
+    assertSame(STRING_A, list.get(0));
+    assertSame(STRING_B, list.get(1));
+    assertSame(STRING_C, list.get(2));
+
+    list.set(1, STRING_C);
+    assertSame(STRING_C, list.get(1));
+
+    list.remove(1);
+    assertSame(STRING_A, list.get(0));
+    assertSame(STRING_C, list.get(1));
+  }
+
+  public void testJustByteString() {
+    LazyStringArrayList list = new LazyStringArrayList();
+    list.add(BYTE_STRING_A);
+    list.add(BYTE_STRING_B);
+    list.add(BYTE_STRING_C);
+
+    assertEquals(3, list.size());
+    assertSame(BYTE_STRING_A, list.getByteString(0));
+    assertSame(BYTE_STRING_B, list.getByteString(1));
+    assertSame(BYTE_STRING_C, list.getByteString(2));
+
+    list.remove(1);
+    assertSame(BYTE_STRING_A, list.getByteString(0));
+    assertSame(BYTE_STRING_C, list.getByteString(1));
+  }
+
+  public void testConversionBackAndForth() {
+    LazyStringArrayList list = new LazyStringArrayList();
+    list.add(STRING_A);
+    list.add(BYTE_STRING_B);
+    list.add(BYTE_STRING_C);
+
+    // String a should be the same because it was originally a string
+    assertSame(STRING_A, list.get(0));
+
+    // String b and c should be different because the string has to be computed
+    // from the ByteString
+    String bPrime = list.get(1);
+    assertNotSame(STRING_B, bPrime);
+    assertEquals(STRING_B, bPrime);
+    String cPrime = list.get(2);
+    assertNotSame(STRING_C, cPrime);
+    assertEquals(STRING_C, cPrime);
+
+    // String c and c should stay the same once cached.
+    assertSame(bPrime, list.get(1));
+    assertSame(cPrime, list.get(2));
+
+    // ByteString needs to be computed from string for both a and b
+    ByteString aPrimeByteString = list.getByteString(0);
+    assertEquals(BYTE_STRING_A, aPrimeByteString);
+    ByteString bPrimeByteString = list.getByteString(1);
+    assertNotSame(BYTE_STRING_B, bPrimeByteString);
+    assertEquals(BYTE_STRING_B, list.getByteString(1));
+
+    // Once cached, ByteString should stay cached.
+    assertSame(aPrimeByteString, list.getByteString(0));
+    assertSame(bPrimeByteString, list.getByteString(1));
+  }
+}

+ 115 - 0
java/src/test/java/com/google/protobuf/LazyStringEndToEndTest.java

@@ -0,0 +1,115 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+
+import protobuf_unittest.UnittestProto;
+
+import junit.framework.TestCase;
+
+import java.io.IOException;
+
+/**
+ * Tests to make sure the lazy conversion of UTF8-encoded byte arrays to
+ * strings works correctly.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public class LazyStringEndToEndTest extends TestCase {
+
+  private static ByteString TEST_ALL_TYPES_SERIALIZED_WITH_ILLEGAL_UTF8 =
+      ByteString.copyFrom(new byte[] {
+          114, 4, -1, 0, -1, 0, -30, 2, 4, -1,
+          0, -1, 0, -30, 2, 4, -1, 0, -1, 0, });
+
+  /**
+   * Tests that an invalid UTF8 string will roundtrip through a parse
+   * and serialization.
+   */
+  public void testParseAndSerialize() throws InvalidProtocolBufferException {
+    UnittestProto.TestAllTypes tV2 = UnittestProto.TestAllTypes.parseFrom(
+        TEST_ALL_TYPES_SERIALIZED_WITH_ILLEGAL_UTF8);
+    ByteString bytes = tV2.toByteString();
+    assertEquals(TEST_ALL_TYPES_SERIALIZED_WITH_ILLEGAL_UTF8, bytes);
+
+    tV2.getOptionalString();
+    bytes = tV2.toByteString();
+    assertEquals(TEST_ALL_TYPES_SERIALIZED_WITH_ILLEGAL_UTF8, bytes);
+  }
+
+  public void testParseAndWrite() throws IOException {
+    UnittestProto.TestAllTypes tV2 = UnittestProto.TestAllTypes.parseFrom(
+        TEST_ALL_TYPES_SERIALIZED_WITH_ILLEGAL_UTF8);
+    byte[] sink = new byte[TEST_ALL_TYPES_SERIALIZED_WITH_ILLEGAL_UTF8.size()];
+    CodedOutputStream outputStream = CodedOutputStream.newInstance(sink);
+    tV2.writeTo(outputStream);
+    outputStream.flush();
+    assertEquals(
+        TEST_ALL_TYPES_SERIALIZED_WITH_ILLEGAL_UTF8,
+        ByteString.copyFrom(sink));
+  }
+
+  public void testCaching() {
+    String a = "a";
+    String b = "b";
+    String c = "c";
+    UnittestProto.TestAllTypes proto = UnittestProto.TestAllTypes.newBuilder()
+        .setOptionalString(a)
+        .addRepeatedString(b)
+        .addRepeatedString(c)
+        .build();
+
+    // String should be the one we passed it.
+    assertSame(a, proto.getOptionalString());
+    assertSame(b, proto.getRepeatedString(0));
+    assertSame(c, proto.getRepeatedString(1));
+
+
+    // There's no way to directly observe that the ByteString is cached
+    // correctly on serialization, but we can observe that it had to recompute
+    // the string after serialization.
+    proto.toByteString();
+    String aPrime = proto.getOptionalString();
+    assertNotSame(a, aPrime);
+    assertEquals(a, aPrime);
+    String bPrime = proto.getRepeatedString(0);
+    assertNotSame(b, bPrime);
+    assertEquals(b, bPrime);
+    String cPrime = proto.getRepeatedString(1);
+    assertNotSame(c, cPrime);
+    assertEquals(c, cPrime);
+
+    // And now the string should stay cached.
+    assertSame(aPrime, proto.getOptionalString());
+    assertSame(bPrime, proto.getRepeatedString(0));
+    assertSame(cPrime, proto.getRepeatedString(1));
+  }
+}

+ 29 - 0
java/src/test/java/com/google/protobuf/LiteTest.java

@@ -37,6 +37,11 @@ import com.google.protobuf.UnittestLite.TestNestedExtensionLite;
 
 import junit.framework.TestCase;
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+
 /**
  * Test lite runtime.
  *
@@ -113,4 +118,28 @@ public class LiteTest extends TestCase {
     assertEquals(7, message2.getExtension(
         UnittestLite.optionalNestedMessageExtensionLite).getBb());
   }
+
+  public void testSerialize() throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    TestAllTypesLite expected =
+      TestAllTypesLite.newBuilder()
+                      .setOptionalInt32(123)
+                      .addRepeatedString("hello")
+                      .setOptionalNestedMessage(
+                          TestAllTypesLite.NestedMessage.newBuilder().setBb(7))
+                      .build();
+    ObjectOutputStream out = new ObjectOutputStream(baos);
+    out.writeObject(expected);
+    out.close();
+    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+    ObjectInputStream in = new ObjectInputStream(bais);
+    TestAllTypesLite actual = (TestAllTypesLite) in.readObject();
+    assertEquals(expected.getOptionalInt32(), actual.getOptionalInt32());
+    assertEquals(expected.getRepeatedStringCount(),
+        actual.getRepeatedStringCount());
+    assertEquals(expected.getRepeatedString(0),
+        actual.getRepeatedString(0));
+    assertEquals(expected.getOptionalNestedMessage().getBb(),
+        actual.getOptionalNestedMessage().getBb());
+  }
 }

+ 185 - 0
java/src/test/java/com/google/protobuf/NestedBuildersTest.java

@@ -0,0 +1,185 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import protobuf_unittest.Vehicle;
+import protobuf_unittest.Wheel;
+
+import junit.framework.TestCase;
+
+import java.util.List;
+import java.util.ArrayList;
+
+/**
+ * Test cases that exercise end-to-end use cases involving
+ * {@link SingleFieldBuilder} and {@link RepeatedFieldBuilder}.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public class NestedBuildersTest extends TestCase {
+
+  public void testMessagesAndBuilders() {
+    Vehicle.Builder vehicleBuilder = Vehicle.newBuilder();
+    vehicleBuilder.addWheelBuilder()
+        .setRadius(4)
+        .setWidth(1);
+    vehicleBuilder.addWheelBuilder()
+        .setRadius(4)
+        .setWidth(2);
+    vehicleBuilder.addWheelBuilder()
+        .setRadius(4)
+        .setWidth(3);
+    vehicleBuilder.addWheelBuilder()
+        .setRadius(4)
+        .setWidth(4);
+    vehicleBuilder.getEngineBuilder()
+        .setLiters(10);
+
+    Vehicle vehicle = vehicleBuilder.build();
+    assertEquals(4, vehicle.getWheelCount());
+    for (int i = 0; i < 4; i++) {
+      Wheel wheel = vehicle.getWheel(i);
+      assertEquals(4, wheel.getRadius());
+      assertEquals(i + 1, wheel.getWidth());
+    }
+    assertEquals(10, vehicle.getEngine().getLiters());
+
+    for (int i = 0; i < 4; i++) {
+      vehicleBuilder.getWheelBuilder(i)
+          .setRadius(5)
+          .setWidth(i + 10);
+    }
+    vehicleBuilder.getEngineBuilder().setLiters(20);
+
+    vehicle = vehicleBuilder.build();
+    for (int i = 0; i < 4; i++) {
+      Wheel wheel = vehicle.getWheel(i);
+      assertEquals(5, wheel.getRadius());
+      assertEquals(i + 10, wheel.getWidth());
+    }
+    assertEquals(20, vehicle.getEngine().getLiters());
+    assertTrue(vehicle.hasEngine());
+  }
+
+  public void testMessagesAreCached() {
+    Vehicle.Builder vehicleBuilder = Vehicle.newBuilder();
+    vehicleBuilder.addWheelBuilder()
+        .setRadius(1)
+        .setWidth(2);
+    vehicleBuilder.addWheelBuilder()
+        .setRadius(3)
+        .setWidth(4);
+    vehicleBuilder.addWheelBuilder()
+        .setRadius(5)
+        .setWidth(6);
+    vehicleBuilder.addWheelBuilder()
+        .setRadius(7)
+        .setWidth(8);
+
+    // Make sure messages are cached.
+    List<Wheel> wheels = new ArrayList<Wheel>(vehicleBuilder.getWheelList());
+    for (int i = 0; i < wheels.size(); i++) {
+      assertSame(wheels.get(i), vehicleBuilder.getWheel(i));
+    }
+
+    // Now get builders and check they didn't change.
+    for (int i = 0; i < wheels.size(); i++) {
+      vehicleBuilder.getWheel(i);
+    }
+    for (int i = 0; i < wheels.size(); i++) {
+      assertSame(wheels.get(i), vehicleBuilder.getWheel(i));
+    }
+
+    // Change just one
+    vehicleBuilder.getWheelBuilder(3)
+        .setRadius(20).setWidth(20);
+
+    // Now get wheels and check that only that one changed
+    for (int i = 0; i < wheels.size(); i++) {
+      if (i < 3) {
+        assertSame(wheels.get(i), vehicleBuilder.getWheel(i));
+      } else {
+        assertNotSame(wheels.get(i), vehicleBuilder.getWheel(i));
+      }
+    }
+  }
+
+  public void testRemove_WithNestedBuilders() {
+    Vehicle.Builder vehicleBuilder = Vehicle.newBuilder();
+    vehicleBuilder.addWheelBuilder()
+        .setRadius(1)
+        .setWidth(1);
+    vehicleBuilder.addWheelBuilder()
+        .setRadius(2)
+        .setWidth(2);
+    vehicleBuilder.removeWheel(0);
+
+    assertEquals(1, vehicleBuilder.getWheelCount());
+    assertEquals(2, vehicleBuilder.getWheel(0).getRadius());
+  }
+
+  public void testRemove_WithNestedMessages() {
+    Vehicle.Builder vehicleBuilder = Vehicle.newBuilder();
+    vehicleBuilder.addWheel(Wheel.newBuilder()
+        .setRadius(1)
+        .setWidth(1));
+    vehicleBuilder.addWheel(Wheel.newBuilder()
+        .setRadius(2)
+        .setWidth(2));
+    vehicleBuilder.removeWheel(0);
+
+    assertEquals(1, vehicleBuilder.getWheelCount());
+    assertEquals(2, vehicleBuilder.getWheel(0).getRadius());
+  }
+
+  public void testMerge() {
+    Vehicle vehicle1 = Vehicle.newBuilder()
+        .addWheel(Wheel.newBuilder().setRadius(1).build())
+        .addWheel(Wheel.newBuilder().setRadius(2).build())
+        .build();
+
+    Vehicle vehicle2 = Vehicle.newBuilder()
+        .mergeFrom(vehicle1)
+        .build();
+    // List should be the same -- no allocation
+    assertSame(vehicle1.getWheelList(), vehicle2.getWheelList());
+
+    Vehicle vehicle3 = vehicle1.toBuilder().build();
+    assertSame(vehicle1.getWheelList(), vehicle3.getWheelList());
+  }
+
+  public void testGettingBuilderMarksFieldAsHaving() {
+    Vehicle.Builder vehicleBuilder = Vehicle.newBuilder();
+    vehicleBuilder.getEngineBuilder();
+    Vehicle vehicle = vehicleBuilder.buildPartial();
+    assertTrue(vehicle.hasEngine());
+  }
+}

+ 190 - 0
java/src/test/java/com/google/protobuf/RepeatedFieldBuilderTest.java

@@ -0,0 +1,190 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import protobuf_unittest.UnittestProto.TestAllTypes;
+import protobuf_unittest.UnittestProto.TestAllTypesOrBuilder;
+
+import junit.framework.TestCase;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Tests for {@link RepeatedFieldBuilder}. This tests basic functionality.
+ * More extensive testing is provided via other tests that exercise the
+ * builder.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public class RepeatedFieldBuilderTest extends TestCase {
+
+  public void testBasicUse() {
+    TestUtil.MockBuilderParent mockParent = new TestUtil.MockBuilderParent();
+    RepeatedFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+        TestAllTypesOrBuilder> builder = newRepeatedFieldBuilder(mockParent);
+    builder.addMessage(TestAllTypes.newBuilder().setOptionalInt32(0).build());
+    builder.addMessage(TestAllTypes.newBuilder().setOptionalInt32(1).build());
+    assertEquals(0, builder.getMessage(0).getOptionalInt32());
+    assertEquals(1, builder.getMessage(1).getOptionalInt32());
+
+    List<TestAllTypes> list = builder.build();
+    assertEquals(2, list.size());
+    assertEquals(0, list.get(0).getOptionalInt32());
+    assertEquals(1, list.get(1).getOptionalInt32());
+    assertIsUnmodifiable(list);
+
+    // Make sure it doesn't change.
+    List<TestAllTypes> list2 = builder.build();
+    assertSame(list, list2);
+    assertEquals(0, mockParent.getInvalidationCount());
+  }
+
+  public void testGoingBackAndForth() {
+    TestUtil.MockBuilderParent mockParent = new TestUtil.MockBuilderParent();
+    RepeatedFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+        TestAllTypesOrBuilder> builder = newRepeatedFieldBuilder(mockParent);
+    builder.addMessage(TestAllTypes.newBuilder().setOptionalInt32(0).build());
+    builder.addMessage(TestAllTypes.newBuilder().setOptionalInt32(1).build());
+    assertEquals(0, builder.getMessage(0).getOptionalInt32());
+    assertEquals(1, builder.getMessage(1).getOptionalInt32());
+
+    // Convert to list
+    List<TestAllTypes> list = builder.build();
+    assertEquals(2, list.size());
+    assertEquals(0, list.get(0).getOptionalInt32());
+    assertEquals(1, list.get(1).getOptionalInt32());
+    assertIsUnmodifiable(list);
+
+    // Update 0th item
+    assertEquals(0, mockParent.getInvalidationCount());
+    builder.getBuilder(0).setOptionalString("foo");
+    assertEquals(1, mockParent.getInvalidationCount());
+    list = builder.build();
+    assertEquals(2, list.size());
+    assertEquals(0, list.get(0).getOptionalInt32());
+      assertEquals("foo", list.get(0).getOptionalString());
+    assertEquals(1, list.get(1).getOptionalInt32());
+    assertIsUnmodifiable(list);
+    assertEquals(1, mockParent.getInvalidationCount());
+  }
+
+  public void testVariousMethods() {
+    TestUtil.MockBuilderParent mockParent = new TestUtil.MockBuilderParent();
+    RepeatedFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+        TestAllTypesOrBuilder> builder = newRepeatedFieldBuilder(mockParent);
+    builder.addMessage(TestAllTypes.newBuilder().setOptionalInt32(1).build());
+    builder.addMessage(TestAllTypes.newBuilder().setOptionalInt32(2).build());
+    builder.addBuilder(0, TestAllTypes.getDefaultInstance())
+        .setOptionalInt32(0);
+    builder.addBuilder(TestAllTypes.getDefaultInstance()).setOptionalInt32(3);
+
+    assertEquals(0, builder.getMessage(0).getOptionalInt32());
+    assertEquals(1, builder.getMessage(1).getOptionalInt32());
+    assertEquals(2, builder.getMessage(2).getOptionalInt32());
+    assertEquals(3, builder.getMessage(3).getOptionalInt32());
+
+    assertEquals(0, mockParent.getInvalidationCount());
+    List<TestAllTypes> messages = builder.build();
+    assertEquals(4, messages.size());
+    assertSame(messages, builder.build()); // expect same list
+
+    // Remove a message.
+    builder.remove(2);
+    assertEquals(1, mockParent.getInvalidationCount());
+    assertEquals(3, builder.getCount());
+    assertEquals(0, builder.getMessage(0).getOptionalInt32());
+    assertEquals(1, builder.getMessage(1).getOptionalInt32());
+    assertEquals(3, builder.getMessage(2).getOptionalInt32());
+
+    // Remove a builder.
+    builder.remove(0);
+    assertEquals(1, mockParent.getInvalidationCount());
+    assertEquals(2, builder.getCount());
+    assertEquals(1, builder.getMessage(0).getOptionalInt32());
+    assertEquals(3, builder.getMessage(1).getOptionalInt32());
+
+    // Test clear.
+    builder.clear();
+    assertEquals(1, mockParent.getInvalidationCount());
+    assertEquals(0, builder.getCount());
+    assertTrue(builder.isEmpty());
+  }
+
+  public void testLists() {
+    TestUtil.MockBuilderParent mockParent = new TestUtil.MockBuilderParent();
+    RepeatedFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+        TestAllTypesOrBuilder> builder = newRepeatedFieldBuilder(mockParent);
+    builder.addMessage(TestAllTypes.newBuilder().setOptionalInt32(1).build());
+    builder.addMessage(0,
+        TestAllTypes.newBuilder().setOptionalInt32(0).build());
+    assertEquals(0, builder.getMessage(0).getOptionalInt32());
+    assertEquals(1, builder.getMessage(1).getOptionalInt32());
+
+    // Use list of builders.
+    List<TestAllTypes.Builder> builders = builder.getBuilderList();
+    assertEquals(0, builders.get(0).getOptionalInt32());
+    assertEquals(1, builders.get(1).getOptionalInt32());
+    builders.get(0).setOptionalInt32(10);
+    builders.get(1).setOptionalInt32(11);
+
+    // Use list of protos
+    List<TestAllTypes> protos = builder.getMessageList();
+    assertEquals(10, protos.get(0).getOptionalInt32());
+    assertEquals(11, protos.get(1).getOptionalInt32());
+
+    // Add an item to the builders and verify it's updated in both
+    builder.addMessage(TestAllTypes.newBuilder().setOptionalInt32(12).build());
+    assertEquals(3, builders.size());
+    assertEquals(3, protos.size());
+  }
+
+  private void assertIsUnmodifiable(List<?> list) {
+    if (list == Collections.emptyList()) {
+      // OKAY -- Need to check this b/c EmptyList allows you to call clear.
+    } else {
+      try {
+        list.clear();
+        fail("List wasn't immutable");
+      } catch (UnsupportedOperationException e) {
+        // good
+      }
+    }
+  }
+
+  private RepeatedFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+      TestAllTypesOrBuilder>
+      newRepeatedFieldBuilder(GeneratedMessage.BuilderParent parent) {
+    return new RepeatedFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+        TestAllTypesOrBuilder>(Collections.<TestAllTypes>emptyList(), false,
+        parent, false);
+  }
+}

+ 8 - 0
java/src/test/java/com/google/protobuf/ServiceTest.java

@@ -244,6 +244,14 @@ public class ServiceTest extends TestCase {
       //   make assumptions, so I'm just going to accept any character as the
       //   separator.
       assertTrue(fullName.startsWith(outerName));
+
+      if (!Service.class.isAssignableFrom(innerClass) &&
+          !Message.class.isAssignableFrom(innerClass) &&
+          !ProtocolMessageEnum.class.isAssignableFrom(innerClass)) {
+        // Ignore any classes not generated by the base code generator.
+        continue;
+      }
+
       innerClassNames.add(fullName.substring(outerName.length() + 1));
     }
 

+ 155 - 0
java/src/test/java/com/google/protobuf/SingleFieldBuilderTest.java

@@ -0,0 +1,155 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import protobuf_unittest.UnittestProto.TestAllTypes;
+import protobuf_unittest.UnittestProto.TestAllTypesOrBuilder;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests for {@link SingleFieldBuilder}. This tests basic functionality.
+ * More extensive testing is provided via other tests that exercise the
+ * builder.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public class SingleFieldBuilderTest extends TestCase {
+
+  public void testBasicUseAndInvalidations() {
+    TestUtil.MockBuilderParent mockParent = new TestUtil.MockBuilderParent();
+    SingleFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+        TestAllTypesOrBuilder> builder =
+        new SingleFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+            TestAllTypesOrBuilder>(
+            TestAllTypes.getDefaultInstance(),
+            mockParent,
+            false);
+    assertSame(TestAllTypes.getDefaultInstance(), builder.getMessage());
+    assertEquals(TestAllTypes.getDefaultInstance(),
+        builder.getBuilder().buildPartial());
+    assertEquals(0, mockParent.getInvalidationCount());
+
+    builder.getBuilder().setOptionalInt32(10);
+    assertEquals(0, mockParent.getInvalidationCount());
+    TestAllTypes message = builder.build();
+    assertEquals(10, message.getOptionalInt32());
+
+    // Test that we receive invalidations now that build has been called.
+    assertEquals(0, mockParent.getInvalidationCount());
+    builder.getBuilder().setOptionalInt32(20);
+    assertEquals(1, mockParent.getInvalidationCount());
+
+    // Test that we don't keep getting invalidations on every change
+    builder.getBuilder().setOptionalInt32(30);
+    assertEquals(1, mockParent.getInvalidationCount());
+
+  }
+
+  public void testSetMessage() {
+    TestUtil.MockBuilderParent mockParent = new TestUtil.MockBuilderParent();
+    SingleFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+        TestAllTypesOrBuilder> builder =
+        new SingleFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+            TestAllTypesOrBuilder>(
+            TestAllTypes.getDefaultInstance(),
+            mockParent,
+            false);
+    builder.setMessage(TestAllTypes.newBuilder().setOptionalInt32(0).build());
+    assertEquals(0, builder.getMessage().getOptionalInt32());
+
+    // Update message using the builder
+    builder.getBuilder().setOptionalInt32(1);
+    assertEquals(0, mockParent.getInvalidationCount());
+    assertEquals(1, builder.getBuilder().getOptionalInt32());
+    assertEquals(1, builder.getMessage().getOptionalInt32());
+    builder.build();
+    builder.getBuilder().setOptionalInt32(2);
+    assertEquals(2, builder.getBuilder().getOptionalInt32());
+    assertEquals(2, builder.getMessage().getOptionalInt32());
+
+    // Make sure message stays cached
+    assertSame(builder.getMessage(), builder.getMessage());
+  }
+
+  public void testClear() {
+    TestUtil.MockBuilderParent mockParent = new TestUtil.MockBuilderParent();
+    SingleFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+        TestAllTypesOrBuilder> builder =
+        new SingleFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+            TestAllTypesOrBuilder>(
+            TestAllTypes.getDefaultInstance(),
+            mockParent,
+            false);
+    builder.setMessage(TestAllTypes.newBuilder().setOptionalInt32(0).build());
+    assertNotSame(TestAllTypes.getDefaultInstance(), builder.getMessage());
+    builder.clear();
+    assertSame(TestAllTypes.getDefaultInstance(), builder.getMessage());
+
+    builder.getBuilder().setOptionalInt32(1);
+    assertNotSame(TestAllTypes.getDefaultInstance(), builder.getMessage());
+    builder.clear();
+    assertSame(TestAllTypes.getDefaultInstance(), builder.getMessage());
+  }
+
+  public void testMerge() {
+    TestUtil.MockBuilderParent mockParent = new TestUtil.MockBuilderParent();
+    SingleFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+        TestAllTypesOrBuilder> builder =
+        new SingleFieldBuilder<TestAllTypes, TestAllTypes.Builder,
+            TestAllTypesOrBuilder>(
+            TestAllTypes.getDefaultInstance(),
+            mockParent,
+            false);
+
+    // Merge into default field.
+    builder.mergeFrom(TestAllTypes.getDefaultInstance());
+    assertSame(TestAllTypes.getDefaultInstance(), builder.getMessage());
+
+    // Merge into non-default field on existing builder.
+    builder.getBuilder().setOptionalInt32(2);
+    builder.mergeFrom(TestAllTypes.newBuilder()
+        .setOptionalDouble(4.0)
+        .buildPartial());
+    assertEquals(2, builder.getMessage().getOptionalInt32());
+    assertEquals(4.0, builder.getMessage().getOptionalDouble());
+
+    // Merge into non-default field on existing message
+    builder.setMessage(TestAllTypes.newBuilder()
+        .setOptionalInt32(10)
+        .buildPartial());
+    builder.mergeFrom(TestAllTypes.newBuilder()
+        .setOptionalDouble(5.0)
+        .buildPartial());
+    assertEquals(10, builder.getMessage().getOptionalInt32());
+    assertEquals(5.0, builder.getMessage().getOptionalDouble());
+  }
+}

+ 378 - 0
java/src/test/java/com/google/protobuf/SmallSortedMapTest.java

@@ -0,0 +1,378 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import junit.framework.TestCase;
+
+import java.util.AbstractMap.SimpleEntry;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+/**
+ * @author darick@google.com Darick Tong
+ */
+public class SmallSortedMapTest extends TestCase {
+
+  public void testPutAndGetArrayEntriesOnly() {
+    runPutAndGetTest(3);
+  }
+
+  public void testPutAndGetOverflowEntries() {
+    runPutAndGetTest(6);
+  }
+
+  private void runPutAndGetTest(int numElements) {
+    // Test with even and odd arraySize
+    SmallSortedMap<Integer, Integer> map1 =
+        SmallSortedMap.newInstanceForTest(3);
+    SmallSortedMap<Integer, Integer> map2 =
+        SmallSortedMap.newInstanceForTest(4);
+    SmallSortedMap<Integer, Integer> map3 =
+        SmallSortedMap.newInstanceForTest(3);
+    SmallSortedMap<Integer, Integer> map4 =
+        SmallSortedMap.newInstanceForTest(4);
+
+    // Test with puts in ascending order.
+    for (int i = 0; i < numElements; i++) {
+      assertNull(map1.put(i, i + 1));
+      assertNull(map2.put(i, i + 1));
+    }
+    // Test with puts in descending order.
+    for (int i = numElements - 1; i >= 0; i--) {
+      assertNull(map3.put(i, i + 1));
+      assertNull(map4.put(i, i + 1));
+    }
+
+    assertEquals(Math.min(3, numElements), map1.getNumArrayEntries());
+    assertEquals(Math.min(4, numElements), map2.getNumArrayEntries());
+    assertEquals(Math.min(3, numElements), map3.getNumArrayEntries());
+    assertEquals(Math.min(4, numElements), map4.getNumArrayEntries());
+
+    List<SmallSortedMap<Integer, Integer>> allMaps =
+        new ArrayList<SmallSortedMap<Integer, Integer>>();
+    allMaps.add(map1);
+    allMaps.add(map2);
+    allMaps.add(map3);
+    allMaps.add(map4);
+
+    for (SmallSortedMap<Integer, Integer> map : allMaps) {
+      assertEquals(numElements, map.size());
+      for (int i = 0; i < numElements; i++) {
+        assertEquals(new Integer(i + 1), map.get(i));
+      }
+    }
+
+    assertEquals(map1, map2);
+    assertEquals(map2, map3);
+    assertEquals(map3, map4);
+  }
+
+  public void testReplacingPut() {
+    SmallSortedMap<Integer, Integer> map = SmallSortedMap.newInstanceForTest(3);
+    for (int i = 0; i < 6; i++) {
+      assertNull(map.put(i, i + 1));
+      assertNull(map.remove(i + 1));
+    }
+    for (int i = 0; i < 6; i++) {
+      assertEquals(new Integer(i + 1), map.put(i, i + 2));
+    }
+  }
+
+  public void testRemove() {
+    SmallSortedMap<Integer, Integer> map = SmallSortedMap.newInstanceForTest(3);
+    for (int i = 0; i < 6; i++) {
+      assertNull(map.put(i, i + 1));
+      assertNull(map.remove(i + 1));
+    }
+
+    assertEquals(3, map.getNumArrayEntries());
+    assertEquals(3, map.getNumOverflowEntries());
+    assertEquals(6,  map.size());
+    assertEquals(makeSortedKeySet(0, 1, 2, 3, 4, 5), map.keySet());
+
+    assertEquals(new Integer(2), map.remove(1));
+    assertEquals(3, map.getNumArrayEntries());
+    assertEquals(2, map.getNumOverflowEntries());
+    assertEquals(5,  map.size());
+    assertEquals(makeSortedKeySet(0, 2, 3, 4, 5), map.keySet());
+
+    assertEquals(new Integer(5), map.remove(4));
+    assertEquals(3, map.getNumArrayEntries());
+    assertEquals(1, map.getNumOverflowEntries());
+    assertEquals(4,  map.size());
+    assertEquals(makeSortedKeySet(0, 2, 3, 5), map.keySet());
+
+    assertEquals(new Integer(4), map.remove(3));
+    assertEquals(3, map.getNumArrayEntries());
+    assertEquals(0, map.getNumOverflowEntries());
+    assertEquals(3, map.size());
+    assertEquals(makeSortedKeySet(0, 2, 5), map.keySet());
+
+    assertNull(map.remove(3));
+    assertEquals(3, map.getNumArrayEntries());
+    assertEquals(0, map.getNumOverflowEntries());
+    assertEquals(3, map.size());
+
+    assertEquals(new Integer(1), map.remove(0));
+    assertEquals(2, map.getNumArrayEntries());
+    assertEquals(0, map.getNumOverflowEntries());
+    assertEquals(2, map.size());
+  }
+
+  public void testClear() {
+    SmallSortedMap<Integer, Integer> map = SmallSortedMap.newInstanceForTest(3);
+    for (int i = 0; i < 6; i++) {
+      assertNull(map.put(i, i + 1));
+    }
+    map.clear();
+    assertEquals(0, map.getNumArrayEntries());
+    assertEquals(0, map.getNumOverflowEntries());
+    assertEquals(0, map.size());
+  }
+
+  public void testGetArrayEntryAndOverflowEntries() {
+    SmallSortedMap<Integer, Integer> map = SmallSortedMap.newInstanceForTest(3);
+    for (int i = 0; i < 6; i++) {
+      assertNull(map.put(i, i + 1));
+    }
+    assertEquals(3, map.getNumArrayEntries());
+    for (int i = 0; i < 3; i++) {
+      Map.Entry<Integer, Integer> entry = map.getArrayEntryAt(i);
+      assertEquals(new Integer(i), entry.getKey());
+      assertEquals(new Integer(i + 1), entry.getValue());
+    }
+    Iterator<Map.Entry<Integer, Integer>> it =
+        map.getOverflowEntries().iterator();
+    for (int i = 3; i < 6; i++) {
+      assertTrue(it.hasNext());
+      Map.Entry<Integer, Integer> entry = it.next();
+      assertEquals(new Integer(i), entry.getKey());
+      assertEquals(new Integer(i + 1), entry.getValue());
+    }
+    assertFalse(it.hasNext());
+  }
+
+  public void testEntrySetContains() {
+    SmallSortedMap<Integer, Integer> map = SmallSortedMap.newInstanceForTest(3);
+    for (int i = 0; i < 6; i++) {
+      assertNull(map.put(i, i + 1));
+    }
+    Set<Map.Entry<Integer, Integer>> entrySet = map.entrySet();
+    for (int i = 0; i < 6; i++) {
+      assertTrue(
+          entrySet.contains(new SimpleEntry<Integer, Integer>(i, i + 1)));
+      assertFalse(
+          entrySet.contains(new SimpleEntry<Integer, Integer>(i, i)));
+    }
+  }
+
+  public void testEntrySetAdd() {
+    SmallSortedMap<Integer, Integer> map = SmallSortedMap.newInstanceForTest(3);
+    Set<Map.Entry<Integer, Integer>> entrySet = map.entrySet();
+    for (int i = 0; i < 6; i++) {
+      Map.Entry<Integer, Integer> entry =
+          new SimpleEntry<Integer, Integer>(i, i + 1);
+      assertTrue(entrySet.add(entry));
+      assertFalse(entrySet.add(entry));
+    }
+    for (int i = 0; i < 6; i++) {
+      assertEquals(new Integer(i + 1), map.get(i));
+    }
+    assertEquals(3, map.getNumArrayEntries());
+    assertEquals(3, map.getNumOverflowEntries());
+    assertEquals(6, map.size());
+  }
+
+  public void testEntrySetRemove() {
+    SmallSortedMap<Integer, Integer> map = SmallSortedMap.newInstanceForTest(3);
+    Set<Map.Entry<Integer, Integer>> entrySet = map.entrySet();
+    for (int i = 0; i < 6; i++) {
+      assertNull(map.put(i, i + 1));
+    }
+    for (int i = 0; i < 6; i++) {
+      Map.Entry<Integer, Integer> entry =
+          new SimpleEntry<Integer, Integer>(i, i + 1);
+      assertTrue(entrySet.remove(entry));
+      assertFalse(entrySet.remove(entry));
+    }
+    assertTrue(map.isEmpty());
+    assertEquals(0, map.getNumArrayEntries());
+    assertEquals(0, map.getNumOverflowEntries());
+    assertEquals(0, map.size());
+  }
+
+  public void testEntrySetClear() {
+    SmallSortedMap<Integer, Integer> map = SmallSortedMap.newInstanceForTest(3);
+    for (int i = 0; i < 6; i++) {
+      assertNull(map.put(i, i + 1));
+    }
+    map.entrySet().clear();
+    assertTrue(map.isEmpty());
+    assertEquals(0, map.getNumArrayEntries());
+    assertEquals(0, map.getNumOverflowEntries());
+    assertEquals(0, map.size());
+  }
+
+  public void testEntrySetIteratorNext() {
+    SmallSortedMap<Integer, Integer> map = SmallSortedMap.newInstanceForTest(3);
+    for (int i = 0; i < 6; i++) {
+      assertNull(map.put(i, i + 1));
+    }
+    Iterator<Map.Entry<Integer, Integer>> it = map.entrySet().iterator();
+    for (int i = 0; i < 6; i++) {
+      assertTrue(it.hasNext());
+      Map.Entry<Integer, Integer> entry = it.next();
+      assertEquals(new Integer(i), entry.getKey());
+      assertEquals(new Integer(i + 1), entry.getValue());
+    }
+    assertFalse(it.hasNext());
+  }
+
+  public void testEntrySetIteratorRemove() {
+    SmallSortedMap<Integer, Integer> map = SmallSortedMap.newInstanceForTest(3);
+    for (int i = 0; i < 6; i++) {
+      assertNull(map.put(i, i + 1));
+    }
+    Iterator<Map.Entry<Integer, Integer>> it = map.entrySet().iterator();
+    for (int i = 0; i < 6; i++) {
+      assertTrue(map.containsKey(i));
+      it.next();
+      it.remove();
+      assertFalse(map.containsKey(i));
+      assertEquals(6 - i - 1, map.size());
+    }
+  }
+
+  public void testMapEntryModification() {
+    SmallSortedMap<Integer, Integer> map = SmallSortedMap.newInstanceForTest(3);
+    for (int i = 0; i < 6; i++) {
+      assertNull(map.put(i, i + 1));
+    }
+    Iterator<Map.Entry<Integer, Integer>> it = map.entrySet().iterator();
+    for (int i = 0; i < 6; i++) {
+      Map.Entry<Integer, Integer> entry = it.next();
+      entry.setValue(i + 23);
+    }
+    for (int i = 0; i < 6; i++) {
+      assertEquals(new Integer(i + 23), map.get(i));
+    }
+  }
+
+  public void testMakeImmutable() {
+    SmallSortedMap<Integer, Integer> map = SmallSortedMap.newInstanceForTest(3);
+    for (int i = 0; i < 6; i++) {
+      assertNull(map.put(i, i + 1));
+    }
+    map.makeImmutable();
+    assertEquals(new Integer(1), map.get(0));
+    assertEquals(6, map.size());
+
+    try {
+      map.put(23, 23);
+      fail("Expected UnsupportedOperationException");
+    } catch (UnsupportedOperationException expected) {
+    }
+
+    Map<Integer, Integer> other = new HashMap<Integer, Integer>();
+    other.put(23, 23);
+    try {
+      map.putAll(other);
+      fail("Expected UnsupportedOperationException");
+    } catch (UnsupportedOperationException expected) {
+    }
+
+    try {
+      map.remove(0);
+      fail("Expected UnsupportedOperationException");
+    } catch (UnsupportedOperationException expected) {
+    }
+
+    try {
+      map.clear();
+      fail("Expected UnsupportedOperationException");
+    } catch (UnsupportedOperationException expected) {
+    }
+
+    Set<Map.Entry<Integer, Integer>> entrySet = map.entrySet();
+    try {
+      entrySet.clear();
+      fail("Expected UnsupportedOperationException");
+    } catch (UnsupportedOperationException expected) {
+    }
+
+    Iterator<Map.Entry<Integer, Integer>> it = entrySet.iterator();
+    while (it.hasNext()) {
+      Map.Entry<Integer, Integer> entry = it.next();
+      try {
+        entry.setValue(0);
+        fail("Expected UnsupportedOperationException");
+      } catch (UnsupportedOperationException expected) {
+      }
+      try {
+        it.remove();
+        fail("Expected UnsupportedOperationException");
+      } catch (UnsupportedOperationException expected) {
+      }
+    }
+
+    Set<Integer> keySet = map.keySet();
+    try {
+      keySet.clear();
+      fail("Expected UnsupportedOperationException");
+    } catch (UnsupportedOperationException expected) {
+    }
+
+    Iterator<Integer> keys = keySet.iterator();
+    while (keys.hasNext()) {
+      Integer key = keys.next();
+      try {
+        keySet.remove(key);
+        fail("Expected UnsupportedOperationException");
+      } catch (UnsupportedOperationException expected) {
+      }
+      try {
+        keys.remove();
+        fail("Expected UnsupportedOperationException");
+      } catch (UnsupportedOperationException expected) {
+      }
+    }
+  }
+
+  private Set<Integer> makeSortedKeySet(Integer... keys) {
+    return new TreeSet<Integer>(Arrays.<Integer>asList(keys));
+  }
+}

+ 49 - 0
java/src/test/java/com/google/protobuf/TestBadIdentifiers.java

@@ -0,0 +1,49 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests that proto2 api generation doesn't cause compile errors when
+ * compiling protocol buffers that have names that would otherwise conflict
+ * if not fully qualified (like @Deprecated and @Override). 
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public class TestBadIdentifiers extends TestCase {
+
+  public void testCompilation() {
+    // If this compiles, it means the generation was correct.
+    TestBadIdentifiersProto.Deprecated.newBuilder();
+    TestBadIdentifiersProto.Override.newBuilder();
+  } 
+}

+ 51 - 20
java/src/test/java/com/google/protobuf/TestUtil.java

@@ -214,7 +214,9 @@ import static com.google.protobuf.UnittestLite.packedBoolExtensionLite;
 import static com.google.protobuf.UnittestLite.packedEnumExtensionLite;
 
 import protobuf_unittest.UnittestProto.TestAllExtensions;
+import protobuf_unittest.UnittestProto.TestAllExtensionsOrBuilder;
 import protobuf_unittest.UnittestProto.TestAllTypes;
+import protobuf_unittest.UnittestProto.TestAllTypesOrBuilder;
 import protobuf_unittest.UnittestProto.TestPackedExtensions;
 import protobuf_unittest.UnittestProto.TestPackedTypes;
 import protobuf_unittest.UnittestProto.TestUnpackedTypes;
@@ -225,6 +227,7 @@ import com.google.protobuf.test.UnittestImport.ImportEnum;
 
 import com.google.protobuf.UnittestLite.TestAllTypesLite;
 import com.google.protobuf.UnittestLite.TestAllExtensionsLite;
+import com.google.protobuf.UnittestLite.TestAllExtensionsLiteOrBuilder;
 import com.google.protobuf.UnittestLite.TestPackedExtensionsLite;
 import com.google.protobuf.UnittestLite.ForeignMessageLite;
 import com.google.protobuf.UnittestLite.ForeignEnumLite;
@@ -244,9 +247,12 @@ import java.io.RandomAccessFile;
  * set all fields of a message, serialize it, parse it, and check that all
  * fields are set.
  *
+ * <p>This code is not to be used outside of {@code com.google.protobuf} and
+ * subpackages.
+ *
  * @author kenton@google.com Kenton Varda
  */
-class TestUtil {
+public final class TestUtil {
   private TestUtil() {}
 
   /** Helper to convert a String to ByteString. */
@@ -485,7 +491,7 @@ class TestUtil {
    * Assert (using {@code junit.framework.Assert}} that all fields of
    * {@code message} are set to the values assigned by {@code setAllFields}.
    */
-  public static void assertAllFieldsSet(TestAllTypes message) {
+  public static void assertAllFieldsSet(TestAllTypesOrBuilder message) {
     Assert.assertTrue(message.hasOptionalInt32   ());
     Assert.assertTrue(message.hasOptionalInt64   ());
     Assert.assertTrue(message.hasOptionalUint32  ());
@@ -682,13 +688,12 @@ class TestUtil {
   }
 
   // -------------------------------------------------------------------
-
   /**
    * Assert (using {@code junit.framework.Assert}} that all fields of
    * {@code message} are cleared, and that getting the fields returns their
    * default values.
    */
-  public static void assertClear(TestAllTypes message) {
+  public static void assertClear(TestAllTypesOrBuilder message) {
     // hasBlah() should initially be false for all optional fields.
     Assert.assertFalse(message.hasOptionalInt32   ());
     Assert.assertFalse(message.hasOptionalInt64   ());
@@ -838,7 +843,8 @@ class TestUtil {
    * {@code message} are set to the values assigned by {@code setAllFields}
    * followed by {@code modifyRepeatedFields}.
    */
-  public static void assertRepeatedFieldsModified(TestAllTypes message) {
+  public static void assertRepeatedFieldsModified(
+      TestAllTypesOrBuilder message) {
     // ModifyRepeatedFields only sets the second repeated element of each
     // field.  In addition to verifying this, we also verify that the first
     // element and size were *not* modified.
@@ -1352,7 +1358,8 @@ class TestUtil {
    * Assert (using {@code junit.framework.Assert}} that all extensions of
    * {@code message} are set to the values assigned by {@code setAllExtensions}.
    */
-  public static void assertAllExtensionsSet(TestAllExtensions message) {
+  public static void assertAllExtensionsSet(
+      TestAllExtensionsOrBuilder message) {
     Assert.assertTrue(message.hasExtension(optionalInt32Extension   ));
     Assert.assertTrue(message.hasExtension(optionalInt64Extension   ));
     Assert.assertTrue(message.hasExtension(optionalUint32Extension  ));
@@ -1567,7 +1574,7 @@ class TestUtil {
    * {@code message} are cleared, and that getting the extensions returns their
    * default values.
    */
-  public static void assertExtensionsClear(TestAllExtensions message) {
+  public static void assertExtensionsClear(TestAllExtensionsOrBuilder message) {
     // hasBlah() should initially be false for all optional fields.
     Assert.assertFalse(message.hasExtension(optionalInt32Extension   ));
     Assert.assertFalse(message.hasExtension(optionalInt64Extension   ));
@@ -1752,7 +1759,7 @@ class TestUtil {
    * followed by {@code modifyRepeatedExtensions}.
    */
   public static void assertRepeatedExtensionsModified(
-      TestAllExtensions message) {
+      TestAllExtensionsOrBuilder message) {
     // ModifyRepeatedFields only sets the second repeated element of each
     // field.  In addition to verifying this, we also verify that the first
     // element and size were *not* modified.
@@ -2106,7 +2113,8 @@ class TestUtil {
    * Assert (using {@code junit.framework.Assert}} that all extensions of
    * {@code message} are set to the values assigned by {@code setAllExtensions}.
    */
-  public static void assertAllExtensionsSet(TestAllExtensionsLite message) {
+  public static void assertAllExtensionsSet(
+      TestAllExtensionsLiteOrBuilder message) {
     Assert.assertTrue(message.hasExtension(optionalInt32ExtensionLite   ));
     Assert.assertTrue(message.hasExtension(optionalInt64ExtensionLite   ));
     Assert.assertTrue(message.hasExtension(optionalUint32ExtensionLite  ));
@@ -2321,7 +2329,8 @@ class TestUtil {
    * {@code message} are cleared, and that getting the extensions returns their
    * default values.
    */
-  public static void assertExtensionsClear(TestAllExtensionsLite message) {
+  public static void assertExtensionsClear(
+      TestAllExtensionsLiteOrBuilder message) {
     // hasBlah() should initially be false for all optional fields.
     Assert.assertFalse(message.hasExtension(optionalInt32ExtensionLite   ));
     Assert.assertFalse(message.hasExtension(optionalInt64ExtensionLite   ));
@@ -2478,7 +2487,7 @@ class TestUtil {
    * followed by {@code modifyRepeatedExtensions}.
    */
   public static void assertRepeatedExtensionsModified(
-      TestAllExtensionsLite message) {
+      TestAllExtensionsLiteOrBuilder message) {
     // ModifyRepeatedFields only sets the second repeated element of each
     // field.  In addition to verifying this, we also verify that the first
     // element and size were *not* modified.
@@ -3015,7 +3024,7 @@ class TestUtil {
      * {@code message} are set to the values assigned by {@code setAllFields},
      * using the {@link Message} reflection interface.
      */
-    public void assertAllFieldsSetViaReflection(Message message) {
+    public void assertAllFieldsSetViaReflection(MessageOrBuilder message) {
       Assert.assertTrue(message.hasField(f("optional_int32"   )));
       Assert.assertTrue(message.hasField(f("optional_int64"   )));
       Assert.assertTrue(message.hasField(f("optional_uint32"  )));
@@ -3248,7 +3257,7 @@ class TestUtil {
      * {@code message} are cleared, and that getting the fields returns their
      * default values, using the {@link Message} reflection interface.
      */
-    public void assertClearViaReflection(Message message) {
+    public void assertClearViaReflection(MessageOrBuilder message) {
       // has_blah() should initially be false for all optional fields.
       Assert.assertFalse(message.hasField(f("optional_int32"   )));
       Assert.assertFalse(message.hasField(f("optional_int64"   )));
@@ -3405,9 +3414,11 @@ class TestUtil {
       Assert.assertEquals("123", message.getField(f("default_cord")));
     }
 
+
     // ---------------------------------------------------------------
 
-    public void assertRepeatedFieldsModifiedViaReflection(Message message) {
+    public void assertRepeatedFieldsModifiedViaReflection(
+        MessageOrBuilder message) {
       // ModifyRepeatedFields only sets the second repeated element of each
       // field.  In addition to verifying this, we also verify that the first
       // element and size were *not* modified.
@@ -3543,7 +3554,7 @@ class TestUtil {
       message.addRepeatedField(f("packed_enum" ),  foreignBaz);
     }
 
-    public void assertPackedFieldsSetViaReflection(Message message) {
+    public void assertPackedFieldsSetViaReflection(MessageOrBuilder message) {
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("packed_int32"   )));
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("packed_int64"   )));
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("packed_uint32"  )));
@@ -3699,7 +3710,7 @@ class TestUtil {
 
   /**
    * @param filePath The path relative to
-   * {@link com.google.testing.util.TestUtil#getDefaultSrcDir}.
+   * {@link #getTestDataDir}.
    */
   public static String readTextFromFile(String filePath) {
     return readBytesFromFile(filePath).toStringUtf8();
@@ -3728,8 +3739,8 @@ class TestUtil {
   }
 
   /**
-   * @param filePath The path relative to
-   * {@link com.google.testing.util.TestUtil#getDefaultSrcDir}.
+   * @param filename The path relative to
+   * {@link #getTestDataDir}.
    */
   public static ByteString readBytesFromFile(String filename) {
     File fullPath = new File(getTestDataDir(), filename);
@@ -3749,7 +3760,7 @@ class TestUtil {
   /**
    * Get the bytes of the "golden message".  This is a serialized TestAllTypes
    * with all fields set as they would be by
-   * {@link setAllFields(TestAllTypes.Builder)}, but it is loaded from a file
+   * {@link #setAllFields(TestAllTypes.Builder)}, but it is loaded from a file
    * on disk rather than generated dynamically.  The file is actually generated
    * by C++ code, so testing against it verifies compatibility with C++.
    */
@@ -3764,7 +3775,7 @@ class TestUtil {
   /**
    * Get the bytes of the "golden packed fields message".  This is a serialized
    * TestPackedTypes with all fields set as they would be by
-   * {@link setPackedFields(TestPackedTypes.Builder)}, but it is loaded from a
+   * {@link #setPackedFields(TestPackedTypes.Builder)}, but it is loaded from a
    * file on disk rather than generated dynamically.  The file is actually
    * generated by C++ code, so testing against it verifies compatibility with
    * C++.
@@ -3777,4 +3788,24 @@ class TestUtil {
     return goldenPackedFieldsMessage;
   }
   private static ByteString goldenPackedFieldsMessage = null;
+
+  /**
+   * Mock implementation of {@link GeneratedMessage.BuilderParent} for testing.
+   *
+   * @author jonp@google.com (Jon Perlow)
+   */
+  public static class MockBuilderParent
+      implements GeneratedMessage.BuilderParent {
+
+    private int invalidations;
+
+    @Override
+    public void markDirty() {
+      invalidations++;
+    }
+
+    public int getInvalidationCount() {
+      return invalidations;
+    }
+  }
 }

+ 124 - 34
java/src/test/java/com/google/protobuf/TextFormatTest.java

@@ -31,14 +31,14 @@
 package com.google.protobuf;
 
 import com.google.protobuf.Descriptors.FieldDescriptor;
-import protobuf_unittest.UnittestProto.OneString;
-import protobuf_unittest.UnittestProto.TestAllTypes;
-import protobuf_unittest.UnittestProto.TestAllExtensions;
-import protobuf_unittest.UnittestProto.TestEmptyMessage;
-import protobuf_unittest.UnittestProto.TestAllTypes.NestedMessage;
 import protobuf_unittest.UnittestMset.TestMessageSet;
 import protobuf_unittest.UnittestMset.TestMessageSetExtension1;
 import protobuf_unittest.UnittestMset.TestMessageSetExtension2;
+import protobuf_unittest.UnittestProto.OneString;
+import protobuf_unittest.UnittestProto.TestAllExtensions;
+import protobuf_unittest.UnittestProto.TestAllTypes;
+import protobuf_unittest.UnittestProto.TestAllTypes.NestedMessage;
+import protobuf_unittest.UnittestProto.TestEmptyMessage;
 
 import junit.framework.TestCase;
 
@@ -133,40 +133,44 @@ public class TextFormatTest extends TestCase {
     assertEquals(allExtensionsSetText, javaText);
   }
 
+  // Creates an example unknown field set.
+  private UnknownFieldSet makeUnknownFieldSet() {
+    return UnknownFieldSet.newBuilder()
+        .addField(5,
+            UnknownFieldSet.Field.newBuilder()
+            .addVarint(1)
+            .addFixed32(2)
+            .addFixed64(3)
+            .addLengthDelimited(ByteString.copyFromUtf8("4"))
+            .addGroup(
+                UnknownFieldSet.newBuilder()
+                .addField(10,
+                    UnknownFieldSet.Field.newBuilder()
+                    .addVarint(5)
+                    .build())
+                .build())
+            .build())
+        .addField(8,
+            UnknownFieldSet.Field.newBuilder()
+            .addVarint(1)
+            .addVarint(2)
+            .addVarint(3)
+            .build())
+        .addField(15,
+            UnknownFieldSet.Field.newBuilder()
+            .addVarint(0xABCDEF1234567890L)
+            .addFixed32(0xABCD1234)
+            .addFixed64(0xABCDEF1234567890L)
+            .build())
+        .build();
+  }
+
   public void testPrintUnknownFields() throws Exception {
     // Test printing of unknown fields in a message.
 
     TestEmptyMessage message =
       TestEmptyMessage.newBuilder()
-        .setUnknownFields(
-          UnknownFieldSet.newBuilder()
-            .addField(5,
-              UnknownFieldSet.Field.newBuilder()
-                .addVarint(1)
-                .addFixed32(2)
-                .addFixed64(3)
-                .addLengthDelimited(ByteString.copyFromUtf8("4"))
-                .addGroup(
-                  UnknownFieldSet.newBuilder()
-                    .addField(10,
-                      UnknownFieldSet.Field.newBuilder()
-                        .addVarint(5)
-                        .build())
-                    .build())
-                .build())
-            .addField(8,
-              UnknownFieldSet.Field.newBuilder()
-                .addVarint(1)
-                .addVarint(2)
-                .addVarint(3)
-                .build())
-            .addField(15,
-              UnknownFieldSet.Field.newBuilder()
-                .addVarint(0xABCDEF1234567890L)
-                .addFixed32(0xABCD1234)
-                .addFixed64(0xABCDEF1234567890L)
-                .build())
-            .build())
+        .setUnknownFields(makeUnknownFieldSet())
         .build();
 
     assertEquals(
@@ -408,6 +412,9 @@ public class TextFormatTest extends TestCase {
     assertParseError(
       "1:16: Expected \"true\" or \"false\".",
       "optional_bool: maybe");
+    assertParseError(
+      "1:16: Expected \"true\" or \"false\".",
+      "optional_bool: 2");
     assertParseError(
       "1:18: Expected string.",
       "optional_string: 123");
@@ -485,6 +492,11 @@ public class TextFormatTest extends TestCase {
     assertEquals(bytes(0xe1, 0x88, 0xb4),
                  TextFormat.unescapeBytes("\\xe1\\x88\\xb4"));
 
+    // Handling of strings with unescaped Unicode characters > 255.
+    final String zh = "\u9999\u6e2f\u4e0a\u6d77\ud84f\udf80\u8c50\u9280\u884c";
+    ByteString zhByteString = ByteString.copyFromUtf8(zh);
+    assertEquals(zhByteString, TextFormat.unescapeBytes(zh));
+
     // Errors.
     try {
       TextFormat.unescapeText("\\x");
@@ -626,6 +638,13 @@ public class TextFormatTest extends TestCase {
     }
   }
 
+  public void testParseString() throws Exception {
+    final String zh = "\u9999\u6e2f\u4e0a\u6d77\ud84f\udf80\u8c50\u9280\u884c";
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    TextFormat.merge("optional_string: \"" + zh + "\"", builder);
+    assertEquals(zh, builder.getOptionalString());
+  }
+
   public void testParseLongString() throws Exception {
     String longText =
       "123456789012345678901234567890123456789012345678901234567890" +
@@ -654,9 +673,80 @@ public class TextFormatTest extends TestCase {
     assertEquals(longText, builder.getOptionalString());
   }
 
+  public void testParseBoolean() throws Exception {
+    String goodText =
+        "repeated_bool: t  repeated_bool : 0\n" +
+        "repeated_bool :f repeated_bool:1";
+    String goodTextCanonical =
+        "repeated_bool: true\n" +
+        "repeated_bool: false\n" +
+        "repeated_bool: false\n" +
+        "repeated_bool: true\n";
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    TextFormat.merge(goodText, builder);
+    assertEquals(goodTextCanonical, builder.build().toString());
+
+    try {
+      TestAllTypes.Builder badBuilder = TestAllTypes.newBuilder();
+      TextFormat.merge("optional_bool:2", badBuilder);
+      fail("Should have thrown an exception.");
+    } catch (TextFormat.ParseException e) {
+      // success
+    }
+    try {
+      TestAllTypes.Builder badBuilder = TestAllTypes.newBuilder();
+      TextFormat.merge("optional_bool: foo", badBuilder);
+      fail("Should have thrown an exception.");
+    } catch (TextFormat.ParseException e) {
+      // success
+    }
+  }
+
   public void testParseAdjacentStringLiterals() throws Exception {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     TextFormat.merge("optional_string: \"foo\" 'corge' \"grault\"", builder);
     assertEquals("foocorgegrault", builder.getOptionalString());
   }
+
+  public void testPrintFieldValue() throws Exception {
+    assertPrintFieldValue("\"Hello\"", "Hello", "repeated_string");
+    assertPrintFieldValue("123.0",  123f, "repeated_float");
+    assertPrintFieldValue("123.0",  123d, "repeated_double");
+    assertPrintFieldValue("123",  123, "repeated_int32");
+    assertPrintFieldValue("123",  123L, "repeated_int64");
+    assertPrintFieldValue("true",  true, "repeated_bool");
+    assertPrintFieldValue("4294967295", 0xFFFFFFFF, "repeated_uint32");
+    assertPrintFieldValue("18446744073709551615",  0xFFFFFFFFFFFFFFFFL,
+        "repeated_uint64");
+    assertPrintFieldValue("\"\\001\\002\\003\"",
+        ByteString.copyFrom(new byte[] {1, 2, 3}), "repeated_bytes");
+  }
+
+  private void assertPrintFieldValue(String expect, Object value,
+      String fieldName) throws Exception {
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    StringBuilder sb = new StringBuilder();
+    TextFormat.printFieldValue(
+        TestAllTypes.getDescriptor().findFieldByName(fieldName),
+        value, sb);
+    assertEquals(expect, sb.toString());
+  }
+
+  public void testShortDebugString() {
+    assertEquals("optional_nested_message { bb: 42 } repeated_int32: 1"
+        + " repeated_uint32: 2",
+        TextFormat.shortDebugString(TestAllTypes.newBuilder()
+            .addRepeatedInt32(1)
+            .addRepeatedUint32(2)
+            .setOptionalNestedMessage(
+                NestedMessage.newBuilder().setBb(42).build())
+            .build()));
+  }
+
+  public void testShortDebugString_unknown() {
+    assertEquals("5: 1 5: 0x00000002 5: 0x0000000000000003 5: \"4\" 5 { 10: 5 }"
+        + " 8: 1 8: 2 8: 3 15: 12379813812177893520 15: 0xabcd1234 15:"
+        + " 0xabcdef1234567890",
+        TextFormat.shortDebugString(makeUnknownFieldSet()));
+  }
 }

+ 152 - 0
java/src/test/java/com/google/protobuf/UnmodifiableLazyStringListTest.java

@@ -0,0 +1,152 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import junit.framework.TestCase;
+
+import java.util.Iterator;
+import java.util.ListIterator;
+
+/**
+ * Tests for {@link UnmodifiableLazyStringList}.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ */
+public class UnmodifiableLazyStringListTest extends TestCase {
+
+  private static String STRING_A = "A";
+  private static String STRING_B = "B";
+  private static String STRING_C = "C";
+
+  private static ByteString BYTE_STRING_A = ByteString.copyFromUtf8("A");
+  private static ByteString BYTE_STRING_B = ByteString.copyFromUtf8("B");
+  private static ByteString BYTE_STRING_C = ByteString.copyFromUtf8("C");
+
+  public void testReadOnlyMethods() {
+    LazyStringArrayList rawList = createSampleList();
+    UnmodifiableLazyStringList list = new UnmodifiableLazyStringList(rawList);
+    assertEquals(3, list.size());
+    assertSame(STRING_A, list.get(0));
+    assertSame(STRING_B, list.get(1));
+    assertSame(STRING_C, list.get(2));
+    assertEquals(BYTE_STRING_A, list.getByteString(0));
+    assertEquals(BYTE_STRING_B, list.getByteString(1));
+    assertEquals(BYTE_STRING_C, list.getByteString(2));
+  }
+
+  public void testModifyMethods() {
+    LazyStringArrayList rawList = createSampleList();
+    UnmodifiableLazyStringList list = new UnmodifiableLazyStringList(rawList);
+
+    try {
+      list.remove(0);
+      fail();
+    } catch (UnsupportedOperationException e) {
+      // expected
+    }
+    assertEquals(3, list.size());
+
+    try {
+      list.add(STRING_B);
+      fail();
+    } catch (UnsupportedOperationException e) {
+      // expected
+    }
+    assertEquals(3, list.size());
+
+    try {
+      list.set(1, STRING_B);
+      fail();
+    } catch (UnsupportedOperationException e) {
+      // expected
+    }
+  }
+
+  public void testIterator() {
+    LazyStringArrayList rawList = createSampleList();
+    UnmodifiableLazyStringList list = new UnmodifiableLazyStringList(rawList);
+
+    Iterator<String> iter = list.iterator();
+    int count = 0;
+    while (iter.hasNext()) {
+      iter.next();
+      count++;
+      try {
+        iter.remove();
+        fail();
+      } catch (UnsupportedOperationException e) {
+        // expected
+      }
+    }
+    assertEquals(3, count);
+
+  }
+
+  public void testListIterator() {
+    LazyStringArrayList rawList = createSampleList();
+    UnmodifiableLazyStringList list = new UnmodifiableLazyStringList(rawList);
+
+    ListIterator<String> iter = list.listIterator();
+    int count = 0;
+    while (iter.hasNext()) {
+      iter.next();
+      count++;
+      try {
+        iter.remove();
+        fail();
+      } catch (UnsupportedOperationException e) {
+        // expected
+      }
+      try {
+        iter.set("bar");
+        fail();
+      } catch (UnsupportedOperationException e) {
+        // expected
+      }
+      try {
+        iter.add("bar");
+        fail();
+      } catch (UnsupportedOperationException e) {
+        // expected
+      }
+    }
+    assertEquals(3, count);
+
+  }
+
+  private LazyStringArrayList createSampleList() {
+    LazyStringArrayList rawList = new LazyStringArrayList();
+    rawList.add(STRING_A);
+    rawList.add(STRING_B);
+    rawList.add(STRING_C);
+    return rawList;
+  }
+}

+ 4 - 0
java/src/test/java/com/google/protobuf/multiple_files_test.proto

@@ -33,6 +33,10 @@
 // A proto file which tests the java_multiple_files option.
 
 
+// Some generic_services option(s) added automatically.
+// See:  http://go/proto2-generic-services-default
+option java_generic_services = true;   // auto-added
+
 import "google/protobuf/unittest.proto";
 
 package protobuf_unittest;

+ 53 - 0
java/src/test/java/com/google/protobuf/nested_builders_test.proto

@@ -0,0 +1,53 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: jonp@google.com (Jon Perlow)
+//
+
+package protobuf_unittest;
+
+option java_multiple_files = true;
+option java_outer_classname = "NestedBuilders";
+
+
+message Vehicle {
+  optional Engine engine = 1;
+  repeated Wheel wheel = 2;
+}
+
+message Engine {
+  optional int32 cylinder = 1;
+  optional int32 liters = 2;
+}
+
+message Wheel {
+  optional int32 radius = 1;
+  optional int32 width = 2;
+}

+ 45 - 0
java/src/test/java/com/google/protobuf/nested_extension.proto

@@ -0,0 +1,45 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: Darick Tong (darick@google.com)
+//
+// A proto file with nested extensions. Note that this must be defined in
+// a separate file to properly test the initialization of the outer class.
+
+
+import "com/google/protobuf/non_nested_extension.proto";
+
+package protobuf_unittest;
+
+message MyNestedExtension {
+  extend MessageToBeExtended {
+    optional MessageToBeExtended recursiveExtension = 2;
+  }
+}

+ 48 - 0
java/src/test/java/com/google/protobuf/nested_extension_lite.proto

@@ -0,0 +1,48 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: Darick Tong (darick@google.com)
+//
+// A proto file with nested extensions for a MessageLite messages. Note that
+// this must be defined in a separate file to properly test the initialization
+// of the outer class.
+
+
+package protobuf_unittest;
+
+option optimize_for = LITE_RUNTIME;
+
+import "com/google/protobuf/non_nested_extension_lite.proto";
+
+message MyNestedExtensionLite {
+  extend MessageLiteToBeExtended {
+    optional MessageLiteToBeExtended recursiveExtensionLite = 3;
+  }
+}

+ 48 - 0
java/src/test/java/com/google/protobuf/non_nested_extension.proto

@@ -0,0 +1,48 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: Darick Tong (darick@google.com)
+//
+// A proto file with extensions.
+
+
+package protobuf_unittest;
+
+message MessageToBeExtended {
+  extensions 1 to max;
+}
+
+message MyNonNestedExtension {
+}
+
+extend MessageToBeExtended {
+  optional MyNonNestedExtension nonNestedExtension = 1;
+}
+

+ 50 - 0
java/src/test/java/com/google/protobuf/non_nested_extension_lite.proto

@@ -0,0 +1,50 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: Darick Tong (darick@google.com)
+//
+// A proto file with extensions for a MessageLite messages.
+
+
+package protobuf_unittest;
+
+option optimize_for = LITE_RUNTIME;
+
+message MessageLiteToBeExtended {
+  extensions 1 to max;
+}
+
+message MyNonNestedExtensionLite {
+}
+
+extend MessageLiteToBeExtended {
+  optional MyNonNestedExtensionLite nonNestedExtensionLite = 1;
+}
+

+ 66 - 0
java/src/test/java/com/google/protobuf/test_bad_identifiers.proto

@@ -0,0 +1,66 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: jonp@google.com (Jon Perlow)
+
+// This file tests that various identifiers work as field and type names even
+// though the same identifiers are used internally by the java code generator.
+
+
+// Some generic_services option(s) added automatically.
+// See:  http://go/proto2-generic-services-default
+option java_generic_services = true;   // auto-added
+
+package io_protocol_tests;
+
+option java_package = "com.google.protobuf";
+option java_outer_classname = "TestBadIdentifiersProto";
+
+message TestMessage {
+}
+
+message Deprecated {
+  enum TestEnum {
+    FOO = 1;
+  }
+
+  optional int32 field1 = 1 [deprecated=true];
+  optional TestEnum field2 = 2 [deprecated=true];
+  optional TestMessage field3 = 3 [deprecated=true];
+}
+
+message Override {
+ optional int32 override = 1;
+}
+
+service TestConflictingMethodNames {
+  rpc Override(TestMessage) returns (TestMessage);
+}
+

+ 18 - 10
python/google/protobuf/descriptor.py

@@ -28,16 +28,6 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-# TODO(robinson): We probably need to provide deep-copy methods for
-# descriptor types.  When a FieldDescriptor is passed into
-# Descriptor.__init__(), we should make a deep copy and then set
-# containing_type on it.  Alternatively, we could just get
-# rid of containing_type (iit's not needed for reflection.py, at least).
-#
-# TODO(robinson): Print method?
-#
-# TODO(robinson): Useful __repr__?
-
 """Descriptors essentially contain exactly the information found in a .proto
 file, in types that make this information accessible in Python.
 """
@@ -45,6 +35,13 @@ file, in types that make this information accessible in Python.
 __author__ = 'robinson@google.com (Will Robinson)'
 
 
+from google.protobuf.internal import api_implementation
+
+
+if api_implementation.Type() == 'cpp':
+  from google.protobuf.internal import cpp_message
+
+
 class Error(Exception):
   """Base error for this module."""
 
@@ -396,6 +393,13 @@ class FieldDescriptor(DescriptorBase):
     self.enum_type = enum_type
     self.is_extension = is_extension
     self.extension_scope = extension_scope
+    if api_implementation.Type() == 'cpp':
+      if is_extension:
+        self._cdescriptor = cpp_message.GetExtensionDescriptor(full_name)
+      else:
+        self._cdescriptor = cpp_message.GetFieldDescriptor(full_name)
+    else:
+      self._cdescriptor = None
 
 
 class EnumDescriptor(_NestedDescriptorBase):
@@ -567,9 +571,13 @@ class FileDescriptor(DescriptorBase):
     """Constructor."""
     super(FileDescriptor, self).__init__(options, 'FileOptions')
 
+    self.message_types_by_name = {}
     self.name = name
     self.package = package
     self.serialized_pb = serialized_pb
+    if (api_implementation.Type() == 'cpp' and
+        self.serialized_pb is not None):
+      cpp_message.BuildFile(self.serialized_pb)
 
   def CopyToProto(self, proto):
     """Copies this to a descriptor_pb2.FileDescriptorProto.

+ 64 - 0
python/google/protobuf/internal/api_implementation.py

@@ -0,0 +1,64 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This module is the central entity that determines which implementation of the
+API is used.
+"""
+
+__author__ = 'petar@google.com (Petar Petrov)'
+
+import os
+# This environment variable can be used to switch to a certain implementation
+# of the Python API. Right now only 'python' and 'cpp' are valid values. Any
+# other value will be ignored.
+_implementation_type = os.getenv('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION',
+                                 'python')
+
+
+if _implementation_type != 'python':
+  # For now, by default use the pure-Python implementation.
+  # The code below checks if the C extension is available and
+  # uses it if it is available.
+  _implementation_type = 'cpp'
+  ## Determine automatically which implementation to use.
+  #try:
+  #  from google.protobuf.internal import cpp_message
+  #  _implementation_type = 'cpp'
+  #except ImportError, e:
+  #  _implementation_type = 'python'
+
+
+# Usage of this function is discouraged. Clients shouldn't care which
+# implementation of the API is in use. Note that there is no guarantee
+# that differences between APIs will be maintained.
+# Please don't use this function if possible.
+def Type():
+  return _implementation_type

+ 21 - 6
python/google/protobuf/internal/containers.py

@@ -72,9 +72,15 @@ class BaseContainer(object):
     # The concrete classes should define __eq__.
     return not self == other
 
+  def __hash__(self):
+    raise TypeError('unhashable object')
+
   def __repr__(self):
     return repr(self._values)
 
+  def sort(self, sort_function=cmp):
+    self._values.sort(sort_function)
+
 
 class RepeatedScalarFieldContainer(BaseContainer):
 
@@ -198,28 +204,37 @@ class RepeatedCompositeFieldContainer(BaseContainer):
     super(RepeatedCompositeFieldContainer, self).__init__(message_listener)
     self._message_descriptor = message_descriptor
 
-  def add(self):
-    new_element = self._message_descriptor._concrete_class()
+  def add(self, **kwargs):
+    """Adds a new element at the end of the list and returns it. Keyword
+    arguments may be used to initialize the element.
+    """
+    new_element = self._message_descriptor._concrete_class(**kwargs)
     new_element._SetListener(self._message_listener)
     self._values.append(new_element)
     if not self._message_listener.dirty:
       self._message_listener.Modified()
     return new_element
 
-  def MergeFrom(self, other):
-    """Appends the contents of another repeated field of the same type to this
-    one, copying each individual message.
+  def extend(self, elem_seq):
+    """Extends by appending the given sequence of elements of the same type
+    as this one, copying each individual message.
     """
     message_class = self._message_descriptor._concrete_class
     listener = self._message_listener
     values = self._values
-    for message in other._values:
+    for message in elem_seq:
       new_element = message_class()
       new_element._SetListener(listener)
       new_element.MergeFrom(message)
       values.append(new_element)
     listener.Modified()
 
+  def MergeFrom(self, other):
+    """Appends the contents of another repeated field of the same type to this
+    one, copying each individual message.
+    """
+    self.extend(other._values)
+
   def __getslice__(self, start, stop):
     """Retrieves the subset of items from between the specified indices."""
     return self._values[start:stop]

+ 616 - 0
python/google/protobuf/internal/cpp_message.py

@@ -0,0 +1,616 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Contains helper functions used to create protocol message classes from
+Descriptor objects at runtime backed by the protocol buffer C++ API.
+"""
+
+__author__ = 'petar@google.com (Petar Petrov)'
+
+import operator
+from google.protobuf.internal import _net_proto2___python
+from google.protobuf import message
+
+
+_LABEL_REPEATED = _net_proto2___python.LABEL_REPEATED
+_LABEL_OPTIONAL = _net_proto2___python.LABEL_OPTIONAL
+_CPPTYPE_MESSAGE = _net_proto2___python.CPPTYPE_MESSAGE
+_TYPE_MESSAGE = _net_proto2___python.TYPE_MESSAGE
+
+
+def GetDescriptorPool():
+  """Creates a new DescriptorPool C++ object."""
+  return _net_proto2___python.NewCDescriptorPool()
+
+
+_pool = GetDescriptorPool()
+
+
+def GetFieldDescriptor(full_field_name):
+  """Searches for a field descriptor given a full field name."""
+  return _pool.FindFieldByName(full_field_name)
+
+
+def BuildFile(content):
+  """Registers a new proto file in the underlying C++ descriptor pool."""
+  _net_proto2___python.BuildFile(content)
+
+
+def GetExtensionDescriptor(full_extension_name):
+  """Searches for extension descriptor given a full field name."""
+  return _pool.FindExtensionByName(full_extension_name)
+
+
+def NewCMessage(full_message_name):
+  """Creates a new C++ protocol message by its name."""
+  return _net_proto2___python.NewCMessage(full_message_name)
+
+
+def ScalarProperty(cdescriptor):
+  """Returns a scalar property for the given descriptor."""
+
+  def Getter(self):
+    return self._cmsg.GetScalar(cdescriptor)
+
+  def Setter(self, value):
+    self._cmsg.SetScalar(cdescriptor, value)
+
+  return property(Getter, Setter)
+
+
+def CompositeProperty(cdescriptor, message_type):
+  """Returns a Python property the given composite field."""
+
+  def Getter(self):
+    sub_message = self._composite_fields.get(cdescriptor.name, None)
+    if sub_message is None:
+      cmessage = self._cmsg.NewSubMessage(cdescriptor)
+      sub_message = message_type._concrete_class(__cmessage=cmessage)
+      self._composite_fields[cdescriptor.name] = sub_message
+    return sub_message
+
+  return property(Getter)
+
+
+class RepeatedScalarContainer(object):
+  """Container for repeated scalar fields."""
+
+  __slots__ = ['_message', '_cfield_descriptor', '_cmsg']
+
+  def __init__(self, msg, cfield_descriptor):
+    self._message = msg
+    self._cmsg = msg._cmsg
+    self._cfield_descriptor = cfield_descriptor
+
+  def append(self, value):
+    self._cmsg.AddRepeatedScalar(
+        self._cfield_descriptor, value)
+
+  def extend(self, sequence):
+    for element in sequence:
+      self.append(element)
+
+  def insert(self, key, value):
+    values = self[slice(None, None, None)]
+    values.insert(key, value)
+    self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values)
+
+  def remove(self, value):
+    values = self[slice(None, None, None)]
+    values.remove(value)
+    self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values)
+
+  def __setitem__(self, key, value):
+    values = self[slice(None, None, None)]
+    values[key] = value
+    self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values)
+
+  def __getitem__(self, key):
+    return self._cmsg.GetRepeatedScalar(self._cfield_descriptor, key)
+
+  def __delitem__(self, key):
+    self._cmsg.DeleteRepeatedField(self._cfield_descriptor, key)
+
+  def __len__(self):
+    return len(self[slice(None, None, None)])
+
+  def __eq__(self, other):
+    if self is other:
+      return True
+    if not operator.isSequenceType(other):
+      raise TypeError(
+          'Can only compare repeated scalar fields against sequences.')
+    # We are presumably comparing against some other sequence type.
+    return other == self[slice(None, None, None)]
+
+  def __ne__(self, other):
+    return not self == other
+
+  def __hash__(self):
+    raise TypeError('unhashable object')
+
+  def sort(self, sort_function=cmp):
+    values = self[slice(None, None, None)]
+    values.sort(sort_function)
+    self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values)
+
+
+def RepeatedScalarProperty(cdescriptor):
+  """Returns a Python property the given repeated scalar field."""
+
+  def Getter(self):
+    container = self._composite_fields.get(cdescriptor.name, None)
+    if container is None:
+      container = RepeatedScalarContainer(self, cdescriptor)
+      self._composite_fields[cdescriptor.name] = container
+    return container
+
+  def Setter(self, new_value):
+    raise AttributeError('Assignment not allowed to repeated field '
+                         '"%s" in protocol message object.' % cdescriptor.name)
+
+  doc = 'Magic attribute generated for "%s" proto field.' % cdescriptor.name
+  return property(Getter, Setter, doc=doc)
+
+
+class RepeatedCompositeContainer(object):
+  """Container for repeated composite fields."""
+
+  __slots__ = ['_message', '_subclass', '_cfield_descriptor', '_cmsg']
+
+  def __init__(self, msg, cfield_descriptor, subclass):
+    self._message = msg
+    self._cmsg = msg._cmsg
+    self._subclass = subclass
+    self._cfield_descriptor = cfield_descriptor
+
+  def add(self, **kwargs):
+    cmessage = self._cmsg.AddMessage(self._cfield_descriptor)
+    return self._subclass(__cmessage=cmessage, __owner=self._message, **kwargs)
+
+  def extend(self, elem_seq):
+    """Extends by appending the given sequence of elements of the same type
+    as this one, copying each individual message.
+    """
+    for message in elem_seq:
+      self.add().MergeFrom(message)
+
+  def MergeFrom(self, other):
+    for message in other[:]:
+      self.add().MergeFrom(message)
+
+  def __getitem__(self, key):
+    cmessages = self._cmsg.GetRepeatedMessage(
+        self._cfield_descriptor, key)
+    subclass = self._subclass
+    if not isinstance(cmessages, list):
+      return subclass(__cmessage=cmessages, __owner=self._message)
+
+    return [subclass(__cmessage=m, __owner=self._message) for m in cmessages]
+
+  def __delitem__(self, key):
+    self._cmsg.DeleteRepeatedField(
+        self._cfield_descriptor, key)
+
+  def __len__(self):
+    return self._cmsg.FieldLength(self._cfield_descriptor)
+
+  def __eq__(self, other):
+    """Compares the current instance with another one."""
+    if self is other:
+      return True
+    if not isinstance(other, self.__class__):
+      raise TypeError('Can only compare repeated composite fields against '
+                      'other repeated composite fields.')
+    messages = self[slice(None, None, None)]
+    other_messages = other[slice(None, None, None)]
+    return messages == other_messages
+
+  def __hash__(self):
+    raise TypeError('unhashable object')
+
+  def sort(self, sort_function=cmp):
+    messages = []
+    for index in range(len(self)):
+      # messages[i][0] is where the i-th element of the new array has to come
+      # from.
+      # messages[i][1] is where the i-th element of the old array has to go.
+      messages.append([index, 0, self[index]])
+    messages.sort(lambda x,y: sort_function(x[2], y[2]))
+
+    # Remember which position each elements has to move to.
+    for i in range(len(messages)):
+      messages[messages[i][0]][1] = i
+
+    # Apply the transposition.
+    for i in range(len(messages)):
+      from_position = messages[i][0]
+      if i == from_position:
+        continue
+      self._cmsg.SwapRepeatedFieldElements(
+          self._cfield_descriptor, i, from_position)
+      messages[messages[i][1]][0] = from_position
+
+
+def RepeatedCompositeProperty(cdescriptor, message_type):
+  """Returns a Python property for the given repeated composite field."""
+
+  def Getter(self):
+    container = self._composite_fields.get(cdescriptor.name, None)
+    if container is None:
+      container = RepeatedCompositeContainer(
+          self, cdescriptor, message_type._concrete_class)
+      self._composite_fields[cdescriptor.name] = container
+    return container
+
+  def Setter(self, new_value):
+    raise AttributeError('Assignment not allowed to repeated field '
+                         '"%s" in protocol message object.' % cdescriptor.name)
+
+  doc = 'Magic attribute generated for "%s" proto field.' % cdescriptor.name
+  return property(Getter, Setter, doc=doc)
+
+
+class ExtensionDict(object):
+  """Extension dictionary added to each protocol message."""
+
+  def __init__(self, msg):
+    self._message = msg
+    self._cmsg = msg._cmsg
+    self._values = {}
+
+  def __setitem__(self, extension, value):
+    from google.protobuf import descriptor
+    if not isinstance(extension, descriptor.FieldDescriptor):
+      raise KeyError('Bad extension %r.' % (extension,))
+    cdescriptor = extension._cdescriptor
+    if (cdescriptor.label != _LABEL_OPTIONAL or
+        cdescriptor.cpp_type == _CPPTYPE_MESSAGE):
+      raise TypeError('Extension %r is repeated and/or a composite type.' % (
+          extension.full_name,))
+    self._cmsg.SetScalar(cdescriptor, value)
+    self._values[extension] = value
+
+  def __getitem__(self, extension):
+    from google.protobuf import descriptor
+    if not isinstance(extension, descriptor.FieldDescriptor):
+      raise KeyError('Bad extension %r.' % (extension,))
+
+    cdescriptor = extension._cdescriptor
+    if (cdescriptor.label != _LABEL_REPEATED and
+        cdescriptor.cpp_type != _CPPTYPE_MESSAGE):
+      return self._cmsg.GetScalar(cdescriptor)
+
+    ext = self._values.get(extension, None)
+    if ext is not None:
+      return ext
+
+    ext = self._CreateNewHandle(extension)
+    self._values[extension] = ext
+    return ext
+
+  def ClearExtension(self, extension):
+    from google.protobuf import descriptor
+    if not isinstance(extension, descriptor.FieldDescriptor):
+      raise KeyError('Bad extension %r.' % (extension,))
+    self._cmsg.ClearFieldByDescriptor(extension._cdescriptor)
+    if extension in self._values:
+      del self._values[extension]
+
+  def HasExtension(self, extension):
+    from google.protobuf import descriptor
+    if not isinstance(extension, descriptor.FieldDescriptor):
+      raise KeyError('Bad extension %r.' % (extension,))
+    return self._cmsg.HasFieldByDescriptor(extension._cdescriptor)
+
+  def _FindExtensionByName(self, name):
+    """Tries to find a known extension with the specified name.
+
+    Args:
+      name: Extension full name.
+
+    Returns:
+      Extension field descriptor.
+    """
+    return self._message._extensions_by_name.get(name, None)
+
+  def _CreateNewHandle(self, extension):
+    cdescriptor = extension._cdescriptor
+    if (cdescriptor.label != _LABEL_REPEATED and
+        cdescriptor.cpp_type == _CPPTYPE_MESSAGE):
+      cmessage = self._cmsg.NewSubMessage(cdescriptor)
+      return extension.message_type._concrete_class(__cmessage=cmessage)
+
+    if cdescriptor.label == _LABEL_REPEATED:
+      if cdescriptor.cpp_type == _CPPTYPE_MESSAGE:
+        return RepeatedCompositeContainer(
+            self._message, cdescriptor, extension.message_type._concrete_class)
+      else:
+        return RepeatedScalarContainer(self._message, cdescriptor)
+    # This shouldn't happen!
+    assert False
+    return None
+
+
+def NewMessage(message_descriptor, dictionary):
+  """Creates a new protocol message *class*."""
+  _AddClassAttributesForNestedExtensions(message_descriptor, dictionary)
+  _AddEnumValues(message_descriptor, dictionary)
+  _AddDescriptors(message_descriptor, dictionary)
+
+
+def InitMessage(message_descriptor, cls):
+  """Constructs a new message instance (called before instance's __init__)."""
+  cls._extensions_by_name = {}
+  _AddInitMethod(message_descriptor, cls)
+  _AddMessageMethods(message_descriptor, cls)
+  _AddPropertiesForExtensions(message_descriptor, cls)
+
+
+def _AddDescriptors(message_descriptor, dictionary):
+  """Sets up a new protocol message class dictionary.
+
+  Args:
+    message_descriptor: A Descriptor instance describing this message type.
+    dictionary: Class dictionary to which we'll add a '__slots__' entry.
+  """
+  dictionary['__descriptors'] = {}
+  for field in message_descriptor.fields:
+    dictionary['__descriptors'][field.name] = GetFieldDescriptor(
+        field.full_name)
+
+  dictionary['__slots__'] = list(dictionary['__descriptors'].iterkeys()) + [
+      '_cmsg', '_owner', '_composite_fields', 'Extensions']
+
+
+def _AddEnumValues(message_descriptor, dictionary):
+  """Sets class-level attributes for all enum fields defined in this message.
+
+  Args:
+    message_descriptor: Descriptor object for this message type.
+    dictionary: Class dictionary that should be populated.
+  """
+  for enum_type in message_descriptor.enum_types:
+    for enum_value in enum_type.values:
+      dictionary[enum_value.name] = enum_value.number
+
+
+def _AddClassAttributesForNestedExtensions(message_descriptor, dictionary):
+  """Adds class attributes for the nested extensions."""
+  extension_dict = message_descriptor.extensions_by_name
+  for extension_name, extension_field in extension_dict.iteritems():
+    assert extension_name not in dictionary
+    dictionary[extension_name] = extension_field
+
+
+def _AddInitMethod(message_descriptor, cls):
+  """Adds an __init__ method to cls."""
+
+  # Create and attach message field properties to the message class.
+  # This can be done just once per message class, since property setters and
+  # getters are passed the message instance.
+  # This makes message instantiation extremely fast, and at the same time it
+  # doesn't require the creation of property objects for each message instance,
+  # which saves a lot of memory.
+  for field in message_descriptor.fields:
+    field_cdescriptor = cls.__descriptors[field.name]
+    if field.label == _LABEL_REPEATED:
+      if field.cpp_type == _CPPTYPE_MESSAGE:
+        value = RepeatedCompositeProperty(field_cdescriptor, field.message_type)
+      else:
+        value = RepeatedScalarProperty(field_cdescriptor)
+    elif field.cpp_type == _CPPTYPE_MESSAGE:
+      value = CompositeProperty(field_cdescriptor, field.message_type)
+    else:
+      value = ScalarProperty(field_cdescriptor)
+    setattr(cls, field.name, value)
+
+    # Attach a constant with the field number.
+    constant_name = field.name.upper() + '_FIELD_NUMBER'
+    setattr(cls, constant_name, field.number)
+
+  def Init(self, **kwargs):
+    """Message constructor."""
+    cmessage = kwargs.pop('__cmessage', None)
+    if cmessage is None:
+      self._cmsg = NewCMessage(message_descriptor.full_name)
+    else:
+      self._cmsg = cmessage
+
+    # Keep a reference to the owner, as the owner keeps a reference to the
+    # underlying protocol buffer message.
+    owner = kwargs.pop('__owner', None)
+    if owner is not None:
+      self._owner = owner
+
+    self.Extensions = ExtensionDict(self)
+    self._composite_fields = {}
+
+    for field_name, field_value in kwargs.iteritems():
+      field_cdescriptor = self.__descriptors.get(field_name, None)
+      if field_cdescriptor is None:
+        raise ValueError('Protocol message has no "%s" field.' % field_name)
+      if field_cdescriptor.label == _LABEL_REPEATED:
+        if field_cdescriptor.cpp_type == _CPPTYPE_MESSAGE:
+          for val in field_value:
+            getattr(self, field_name).add().MergeFrom(val)
+        else:
+          getattr(self, field_name).extend(field_value)
+      elif field_cdescriptor.cpp_type == _CPPTYPE_MESSAGE:
+        getattr(self, field_name).MergeFrom(field_value)
+      else:
+        setattr(self, field_name, field_value)
+
+  Init.__module__ = None
+  Init.__doc__ = None
+  cls.__init__ = Init
+
+
+def _IsMessageSetExtension(field):
+  """Checks if a field is a message set extension."""
+  return (field.is_extension and
+          field.containing_type.has_options and
+          field.containing_type.GetOptions().message_set_wire_format and
+          field.type == _TYPE_MESSAGE and
+          field.message_type == field.extension_scope and
+          field.label == _LABEL_OPTIONAL)
+
+
+def _AddMessageMethods(message_descriptor, cls):
+  """Adds the methods to a protocol message class."""
+  if message_descriptor.is_extendable:
+
+    def ClearExtension(self, extension):
+      self.Extensions.ClearExtension(extension)
+
+    def HasExtension(self, extension):
+      return self.Extensions.HasExtension(extension)
+
+  def HasField(self, field_name):
+    return self._cmsg.HasField(field_name)
+
+  def ClearField(self, field_name):
+    if field_name in self._composite_fields:
+      del self._composite_fields[field_name]
+    self._cmsg.ClearField(field_name)
+
+  def Clear(self):
+    return self._cmsg.Clear()
+
+  def IsInitialized(self, errors=None):
+    if self._cmsg.IsInitialized():
+      return True
+    if errors is not None:
+      errors.extend(self.FindInitializationErrors());
+    return False
+
+  def SerializeToString(self):
+    if not self.IsInitialized():
+      raise message.EncodeError(
+          'Message is missing required fields: ' +
+          ','.join(self.FindInitializationErrors()))
+    return self._cmsg.SerializeToString()
+
+  def SerializePartialToString(self):
+    return self._cmsg.SerializePartialToString()
+
+  def ParseFromString(self, serialized):
+    self.Clear()
+    self.MergeFromString(serialized)
+
+  def MergeFromString(self, serialized):
+    byte_size = self._cmsg.MergeFromString(serialized)
+    if byte_size < 0:
+      raise message.DecodeError('Unable to merge from string.')
+    return byte_size
+
+  def MergeFrom(self, msg):
+    if not isinstance(msg, cls):
+      raise TypeError(
+          "Parameter to MergeFrom() must be instance of same class.")
+    self._cmsg.MergeFrom(msg._cmsg)
+
+  def CopyFrom(self, msg):
+    self._cmsg.CopyFrom(msg._cmsg)
+
+  def ByteSize(self):
+    return self._cmsg.ByteSize()
+
+  def SetInParent(self):
+    return self._cmsg.SetInParent()
+
+  def ListFields(self):
+    all_fields = []
+    field_list = self._cmsg.ListFields()
+    fields_by_name = cls.DESCRIPTOR.fields_by_name
+    for is_extension, field_name in field_list:
+      if is_extension:
+        extension = cls._extensions_by_name[field_name]
+        all_fields.append((extension, self.Extensions[extension]))
+      else:
+        field_descriptor = fields_by_name[field_name]
+        all_fields.append(
+            (field_descriptor, getattr(self, field_name)))
+    all_fields.sort(key=lambda item: item[0].number)
+    return all_fields
+
+  def FindInitializationErrors(self):
+    return self._cmsg.FindInitializationErrors()
+
+  def __str__(self):
+    return self._cmsg.DebugString()
+
+  def __eq__(self, other):
+    if self is other:
+      return True
+    if not isinstance(other, self.__class__):
+      return False
+    return self.ListFields() == other.ListFields()
+
+  def __ne__(self, other):
+    return not self == other
+
+  def __hash__(self):
+    raise TypeError('unhashable object')
+
+  def __unicode__(self):
+    return text_format.MessageToString(self, as_utf8=True).decode('utf-8')
+
+  # Attach the local methods to the message class.
+  for key, value in locals().copy().iteritems():
+    if key not in ('key', 'value', '__builtins__', '__name__', '__doc__'):
+      setattr(cls, key, value)
+
+  # Static methods:
+
+  def RegisterExtension(extension_handle):
+    extension_handle.containing_type = cls.DESCRIPTOR
+    cls._extensions_by_name[extension_handle.full_name] = extension_handle
+
+    if _IsMessageSetExtension(extension_handle):
+      # MessageSet extension.  Also register under type name.
+      cls._extensions_by_name[
+          extension_handle.message_type.full_name] = extension_handle
+  cls.RegisterExtension = staticmethod(RegisterExtension)
+
+  def FromString(string):
+    msg = cls()
+    msg.MergeFromString(string)
+    return msg
+  cls.FromString = staticmethod(FromString)
+
+
+
+def _AddPropertiesForExtensions(message_descriptor, cls):
+  """Adds properties for all fields in this protocol message type."""
+  extension_dict = message_descriptor.extensions_by_name
+  for extension_name, extension_field in extension_dict.iteritems():
+    constant_name = extension_name.upper() + '_FIELD_NUMBER'
+    setattr(cls, constant_name, extension_field.number)

+ 75 - 2
python/google/protobuf/internal/decoder.py

@@ -86,6 +86,13 @@ from google.protobuf.internal import wire_format
 from google.protobuf import message
 
 
+# This will overflow and thus become IEEE-754 "infinity".  We would use
+# "float('inf')" but it doesn't work on Windows pre-Python-2.6.
+_POS_INF = 1e10000
+_NEG_INF = -_POS_INF
+_NAN = _POS_INF * 0
+
+
 # This is not for optimization, but rather to avoid conflicts with local
 # variables named "message".
 _DecodeError = message.DecodeError
@@ -269,6 +276,72 @@ def _StructPackDecoder(wire_type, format):
   return _SimpleDecoder(wire_type, InnerDecode)
 
 
+def _FloatDecoder():
+  """Returns a decoder for a float field.
+
+  This code works around a bug in struct.unpack for non-finite 32-bit
+  floating-point values.
+  """
+
+  local_unpack = struct.unpack
+
+  def InnerDecode(buffer, pos):
+    # We expect a 32-bit value in little-endian byte order.  Bit 1 is the sign
+    # bit, bits 2-9 represent the exponent, and bits 10-32 are the significand.
+    new_pos = pos + 4
+    float_bytes = buffer[pos:new_pos]
+
+    # If this value has all its exponent bits set, then it's non-finite.
+    # In Python 2.4, struct.unpack will convert it to a finite 64-bit value.
+    # To avoid that, we parse it specially.
+    if ((float_bytes[3] in '\x7F\xFF')
+        and (float_bytes[2] >= '\x80')):
+      # If at least one significand bit is set...
+      if float_bytes[0:3] != '\x00\x00\x80':
+        return (_NAN, new_pos)
+      # If sign bit is set...
+      if float_bytes[3] == '\xFF':
+        return (_NEG_INF, new_pos)
+      return (_POS_INF, new_pos)
+
+    # Note that we expect someone up-stack to catch struct.error and convert
+    # it to _DecodeError -- this way we don't have to set up exception-
+    # handling blocks every time we parse one value.
+    result = local_unpack('<f', float_bytes)[0]
+    return (result, new_pos)
+  return _SimpleDecoder(wire_format.WIRETYPE_FIXED32, InnerDecode)
+
+
+def _DoubleDecoder():
+  """Returns a decoder for a double field.
+
+  This code works around a bug in struct.unpack for not-a-number.
+  """
+
+  local_unpack = struct.unpack
+
+  def InnerDecode(buffer, pos):
+    # We expect a 64-bit value in little-endian byte order.  Bit 1 is the sign
+    # bit, bits 2-12 represent the exponent, and bits 13-64 are the significand.
+    new_pos = pos + 8
+    double_bytes = buffer[pos:new_pos]
+
+    # If this value has all its exponent bits set and at least one significand
+    # bit set, it's not a number.  In Python 2.4, struct.unpack will treat it
+    # as inf or -inf.  To avoid that, we treat it specially.
+    if ((double_bytes[7] in '\x7F\xFF')
+        and (double_bytes[6] >= '\xF0')
+        and (double_bytes[0:7] != '\x00\x00\x00\x00\x00\x00\xF0')):
+      return (_NAN, new_pos)
+
+    # Note that we expect someone up-stack to catch struct.error and convert
+    # it to _DecodeError -- this way we don't have to set up exception-
+    # handling blocks every time we parse one value.
+    result = local_unpack('<d', double_bytes)[0]
+    return (result, new_pos)
+  return _SimpleDecoder(wire_format.WIRETYPE_FIXED64, InnerDecode)
+
+
 # --------------------------------------------------------------------
 
 
@@ -294,8 +367,8 @@ Fixed32Decoder  = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<I')
 Fixed64Decoder  = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<Q')
 SFixed32Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<i')
 SFixed64Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<q')
-FloatDecoder    = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<f')
-DoubleDecoder   = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<d')
+FloatDecoder = _FloatDecoder()
+DoubleDecoder = _DoubleDecoder()
 
 BoolDecoder = _ModifiedDecoder(
     wire_format.WIRETYPE_VARINT, _DecodeVarint, bool)

+ 85 - 2
python/google/protobuf/internal/encoder.py

@@ -70,6 +70,12 @@ import struct
 from google.protobuf.internal import wire_format
 
 
+# This will overflow and thus become IEEE-754 "infinity".  We would use
+# "float('inf')" but it doesn't work on Windows pre-Python-2.6.
+_POS_INF = 1e10000
+_NEG_INF = -_POS_INF
+
+
 def _VarintSize(value):
   """Compute the size of a varint value."""
   if value <= 0x7f: return 1
@@ -502,6 +508,83 @@ def _StructPackEncoder(wire_type, format):
   return SpecificEncoder
 
 
+def _FloatingPointEncoder(wire_type, format):
+  """Return a constructor for an encoder for float fields.
+
+  This is like StructPackEncoder, but catches errors that may be due to
+  passing non-finite floating-point values to struct.pack, and makes a
+  second attempt to encode those values.
+
+  Args:
+      wire_type:  The field's wire type, for encoding tags.
+      format:  The format string to pass to struct.pack().
+  """
+
+  value_size = struct.calcsize(format)
+  if value_size == 4:
+    def EncodeNonFiniteOrRaise(write, value):
+      # Remember that the serialized form uses little-endian byte order.
+      if value == _POS_INF:
+        write('\x00\x00\x80\x7F')
+      elif value == _NEG_INF:
+        write('\x00\x00\x80\xFF')
+      elif value != value:           # NaN
+        write('\x00\x00\xC0\x7F')
+      else:
+        raise
+  elif value_size == 8:
+    def EncodeNonFiniteOrRaise(write, value):
+      if value == _POS_INF:
+        write('\x00\x00\x00\x00\x00\x00\xF0\x7F')
+      elif value == _NEG_INF:
+        write('\x00\x00\x00\x00\x00\x00\xF0\xFF')
+      elif value != value:                         # NaN
+        write('\x00\x00\x00\x00\x00\x00\xF8\x7F')
+      else:
+        raise
+  else:
+    raise ValueError('Can\'t encode floating-point values that are '
+                     '%d bytes long (only 4 or 8)' % value_size)
+
+  def SpecificEncoder(field_number, is_repeated, is_packed):
+    local_struct_pack = struct.pack
+    if is_packed:
+      tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
+      local_EncodeVarint = _EncodeVarint
+      def EncodePackedField(write, value):
+        write(tag_bytes)
+        local_EncodeVarint(write, len(value) * value_size)
+        for element in value:
+          # This try/except block is going to be faster than any code that
+          # we could write to check whether element is finite.
+          try:
+            write(local_struct_pack(format, element))
+          except SystemError:
+            EncodeNonFiniteOrRaise(write, element)
+      return EncodePackedField
+    elif is_repeated:
+      tag_bytes = TagBytes(field_number, wire_type)
+      def EncodeRepeatedField(write, value):
+        for element in value:
+          write(tag_bytes)
+          try:
+            write(local_struct_pack(format, element))
+          except SystemError:
+            EncodeNonFiniteOrRaise(write, element)
+      return EncodeRepeatedField
+    else:
+      tag_bytes = TagBytes(field_number, wire_type)
+      def EncodeField(write, value):
+        write(tag_bytes)
+        try:
+          write(local_struct_pack(format, value))
+        except SystemError:
+          EncodeNonFiniteOrRaise(write, value)
+      return EncodeField
+
+  return SpecificEncoder
+
+
 # ====================================================================
 # Here we declare an encoder constructor for each field type.  These work
 # very similarly to sizer constructors, described earlier.
@@ -525,8 +608,8 @@ Fixed32Encoder  = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<I')
 Fixed64Encoder  = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<Q')
 SFixed32Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<i')
 SFixed64Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<q')
-FloatEncoder    = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<f')
-DoubleEncoder   = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<d')
+FloatEncoder    = _FloatingPointEncoder(wire_format.WIRETYPE_FIXED32, '<f')
+DoubleEncoder   = _FloatingPointEncoder(wire_format.WIRETYPE_FIXED64, '<d')
 
 
 def BoolEncoder(field_number, is_repeated, is_packed):

+ 27 - 4
python/google/protobuf/internal/generator_test.py

@@ -42,11 +42,12 @@ further ensures that we can use Python protocol message objects as we expect.
 __author__ = 'robinson@google.com (Will Robinson)'
 
 import unittest
+from google.protobuf import unittest_custom_options_pb2
 from google.protobuf import unittest_import_pb2
 from google.protobuf import unittest_mset_pb2
 from google.protobuf import unittest_pb2
 from google.protobuf import unittest_no_generic_services_pb2
-
+from google.protobuf import service
 
 MAX_EXTENSION = 536870912
 
@@ -140,6 +141,13 @@ class GeneratorTest(unittest.TestCase):
     proto = unittest_mset_pb2.TestMessageSet()
     self.assertTrue(proto.DESCRIPTOR.GetOptions().message_set_wire_format)
 
+  def testMessageWithCustomOptions(self):
+    proto = unittest_custom_options_pb2.TestMessageWithCustomOptions()
+    enum_options = proto.DESCRIPTOR.enum_types_by_name['AnEnum'].GetOptions()
+    self.assertTrue(enum_options is not None)
+    # TODO(gps): We really should test for the presense of the enum_opt1
+    # extension and for its value to be set to -789.
+
   def testNestedTypes(self):
     self.assertEquals(
         set(unittest_pb2.TestAllTypes.DESCRIPTOR.nested_types),
@@ -208,12 +216,27 @@ class GeneratorTest(unittest.TestCase):
     self.assertFalse(unittest_pb2.DESCRIPTOR.serialized_pb is None)
 
   def testNoGenericServices(self):
-    # unittest_no_generic_services.proto should contain defs for everything
-    # except services.
     self.assertTrue(hasattr(unittest_no_generic_services_pb2, "TestMessage"))
     self.assertTrue(hasattr(unittest_no_generic_services_pb2, "FOO"))
     self.assertTrue(hasattr(unittest_no_generic_services_pb2, "test_extension"))
-    self.assertFalse(hasattr(unittest_no_generic_services_pb2, "TestService"))
+
+    # Make sure unittest_no_generic_services_pb2 has no services subclassing
+    # Proto2 Service class.
+    if hasattr(unittest_no_generic_services_pb2, "TestService"):
+      self.assertFalse(issubclass(unittest_no_generic_services_pb2.TestService,
+                                  service.Service))
+
+  def testMessageTypesByName(self):
+    file_type = unittest_pb2.DESCRIPTOR
+    self.assertEqual(
+        unittest_pb2._TESTALLTYPES,
+        file_type.message_types_by_name[unittest_pb2._TESTALLTYPES.name])
+
+    # Nested messages shouldn't be included in the message_types_by_name
+    # dictionary (like in the C++ API).
+    self.assertFalse(
+        unittest_pb2._TESTALLTYPES_NESTEDMESSAGE.name in
+        file_type.message_types_by_name)
 
 
 if __name__ == '__main__':

+ 254 - 0
python/google/protobuf/internal/message_test.py

@@ -43,11 +43,25 @@ abstract interface.
 
 __author__ = 'gps@google.com (Gregory P. Smith)'
 
+import copy
+import math
 import unittest
 from google.protobuf import unittest_import_pb2
 from google.protobuf import unittest_pb2
 from google.protobuf.internal import test_util
 
+# Python pre-2.6 does not have isinf() or isnan() functions, so we have
+# to provide our own.
+def isnan(val):
+  # NaN is never equal to itself.
+  return val != val
+def isinf(val):
+  # Infinity times zero equals NaN.
+  return not isnan(val) and isnan(val * 0)
+def IsPosInf(val):
+  return isinf(val) and (val > 0)
+def IsNegInf(val):
+  return isinf(val) and (val < 0)
 
 class MessageTest(unittest.TestCase):
 
@@ -57,6 +71,8 @@ class MessageTest(unittest.TestCase):
     golden_message.ParseFromString(golden_data)
     test_util.ExpectAllFieldsSet(self, golden_message)
     self.assertTrue(golden_message.SerializeToString() == golden_data)
+    golden_copy = copy.deepcopy(golden_message)
+    self.assertTrue(golden_copy.SerializeToString() == golden_data)
 
   def testGoldenExtensions(self):
     golden_data = test_util.GoldenFile('golden_message').read()
@@ -66,6 +82,8 @@ class MessageTest(unittest.TestCase):
     test_util.SetAllExtensions(all_set)
     self.assertEquals(all_set, golden_message)
     self.assertTrue(golden_message.SerializeToString() == golden_data)
+    golden_copy = copy.deepcopy(golden_message)
+    self.assertTrue(golden_copy.SerializeToString() == golden_data)
 
   def testGoldenPackedMessage(self):
     golden_data = test_util.GoldenFile('golden_packed_fields_message').read()
@@ -75,6 +93,8 @@ class MessageTest(unittest.TestCase):
     test_util.SetAllPackedFields(all_set)
     self.assertEquals(all_set, golden_message)
     self.assertTrue(all_set.SerializeToString() == golden_data)
+    golden_copy = copy.deepcopy(golden_message)
+    self.assertTrue(golden_copy.SerializeToString() == golden_data)
 
   def testGoldenPackedExtensions(self):
     golden_data = test_util.GoldenFile('golden_packed_fields_message').read()
@@ -84,6 +104,240 @@ class MessageTest(unittest.TestCase):
     test_util.SetAllPackedExtensions(all_set)
     self.assertEquals(all_set, golden_message)
     self.assertTrue(all_set.SerializeToString() == golden_data)
+    golden_copy = copy.deepcopy(golden_message)
+    self.assertTrue(golden_copy.SerializeToString() == golden_data)
+
+  def testPositiveInfinity(self):
+    golden_data = ('\x5D\x00\x00\x80\x7F'
+                   '\x61\x00\x00\x00\x00\x00\x00\xF0\x7F'
+                   '\xCD\x02\x00\x00\x80\x7F'
+                   '\xD1\x02\x00\x00\x00\x00\x00\x00\xF0\x7F')
+    golden_message = unittest_pb2.TestAllTypes()
+    golden_message.ParseFromString(golden_data)
+    self.assertTrue(IsPosInf(golden_message.optional_float))
+    self.assertTrue(IsPosInf(golden_message.optional_double))
+    self.assertTrue(IsPosInf(golden_message.repeated_float[0]))
+    self.assertTrue(IsPosInf(golden_message.repeated_double[0]))
+    self.assertTrue(golden_message.SerializeToString() == golden_data)
+
+  def testNegativeInfinity(self):
+    golden_data = ('\x5D\x00\x00\x80\xFF'
+                   '\x61\x00\x00\x00\x00\x00\x00\xF0\xFF'
+                   '\xCD\x02\x00\x00\x80\xFF'
+                   '\xD1\x02\x00\x00\x00\x00\x00\x00\xF0\xFF')
+    golden_message = unittest_pb2.TestAllTypes()
+    golden_message.ParseFromString(golden_data)
+    self.assertTrue(IsNegInf(golden_message.optional_float))
+    self.assertTrue(IsNegInf(golden_message.optional_double))
+    self.assertTrue(IsNegInf(golden_message.repeated_float[0]))
+    self.assertTrue(IsNegInf(golden_message.repeated_double[0]))
+    self.assertTrue(golden_message.SerializeToString() == golden_data)
+
+  def testNotANumber(self):
+    golden_data = ('\x5D\x00\x00\xC0\x7F'
+                   '\x61\x00\x00\x00\x00\x00\x00\xF8\x7F'
+                   '\xCD\x02\x00\x00\xC0\x7F'
+                   '\xD1\x02\x00\x00\x00\x00\x00\x00\xF8\x7F')
+    golden_message = unittest_pb2.TestAllTypes()
+    golden_message.ParseFromString(golden_data)
+    self.assertTrue(isnan(golden_message.optional_float))
+    self.assertTrue(isnan(golden_message.optional_double))
+    self.assertTrue(isnan(golden_message.repeated_float[0]))
+    self.assertTrue(isnan(golden_message.repeated_double[0]))
+    self.assertTrue(golden_message.SerializeToString() == golden_data)
+
+  def testPositiveInfinityPacked(self):
+    golden_data = ('\xA2\x06\x04\x00\x00\x80\x7F'
+                   '\xAA\x06\x08\x00\x00\x00\x00\x00\x00\xF0\x7F')
+    golden_message = unittest_pb2.TestPackedTypes()
+    golden_message.ParseFromString(golden_data)
+    self.assertTrue(IsPosInf(golden_message.packed_float[0]))
+    self.assertTrue(IsPosInf(golden_message.packed_double[0]))
+    self.assertTrue(golden_message.SerializeToString() == golden_data)
+
+  def testNegativeInfinityPacked(self):
+    golden_data = ('\xA2\x06\x04\x00\x00\x80\xFF'
+                   '\xAA\x06\x08\x00\x00\x00\x00\x00\x00\xF0\xFF')
+    golden_message = unittest_pb2.TestPackedTypes()
+    golden_message.ParseFromString(golden_data)
+    self.assertTrue(IsNegInf(golden_message.packed_float[0]))
+    self.assertTrue(IsNegInf(golden_message.packed_double[0]))
+    self.assertTrue(golden_message.SerializeToString() == golden_data)
+
+  def testNotANumberPacked(self):
+    golden_data = ('\xA2\x06\x04\x00\x00\xC0\x7F'
+                   '\xAA\x06\x08\x00\x00\x00\x00\x00\x00\xF8\x7F')
+    golden_message = unittest_pb2.TestPackedTypes()
+    golden_message.ParseFromString(golden_data)
+    self.assertTrue(isnan(golden_message.packed_float[0]))
+    self.assertTrue(isnan(golden_message.packed_double[0]))
+    self.assertTrue(golden_message.SerializeToString() == golden_data)
+
+  def testExtremeFloatValues(self):
+    message = unittest_pb2.TestAllTypes()
+
+    # Most positive exponent, no significand bits set.
+    kMostPosExponentNoSigBits = math.pow(2, 127)
+    message.optional_float = kMostPosExponentNoSigBits
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_float == kMostPosExponentNoSigBits)
+
+    # Most positive exponent, one significand bit set.
+    kMostPosExponentOneSigBit = 1.5 * math.pow(2, 127)
+    message.optional_float = kMostPosExponentOneSigBit
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_float == kMostPosExponentOneSigBit)
+
+    # Repeat last two cases with values of same magnitude, but negative.
+    message.optional_float = -kMostPosExponentNoSigBits
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_float == -kMostPosExponentNoSigBits)
+
+    message.optional_float = -kMostPosExponentOneSigBit
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_float == -kMostPosExponentOneSigBit)
+
+    # Most negative exponent, no significand bits set.
+    kMostNegExponentNoSigBits = math.pow(2, -127)
+    message.optional_float = kMostNegExponentNoSigBits
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_float == kMostNegExponentNoSigBits)
+
+    # Most negative exponent, one significand bit set.
+    kMostNegExponentOneSigBit = 1.5 * math.pow(2, -127)
+    message.optional_float = kMostNegExponentOneSigBit
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_float == kMostNegExponentOneSigBit)
+
+    # Repeat last two cases with values of the same magnitude, but negative.
+    message.optional_float = -kMostNegExponentNoSigBits
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_float == -kMostNegExponentNoSigBits)
+
+    message.optional_float = -kMostNegExponentOneSigBit
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_float == -kMostNegExponentOneSigBit)
+
+  def testExtremeFloatValues(self):
+    message = unittest_pb2.TestAllTypes()
+
+    # Most positive exponent, no significand bits set.
+    kMostPosExponentNoSigBits = math.pow(2, 1023)
+    message.optional_double = kMostPosExponentNoSigBits
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_double == kMostPosExponentNoSigBits)
+
+    # Most positive exponent, one significand bit set.
+    kMostPosExponentOneSigBit = 1.5 * math.pow(2, 1023)
+    message.optional_double = kMostPosExponentOneSigBit
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_double == kMostPosExponentOneSigBit)
+
+    # Repeat last two cases with values of same magnitude, but negative.
+    message.optional_double = -kMostPosExponentNoSigBits
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_double == -kMostPosExponentNoSigBits)
+
+    message.optional_double = -kMostPosExponentOneSigBit
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_double == -kMostPosExponentOneSigBit)
+
+    # Most negative exponent, no significand bits set.
+    kMostNegExponentNoSigBits = math.pow(2, -1023)
+    message.optional_double = kMostNegExponentNoSigBits
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_double == kMostNegExponentNoSigBits)
+
+    # Most negative exponent, one significand bit set.
+    kMostNegExponentOneSigBit = 1.5 * math.pow(2, -1023)
+    message.optional_double = kMostNegExponentOneSigBit
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_double == kMostNegExponentOneSigBit)
+
+    # Repeat last two cases with values of the same magnitude, but negative.
+    message.optional_double = -kMostNegExponentNoSigBits
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_double == -kMostNegExponentNoSigBits)
+
+    message.optional_double = -kMostNegExponentOneSigBit
+    message.ParseFromString(message.SerializeToString())
+    self.assertTrue(message.optional_double == -kMostNegExponentOneSigBit)
+
+  def testSortingRepeatedScalarFieldsDefaultComparator(self):
+    """Check some different types with the default comparator."""
+    message = unittest_pb2.TestAllTypes()
+
+    # TODO(mattp): would testing more scalar types strengthen test?
+    message.repeated_int32.append(1)
+    message.repeated_int32.append(3)
+    message.repeated_int32.append(2)
+    message.repeated_int32.sort()
+    self.assertEqual(message.repeated_int32[0], 1)
+    self.assertEqual(message.repeated_int32[1], 2)
+    self.assertEqual(message.repeated_int32[2], 3)
+
+    message.repeated_float.append(1.1)
+    message.repeated_float.append(1.3)
+    message.repeated_float.append(1.2)
+    message.repeated_float.sort()
+    self.assertAlmostEqual(message.repeated_float[0], 1.1)
+    self.assertAlmostEqual(message.repeated_float[1], 1.2)
+    self.assertAlmostEqual(message.repeated_float[2], 1.3)
+
+    message.repeated_string.append('a')
+    message.repeated_string.append('c')
+    message.repeated_string.append('b')
+    message.repeated_string.sort()
+    self.assertEqual(message.repeated_string[0], 'a')
+    self.assertEqual(message.repeated_string[1], 'b')
+    self.assertEqual(message.repeated_string[2], 'c')
+
+    message.repeated_bytes.append('a')
+    message.repeated_bytes.append('c')
+    message.repeated_bytes.append('b')
+    message.repeated_bytes.sort()
+    self.assertEqual(message.repeated_bytes[0], 'a')
+    self.assertEqual(message.repeated_bytes[1], 'b')
+    self.assertEqual(message.repeated_bytes[2], 'c')
+
+  def testSortingRepeatedScalarFieldsCustomComparator(self):
+    """Check some different types with custom comparator."""
+    message = unittest_pb2.TestAllTypes()
+
+    message.repeated_int32.append(-3)
+    message.repeated_int32.append(-2)
+    message.repeated_int32.append(-1)
+    message.repeated_int32.sort(lambda x,y: cmp(abs(x), abs(y)))
+    self.assertEqual(message.repeated_int32[0], -1)
+    self.assertEqual(message.repeated_int32[1], -2)
+    self.assertEqual(message.repeated_int32[2], -3)
+
+    message.repeated_string.append('aaa')
+    message.repeated_string.append('bb')
+    message.repeated_string.append('c')
+    message.repeated_string.sort(lambda x,y: cmp(len(x), len(y)))
+    self.assertEqual(message.repeated_string[0], 'c')
+    self.assertEqual(message.repeated_string[1], 'bb')
+    self.assertEqual(message.repeated_string[2], 'aaa')
+
+  def testSortingRepeatedCompositeFieldsCustomComparator(self):
+    """Check passing a custom comparator to sort a repeated composite field."""
+    message = unittest_pb2.TestAllTypes()
+
+    message.repeated_nested_message.add().bb = 1
+    message.repeated_nested_message.add().bb = 3
+    message.repeated_nested_message.add().bb = 2
+    message.repeated_nested_message.add().bb = 6
+    message.repeated_nested_message.add().bb = 5
+    message.repeated_nested_message.add().bb = 4
+    message.repeated_nested_message.sort(lambda x,y: cmp(x.bb, y.bb))
+    self.assertEqual(message.repeated_nested_message[0].bb, 1)
+    self.assertEqual(message.repeated_nested_message[1].bb, 2)
+    self.assertEqual(message.repeated_nested_message[2].bb, 3)
+    self.assertEqual(message.repeated_nested_message[3].bb, 4)
+    self.assertEqual(message.repeated_nested_message[4].bb, 5)
+    self.assertEqual(message.repeated_nested_message[5].bb, 6)
+
 
 if __name__ == '__main__':
   unittest.main()

+ 1098 - 0
python/google/protobuf/internal/python_message.py

@@ -0,0 +1,1098 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This code is meant to work on Python 2.4 and above only.
+#
+# TODO(robinson): Helpers for verbose, common checks like seeing if a
+# descriptor's cpp_type is CPPTYPE_MESSAGE.
+
+"""Contains a metaclass and helper functions used to create
+protocol message classes from Descriptor objects at runtime.
+
+Recall that a metaclass is the "type" of a class.
+(A class is to a metaclass what an instance is to a class.)
+
+In this case, we use the GeneratedProtocolMessageType metaclass
+to inject all the useful functionality into the classes
+output by the protocol compiler at compile-time.
+
+The upshot of all this is that the real implementation
+details for ALL pure-Python protocol buffers are *here in
+this file*.
+"""
+
+__author__ = 'robinson@google.com (Will Robinson)'
+
+try:
+  from cStringIO import StringIO
+except ImportError:
+  from StringIO import StringIO
+import struct
+import weakref
+
+# We use "as" to avoid name collisions with variables.
+from google.protobuf.internal import containers
+from google.protobuf.internal import decoder
+from google.protobuf.internal import encoder
+from google.protobuf.internal import message_listener as message_listener_mod
+from google.protobuf.internal import type_checkers
+from google.protobuf.internal import wire_format
+from google.protobuf import descriptor as descriptor_mod
+from google.protobuf import message as message_mod
+from google.protobuf import text_format
+
+_FieldDescriptor = descriptor_mod.FieldDescriptor
+
+
+def NewMessage(descriptor, dictionary):
+  _AddClassAttributesForNestedExtensions(descriptor, dictionary)
+  _AddSlots(descriptor, dictionary)
+
+
+def InitMessage(descriptor, cls):
+  cls._decoders_by_tag = {}
+  cls._extensions_by_name = {}
+  cls._extensions_by_number = {}
+  if (descriptor.has_options and
+      descriptor.GetOptions().message_set_wire_format):
+    cls._decoders_by_tag[decoder.MESSAGE_SET_ITEM_TAG] = (
+        decoder.MessageSetItemDecoder(cls._extensions_by_number))
+
+  # Attach stuff to each FieldDescriptor for quick lookup later on.
+  for field in descriptor.fields:
+    _AttachFieldHelpers(cls, field)
+
+  _AddEnumValues(descriptor, cls)
+  _AddInitMethod(descriptor, cls)
+  _AddPropertiesForFields(descriptor, cls)
+  _AddPropertiesForExtensions(descriptor, cls)
+  _AddStaticMethods(cls)
+  _AddMessageMethods(descriptor, cls)
+  _AddPrivateHelperMethods(cls)
+
+
+# Stateless helpers for GeneratedProtocolMessageType below.
+# Outside clients should not access these directly.
+#
+# I opted not to make any of these methods on the metaclass, to make it more
+# clear that I'm not really using any state there and to keep clients from
+# thinking that they have direct access to these construction helpers.
+
+
+def _PropertyName(proto_field_name):
+  """Returns the name of the public property attribute which
+  clients can use to get and (in some cases) set the value
+  of a protocol message field.
+
+  Args:
+    proto_field_name: The protocol message field name, exactly
+      as it appears (or would appear) in a .proto file.
+  """
+  # TODO(robinson): Escape Python keywords (e.g., yield), and test this support.
+  # nnorwitz makes my day by writing:
+  # """
+  # FYI.  See the keyword module in the stdlib. This could be as simple as:
+  #
+  # if keyword.iskeyword(proto_field_name):
+  #   return proto_field_name + "_"
+  # return proto_field_name
+  # """
+  # Kenton says:  The above is a BAD IDEA.  People rely on being able to use
+  #   getattr() and setattr() to reflectively manipulate field values.  If we
+  #   rename the properties, then every such user has to also make sure to apply
+  #   the same transformation.  Note that currently if you name a field "yield",
+  #   you can still access it just fine using getattr/setattr -- it's not even
+  #   that cumbersome to do so.
+  # TODO(kenton):  Remove this method entirely if/when everyone agrees with my
+  #   position.
+  return proto_field_name
+
+
+def _VerifyExtensionHandle(message, extension_handle):
+  """Verify that the given extension handle is valid."""
+
+  if not isinstance(extension_handle, _FieldDescriptor):
+    raise KeyError('HasExtension() expects an extension handle, got: %s' %
+                   extension_handle)
+
+  if not extension_handle.is_extension:
+    raise KeyError('"%s" is not an extension.' % extension_handle.full_name)
+
+  if extension_handle.containing_type is not message.DESCRIPTOR:
+    raise KeyError('Extension "%s" extends message type "%s", but this '
+                   'message is of type "%s".' %
+                   (extension_handle.full_name,
+                    extension_handle.containing_type.full_name,
+                    message.DESCRIPTOR.full_name))
+
+
+def _AddSlots(message_descriptor, dictionary):
+  """Adds a __slots__ entry to dictionary, containing the names of all valid
+  attributes for this message type.
+
+  Args:
+    message_descriptor: A Descriptor instance describing this message type.
+    dictionary: Class dictionary to which we'll add a '__slots__' entry.
+  """
+  dictionary['__slots__'] = ['_cached_byte_size',
+                             '_cached_byte_size_dirty',
+                             '_fields',
+                             '_is_present_in_parent',
+                             '_listener',
+                             '_listener_for_children',
+                             '__weakref__']
+
+
+def _IsMessageSetExtension(field):
+  return (field.is_extension and
+          field.containing_type.has_options and
+          field.containing_type.GetOptions().message_set_wire_format and
+          field.type == _FieldDescriptor.TYPE_MESSAGE and
+          field.message_type == field.extension_scope and
+          field.label == _FieldDescriptor.LABEL_OPTIONAL)
+
+
+def _AttachFieldHelpers(cls, field_descriptor):
+  is_repeated = (field_descriptor.label == _FieldDescriptor.LABEL_REPEATED)
+  is_packed = (field_descriptor.has_options and
+               field_descriptor.GetOptions().packed)
+
+  if _IsMessageSetExtension(field_descriptor):
+    field_encoder = encoder.MessageSetItemEncoder(field_descriptor.number)
+    sizer = encoder.MessageSetItemSizer(field_descriptor.number)
+  else:
+    field_encoder = type_checkers.TYPE_TO_ENCODER[field_descriptor.type](
+        field_descriptor.number, is_repeated, is_packed)
+    sizer = type_checkers.TYPE_TO_SIZER[field_descriptor.type](
+        field_descriptor.number, is_repeated, is_packed)
+
+  field_descriptor._encoder = field_encoder
+  field_descriptor._sizer = sizer
+  field_descriptor._default_constructor = _DefaultValueConstructorForField(
+      field_descriptor)
+
+  def AddDecoder(wiretype, is_packed):
+    tag_bytes = encoder.TagBytes(field_descriptor.number, wiretype)
+    cls._decoders_by_tag[tag_bytes] = (
+        type_checkers.TYPE_TO_DECODER[field_descriptor.type](
+            field_descriptor.number, is_repeated, is_packed,
+            field_descriptor, field_descriptor._default_constructor))
+
+  AddDecoder(type_checkers.FIELD_TYPE_TO_WIRE_TYPE[field_descriptor.type],
+             False)
+
+  if is_repeated and wire_format.IsTypePackable(field_descriptor.type):
+    # To support wire compatibility of adding packed = true, add a decoder for
+    # packed values regardless of the field's options.
+    AddDecoder(wire_format.WIRETYPE_LENGTH_DELIMITED, True)
+
+
+def _AddClassAttributesForNestedExtensions(descriptor, dictionary):
+  extension_dict = descriptor.extensions_by_name
+  for extension_name, extension_field in extension_dict.iteritems():
+    assert extension_name not in dictionary
+    dictionary[extension_name] = extension_field
+
+
+def _AddEnumValues(descriptor, cls):
+  """Sets class-level attributes for all enum fields defined in this message.
+
+  Args:
+    descriptor: Descriptor object for this message type.
+    cls: Class we're constructing for this message type.
+  """
+  for enum_type in descriptor.enum_types:
+    for enum_value in enum_type.values:
+      setattr(cls, enum_value.name, enum_value.number)
+
+
+def _DefaultValueConstructorForField(field):
+  """Returns a function which returns a default value for a field.
+
+  Args:
+    field: FieldDescriptor object for this field.
+
+  The returned function has one argument:
+    message: Message instance containing this field, or a weakref proxy
+      of same.
+
+  That function in turn returns a default value for this field.  The default
+    value may refer back to |message| via a weak reference.
+  """
+
+  if field.label == _FieldDescriptor.LABEL_REPEATED:
+    if field.default_value != []:
+      raise ValueError('Repeated field default value not empty list: %s' % (
+          field.default_value))
+    if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
+      # We can't look at _concrete_class yet since it might not have
+      # been set.  (Depends on order in which we initialize the classes).
+      message_type = field.message_type
+      def MakeRepeatedMessageDefault(message):
+        return containers.RepeatedCompositeFieldContainer(
+            message._listener_for_children, field.message_type)
+      return MakeRepeatedMessageDefault
+    else:
+      type_checker = type_checkers.GetTypeChecker(field.cpp_type, field.type)
+      def MakeRepeatedScalarDefault(message):
+        return containers.RepeatedScalarFieldContainer(
+            message._listener_for_children, type_checker)
+      return MakeRepeatedScalarDefault
+
+  if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
+    # _concrete_class may not yet be initialized.
+    message_type = field.message_type
+    def MakeSubMessageDefault(message):
+      result = message_type._concrete_class()
+      result._SetListener(message._listener_for_children)
+      return result
+    return MakeSubMessageDefault
+
+  def MakeScalarDefault(message):
+    return field.default_value
+  return MakeScalarDefault
+
+
+def _AddInitMethod(message_descriptor, cls):
+  """Adds an __init__ method to cls."""
+  fields = message_descriptor.fields
+  def init(self, **kwargs):
+    self._cached_byte_size = 0
+    self._cached_byte_size_dirty = len(kwargs) > 0
+    self._fields = {}
+    self._is_present_in_parent = False
+    self._listener = message_listener_mod.NullMessageListener()
+    self._listener_for_children = _Listener(self)
+    for field_name, field_value in kwargs.iteritems():
+      field = _GetFieldByName(message_descriptor, field_name)
+      if field is None:
+        raise TypeError("%s() got an unexpected keyword argument '%s'" %
+                        (message_descriptor.name, field_name))
+      if field.label == _FieldDescriptor.LABEL_REPEATED:
+        copy = field._default_constructor(self)
+        if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:  # Composite
+          for val in field_value:
+            copy.add().MergeFrom(val)
+        else:  # Scalar
+          copy.extend(field_value)
+        self._fields[field] = copy
+      elif field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
+        copy = field._default_constructor(self)
+        copy.MergeFrom(field_value)
+        self._fields[field] = copy
+      else:
+        setattr(self, field_name, field_value)
+
+  init.__module__ = None
+  init.__doc__ = None
+  cls.__init__ = init
+
+
+def _GetFieldByName(message_descriptor, field_name):
+  """Returns a field descriptor by field name.
+
+  Args:
+    message_descriptor: A Descriptor describing all fields in message.
+    field_name: The name of the field to retrieve.
+  Returns:
+    The field descriptor associated with the field name.
+  """
+  try:
+    return message_descriptor.fields_by_name[field_name]
+  except KeyError:
+    raise ValueError('Protocol message has no "%s" field.' % field_name)
+
+
+def _AddPropertiesForFields(descriptor, cls):
+  """Adds properties for all fields in this protocol message type."""
+  for field in descriptor.fields:
+    _AddPropertiesForField(field, cls)
+
+  if descriptor.is_extendable:
+    # _ExtensionDict is just an adaptor with no state so we allocate a new one
+    # every time it is accessed.
+    cls.Extensions = property(lambda self: _ExtensionDict(self))
+
+
+def _AddPropertiesForField(field, cls):
+  """Adds a public property for a protocol message field.
+  Clients can use this property to get and (in the case
+  of non-repeated scalar fields) directly set the value
+  of a protocol message field.
+
+  Args:
+    field: A FieldDescriptor for this field.
+    cls: The class we're constructing.
+  """
+  # Catch it if we add other types that we should
+  # handle specially here.
+  assert _FieldDescriptor.MAX_CPPTYPE == 10
+
+  constant_name = field.name.upper() + "_FIELD_NUMBER"
+  setattr(cls, constant_name, field.number)
+
+  if field.label == _FieldDescriptor.LABEL_REPEATED:
+    _AddPropertiesForRepeatedField(field, cls)
+  elif field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
+    _AddPropertiesForNonRepeatedCompositeField(field, cls)
+  else:
+    _AddPropertiesForNonRepeatedScalarField(field, cls)
+
+
+def _AddPropertiesForRepeatedField(field, cls):
+  """Adds a public property for a "repeated" protocol message field.  Clients
+  can use this property to get the value of the field, which will be either a
+  _RepeatedScalarFieldContainer or _RepeatedCompositeFieldContainer (see
+  below).
+
+  Note that when clients add values to these containers, we perform
+  type-checking in the case of repeated scalar fields, and we also set any
+  necessary "has" bits as a side-effect.
+
+  Args:
+    field: A FieldDescriptor for this field.
+    cls: The class we're constructing.
+  """
+  proto_field_name = field.name
+  property_name = _PropertyName(proto_field_name)
+
+  def getter(self):
+    field_value = self._fields.get(field)
+    if field_value is None:
+      # Construct a new object to represent this field.
+      field_value = field._default_constructor(self)
+
+      # Atomically check if another thread has preempted us and, if not, swap
+      # in the new object we just created.  If someone has preempted us, we
+      # take that object and discard ours.
+      # WARNING:  We are relying on setdefault() being atomic.  This is true
+      #   in CPython but we haven't investigated others.  This warning appears
+      #   in several other locations in this file.
+      field_value = self._fields.setdefault(field, field_value)
+    return field_value
+  getter.__module__ = None
+  getter.__doc__ = 'Getter for %s.' % proto_field_name
+
+  # We define a setter just so we can throw an exception with a more
+  # helpful error message.
+  def setter(self, new_value):
+    raise AttributeError('Assignment not allowed to repeated field '
+                         '"%s" in protocol message object.' % proto_field_name)
+
+  doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name
+  setattr(cls, property_name, property(getter, setter, doc=doc))
+
+
+def _AddPropertiesForNonRepeatedScalarField(field, cls):
+  """Adds a public property for a nonrepeated, scalar protocol message field.
+  Clients can use this property to get and directly set the value of the field.
+  Note that when the client sets the value of a field by using this property,
+  all necessary "has" bits are set as a side-effect, and we also perform
+  type-checking.
+
+  Args:
+    field: A FieldDescriptor for this field.
+    cls: The class we're constructing.
+  """
+  proto_field_name = field.name
+  property_name = _PropertyName(proto_field_name)
+  type_checker = type_checkers.GetTypeChecker(field.cpp_type, field.type)
+  default_value = field.default_value
+  valid_values = set()
+
+  def getter(self):
+    return self._fields.get(field, default_value)
+  getter.__module__ = None
+  getter.__doc__ = 'Getter for %s.' % proto_field_name
+  def setter(self, new_value):
+    type_checker.CheckValue(new_value)
+    self._fields[field] = new_value
+    # Check _cached_byte_size_dirty inline to improve performance, since scalar
+    # setters are called frequently.
+    if not self._cached_byte_size_dirty:
+      self._Modified()
+
+  setter.__module__ = None
+  setter.__doc__ = 'Setter for %s.' % proto_field_name
+
+  # Add a property to encapsulate the getter/setter.
+  doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name
+  setattr(cls, property_name, property(getter, setter, doc=doc))
+
+
+def _AddPropertiesForNonRepeatedCompositeField(field, cls):
+  """Adds a public property for a nonrepeated, composite protocol message field.
+  A composite field is a "group" or "message" field.
+
+  Clients can use this property to get the value of the field, but cannot
+  assign to the property directly.
+
+  Args:
+    field: A FieldDescriptor for this field.
+    cls: The class we're constructing.
+  """
+  # TODO(robinson): Remove duplication with similar method
+  # for non-repeated scalars.
+  proto_field_name = field.name
+  property_name = _PropertyName(proto_field_name)
+  message_type = field.message_type
+
+  def getter(self):
+    field_value = self._fields.get(field)
+    if field_value is None:
+      # Construct a new object to represent this field.
+      field_value = message_type._concrete_class()
+      field_value._SetListener(self._listener_for_children)
+
+      # Atomically check if another thread has preempted us and, if not, swap
+      # in the new object we just created.  If someone has preempted us, we
+      # take that object and discard ours.
+      # WARNING:  We are relying on setdefault() being atomic.  This is true
+      #   in CPython but we haven't investigated others.  This warning appears
+      #   in several other locations in this file.
+      field_value = self._fields.setdefault(field, field_value)
+    return field_value
+  getter.__module__ = None
+  getter.__doc__ = 'Getter for %s.' % proto_field_name
+
+  # We define a setter just so we can throw an exception with a more
+  # helpful error message.
+  def setter(self, new_value):
+    raise AttributeError('Assignment not allowed to composite field '
+                         '"%s" in protocol message object.' % proto_field_name)
+
+  # Add a property to encapsulate the getter.
+  doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name
+  setattr(cls, property_name, property(getter, setter, doc=doc))
+
+
+def _AddPropertiesForExtensions(descriptor, cls):
+  """Adds properties for all fields in this protocol message type."""
+  extension_dict = descriptor.extensions_by_name
+  for extension_name, extension_field in extension_dict.iteritems():
+    constant_name = extension_name.upper() + "_FIELD_NUMBER"
+    setattr(cls, constant_name, extension_field.number)
+
+
+def _AddStaticMethods(cls):
+  # TODO(robinson): This probably needs to be thread-safe(?)
+  def RegisterExtension(extension_handle):
+    extension_handle.containing_type = cls.DESCRIPTOR
+    _AttachFieldHelpers(cls, extension_handle)
+
+    # Try to insert our extension, failing if an extension with the same number
+    # already exists.
+    actual_handle = cls._extensions_by_number.setdefault(
+        extension_handle.number, extension_handle)
+    if actual_handle is not extension_handle:
+      raise AssertionError(
+          'Extensions "%s" and "%s" both try to extend message type "%s" with '
+          'field number %d.' %
+          (extension_handle.full_name, actual_handle.full_name,
+           cls.DESCRIPTOR.full_name, extension_handle.number))
+
+    cls._extensions_by_name[extension_handle.full_name] = extension_handle
+
+    handle = extension_handle  # avoid line wrapping
+    if _IsMessageSetExtension(handle):
+      # MessageSet extension.  Also register under type name.
+      cls._extensions_by_name[
+          extension_handle.message_type.full_name] = extension_handle
+
+  cls.RegisterExtension = staticmethod(RegisterExtension)
+
+  def FromString(s):
+    message = cls()
+    message.MergeFromString(s)
+    return message
+  cls.FromString = staticmethod(FromString)
+
+
+def _IsPresent(item):
+  """Given a (FieldDescriptor, value) tuple from _fields, return true if the
+  value should be included in the list returned by ListFields()."""
+
+  if item[0].label == _FieldDescriptor.LABEL_REPEATED:
+    return bool(item[1])
+  elif item[0].cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
+    return item[1]._is_present_in_parent
+  else:
+    return True
+
+
+def _AddListFieldsMethod(message_descriptor, cls):
+  """Helper for _AddMessageMethods()."""
+
+  def ListFields(self):
+    all_fields = [item for item in self._fields.iteritems() if _IsPresent(item)]
+    all_fields.sort(key = lambda item: item[0].number)
+    return all_fields
+
+  cls.ListFields = ListFields
+
+
+def _AddHasFieldMethod(message_descriptor, cls):
+  """Helper for _AddMessageMethods()."""
+
+  singular_fields = {}
+  for field in message_descriptor.fields:
+    if field.label != _FieldDescriptor.LABEL_REPEATED:
+      singular_fields[field.name] = field
+
+  def HasField(self, field_name):
+    try:
+      field = singular_fields[field_name]
+    except KeyError:
+      raise ValueError(
+          'Protocol message has no singular "%s" field.' % field_name)
+
+    if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
+      value = self._fields.get(field)
+      return value is not None and value._is_present_in_parent
+    else:
+      return field in self._fields
+  cls.HasField = HasField
+
+
+def _AddClearFieldMethod(message_descriptor, cls):
+  """Helper for _AddMessageMethods()."""
+  def ClearField(self, field_name):
+    try:
+      field = message_descriptor.fields_by_name[field_name]
+    except KeyError:
+      raise ValueError('Protocol message has no "%s" field.' % field_name)
+
+    if field in self._fields:
+      # Note:  If the field is a sub-message, its listener will still point
+      #   at us.  That's fine, because the worst than can happen is that it
+      #   will call _Modified() and invalidate our byte size.  Big deal.
+      del self._fields[field]
+
+    # Always call _Modified() -- even if nothing was changed, this is
+    # a mutating method, and thus calling it should cause the field to become
+    # present in the parent message.
+    self._Modified()
+
+  cls.ClearField = ClearField
+
+
+def _AddClearExtensionMethod(cls):
+  """Helper for _AddMessageMethods()."""
+  def ClearExtension(self, extension_handle):
+    _VerifyExtensionHandle(self, extension_handle)
+
+    # Similar to ClearField(), above.
+    if extension_handle in self._fields:
+      del self._fields[extension_handle]
+    self._Modified()
+  cls.ClearExtension = ClearExtension
+
+
+def _AddClearMethod(message_descriptor, cls):
+  """Helper for _AddMessageMethods()."""
+  def Clear(self):
+    # Clear fields.
+    self._fields = {}
+    self._Modified()
+  cls.Clear = Clear
+
+
+def _AddHasExtensionMethod(cls):
+  """Helper for _AddMessageMethods()."""
+  def HasExtension(self, extension_handle):
+    _VerifyExtensionHandle(self, extension_handle)
+    if extension_handle.label == _FieldDescriptor.LABEL_REPEATED:
+      raise KeyError('"%s" is repeated.' % extension_handle.full_name)
+
+    if extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
+      value = self._fields.get(extension_handle)
+      return value is not None and value._is_present_in_parent
+    else:
+      return extension_handle in self._fields
+  cls.HasExtension = HasExtension
+
+
+def _AddEqualsMethod(message_descriptor, cls):
+  """Helper for _AddMessageMethods()."""
+  def __eq__(self, other):
+    if (not isinstance(other, message_mod.Message) or
+        other.DESCRIPTOR != self.DESCRIPTOR):
+      return False
+
+    if self is other:
+      return True
+
+    return self.ListFields() == other.ListFields()
+
+  cls.__eq__ = __eq__
+
+
+def _AddStrMethod(message_descriptor, cls):
+  """Helper for _AddMessageMethods()."""
+  def __str__(self):
+    return text_format.MessageToString(self)
+  cls.__str__ = __str__
+
+
+def _AddUnicodeMethod(unused_message_descriptor, cls):
+  """Helper for _AddMessageMethods()."""
+
+  def __unicode__(self):
+    return text_format.MessageToString(self, as_utf8=True).decode('utf-8')
+  cls.__unicode__ = __unicode__
+
+
+def _AddSetListenerMethod(cls):
+  """Helper for _AddMessageMethods()."""
+  def SetListener(self, listener):
+    if listener is None:
+      self._listener = message_listener_mod.NullMessageListener()
+    else:
+      self._listener = listener
+  cls._SetListener = SetListener
+
+
+def _BytesForNonRepeatedElement(value, field_number, field_type):
+  """Returns the number of bytes needed to serialize a non-repeated element.
+  The returned byte count includes space for tag information and any
+  other additional space associated with serializing value.
+
+  Args:
+    value: Value we're serializing.
+    field_number: Field number of this value.  (Since the field number
+      is stored as part of a varint-encoded tag, this has an impact
+      on the total bytes required to serialize the value).
+    field_type: The type of the field.  One of the TYPE_* constants
+      within FieldDescriptor.
+  """
+  try:
+    fn = type_checkers.TYPE_TO_BYTE_SIZE_FN[field_type]
+    return fn(field_number, value)
+  except KeyError:
+    raise message_mod.EncodeError('Unrecognized field type: %d' % field_type)
+
+
+def _AddByteSizeMethod(message_descriptor, cls):
+  """Helper for _AddMessageMethods()."""
+
+  def ByteSize(self):
+    if not self._cached_byte_size_dirty:
+      return self._cached_byte_size
+
+    size = 0
+    for field_descriptor, field_value in self.ListFields():
+      size += field_descriptor._sizer(field_value)
+
+    self._cached_byte_size = size
+    self._cached_byte_size_dirty = False
+    self._listener_for_children.dirty = False
+    return size
+
+  cls.ByteSize = ByteSize
+
+
+def _AddSerializeToStringMethod(message_descriptor, cls):
+  """Helper for _AddMessageMethods()."""
+
+  def SerializeToString(self):
+    # Check if the message has all of its required fields set.
+    errors = []
+    if not self.IsInitialized():
+      raise message_mod.EncodeError(
+          'Message is missing required fields: ' +
+          ','.join(self.FindInitializationErrors()))
+    return self.SerializePartialToString()
+  cls.SerializeToString = SerializeToString
+
+
+def _AddSerializePartialToStringMethod(message_descriptor, cls):
+  """Helper for _AddMessageMethods()."""
+
+  def SerializePartialToString(self):
+    out = StringIO()
+    self._InternalSerialize(out.write)
+    return out.getvalue()
+  cls.SerializePartialToString = SerializePartialToString
+
+  def InternalSerialize(self, write_bytes):
+    for field_descriptor, field_value in self.ListFields():
+      field_descriptor._encoder(write_bytes, field_value)
+  cls._InternalSerialize = InternalSerialize
+
+
+def _AddMergeFromStringMethod(message_descriptor, cls):
+  """Helper for _AddMessageMethods()."""
+  def MergeFromString(self, serialized):
+    length = len(serialized)
+    try:
+      if self._InternalParse(serialized, 0, length) != length:
+        # The only reason _InternalParse would return early is if it
+        # encountered an end-group tag.
+        raise message_mod.DecodeError('Unexpected end-group tag.')
+    except IndexError:
+      raise message_mod.DecodeError('Truncated message.')
+    except struct.error, e:
+      raise message_mod.DecodeError(e)
+    return length   # Return this for legacy reasons.
+  cls.MergeFromString = MergeFromString
+
+  local_ReadTag = decoder.ReadTag
+  local_SkipField = decoder.SkipField
+  decoders_by_tag = cls._decoders_by_tag
+
+  def InternalParse(self, buffer, pos, end):
+    self._Modified()
+    field_dict = self._fields
+    while pos != end:
+      (tag_bytes, new_pos) = local_ReadTag(buffer, pos)
+      field_decoder = decoders_by_tag.get(tag_bytes)
+      if field_decoder is None:
+        new_pos = local_SkipField(buffer, new_pos, end, tag_bytes)
+        if new_pos == -1:
+          return pos
+        pos = new_pos
+      else:
+        pos = field_decoder(buffer, new_pos, end, self, field_dict)
+    return pos
+  cls._InternalParse = InternalParse
+
+
+def _AddIsInitializedMethod(message_descriptor, cls):
+  """Adds the IsInitialized and FindInitializationError methods to the
+  protocol message class."""
+
+  required_fields = [field for field in message_descriptor.fields
+                           if field.label == _FieldDescriptor.LABEL_REQUIRED]
+
+  def IsInitialized(self, errors=None):
+    """Checks if all required fields of a message are set.
+
+    Args:
+      errors:  A list which, if provided, will be populated with the field
+               paths of all missing required fields.
+
+    Returns:
+      True iff the specified message has all required fields set.
+    """
+
+    # Performance is critical so we avoid HasField() and ListFields().
+
+    for field in required_fields:
+      if (field not in self._fields or
+          (field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE and
+           not self._fields[field]._is_present_in_parent)):
+        if errors is not None:
+          errors.extend(self.FindInitializationErrors())
+        return False
+
+    for field, value in self._fields.iteritems():
+      if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
+        if field.label == _FieldDescriptor.LABEL_REPEATED:
+          for element in value:
+            if not element.IsInitialized():
+              if errors is not None:
+                errors.extend(self.FindInitializationErrors())
+              return False
+        elif value._is_present_in_parent and not value.IsInitialized():
+          if errors is not None:
+            errors.extend(self.FindInitializationErrors())
+          return False
+
+    return True
+
+  cls.IsInitialized = IsInitialized
+
+  def FindInitializationErrors(self):
+    """Finds required fields which are not initialized.
+
+    Returns:
+      A list of strings.  Each string is a path to an uninitialized field from
+      the top-level message, e.g. "foo.bar[5].baz".
+    """
+
+    errors = []  # simplify things
+
+    for field in required_fields:
+      if not self.HasField(field.name):
+        errors.append(field.name)
+
+    for field, value in self.ListFields():
+      if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
+        if field.is_extension:
+          name = "(%s)" % field.full_name
+        else:
+          name = field.name
+
+        if field.label == _FieldDescriptor.LABEL_REPEATED:
+          for i in xrange(len(value)):
+            element = value[i]
+            prefix = "%s[%d]." % (name, i)
+            sub_errors = element.FindInitializationErrors()
+            errors += [ prefix + error for error in sub_errors ]
+        else:
+          prefix = name + "."
+          sub_errors = value.FindInitializationErrors()
+          errors += [ prefix + error for error in sub_errors ]
+
+    return errors
+
+  cls.FindInitializationErrors = FindInitializationErrors
+
+
+def _AddMergeFromMethod(cls):
+  LABEL_REPEATED = _FieldDescriptor.LABEL_REPEATED
+  CPPTYPE_MESSAGE = _FieldDescriptor.CPPTYPE_MESSAGE
+
+  def MergeFrom(self, msg):
+    if not isinstance(msg, cls):
+      raise TypeError(
+          "Parameter to MergeFrom() must be instance of same class.")
+
+    assert msg is not self
+    self._Modified()
+
+    fields = self._fields
+
+    for field, value in msg._fields.iteritems():
+      if field.label == LABEL_REPEATED:
+        field_value = fields.get(field)
+        if field_value is None:
+          # Construct a new object to represent this field.
+          field_value = field._default_constructor(self)
+          fields[field] = field_value
+        field_value.MergeFrom(value)
+      elif field.cpp_type == CPPTYPE_MESSAGE:
+        if value._is_present_in_parent:
+          field_value = fields.get(field)
+          if field_value is None:
+            # Construct a new object to represent this field.
+            field_value = field._default_constructor(self)
+            fields[field] = field_value
+          field_value.MergeFrom(value)
+      else:
+        self._fields[field] = value
+  cls.MergeFrom = MergeFrom
+
+
+def _AddMessageMethods(message_descriptor, cls):
+  """Adds implementations of all Message methods to cls."""
+  _AddListFieldsMethod(message_descriptor, cls)
+  _AddHasFieldMethod(message_descriptor, cls)
+  _AddClearFieldMethod(message_descriptor, cls)
+  if message_descriptor.is_extendable:
+    _AddClearExtensionMethod(cls)
+    _AddHasExtensionMethod(cls)
+  _AddClearMethod(message_descriptor, cls)
+  _AddEqualsMethod(message_descriptor, cls)
+  _AddStrMethod(message_descriptor, cls)
+  _AddUnicodeMethod(message_descriptor, cls)
+  _AddSetListenerMethod(cls)
+  _AddByteSizeMethod(message_descriptor, cls)
+  _AddSerializeToStringMethod(message_descriptor, cls)
+  _AddSerializePartialToStringMethod(message_descriptor, cls)
+  _AddMergeFromStringMethod(message_descriptor, cls)
+  _AddIsInitializedMethod(message_descriptor, cls)
+  _AddMergeFromMethod(cls)
+
+
+def _AddPrivateHelperMethods(cls):
+  """Adds implementation of private helper methods to cls."""
+
+  def Modified(self):
+    """Sets the _cached_byte_size_dirty bit to true,
+    and propagates this to our listener iff this was a state change.
+    """
+
+    # Note:  Some callers check _cached_byte_size_dirty before calling
+    #   _Modified() as an extra optimization.  So, if this method is ever
+    #   changed such that it does stuff even when _cached_byte_size_dirty is
+    #   already true, the callers need to be updated.
+    if not self._cached_byte_size_dirty:
+      self._cached_byte_size_dirty = True
+      self._listener_for_children.dirty = True
+      self._is_present_in_parent = True
+      self._listener.Modified()
+
+  cls._Modified = Modified
+  cls.SetInParent = Modified
+
+
+class _Listener(object):
+
+  """MessageListener implementation that a parent message registers with its
+  child message.
+
+  In order to support semantics like:
+
+    foo.bar.baz.qux = 23
+    assert foo.HasField('bar')
+
+  ...child objects must have back references to their parents.
+  This helper class is at the heart of this support.
+  """
+
+  def __init__(self, parent_message):
+    """Args:
+      parent_message: The message whose _Modified() method we should call when
+        we receive Modified() messages.
+    """
+    # This listener establishes a back reference from a child (contained) object
+    # to its parent (containing) object.  We make this a weak reference to avoid
+    # creating cyclic garbage when the client finishes with the 'parent' object
+    # in the tree.
+    if isinstance(parent_message, weakref.ProxyType):
+      self._parent_message_weakref = parent_message
+    else:
+      self._parent_message_weakref = weakref.proxy(parent_message)
+
+    # As an optimization, we also indicate directly on the listener whether
+    # or not the parent message is dirty.  This way we can avoid traversing
+    # up the tree in the common case.
+    self.dirty = False
+
+  def Modified(self):
+    if self.dirty:
+      return
+    try:
+      # Propagate the signal to our parents iff this is the first field set.
+      self._parent_message_weakref._Modified()
+    except ReferenceError:
+      # We can get here if a client has kept a reference to a child object,
+      # and is now setting a field on it, but the child's parent has been
+      # garbage-collected.  This is not an error.
+      pass
+
+
+# TODO(robinson): Move elsewhere?  This file is getting pretty ridiculous...
+# TODO(robinson): Unify error handling of "unknown extension" crap.
+# TODO(robinson): Support iteritems()-style iteration over all
+# extensions with the "has" bits turned on?
+class _ExtensionDict(object):
+
+  """Dict-like container for supporting an indexable "Extensions"
+  field on proto instances.
+
+  Note that in all cases we expect extension handles to be
+  FieldDescriptors.
+  """
+
+  def __init__(self, extended_message):
+    """extended_message: Message instance for which we are the Extensions dict.
+    """
+
+    self._extended_message = extended_message
+
+  def __getitem__(self, extension_handle):
+    """Returns the current value of the given extension handle."""
+
+    _VerifyExtensionHandle(self._extended_message, extension_handle)
+
+    result = self._extended_message._fields.get(extension_handle)
+    if result is not None:
+      return result
+
+    if extension_handle.label == _FieldDescriptor.LABEL_REPEATED:
+      result = extension_handle._default_constructor(self._extended_message)
+    elif extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
+      result = extension_handle.message_type._concrete_class()
+      try:
+        result._SetListener(self._extended_message._listener_for_children)
+      except ReferenceError:
+        pass
+    else:
+      # Singular scalar -- just return the default without inserting into the
+      # dict.
+      return extension_handle.default_value
+
+    # Atomically check if another thread has preempted us and, if not, swap
+    # in the new object we just created.  If someone has preempted us, we
+    # take that object and discard ours.
+    # WARNING:  We are relying on setdefault() being atomic.  This is true
+    #   in CPython but we haven't investigated others.  This warning appears
+    #   in several other locations in this file.
+    result = self._extended_message._fields.setdefault(
+        extension_handle, result)
+
+    return result
+
+  def __eq__(self, other):
+    if not isinstance(other, self.__class__):
+      return False
+
+    my_fields = self._extended_message.ListFields()
+    other_fields = other._extended_message.ListFields()
+
+    # Get rid of non-extension fields.
+    my_fields    = [ field for field in my_fields    if field.is_extension ]
+    other_fields = [ field for field in other_fields if field.is_extension ]
+
+    return my_fields == other_fields
+
+  def __ne__(self, other):
+    return not self == other
+
+  def __hash__(self):
+    raise TypeError('unhashable object')
+
+  # Note that this is only meaningful for non-repeated, scalar extension
+  # fields.  Note also that we may have to call _Modified() when we do
+  # successfully set a field this way, to set any necssary "has" bits in the
+  # ancestors of the extended message.
+  def __setitem__(self, extension_handle, value):
+    """If extension_handle specifies a non-repeated, scalar extension
+    field, sets the value of that field.
+    """
+
+    _VerifyExtensionHandle(self._extended_message, extension_handle)
+
+    if (extension_handle.label == _FieldDescriptor.LABEL_REPEATED or
+        extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE):
+      raise TypeError(
+          'Cannot assign to extension "%s" because it is a repeated or '
+          'composite type.' % extension_handle.full_name)
+
+    # It's slightly wasteful to lookup the type checker each time,
+    # but we expect this to be a vanishingly uncommon case anyway.
+    type_checker = type_checkers.GetTypeChecker(
+        extension_handle.cpp_type, extension_handle.type)
+    type_checker.CheckValue(value)
+    self._extended_message._fields[extension_handle] = value
+    self._extended_message._Modified()
+
+  def _FindExtensionByName(self, name):
+    """Tries to find a known extension with the specified name.
+
+    Args:
+      name: Extension full name.
+
+    Returns:
+      Extension field descriptor.
+    """
+    return self._extended_message._extensions_by_name.get(name, None)

+ 186 - 40
python/google/protobuf/internal/reflection_test.py

@@ -41,8 +41,6 @@ import operator
 import struct
 
 import unittest
-# TODO(robinson): When we split this test in two, only some of these imports
-# will be necessary in each test.
 from google.protobuf import unittest_import_pb2
 from google.protobuf import unittest_mset_pb2
 from google.protobuf import unittest_pb2
@@ -50,6 +48,7 @@ from google.protobuf import descriptor_pb2
 from google.protobuf import descriptor
 from google.protobuf import message
 from google.protobuf import reflection
+from google.protobuf.internal import api_implementation
 from google.protobuf.internal import more_extensions_pb2
 from google.protobuf.internal import more_messages_pb2
 from google.protobuf.internal import wire_format
@@ -104,10 +103,10 @@ class _MiniDecoder(object):
 
 class ReflectionTest(unittest.TestCase):
 
-  def assertIs(self, values, others):
+  def assertListsEqual(self, values, others):
     self.assertEqual(len(values), len(others))
     for i in range(len(values)):
-      self.assertTrue(values[i] is others[i])
+      self.assertEqual(values[i], others[i])
 
   def testScalarConstructor(self):
     # Constructor with only scalar types should succeed.
@@ -201,15 +200,24 @@ class ReflectionTest(unittest.TestCase):
         list(proto.repeated_foreign_message))
 
   def testConstructorTypeError(self):
-    self.assertRaises(TypeError, unittest_pb2.TestAllTypes, optional_int32="foo")
-    self.assertRaises(TypeError, unittest_pb2.TestAllTypes, optional_string=1234)
-    self.assertRaises(TypeError, unittest_pb2.TestAllTypes, optional_nested_message=1234)
-    self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_int32=1234)
-    self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_int32=["foo"])
-    self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_string=1234)
-    self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_string=[1234])
-    self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_nested_message=1234)
-    self.assertRaises(TypeError, unittest_pb2.TestAllTypes, repeated_nested_message=[1234])
+    self.assertRaises(
+        TypeError, unittest_pb2.TestAllTypes, optional_int32="foo")
+    self.assertRaises(
+        TypeError, unittest_pb2.TestAllTypes, optional_string=1234)
+    self.assertRaises(
+        TypeError, unittest_pb2.TestAllTypes, optional_nested_message=1234)
+    self.assertRaises(
+        TypeError, unittest_pb2.TestAllTypes, repeated_int32=1234)
+    self.assertRaises(
+        TypeError, unittest_pb2.TestAllTypes, repeated_int32=["foo"])
+    self.assertRaises(
+        TypeError, unittest_pb2.TestAllTypes, repeated_string=1234)
+    self.assertRaises(
+        TypeError, unittest_pb2.TestAllTypes, repeated_string=[1234])
+    self.assertRaises(
+        TypeError, unittest_pb2.TestAllTypes, repeated_nested_message=1234)
+    self.assertRaises(
+        TypeError, unittest_pb2.TestAllTypes, repeated_nested_message=[1234])
 
   def testConstructorInvalidatesCachedByteSize(self):
     message = unittest_pb2.TestAllTypes(optional_int32 = 12)
@@ -311,11 +319,14 @@ class ReflectionTest(unittest.TestCase):
       self.assertEqual(0, getattr(composite_field, scalar_field_name))
 
       # Finally, ensure that modifications to the old composite field object
-      # don't have any effect on the parent.
+      # don't have any effect on the parent. Possible only with the pure-python
+      # implementation of the API.
       #
       # (NOTE that when we clear the composite field in the parent, we actually
       # don't recursively clear down the tree.  Instead, we just disconnect the
       # cleared composite from the tree.)
+      if api_implementation.Type() != 'python':
+        return
       self.assertTrue(old_composite_field is not composite_field)
       setattr(old_composite_field, scalar_field_name, new_val)
       self.assertTrue(not composite_field.HasField(scalar_field_name))
@@ -337,6 +348,8 @@ class ReflectionTest(unittest.TestCase):
     nested.bb = 23
 
   def testDisconnectingNestedMessageBeforeSettingField(self):
+    if api_implementation.Type() != 'python':
+      return
     proto = unittest_pb2.TestAllTypes()
     nested = proto.optional_nested_message
     proto.ClearField('optional_nested_message')  # Should disconnect from parent
@@ -526,7 +539,6 @@ class ReflectionTest(unittest.TestCase):
     # proto.nonexistent_field = 23 should fail as well.
     self.assertRaises(AttributeError, setattr, proto, 'nonexistent_field', 23)
 
-  # TODO(robinson): Add type-safety check for enums.
   def testSingleScalarTypeSafety(self):
     proto = unittest_pb2.TestAllTypes()
     self.assertRaises(TypeError, setattr, proto, 'optional_int32', 1.1)
@@ -538,7 +550,9 @@ class ReflectionTest(unittest.TestCase):
     def TestMinAndMaxIntegers(field_name, expected_min, expected_max):
       pb = unittest_pb2.TestAllTypes()
       setattr(pb, field_name, expected_min)
+      self.assertEqual(expected_min, getattr(pb, field_name))
       setattr(pb, field_name, expected_max)
+      self.assertEqual(expected_max, getattr(pb, field_name))
       self.assertRaises(ValueError, setattr, pb, field_name, expected_min - 1)
       self.assertRaises(ValueError, setattr, pb, field_name, expected_max + 1)
 
@@ -546,7 +560,33 @@ class ReflectionTest(unittest.TestCase):
     TestMinAndMaxIntegers('optional_uint32', 0, 0xffffffff)
     TestMinAndMaxIntegers('optional_int64', -(1 << 63), (1 << 63) - 1)
     TestMinAndMaxIntegers('optional_uint64', 0, 0xffffffffffffffff)
-    TestMinAndMaxIntegers('optional_nested_enum', -(1 << 31), (1 << 31) - 1)
+
+    pb = unittest_pb2.TestAllTypes()
+    pb.optional_nested_enum = 1
+    self.assertEqual(1, pb.optional_nested_enum)
+
+    # Invalid enum values.
+    pb.optional_nested_enum = 0
+    self.assertEqual(0, pb.optional_nested_enum)
+
+    bytes_size_before = pb.ByteSize()
+
+    pb.optional_nested_enum = 4
+    self.assertEqual(4, pb.optional_nested_enum)
+
+    pb.optional_nested_enum = 0
+    self.assertEqual(0, pb.optional_nested_enum)
+
+    # Make sure that setting the same enum field doesn't just add unknown
+    # fields (but overwrites them).
+    self.assertEqual(bytes_size_before, pb.ByteSize())
+
+    # Is the invalid value preserved after serialization?
+    serialized = pb.SerializeToString()
+    pb2 = unittest_pb2.TestAllTypes()
+    pb2.ParseFromString(serialized)
+    self.assertEqual(0, pb2.optional_nested_enum)
+    self.assertEqual(pb, pb2)
 
   def testRepeatedScalarTypeSafety(self):
     proto = unittest_pb2.TestAllTypes()
@@ -560,11 +600,19 @@ class ReflectionTest(unittest.TestCase):
     self.assertRaises(IndexError, proto.repeated_int32.__setitem__, 500, 23)
     self.assertRaises(TypeError, proto.repeated_int32.__setitem__, 0, 'abc')
 
+    # Repeated enums tests.
+    #proto.repeated_nested_enum.append(0)
+
   def testSingleScalarGettersAndSetters(self):
     proto = unittest_pb2.TestAllTypes()
     self.assertEqual(0, proto.optional_int32)
     proto.optional_int32 = 1
     self.assertEqual(1, proto.optional_int32)
+
+    proto.optional_uint64 = 0xffffffffffff
+    self.assertEqual(0xffffffffffff, proto.optional_uint64)
+    proto.optional_uint64 = 0xffffffffffffffff
+    self.assertEqual(0xffffffffffffffff, proto.optional_uint64)
     # TODO(robinson): Test all other scalar field types.
 
   def testSingleScalarClearField(self):
@@ -645,11 +693,38 @@ class ReflectionTest(unittest.TestCase):
     del proto.repeated_int32[2:]
     self.assertEqual([5, 35], proto.repeated_int32)
 
+    # Test extending.
+    proto.repeated_int32.extend([3, 13])
+    self.assertEqual([5, 35, 3, 13], proto.repeated_int32)
+
     # Test clearing.
     proto.ClearField('repeated_int32')
     self.assertTrue(not proto.repeated_int32)
     self.assertEqual(0, len(proto.repeated_int32))
 
+    proto.repeated_int32.append(1)
+    self.assertEqual(1, proto.repeated_int32[-1])
+    # Test assignment to a negative index.
+    proto.repeated_int32[-1] = 2
+    self.assertEqual(2, proto.repeated_int32[-1])
+
+    # Test deletion at negative indices.
+    proto.repeated_int32[:] = [0, 1, 2, 3]
+    del proto.repeated_int32[-1]
+    self.assertEqual([0, 1, 2], proto.repeated_int32)
+
+    del proto.repeated_int32[-2]
+    self.assertEqual([0, 2], proto.repeated_int32)
+
+    self.assertRaises(IndexError, proto.repeated_int32.__delitem__, -3)
+    self.assertRaises(IndexError, proto.repeated_int32.__delitem__, 300)
+
+    del proto.repeated_int32[-2:-1]
+    self.assertEqual([2], proto.repeated_int32)
+
+    del proto.repeated_int32[100:10000]
+    self.assertEqual([2], proto.repeated_int32)
+
   def testRepeatedScalarsRemove(self):
     proto = unittest_pb2.TestAllTypes()
 
@@ -687,7 +762,7 @@ class ReflectionTest(unittest.TestCase):
     m1 = proto.repeated_nested_message.add()
     self.assertTrue(proto.repeated_nested_message)
     self.assertEqual(2, len(proto.repeated_nested_message))
-    self.assertIs([m0, m1], proto.repeated_nested_message)
+    self.assertListsEqual([m0, m1], proto.repeated_nested_message)
     self.assertTrue(isinstance(m0, unittest_pb2.TestAllTypes.NestedMessage))
 
     # Test out-of-bounds indices.
@@ -706,32 +781,57 @@ class ReflectionTest(unittest.TestCase):
     m2 = proto.repeated_nested_message.add()
     m3 = proto.repeated_nested_message.add()
     m4 = proto.repeated_nested_message.add()
-    self.assertIs([m1, m2, m3], proto.repeated_nested_message[1:4])
-    self.assertIs([m0, m1, m2, m3, m4], proto.repeated_nested_message[:])
+    self.assertListsEqual(
+        [m1, m2, m3], proto.repeated_nested_message[1:4])
+    self.assertListsEqual(
+        [m0, m1, m2, m3, m4], proto.repeated_nested_message[:])
+    self.assertListsEqual(
+        [m0, m1], proto.repeated_nested_message[:2])
+    self.assertListsEqual(
+        [m2, m3, m4], proto.repeated_nested_message[2:])
+    self.assertEqual(
+        m0, proto.repeated_nested_message[0])
+    self.assertListsEqual(
+        [m0], proto.repeated_nested_message[:1])
 
     # Test that we can use the field as an iterator.
     result = []
     for i in proto.repeated_nested_message:
       result.append(i)
-    self.assertIs([m0, m1, m2, m3, m4], result)
+    self.assertListsEqual([m0, m1, m2, m3, m4], result)
 
     # Test single deletion.
     del proto.repeated_nested_message[2]
-    self.assertIs([m0, m1, m3, m4], proto.repeated_nested_message)
+    self.assertListsEqual([m0, m1, m3, m4], proto.repeated_nested_message)
 
     # Test slice deletion.
     del proto.repeated_nested_message[2:]
-    self.assertIs([m0, m1], proto.repeated_nested_message)
+    self.assertListsEqual([m0, m1], proto.repeated_nested_message)
+
+    # Test extending.
+    n1 = unittest_pb2.TestAllTypes.NestedMessage(bb=1)
+    n2 = unittest_pb2.TestAllTypes.NestedMessage(bb=2)
+    proto.repeated_nested_message.extend([n1,n2])
+    self.assertEqual(4, len(proto.repeated_nested_message))
+    self.assertEqual(n1, proto.repeated_nested_message[2])
+    self.assertEqual(n2, proto.repeated_nested_message[3])
 
     # Test clearing.
     proto.ClearField('repeated_nested_message')
     self.assertTrue(not proto.repeated_nested_message)
     self.assertEqual(0, len(proto.repeated_nested_message))
 
+    # Test constructing an element while adding it.
+    proto.repeated_nested_message.add(bb=23)
+    self.assertEqual(1, len(proto.repeated_nested_message))
+    self.assertEqual(23, proto.repeated_nested_message[0].bb)
+
   def testHandWrittenReflection(self):
-    # TODO(robinson): We probably need a better way to specify
-    # protocol types by hand.  But then again, this isn't something
-    # we expect many people to do.  Hmm.
+    # Hand written extensions are only supported by the pure-Python
+    # implementation of the API.
+    if api_implementation.Type() != 'python':
+      return
+
     FieldDescriptor = descriptor.FieldDescriptor
     foo_field_descriptor = FieldDescriptor(
         name='foo_field', full_name='MyProto.foo_field',
@@ -894,7 +994,7 @@ class ReflectionTest(unittest.TestCase):
     self.assertTrue(not toplevel.HasField('submessage'))
     foreign = toplevel.submessage.Extensions[
         more_extensions_pb2.repeated_message_extension].add()
-    self.assertTrue(foreign is toplevel.submessage.Extensions[
+    self.assertEqual(foreign, toplevel.submessage.Extensions[
         more_extensions_pb2.repeated_message_extension][0])
     self.assertTrue(toplevel.HasField('submessage'))
 
@@ -997,6 +1097,12 @@ class ReflectionTest(unittest.TestCase):
     self.assertEqual(123, proto2.repeated_nested_message[1].bb)
     self.assertEqual(321, proto2.repeated_nested_message[2].bb)
 
+    proto3 = unittest_pb2.TestAllTypes()
+    proto3.repeated_nested_message.MergeFrom(proto2.repeated_nested_message)
+    self.assertEqual(999, proto3.repeated_nested_message[0].bb)
+    self.assertEqual(123, proto3.repeated_nested_message[1].bb)
+    self.assertEqual(321, proto3.repeated_nested_message[2].bb)
+
   def testMergeFromAllFields(self):
     # With all fields set.
     proto1 = unittest_pb2.TestAllTypes()
@@ -1126,6 +1232,15 @@ class ReflectionTest(unittest.TestCase):
     self.assertEqual(2, proto1.optional_int32)
     self.assertEqual('important-text', proto1.optional_string)
 
+  def testCopyFromBadType(self):
+    # The python implementation doesn't raise an exception in this
+    # case. In theory it should.
+    if api_implementation.Type() == 'python':
+      return
+    proto1 = unittest_pb2.TestAllTypes()
+    proto2 = unittest_pb2.TestAllExtensions()
+    self.assertRaises(TypeError, proto1.CopyFrom, proto2)
+
   def testClear(self):
     proto = unittest_pb2.TestAllTypes()
     test_util.SetAllFields(proto)
@@ -1231,9 +1346,10 @@ class ReflectionTest(unittest.TestCase):
     proto.optional_string = str('Testing')
     self.assertEqual(proto.optional_string, unicode('Testing'))
 
-    # Values of type 'str' are also accepted as long as they can be encoded in
-    # UTF-8.
-    self.assertEqual(type(proto.optional_string), str)
+    if api_implementation.Type() == 'python':
+      # Values of type 'str' are also accepted as long as they can be
+      # encoded in UTF-8.
+      self.assertEqual(type(proto.optional_string), str)
 
     # Try to assign a 'str' value which contains bytes that aren't 7-bit ASCII.
     self.assertRaises(ValueError,
@@ -1271,7 +1387,7 @@ class ReflectionTest(unittest.TestCase):
     # Check that the type_id is the same as the tag ID in the .proto file.
     self.assertEqual(raw.item[0].type_id, 1547769)
 
-    # Check the actually bytes on the wire.
+    # Check the actual bytes on the wire.
     self.assertTrue(
         raw.item[0].message.endswith(test_utf8_bytes))
     message2.MergeFromString(raw.item[0].message)
@@ -1279,10 +1395,23 @@ class ReflectionTest(unittest.TestCase):
     self.assertEqual(type(message2.str), unicode)
     self.assertEqual(message2.str, test_utf8)
 
-    # How about if the bytes on the wire aren't a valid UTF-8 encoded string.
+    # The pure Python API throws an exception on MergeFromString(),
+    # if any of the string fields of the message can't be UTF-8 decoded.
+    # The C++ implementation of the API has no way to check that on
+    # MergeFromString and thus has no way to throw the exception.
+    #
+    # The pure Python API always returns objects of type 'unicode' (UTF-8
+    # encoded), or 'str' (in 7 bit ASCII).
     bytes = raw.item[0].message.replace(
         test_utf8_bytes, len(test_utf8_bytes) * '\xff')
-    self.assertRaises(UnicodeDecodeError, message2.MergeFromString, bytes)
+
+    unicode_decode_failed = False
+    try:
+      message2.MergeFromString(bytes)
+    except UnicodeDecodeError, e:
+      unicode_decode_failed = True
+    string_field = message2.str
+    self.assertTrue(unicode_decode_failed or type(string_field) == str)
 
   def testEmptyNestedMessage(self):
     proto = unittest_pb2.TestAllTypes()
@@ -1325,6 +1454,9 @@ class TestAllTypesEqualityTest(unittest.TestCase):
     self.first_proto = unittest_pb2.TestAllTypes()
     self.second_proto = unittest_pb2.TestAllTypes()
 
+  def testNotHashable(self):
+    self.assertRaises(TypeError, hash, self.first_proto)
+
   def testSelfEquality(self):
     self.assertEqual(self.first_proto, self.first_proto)
 
@@ -1342,6 +1474,9 @@ class FullProtosEqualityTest(unittest.TestCase):
     test_util.SetAllFields(self.first_proto)
     test_util.SetAllFields(self.second_proto)
 
+  def testNotHashable(self):
+    self.assertRaises(TypeError, hash, self.first_proto)
+
   def testNoneNotEqual(self):
     self.assertNotEqual(self.first_proto, None)
     self.assertNotEqual(None, self.second_proto)
@@ -1410,9 +1545,6 @@ class FullProtosEqualityTest(unittest.TestCase):
     self.first_proto.ClearField('optional_nested_message')
     self.second_proto.optional_nested_message.ClearField('bb')
     self.assertNotEqual(self.first_proto, self.second_proto)
-    # TODO(robinson): Replace next two lines with method
-    # to set the "has" bit without changing the value,
-    # if/when such a method exists.
     self.first_proto.optional_nested_message.bb = 0
     self.first_proto.optional_nested_message.ClearField('bb')
     self.assertEqual(self.first_proto, self.second_proto)
@@ -1477,6 +1609,14 @@ class ByteSizeTest(unittest.TestCase):
   def testEmptyMessage(self):
     self.assertEqual(0, self.proto.ByteSize())
 
+  def testSizedOnKwargs(self):
+    # Use a separate message to ensure testing right after creation.
+    proto = unittest_pb2.TestAllTypes()
+    self.assertEqual(0, proto.ByteSize())
+    proto_kwargs = unittest_pb2.TestAllTypes(optional_int64 = 1)
+    # One byte for the tag, one to encode varint 1.
+    self.assertEqual(2, proto_kwargs.ByteSize())
+
   def testVarints(self):
     def Test(i, expected_varint_size):
       self.proto.Clear()
@@ -1668,10 +1808,13 @@ class ByteSizeTest(unittest.TestCase):
     self.assertEqual(3, self.proto.ByteSize())
     self.proto.ClearField('optional_foreign_message')
     self.assertEqual(0, self.proto.ByteSize())
-    child = self.proto.optional_foreign_message
-    self.proto.ClearField('optional_foreign_message')
-    child.c = 128
-    self.assertEqual(0, self.proto.ByteSize())
+
+    if api_implementation.Type() == 'python':
+      # This is only possible in pure-Python implementation of the API.
+      child = self.proto.optional_foreign_message
+      self.proto.ClearField('optional_foreign_message')
+      child.c = 128
+      self.assertEqual(0, self.proto.ByteSize())
 
     # Test within extension.
     extension = more_extensions_pb2.optional_message_extension
@@ -1737,7 +1880,6 @@ class ByteSizeTest(unittest.TestCase):
     self.assertEqual(19, self.packed_extended_proto.ByteSize())
 
 
-# TODO(robinson): We need cross-language serialization consistency tests.
 # Issues to be sure to cover include:
 #   * Handling of unrecognized tags ("uninterpreted_bytes").
 #   * Handling of MessageSets.
@@ -1792,6 +1934,10 @@ class SerializationTest(unittest.TestCase):
     self.assertEqual(first_proto, second_proto)
 
   def testParseTruncated(self):
+    # This test is only applicable for the Python implementation of the API.
+    if api_implementation.Type() != 'python':
+      return
+
     first_proto = unittest_pb2.TestAllTypes()
     test_util.SetAllFields(first_proto)
     serialized = first_proto.SerializeToString()

+ 145 - 13
python/google/protobuf/internal/text_format_test.py

@@ -35,6 +35,7 @@
 __author__ = 'kenton@google.com (Kenton Varda)'
 
 import difflib
+import re
 
 import unittest
 from google.protobuf import text_format
@@ -95,12 +96,13 @@ class TextFormatTest(unittest.TestCase):
 
   def testPrintExotic(self):
     message = unittest_pb2.TestAllTypes()
-    message.repeated_int64.append(-9223372036854775808);
-    message.repeated_uint64.append(18446744073709551615);
-    message.repeated_double.append(123.456);
-    message.repeated_double.append(1.23e22);
-    message.repeated_double.append(1.23e-18);
-    message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'\"');
+    message.repeated_int64.append(-9223372036854775808)
+    message.repeated_uint64.append(18446744073709551615)
+    message.repeated_double.append(123.456)
+    message.repeated_double.append(1.23e22)
+    message.repeated_double.append(1.23e-18)
+    message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'"')
+    message.repeated_string.append(u'\u00fc\ua71f')
     self.CompareToGoldenText(
       self.RemoveRedundantZeros(text_format.MessageToString(message)),
       'repeated_int64: -9223372036854775808\n'
@@ -109,7 +111,95 @@ class TextFormatTest(unittest.TestCase):
       'repeated_double: 1.23e+22\n'
       'repeated_double: 1.23e-18\n'
       'repeated_string: '
-        '\"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\\"\"\n')
+        '"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\""\n'
+      'repeated_string: "\\303\\274\\352\\234\\237"\n')
+
+  def testPrintNestedMessageAsOneLine(self):
+    message = unittest_pb2.TestAllTypes()
+    msg = message.repeated_nested_message.add()
+    msg.bb = 42;
+    self.CompareToGoldenText(
+        text_format.MessageToString(message, as_one_line=True),
+        'repeated_nested_message { bb: 42 }')
+
+  def testPrintRepeatedFieldsAsOneLine(self):
+    message = unittest_pb2.TestAllTypes()
+    message.repeated_int32.append(1)
+    message.repeated_int32.append(1)
+    message.repeated_int32.append(3)
+    message.repeated_string.append("Google")
+    message.repeated_string.append("Zurich")
+    self.CompareToGoldenText(
+        text_format.MessageToString(message, as_one_line=True),
+        'repeated_int32: 1 repeated_int32: 1 repeated_int32: 3 '
+        'repeated_string: "Google" repeated_string: "Zurich"')
+
+  def testPrintNestedNewLineInStringAsOneLine(self):
+    message = unittest_pb2.TestAllTypes()
+    message.optional_string = "a\nnew\nline"
+    self.CompareToGoldenText(
+        text_format.MessageToString(message, as_one_line=True),
+        'optional_string: "a\\nnew\\nline"')
+
+  def testPrintMessageSetAsOneLine(self):
+    message = unittest_mset_pb2.TestMessageSetContainer()
+    ext1 = unittest_mset_pb2.TestMessageSetExtension1.message_set_extension
+    ext2 = unittest_mset_pb2.TestMessageSetExtension2.message_set_extension
+    message.message_set.Extensions[ext1].i = 23
+    message.message_set.Extensions[ext2].str = 'foo'
+    self.CompareToGoldenText(
+        text_format.MessageToString(message, as_one_line=True),
+        'message_set {'
+        ' [protobuf_unittest.TestMessageSetExtension1] {'
+        ' i: 23'
+        ' }'
+        ' [protobuf_unittest.TestMessageSetExtension2] {'
+        ' str: \"foo\"'
+        ' }'
+        ' }')
+
+  def testPrintExoticAsOneLine(self):
+    message = unittest_pb2.TestAllTypes()
+    message.repeated_int64.append(-9223372036854775808)
+    message.repeated_uint64.append(18446744073709551615)
+    message.repeated_double.append(123.456)
+    message.repeated_double.append(1.23e22)
+    message.repeated_double.append(1.23e-18)
+    message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'"')
+    message.repeated_string.append(u'\u00fc\ua71f')
+    self.CompareToGoldenText(
+      self.RemoveRedundantZeros(
+          text_format.MessageToString(message, as_one_line=True)),
+      'repeated_int64: -9223372036854775808'
+      ' repeated_uint64: 18446744073709551615'
+      ' repeated_double: 123.456'
+      ' repeated_double: 1.23e+22'
+      ' repeated_double: 1.23e-18'
+      ' repeated_string: '
+      '"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\""'
+      ' repeated_string: "\\303\\274\\352\\234\\237"')
+
+  def testRoundTripExoticAsOneLine(self):
+    message = unittest_pb2.TestAllTypes()
+    message.repeated_int64.append(-9223372036854775808)
+    message.repeated_uint64.append(18446744073709551615)
+    message.repeated_double.append(123.456)
+    message.repeated_double.append(1.23e22)
+    message.repeated_double.append(1.23e-18)
+    message.repeated_string.append('\000\001\a\b\f\n\r\t\v\\\'"')
+    message.repeated_string.append(u'\u00fc\ua71f')
+
+    wire_text = text_format.MessageToString(message, as_one_line=True)
+    parsed_message = unittest_pb2.TestAllTypes()
+    text_format.Merge(wire_text, parsed_message)
+    self.assertEquals(message, parsed_message)
+
+  def testPrintRawUtf8String(self):
+    message = unittest_pb2.TestAllTypes()
+    message.repeated_string.append(u'\u00fc\ua71f')
+    self.CompareToGoldenText(
+        text_format.MessageToString(message, as_utf8 = True),
+        'repeated_string: "\303\274\352\234\237"\n')
 
   def testMessageToString(self):
     message = unittest_pb2.ForeignMessage()
@@ -119,8 +209,12 @@ class TextFormatTest(unittest.TestCase):
   def RemoveRedundantZeros(self, text):
     # Some platforms print 1e+5 as 1e+005.  This is fine, but we need to remove
     # these zeros in order to match the golden file.
-    return text.replace('e+0','e+').replace('e+0','e+') \
+    text = text.replace('e+0','e+').replace('e+0','e+') \
                .replace('e-0','e-').replace('e-0','e-')
+    # Floating point fields are printed with .0 suffix even if they are
+    # actualy integer numbers.
+    text = re.compile('\.0$', re.MULTILINE).sub('', text)
+    return text
 
   def testMergeGolden(self):
     golden_text = '\n'.join(self.ReadGolden('text_format_unittest_data.txt'))
@@ -191,8 +285,11 @@ class TextFormatTest(unittest.TestCase):
             'repeated_double: 1.23e+22\n'
             'repeated_double: 1.23e-18\n'
             'repeated_string: \n'
-            '\"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\\"\"\n'
-            'repeated_string: "foo" \'corge\' "grault"')
+            '"\\000\\001\\007\\010\\014\\n\\r\\t\\013\\\\\\\'\\""\n'
+            'repeated_string: "foo" \'corge\' "grault"\n'
+            'repeated_string: "\\303\\274\\352\\234\\237"\n'
+            'repeated_string: "\\xc3\\xbc"\n'
+            'repeated_string: "\xc3\xbc"\n')
     text_format.Merge(text, message)
 
     self.assertEqual(-9223372036854775808, message.repeated_int64[0])
@@ -201,8 +298,30 @@ class TextFormatTest(unittest.TestCase):
     self.assertEqual(1.23e22, message.repeated_double[1])
     self.assertEqual(1.23e-18, message.repeated_double[2])
     self.assertEqual(
-        '\000\001\a\b\f\n\r\t\v\\\'\"', message.repeated_string[0])
+        '\000\001\a\b\f\n\r\t\v\\\'"', message.repeated_string[0])
     self.assertEqual('foocorgegrault', message.repeated_string[1])
+    self.assertEqual(u'\u00fc\ua71f', message.repeated_string[2])
+    self.assertEqual(u'\u00fc', message.repeated_string[3])
+
+  def testMergeEmptyText(self):
+    message = unittest_pb2.TestAllTypes()
+    text = ''
+    text_format.Merge(text, message)
+    self.assertEquals(unittest_pb2.TestAllTypes(), message)
+
+  def testMergeInvalidUtf8(self):
+    message = unittest_pb2.TestAllTypes()
+    text = 'repeated_string: "\\xc3\\xc3"'
+    self.assertRaises(text_format.ParseError, text_format.Merge, text, message)
+
+  def testMergeSingleWord(self):
+    message = unittest_pb2.TestAllTypes()
+    text = 'foo'
+    self.assertRaisesWithMessage(
+        text_format.ParseError,
+        ('1:1 : Message type "protobuf_unittest.TestAllTypes" has no field named '
+         '"foo".'),
+        text_format.Merge, text, message)
 
   def testMergeUnknownField(self):
     message = unittest_pb2.TestAllTypes()
@@ -297,7 +416,8 @@ class TokenizerTest(unittest.TestCase):
             'identifiER_4 : 1.1e+2 ID5:-0.23 ID6:\'aaaa\\\'bbbb\'\n'
             'ID7 : "aa\\"bb"\n\n\n\n ID8: {A:inf B:-inf C:true D:false}\n'
             'ID9: 22 ID10: -111111111111111111 ID11: -22\n'
-            'ID12: 2222222222222222222')
+            'ID12: 2222222222222222222 '
+            'false_bool:  0 true_BOOL:t \n true_bool1:  1 false_BOOL1:f ' )
     tokenizer = text_format._Tokenizer(text)
     methods = [(tokenizer.ConsumeIdentifier, 'identifier1'),
                ':',
@@ -347,7 +467,19 @@ class TokenizerTest(unittest.TestCase):
                (tokenizer.ConsumeInt32, -22),
                (tokenizer.ConsumeIdentifier, 'ID12'),
                ':',
-               (tokenizer.ConsumeUint64, 2222222222222222222)]
+               (tokenizer.ConsumeUint64, 2222222222222222222),
+               (tokenizer.ConsumeIdentifier, 'false_bool'),
+               ':',
+               (tokenizer.ConsumeBool, False),
+               (tokenizer.ConsumeIdentifier, 'true_BOOL'),
+               ':',
+               (tokenizer.ConsumeBool, True),
+               (tokenizer.ConsumeIdentifier, 'true_bool1'),
+               ':',
+               (tokenizer.ConsumeBool, True),
+               (tokenizer.ConsumeIdentifier, 'false_BOOL1'),
+               ':',
+               (tokenizer.ConsumeBool, False)]
 
     i = 0
     while not tokenizer.AtEnd():

+ 11 - 0
python/google/protobuf/message.py

@@ -67,6 +67,11 @@ class Message(object):
 
   DESCRIPTOR = None
 
+  def __deepcopy__(self, memo=None):
+    clone = type(self)()
+    clone.MergeFrom(self)
+    return clone
+
   def __eq__(self, other_msg):
     raise NotImplementedError
 
@@ -74,9 +79,15 @@ class Message(object):
     # Can't just say self != other_msg, since that would infinitely recurse. :)
     return not self == other_msg
 
+  def __hash__(self):
+    raise TypeError('unhashable object')
+
   def __str__(self):
     raise NotImplementedError
 
+  def __unicode__(self):
+    raise NotImplementedError
+
   def MergeFrom(self, other_msg):
     """Merges the contents of the specified message into current message.
 

+ 1658 - 0
python/google/protobuf/pyext/python-proto2.cc

@@ -0,0 +1,1658 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: petar@google.com (Petar Petrov)
+
+#include <Python.h>
+#include <map>
+#include <string>
+#include <vector>
+
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/pyext/python_descriptor.h>
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/dynamic_message.h>
+#include <google/protobuf/message.h>
+#include <google/protobuf/unknown_field_set.h>
+#include <google/protobuf/pyext/python_protobuf.h>
+
+/* Is 64bit */
+#define IS_64BIT (SIZEOF_LONG == 8)
+
+#define FIELD_BELONGS_TO_MESSAGE(field_descriptor, message) \
+    ((message)->GetDescriptor() == (field_descriptor)->containing_type())
+
+#define FIELD_IS_REPEATED(field_descriptor)                 \
+    ((field_descriptor)->label() == google::protobuf::FieldDescriptor::LABEL_REPEATED)
+
+#define GOOGLE_CHECK_GET_INT32(arg, value)                         \
+    int32 value;                                            \
+    if (!CheckAndGetInteger(arg, &value, kint32min_py, kint32max_py)) { \
+      return NULL;                                          \
+    }
+
+#define GOOGLE_CHECK_GET_INT64(arg, value)                         \
+    int64 value;                                            \
+    if (!CheckAndGetInteger(arg, &value, kint64min_py, kint64max_py)) { \
+      return NULL;                                          \
+    }
+
+#define GOOGLE_CHECK_GET_UINT32(arg, value)                        \
+    uint32 value;                                           \
+    if (!CheckAndGetInteger(arg, &value, kPythonZero, kuint32max_py)) { \
+      return NULL;                                          \
+    }
+
+#define GOOGLE_CHECK_GET_UINT64(arg, value)                        \
+    uint64 value;                                           \
+    if (!CheckAndGetInteger(arg, &value, kPythonZero, kuint64max_py)) { \
+      return NULL;                                          \
+    }
+
+#define GOOGLE_CHECK_GET_FLOAT(arg, value)                         \
+    float value;                                            \
+    if (!CheckAndGetFloat(arg, &value)) {                   \
+      return NULL;                                          \
+    }                                                       \
+
+#define GOOGLE_CHECK_GET_DOUBLE(arg, value)                        \
+    double value;                                           \
+    if (!CheckAndGetDouble(arg, &value)) {                  \
+      return NULL;                                          \
+    }
+
+#define GOOGLE_CHECK_GET_BOOL(arg, value)                          \
+    bool value;                                             \
+    if (!CheckAndGetBool(arg, &value)) {                    \
+      return NULL;                                          \
+    }
+
+#define C(str) const_cast<char*>(str)
+
+// --- Globals:
+
+// Constants used for integer type range checking.
+static PyObject* kPythonZero;
+static PyObject* kint32min_py;
+static PyObject* kint32max_py;
+static PyObject* kuint32max_py;
+static PyObject* kint64min_py;
+static PyObject* kint64max_py;
+static PyObject* kuint64max_py;
+
+namespace google {
+namespace protobuf {
+namespace python {
+
+// --- Support Routines:
+
+static void AddConstants(PyObject* module) {
+  struct NameValue {
+    char* name;
+    int32 value;
+  } constants[] = {
+    // Labels:
+    {"LABEL_OPTIONAL", google::protobuf::FieldDescriptor::LABEL_OPTIONAL},
+    {"LABEL_REQUIRED", google::protobuf::FieldDescriptor::LABEL_REQUIRED},
+    {"LABEL_REPEATED", google::protobuf::FieldDescriptor::LABEL_REPEATED},
+    // CPP types:
+    {"CPPTYPE_MESSAGE", google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE},
+    // Field Types:
+    {"TYPE_MESSAGE", google::protobuf::FieldDescriptor::TYPE_MESSAGE},
+    // End.
+    {NULL, 0}
+  };
+
+  for (NameValue* constant = constants;
+       constant->name != NULL; constant++) {
+    PyModule_AddIntConstant(module, constant->name, constant->value);
+  }
+}
+
+// --- CMessage Custom Type:
+
+// ------ Type Forward Declaration:
+
+struct CMessage;
+struct CMessage_Type;
+
+static void CMessageDealloc(CMessage* self);
+static int CMessageInit(CMessage* self, PyObject *args, PyObject *kwds);
+static PyObject* CMessageStr(CMessage* self);
+
+static PyObject* CMessage_AddMessage(CMessage* self, PyObject* args);
+static PyObject* CMessage_AddRepeatedScalar(CMessage* self, PyObject* args);
+static PyObject* CMessage_AssignRepeatedScalar(CMessage* self, PyObject* args);
+static PyObject* CMessage_ByteSize(CMessage* self, PyObject* args);
+static PyObject* CMessage_Clear(CMessage* self, PyObject* args);
+static PyObject* CMessage_ClearField(CMessage* self, PyObject* args);
+static PyObject* CMessage_ClearFieldByDescriptor(
+    CMessage* self, PyObject* args);
+static PyObject* CMessage_CopyFrom(CMessage* self, PyObject* args);
+static PyObject* CMessage_DebugString(CMessage* self, PyObject* args);
+static PyObject* CMessage_DeleteRepeatedField(CMessage* self, PyObject* args);
+static PyObject* CMessage_Equals(CMessage* self, PyObject* args);
+static PyObject* CMessage_FieldLength(CMessage* self, PyObject* args);
+static PyObject* CMessage_FindInitializationErrors(CMessage* self);
+static PyObject* CMessage_GetRepeatedMessage(CMessage* self, PyObject* args);
+static PyObject* CMessage_GetRepeatedScalar(CMessage* self, PyObject* args);
+static PyObject* CMessage_GetScalar(CMessage* self, PyObject* args);
+static PyObject* CMessage_HasField(CMessage* self, PyObject* args);
+static PyObject* CMessage_HasFieldByDescriptor(CMessage* self, PyObject* args);
+static PyObject* CMessage_IsInitialized(CMessage* self, PyObject* args);
+static PyObject* CMessage_ListFields(CMessage* self, PyObject* args);
+static PyObject* CMessage_MergeFrom(CMessage* self, PyObject* args);
+static PyObject* CMessage_MergeFromString(CMessage* self, PyObject* args);
+static PyObject* CMessage_MutableMessage(CMessage* self, PyObject* args);
+static PyObject* CMessage_NewSubMessage(CMessage* self, PyObject* args);
+static PyObject* CMessage_SetScalar(CMessage* self, PyObject* args);
+static PyObject* CMessage_SerializePartialToString(
+    CMessage* self, PyObject* args);
+static PyObject* CMessage_SerializeToString(CMessage* self, PyObject* args);
+static PyObject* CMessage_SetInParent(CMessage* self, PyObject* args);
+static PyObject* CMessage_SwapRepeatedFieldElements(
+    CMessage* self, PyObject* args);
+
+// ------ Object Definition:
+
+typedef struct CMessage {
+  PyObject_HEAD
+
+  struct CMessage* parent;  // NULL if wasn't created from another message.
+  CFieldDescriptor* parent_field;
+  const char* full_name;
+  google::protobuf::Message* message;
+  bool free_message;
+  bool read_only;
+} CMessage;
+
+// ------ Method Table:
+
+#define CMETHOD(name, args, doc)   \
+  { C(#name), (PyCFunction)CMessage_##name, args, C(doc) }
+static PyMethodDef CMessageMethods[] = {
+  CMETHOD(AddMessage, METH_O,
+          "Adds a new message to a repeated composite field."),
+  CMETHOD(AddRepeatedScalar, METH_VARARGS,
+          "Adds a scalar to a repeated scalar field."),
+  CMETHOD(AssignRepeatedScalar, METH_VARARGS,
+          "Clears and sets the values of a repeated scalar field."),
+  CMETHOD(ByteSize, METH_NOARGS,
+          "Returns the size of the message in bytes."),
+  CMETHOD(Clear, METH_NOARGS,
+          "Clears a protocol message."),
+  CMETHOD(ClearField, METH_O,
+          "Clears a protocol message field by name."),
+  CMETHOD(ClearFieldByDescriptor, METH_O,
+          "Clears a protocol message field by descriptor."),
+  CMETHOD(CopyFrom, METH_O,
+          "Copies a protocol message into the current message."),
+  CMETHOD(DebugString, METH_NOARGS,
+          "Returns the debug string of a protocol message."),
+  CMETHOD(DeleteRepeatedField, METH_VARARGS,
+          "Deletes a slice of values from a repeated field."),
+  CMETHOD(Equals, METH_O,
+          "Checks if two protocol messages are equal (by identity)."),
+  CMETHOD(FieldLength, METH_O,
+          "Returns the number of elements in a repeated field."),
+  CMETHOD(FindInitializationErrors, METH_NOARGS,
+          "Returns the initialization errors of a message."),
+  CMETHOD(GetRepeatedMessage, METH_VARARGS,
+          "Returns a message from a repeated composite field."),
+  CMETHOD(GetRepeatedScalar, METH_VARARGS,
+          "Returns a scalar value from a repeated scalar field."),
+  CMETHOD(GetScalar, METH_O,
+          "Returns the scalar value of a field."),
+  CMETHOD(HasField, METH_O,
+          "Checks if a message field is set."),
+  CMETHOD(HasFieldByDescriptor, METH_O,
+          "Checks if a message field is set by given its descriptor"),
+  CMETHOD(IsInitialized, METH_NOARGS,
+          "Checks if all required fields of a protocol message are set."),
+  CMETHOD(ListFields, METH_NOARGS,
+          "Lists all set fields of a message."),
+  CMETHOD(MergeFrom, METH_O,
+          "Merges a protocol message into the current message."),
+  CMETHOD(MergeFromString, METH_O,
+          "Merges a serialized message into the current message."),
+  CMETHOD(MutableMessage, METH_O,
+          "Returns a new instance of a nested protocol message."),
+  CMETHOD(NewSubMessage, METH_O,
+          "Creates and returns a python message given the descriptor of a "
+          "composite field of the current message."),
+  CMETHOD(SetScalar, METH_VARARGS,
+          "Sets the value of a singular scalar field."),
+  CMETHOD(SerializePartialToString, METH_VARARGS,
+          "Serializes the message to a string, even if it isn't initialized."),
+  CMETHOD(SerializeToString, METH_NOARGS,
+          "Serializes the message to a string, only for initialized messages."),
+  CMETHOD(SetInParent, METH_NOARGS,
+          "Sets the has bit of the given field in its parent message."),
+  CMETHOD(SwapRepeatedFieldElements, METH_VARARGS,
+          "Swaps the elements in two positions in a repeated field."),
+  { NULL, NULL }
+};
+#undef CMETHOD
+
+static PyMemberDef CMessageMembers[] = {
+  { C("full_name"), T_STRING, offsetof(CMessage, full_name), 0, "Full name" },
+  { NULL }
+};
+
+// ------ Type Definition:
+
+// The definition for the type object that captures the type of CMessage
+// in Python.
+PyTypeObject CMessage_Type = {
+  PyObject_HEAD_INIT(&PyType_Type)
+  0,
+  C("google3.net.google.protobuf.python.internal."
+    "_net_proto2___python."
+    "CMessage"),                       // tp_name
+  sizeof(CMessage),                    //  tp_basicsize
+  0,                                   //  tp_itemsize
+  (destructor)CMessageDealloc,         //  tp_dealloc
+  0,                                   //  tp_print
+  0,                                   //  tp_getattr
+  0,                                   //  tp_setattr
+  0,                                   //  tp_compare
+  0,                                   //  tp_repr
+  0,                                   //  tp_as_number
+  0,                                   //  tp_as_sequence
+  0,                                   //  tp_as_mapping
+  0,                                   //  tp_hash
+  0,                                   //  tp_call
+  (reprfunc)CMessageStr,               //  tp_str
+  0,                                   //  tp_getattro
+  0,                                   //  tp_setattro
+  0,                                   //  tp_as_buffer
+  Py_TPFLAGS_DEFAULT,                  //  tp_flags
+  C("A ProtocolMessage"),              //  tp_doc
+  0,                                   //  tp_traverse
+  0,                                   //  tp_clear
+  0,                                   //  tp_richcompare
+  0,                                   //  tp_weaklistoffset
+  0,                                   //  tp_iter
+  0,                                   //  tp_iternext
+  CMessageMethods,                     //  tp_methods
+  CMessageMembers,                     //  tp_members
+  0,                                   //  tp_getset
+  0,                                   //  tp_base
+  0,                                   //  tp_dict
+  0,                                   //  tp_descr_get
+  0,                                   //  tp_descr_set
+  0,                                   //  tp_dictoffset
+  (initproc)CMessageInit,              //  tp_init
+  PyType_GenericAlloc,                 //  tp_alloc
+  PyType_GenericNew,                   //  tp_new
+  PyObject_Del,                        //  tp_free
+};
+
+// ------ Helper Functions:
+
+static void FormatTypeError(PyObject* arg, char* expected_types) {
+  PyObject* s = PyObject_Str(PyObject_Type(arg));
+  PyObject* repr = PyObject_Repr(PyObject_Type(arg));
+  PyErr_Format(PyExc_TypeError,
+               "%.100s has type %.100s, but expected one of: %s",
+               PyString_AS_STRING(repr),
+               PyString_AS_STRING(s),
+               expected_types);
+  Py_DECREF(s);
+  Py_DECREF(repr);
+}
+
+template <class T>
+static bool CheckAndGetInteger(
+    PyObject* arg, T* value, PyObject* min, PyObject* max) {
+  bool is_long = PyLong_Check(arg);
+  if (!PyInt_Check(arg) && !is_long) {
+    FormatTypeError(arg, "int, long");
+    return false;
+  }
+
+  if (PyObject_Compare(min, arg) > 0 || PyObject_Compare(max, arg) < 0) {
+    PyObject* s = PyObject_Str(arg);
+    PyErr_Format(PyExc_ValueError,
+                 "Value out of range: %s",
+                 PyString_AS_STRING(s));
+    Py_DECREF(s);
+    return false;
+  }
+  if (is_long) {
+    if (min == kPythonZero) {
+      *value = static_cast<T>(PyLong_AsUnsignedLongLong(arg));
+    } else {
+      *value = static_cast<T>(PyLong_AsLongLong(arg));
+    }
+  } else {
+    *value = static_cast<T>(PyInt_AsLong(arg));
+  }
+  return true;
+}
+
+static bool CheckAndGetDouble(PyObject* arg, double* value) {
+  if (!PyInt_Check(arg) && !PyLong_Check(arg) &&
+      !PyFloat_Check(arg)) {
+    FormatTypeError(arg, "int, long, float");
+    return false;
+  }
+  *value = PyFloat_AsDouble(arg);
+  return true;
+}
+
+static bool CheckAndGetFloat(PyObject* arg, float* value) {
+  double double_value;
+  if (!CheckAndGetDouble(arg, &double_value)) {
+    return false;
+  }
+  *value = static_cast<float>(double_value);
+  return true;
+}
+
+static bool CheckAndGetBool(PyObject* arg, bool* value) {
+  if (!PyInt_Check(arg) && !PyBool_Check(arg) && !PyLong_Check(arg)) {
+    FormatTypeError(arg, "int, long, bool");
+    return false;
+  }
+  *value = static_cast<bool>(PyInt_AsLong(arg));
+  return true;
+}
+
+google::protobuf::DynamicMessageFactory* global_message_factory = NULL;
+static const google::protobuf::Message* CreateMessage(const char* message_type) {
+  string message_name(message_type);
+  const google::protobuf::Descriptor* descriptor =
+      GetDescriptorPool()->FindMessageTypeByName(message_name);
+  if (descriptor == NULL) {
+    return NULL;
+  }
+  return global_message_factory->GetPrototype(descriptor);
+}
+
+static bool CheckAndSetString(
+    PyObject* arg, google::protobuf::Message* message,
+    const google::protobuf::FieldDescriptor* descriptor,
+    const google::protobuf::Reflection* reflection,
+    bool append,
+    int index) {
+  GOOGLE_DCHECK(descriptor->type() == google::protobuf::FieldDescriptor::TYPE_STRING ||
+         descriptor->type() == google::protobuf::FieldDescriptor::TYPE_BYTES);
+  if (descriptor->type() == google::protobuf::FieldDescriptor::TYPE_STRING) {
+    if (!PyString_Check(arg) && !PyUnicode_Check(arg)) {
+      FormatTypeError(arg, "str, unicode");
+      return false;
+    }
+
+    if (PyString_Check(arg)) {
+      PyObject* unicode = PyUnicode_FromEncodedObject(arg, "ascii", NULL);
+      if (unicode == NULL) {
+        PyObject* repr = PyObject_Repr(arg);
+        PyErr_Format(PyExc_ValueError,
+                     "%s has type str, but isn't in 7-bit ASCII "
+                     "encoding. Non-ASCII strings must be converted to "
+                     "unicode objects before being added.",
+                     PyString_AS_STRING(repr));
+        Py_DECREF(repr);
+        return false;
+      } else {
+        Py_DECREF(unicode);
+      }
+    }
+  } else if (!PyString_Check(arg)) {
+    FormatTypeError(arg, "str");
+    return false;
+  }
+
+  PyObject* encoded_string = NULL;
+  if (descriptor->type() == google::protobuf::FieldDescriptor::TYPE_STRING) {
+    if (PyString_Check(arg)) {
+      encoded_string = PyString_AsEncodedObject(arg, "utf-8", NULL);
+    } else {
+      encoded_string = PyUnicode_AsEncodedObject(arg, "utf-8", NULL);
+    }
+  } else {
+    // In this case field type is "bytes".
+    encoded_string = arg;
+    Py_INCREF(encoded_string);
+  }
+
+  if (encoded_string == NULL) {
+    return false;
+  }
+
+  char* value;
+  Py_ssize_t value_len;
+  if (PyString_AsStringAndSize(encoded_string, &value, &value_len) < 0) {
+    Py_DECREF(encoded_string);
+    return false;
+  }
+
+  string value_string(value, value_len);
+  if (append) {
+    reflection->AddString(message, descriptor, value_string);
+  } else if (index < 0) {
+    reflection->SetString(message, descriptor, value_string);
+  } else {
+    reflection->SetRepeatedString(message, descriptor, index, value_string);
+  }
+  Py_DECREF(encoded_string);
+  return true;
+}
+
+static PyObject* ToStringObject(
+    const google::protobuf::FieldDescriptor* descriptor, string value) {
+  if (descriptor->type() != google::protobuf::FieldDescriptor::TYPE_STRING) {
+    return PyString_FromStringAndSize(value.c_str(), value.length());
+  }
+
+  PyObject* result = PyUnicode_DecodeUTF8(value.c_str(), value.length(), NULL);
+  // If the string can't be decoded in UTF-8, just return a string object that
+  // contains the raw bytes. This can't happen if the value was assigned using
+  // the members of the Python message object, but can happen if the values were
+  // parsed from the wire (binary).
+  if (result == NULL) {
+    PyErr_Clear();
+    result = PyString_FromStringAndSize(value.c_str(), value.length());
+  }
+  return result;
+}
+
+static void AssureWritable(CMessage* self) {
+  if (self == NULL ||
+      self->parent == NULL ||
+      self->parent_field == NULL) {
+    return;
+  }
+
+  if (!self->read_only) {
+    return;
+  }
+
+  AssureWritable(self->parent);
+
+  google::protobuf::Message* message = self->parent->message;
+  const google::protobuf::Reflection* reflection = message->GetReflection();
+  self->message = reflection->MutableMessage(
+      message, self->parent_field->descriptor, global_message_factory);
+  self->read_only = false;
+  self->parent = NULL;
+  self->parent_field = NULL;
+}
+
+static PyObject* InternalGetScalar(
+    google::protobuf::Message* message,
+    const google::protobuf::FieldDescriptor* field_descriptor) {
+  const google::protobuf::Reflection* reflection = message->GetReflection();
+
+  if (!FIELD_BELONGS_TO_MESSAGE(field_descriptor, message)) {
+    PyErr_SetString(
+        PyExc_KeyError, "Field does not belong to message!");
+    return NULL;
+  }
+
+  PyObject* result = NULL;
+  switch (field_descriptor->cpp_type()) {
+    case google::protobuf::FieldDescriptor::CPPTYPE_INT32: {
+      int32 value = reflection->GetInt32(*message, field_descriptor);
+      result = PyInt_FromLong(value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_INT64: {
+      int64 value = reflection->GetInt64(*message, field_descriptor);
+#if IS_64BIT
+      result = PyInt_FromLong(value);
+#else
+      result = PyLong_FromLongLong(value);
+#endif
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_UINT32: {
+      uint32 value = reflection->GetUInt32(*message, field_descriptor);
+#if IS_64BIT
+      result = PyInt_FromLong(value);
+#else
+      result = PyLong_FromLongLong(value);
+#endif
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_UINT64: {
+      uint64 value = reflection->GetUInt64(*message, field_descriptor);
+#if IS_64BIT
+      if (value <= static_cast<uint64>(kint64max)) {
+        result = PyInt_FromLong(static_cast<uint64>(value));
+      }
+#else
+      if (value <= static_cast<uint32>(kint32max)) {
+        result = PyInt_FromLong(static_cast<uint32>(value));
+      }
+#endif
+      else {  // NOLINT
+        result = PyLong_FromUnsignedLongLong(value);
+      }
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_FLOAT: {
+      float value = reflection->GetFloat(*message, field_descriptor);
+      result = PyFloat_FromDouble(value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_DOUBLE: {
+      double value = reflection->GetDouble(*message, field_descriptor);
+      result = PyFloat_FromDouble(value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_BOOL: {
+      bool value = reflection->GetBool(*message, field_descriptor);
+      result = PyBool_FromLong(value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_STRING: {
+      string value = reflection->GetString(*message, field_descriptor);
+      result = ToStringObject(field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_ENUM: {
+      if (!message->GetReflection()->HasField(*message, field_descriptor)) {
+        // Look for the value in the unknown fields.
+        google::protobuf::UnknownFieldSet* unknown_field_set =
+            message->GetReflection()->MutableUnknownFields(message);
+        for (int i = 0; i < unknown_field_set->field_count(); ++i) {
+          if (unknown_field_set->field(i).number() ==
+              field_descriptor->number()) {
+            result = PyInt_FromLong(unknown_field_set->field(i).varint());
+            break;
+          }
+        }
+      }
+
+      if (result == NULL) {
+        const google::protobuf::EnumValueDescriptor* enum_value =
+            message->GetReflection()->GetEnum(*message, field_descriptor);
+        result = PyInt_FromLong(enum_value->number());
+      }
+      break;
+    }
+    default:
+      PyErr_Format(
+          PyExc_SystemError, "Getting a value from a field of unknown type %d",
+          field_descriptor->cpp_type());
+  }
+
+  return result;
+}
+
+static PyObject* InternalSetScalar(
+    google::protobuf::Message* message, const google::protobuf::FieldDescriptor* field_descriptor,
+    PyObject* arg) {
+  const google::protobuf::Reflection* reflection = message->GetReflection();
+
+  if (!FIELD_BELONGS_TO_MESSAGE(field_descriptor, message)) {
+    PyErr_SetString(
+        PyExc_KeyError, "Field does not belong to message!");
+    return NULL;
+  }
+
+  switch (field_descriptor->cpp_type()) {
+    case google::protobuf::FieldDescriptor::CPPTYPE_INT32: {
+      GOOGLE_CHECK_GET_INT32(arg, value);
+      reflection->SetInt32(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_INT64: {
+      GOOGLE_CHECK_GET_INT64(arg, value);
+      reflection->SetInt64(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_UINT32: {
+      GOOGLE_CHECK_GET_UINT32(arg, value);
+      reflection->SetUInt32(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_UINT64: {
+      GOOGLE_CHECK_GET_UINT64(arg, value);
+      reflection->SetUInt64(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_FLOAT: {
+      GOOGLE_CHECK_GET_FLOAT(arg, value);
+      reflection->SetFloat(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_DOUBLE: {
+      GOOGLE_CHECK_GET_DOUBLE(arg, value);
+      reflection->SetDouble(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_BOOL: {
+      GOOGLE_CHECK_GET_BOOL(arg, value);
+      reflection->SetBool(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_STRING: {
+      if (!CheckAndSetString(
+          arg, message, field_descriptor, reflection, false, -1)) {
+        return NULL;
+      }
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_ENUM: {
+      GOOGLE_CHECK_GET_INT32(arg, value);
+      const google::protobuf::EnumDescriptor* enum_descriptor =
+          field_descriptor->enum_type();
+      const google::protobuf::EnumValueDescriptor* enum_value =
+          enum_descriptor->FindValueByNumber(value);
+      if (enum_value != NULL) {
+        reflection->SetEnum(message, field_descriptor, enum_value);
+      } else {
+        bool added = false;
+        // Add the value to the unknown fields.
+        google::protobuf::UnknownFieldSet* unknown_field_set =
+            message->GetReflection()->MutableUnknownFields(message);
+        for (int i = 0; i < unknown_field_set->field_count(); ++i) {
+          if (unknown_field_set->field(i).number() ==
+              field_descriptor->number()) {
+            unknown_field_set->mutable_field(i)->set_varint(value);
+            added = true;
+            break;
+          }
+        }
+
+        if (!added) {
+          unknown_field_set->AddVarint(field_descriptor->number(), value);
+        }
+        reflection->ClearField(message, field_descriptor);
+      }
+      break;
+    }
+    default:
+      PyErr_Format(
+          PyExc_SystemError, "Setting value to a field of unknown type %d",
+          field_descriptor->cpp_type());
+  }
+
+  Py_RETURN_NONE;
+}
+
+static PyObject* InternalAddRepeatedScalar(
+    google::protobuf::Message* message, const google::protobuf::FieldDescriptor* field_descriptor,
+    PyObject* arg) {
+
+  if (!FIELD_BELONGS_TO_MESSAGE(field_descriptor, message)) {
+    PyErr_SetString(
+        PyExc_KeyError, "Field does not belong to message!");
+    return NULL;
+  }
+
+  const google::protobuf::Reflection* reflection = message->GetReflection();
+  switch (field_descriptor->cpp_type()) {
+    case google::protobuf::FieldDescriptor::CPPTYPE_INT32: {
+      GOOGLE_CHECK_GET_INT32(arg, value);
+      reflection->AddInt32(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_INT64: {
+      GOOGLE_CHECK_GET_INT64(arg, value);
+      reflection->AddInt64(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_UINT32: {
+      GOOGLE_CHECK_GET_UINT32(arg, value);
+      reflection->AddUInt32(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_UINT64: {
+      GOOGLE_CHECK_GET_UINT64(arg, value);
+      reflection->AddUInt64(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_FLOAT: {
+      GOOGLE_CHECK_GET_FLOAT(arg, value);
+      reflection->AddFloat(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_DOUBLE: {
+      GOOGLE_CHECK_GET_DOUBLE(arg, value);
+      reflection->AddDouble(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_BOOL: {
+      GOOGLE_CHECK_GET_BOOL(arg, value);
+      reflection->AddBool(message, field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_STRING: {
+      if (!CheckAndSetString(
+          arg, message, field_descriptor, reflection, true, -1)) {
+        return NULL;
+      }
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_ENUM: {
+      GOOGLE_CHECK_GET_INT32(arg, value);
+      const google::protobuf::EnumDescriptor* enum_descriptor =
+          field_descriptor->enum_type();
+      const google::protobuf::EnumValueDescriptor* enum_value =
+          enum_descriptor->FindValueByNumber(value);
+      if (enum_value != NULL) {
+        reflection->AddEnum(message, field_descriptor, enum_value);
+      } else {
+        PyObject* s = PyObject_Str(arg);
+        PyErr_Format(PyExc_ValueError, "Unknown enum value: %s",
+                     PyString_AS_STRING(s));
+        Py_DECREF(s);
+        return NULL;
+      }
+      break;
+    }
+    default:
+      PyErr_Format(
+          PyExc_SystemError, "Adding value to a field of unknown type %d",
+          field_descriptor->cpp_type());
+  }
+
+  Py_RETURN_NONE;
+}
+
+static PyObject* InternalGetRepeatedScalar(
+    CMessage* cmessage, const google::protobuf::FieldDescriptor* field_descriptor,
+    int index) {
+  google::protobuf::Message* message = cmessage->message;
+  const google::protobuf::Reflection* reflection = message->GetReflection();
+
+  int field_size = reflection->FieldSize(*message, field_descriptor);
+  if (index < 0) {
+    index = field_size + index;
+  }
+  if (index < 0 || index >= field_size) {
+    PyErr_Format(PyExc_IndexError,
+                 "list assignment index (%d) out of range", index);
+    return NULL;
+  }
+
+  PyObject* result = NULL;
+  switch (field_descriptor->cpp_type()) {
+    case google::protobuf::FieldDescriptor::CPPTYPE_INT32: {
+      int32 value = reflection->GetRepeatedInt32(
+          *message, field_descriptor, index);
+      result = PyInt_FromLong(value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_INT64: {
+      int64 value = reflection->GetRepeatedInt64(
+          *message, field_descriptor, index);
+      result = PyLong_FromLongLong(value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_UINT32: {
+      uint32 value = reflection->GetRepeatedUInt32(
+          *message, field_descriptor, index);
+      result = PyLong_FromLongLong(value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_UINT64: {
+      uint64 value = reflection->GetRepeatedUInt64(
+          *message, field_descriptor, index);
+      result = PyLong_FromUnsignedLongLong(value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_FLOAT: {
+      float value = reflection->GetRepeatedFloat(
+          *message, field_descriptor, index);
+      result = PyFloat_FromDouble(value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_DOUBLE: {
+      double value = reflection->GetRepeatedDouble(
+          *message, field_descriptor, index);
+      result = PyFloat_FromDouble(value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_BOOL: {
+      bool value = reflection->GetRepeatedBool(
+          *message, field_descriptor, index);
+      result = PyBool_FromLong(value ? 1 : 0);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_ENUM: {
+      const google::protobuf::EnumValueDescriptor* enum_value =
+          message->GetReflection()->GetRepeatedEnum(
+              *message, field_descriptor, index);
+      result = PyInt_FromLong(enum_value->number());
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_STRING: {
+      string value = reflection->GetRepeatedString(
+          *message, field_descriptor, index);
+      result = ToStringObject(field_descriptor, value);
+      break;
+    }
+    case google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE: {
+      CMessage* py_cmsg = PyObject_New(CMessage, &CMessage_Type);
+      if (py_cmsg == NULL) {
+        return NULL;
+      }
+      const google::protobuf::Message& msg = reflection->GetRepeatedMessage(
+          *message, field_descriptor, index);
+      py_cmsg->parent = cmessage;
+      py_cmsg->full_name = field_descriptor->full_name().c_str();
+      py_cmsg->message = const_cast<google::protobuf::Message*>(&msg);
+      py_cmsg->free_message = false;
+      py_cmsg->read_only = false;
+      result = reinterpret_cast<PyObject*>(py_cmsg);
+      break;
+    }
+    default:
+      PyErr_Format(
+          PyExc_SystemError,
+          "Getting value from a repeated field of unknown type %d",
+          field_descriptor->cpp_type());
+  }
+
+  return result;
+}
+
+static PyObject* InternalGetRepeatedScalarSlice(
+    CMessage* cmessage, const google::protobuf::FieldDescriptor* field_descriptor,
+    PyObject* slice) {
+  Py_ssize_t from;
+  Py_ssize_t to;
+  Py_ssize_t step;
+  Py_ssize_t length;
+  bool return_list = false;
+  google::protobuf::Message* message = cmessage->message;
+
+  if (PyInt_Check(slice)) {
+    from = to = PyInt_AsLong(slice);
+  } else if (PyLong_Check(slice)) {
+    from = to = PyLong_AsLong(slice);
+  } else if (PySlice_Check(slice)) {
+    const google::protobuf::Reflection* reflection = message->GetReflection();
+    length = reflection->FieldSize(*message, field_descriptor);
+    PySlice_GetIndices(
+        reinterpret_cast<PySliceObject*>(slice), length, &from, &to, &step);
+    return_list = true;
+  } else {
+    PyErr_SetString(PyExc_TypeError, "list indices must be integers");
+    return NULL;
+  }
+
+  if (!return_list) {
+    return InternalGetRepeatedScalar(cmessage, field_descriptor, from);
+  }
+
+  PyObject* list = PyList_New(0);
+  if (list == NULL) {
+    return NULL;
+  }
+
+  if (from <= to) {
+    if (step < 0) return list;
+    for (Py_ssize_t index = from; index < to; index += step) {
+      if (index < 0 || index >= length) break;
+      PyObject* s = InternalGetRepeatedScalar(
+          cmessage, field_descriptor, index);
+      PyList_Append(list, s);
+      Py_DECREF(s);
+    }
+  } else {
+    if (step > 0) return list;
+    for (Py_ssize_t index = from; index > to; index += step) {
+      if (index < 0 || index >= length) break;
+      PyObject* s = InternalGetRepeatedScalar(
+          cmessage, field_descriptor, index);
+      PyList_Append(list, s);
+      Py_DECREF(s);
+    }
+  }
+  return list;
+}
+
+// ------ C Constructor/Destructor:
+
+static int CMessageInit(CMessage* self, PyObject *args, PyObject *kwds) {
+  self->message = NULL;
+  return 0;
+}
+
+static void CMessageDealloc(CMessage* self) {
+  if (self->free_message) {
+    if (self->read_only) {
+      PyErr_WriteUnraisable(reinterpret_cast<PyObject*>(self));
+    }
+    delete self->message;
+  }
+  self->ob_type->tp_free(reinterpret_cast<PyObject*>(self));
+}
+
+// ------ Methods:
+
+static PyObject* CMessage_Clear(CMessage* self, PyObject* args) {
+  AssureWritable(self);
+  self->message->Clear();
+  Py_RETURN_NONE;
+}
+
+static PyObject* CMessage_IsInitialized(CMessage* self, PyObject* args) {
+  return PyBool_FromLong(self->message->IsInitialized() ? 1 : 0);
+}
+
+static PyObject* CMessage_HasField(CMessage* self, PyObject* arg) {
+  char* field_name;
+  if (PyString_AsStringAndSize(arg, &field_name, NULL) < 0) {
+    return NULL;
+  }
+
+  google::protobuf::Message* message = self->message;
+  const google::protobuf::Descriptor* descriptor = message->GetDescriptor();
+  const google::protobuf::FieldDescriptor* field_descriptor =
+      descriptor->FindFieldByName(field_name);
+  if (field_descriptor == NULL) {
+    PyErr_Format(PyExc_ValueError, "Unknown field %s.", field_name);
+    return NULL;
+  }
+
+  bool has_field =
+      message->GetReflection()->HasField(*message, field_descriptor);
+  return PyBool_FromLong(has_field ? 1 : 0);
+}
+
+static PyObject* CMessage_HasFieldByDescriptor(CMessage* self, PyObject* arg) {
+  CFieldDescriptor* cfield_descriptor = NULL;
+  if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg),
+                          &CFieldDescriptor_Type)) {
+    PyErr_SetString(PyExc_TypeError, "Must be a field descriptor");
+    return NULL;
+  }
+  cfield_descriptor = reinterpret_cast<CFieldDescriptor*>(arg);
+
+  google::protobuf::Message* message = self->message;
+  const google::protobuf::FieldDescriptor* field_descriptor =
+      cfield_descriptor->descriptor;
+
+  if (!FIELD_BELONGS_TO_MESSAGE(field_descriptor, message)) {
+    PyErr_SetString(PyExc_KeyError,
+                    "Field does not belong to message!");
+    return NULL;
+  }
+
+  if (FIELD_IS_REPEATED(field_descriptor)) {
+    PyErr_SetString(PyExc_KeyError,
+                    "Field is repeated. A singular method is required.");
+    return NULL;
+  }
+
+  bool has_field =
+      message->GetReflection()->HasField(*message, field_descriptor);
+  return PyBool_FromLong(has_field ? 1 : 0);
+}
+
+static PyObject* CMessage_ClearFieldByDescriptor(
+    CMessage* self, PyObject* arg) {
+  CFieldDescriptor* cfield_descriptor = NULL;
+  if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg),
+                          &CFieldDescriptor_Type)) {
+    PyErr_SetString(PyExc_TypeError, "Must be a field descriptor");
+    return NULL;
+  }
+  cfield_descriptor = reinterpret_cast<CFieldDescriptor*>(arg);
+
+  google::protobuf::Message* message = self->message;
+  const google::protobuf::FieldDescriptor* field_descriptor =
+      cfield_descriptor->descriptor;
+
+  if (!FIELD_BELONGS_TO_MESSAGE(field_descriptor, message)) {
+    PyErr_SetString(PyExc_KeyError,
+                    "Field does not belong to message!");
+    return NULL;
+  }
+
+  message->GetReflection()->ClearField(message, field_descriptor);
+  Py_RETURN_NONE;
+}
+
+static PyObject* CMessage_ClearField(CMessage* self, PyObject* arg) {
+  char* field_name;
+  if (PyString_AsStringAndSize(arg, &field_name, NULL) < 0) {
+    return NULL;
+  }
+
+  google::protobuf::Message* message = self->message;
+  const google::protobuf::Descriptor* descriptor = message->GetDescriptor();
+  const google::protobuf::FieldDescriptor* field_descriptor =
+      descriptor->FindFieldByName(field_name);
+  if (field_descriptor == NULL) {
+    PyErr_Format(PyExc_ValueError, "Unknown field %s.", field_name);
+    return NULL;
+  }
+
+  message->GetReflection()->ClearField(message, field_descriptor);
+  Py_RETURN_NONE;
+}
+
+static PyObject* CMessage_GetScalar(CMessage* self, PyObject* arg) {
+  CFieldDescriptor* cdescriptor = NULL;
+  if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg),
+                          &CFieldDescriptor_Type)) {
+    PyErr_SetString(PyExc_TypeError, "Must be a field descriptor");
+    return NULL;
+  }
+  cdescriptor = reinterpret_cast<CFieldDescriptor*>(arg);
+
+  google::protobuf::Message* message = self->message;
+  return InternalGetScalar(message, cdescriptor->descriptor);
+}
+
+static PyObject* CMessage_GetRepeatedScalar(CMessage* self, PyObject* args) {
+  CFieldDescriptor* cfield_descriptor;
+  PyObject* slice;
+  if (!PyArg_ParseTuple(args, C("O!O:GetRepeatedScalar"),
+                        &CFieldDescriptor_Type, &cfield_descriptor, &slice)) {
+    return NULL;
+  }
+
+  return InternalGetRepeatedScalarSlice(
+      self, cfield_descriptor->descriptor, slice);
+}
+
+static PyObject* CMessage_AssignRepeatedScalar(CMessage* self, PyObject* args) {
+  CFieldDescriptor* cfield_descriptor;
+  PyObject* slice;
+  if (!PyArg_ParseTuple(args, C("O!O:AssignRepeatedScalar"),
+                        &CFieldDescriptor_Type, &cfield_descriptor, &slice)) {
+    return NULL;
+  }
+
+  AssureWritable(self);
+  google::protobuf::Message* message = self->message;
+  message->GetReflection()->ClearField(message, cfield_descriptor->descriptor);
+
+  PyObject* iter = PyObject_GetIter(slice);
+  PyObject* next;
+  while ((next = PyIter_Next(iter)) != NULL) {
+    if (InternalAddRepeatedScalar(
+        message, cfield_descriptor->descriptor, next) == NULL) {
+      Py_DECREF(iter);
+      return NULL;
+    }
+  }
+  Py_DECREF(iter);
+  Py_RETURN_NONE;
+}
+
+static PyObject* CMessage_DeleteRepeatedField(CMessage* self, PyObject* args) {
+  CFieldDescriptor* cfield_descriptor;
+  PyObject* slice;
+  if (!PyArg_ParseTuple(args, C("O!O:DeleteRepeatedField"),
+                        &CFieldDescriptor_Type, &cfield_descriptor, &slice)) {
+    return NULL;
+  }
+  AssureWritable(self);
+
+  Py_ssize_t length, from, to, step, slice_length;
+  google::protobuf::Message* message = self->message;
+  const google::protobuf::FieldDescriptor* field_descriptor =
+      cfield_descriptor->descriptor;
+  const google::protobuf::Reflection* reflection = message->GetReflection();
+  int min, max;
+  length = reflection->FieldSize(*message, field_descriptor);
+
+  if (PyInt_Check(slice) || PyLong_Check(slice)) {
+    from = to = PyLong_AsLong(slice);
+    if (from < 0) {
+      from = to = length + from;
+    }
+    step = 1;
+    min = max = from;
+
+    // Range check.
+    if (from < 0 || from >= length) {
+      PyErr_Format(PyExc_IndexError, "list assignment index out of range");
+      return NULL;
+    }
+  } else if (PySlice_Check(slice)) {
+    from = to = step = slice_length = 0;
+    PySlice_GetIndicesEx(
+        reinterpret_cast<PySliceObject*>(slice),
+        length, &from, &to, &step, &slice_length);
+    if (from < to) {
+      min = from;
+      max = to - 1;
+    } else {
+      min = to + 1;
+      max = from;
+    }
+  } else {
+    PyErr_SetString(PyExc_TypeError, "list indices must be integers");
+    return NULL;
+  }
+
+  Py_ssize_t i = from;
+  std::vector<bool> to_delete(length, false);
+  while (i >= min && i <= max) {
+    to_delete[i] = true;
+    i += step;
+  }
+
+  to = 0;
+  for (i = 0; i < length; ++i) {
+    if (!to_delete[i]) {
+      if (i != to) {
+        reflection->SwapElements(message, field_descriptor, i, to);
+      }
+      ++to;
+    }
+  }
+
+  while (i > to) {
+    reflection->RemoveLast(message, field_descriptor);
+    --i;
+  }
+
+  Py_RETURN_NONE;
+}
+
+
+static PyObject* CMessage_SetScalar(CMessage* self, PyObject* args) {
+  CFieldDescriptor* cfield_descriptor;
+  PyObject* arg;
+  if (!PyArg_ParseTuple(args, C("O!O:SetScalar"),
+                        &CFieldDescriptor_Type, &cfield_descriptor, &arg)) {
+    return NULL;
+  }
+  AssureWritable(self);
+
+  return InternalSetScalar(self->message, cfield_descriptor->descriptor, arg);
+}
+
+static PyObject* CMessage_AddRepeatedScalar(CMessage* self, PyObject* args) {
+  CFieldDescriptor* cfield_descriptor;
+  PyObject* value;
+  if (!PyArg_ParseTuple(args, C("O!O:AddRepeatedScalar"),
+                        &CFieldDescriptor_Type, &cfield_descriptor, &value)) {
+    return NULL;
+  }
+  AssureWritable(self);
+
+  return InternalAddRepeatedScalar(
+      self->message, cfield_descriptor->descriptor, value);
+}
+
+static PyObject* CMessage_FieldLength(CMessage* self, PyObject* arg) {
+  CFieldDescriptor* cfield_descriptor;
+  if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg),
+                          &CFieldDescriptor_Type)) {
+    PyErr_SetString(PyExc_TypeError, "Must be a field descriptor");
+    return NULL;
+  }
+  cfield_descriptor = reinterpret_cast<CFieldDescriptor*>(arg);
+
+  google::protobuf::Message* message = self->message;
+  int length = message->GetReflection()->FieldSize(
+      *message, cfield_descriptor->descriptor);
+  return PyInt_FromLong(length);
+}
+
+static PyObject* CMessage_DebugString(CMessage* self, PyObject* args) {
+  return PyString_FromString(self->message->DebugString().c_str());
+}
+
+static PyObject* CMessage_SerializeToString(CMessage* self, PyObject* args) {
+  int size = self->message->ByteSize();
+  if (size <= 0) {
+    return PyString_FromString("");
+  }
+  PyObject* result = PyString_FromStringAndSize(NULL, size);
+  if (result == NULL) {
+    return NULL;
+  }
+  char* buffer = PyString_AS_STRING(result);
+  self->message->SerializeWithCachedSizesToArray(
+      reinterpret_cast<uint8*>(buffer));
+  return result;
+}
+
+static PyObject* CMessage_SerializePartialToString(
+    CMessage* self, PyObject* args) {
+  string contents;
+  self->message->SerializePartialToString(&contents);
+  return PyString_FromStringAndSize(contents.c_str(), contents.size());
+}
+
+static PyObject* CMessageStr(CMessage* self) {
+  char str[1024];
+  str[sizeof(str) - 1] = 0;
+  snprintf(str, sizeof(str) - 1, "CMessage: <%p>", self->message);
+  return PyString_FromString(str);
+}
+
+static PyObject* CMessage_MergeFrom(CMessage* self, PyObject* arg) {
+  CMessage* other_message;
+  if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), &CMessage_Type)) {
+    PyErr_SetString(PyExc_TypeError, "Must be a message");
+    return NULL;
+  }
+
+  other_message = reinterpret_cast<CMessage*>(arg);
+  if (other_message->message->GetDescriptor() !=
+      self->message->GetDescriptor()) {
+    PyErr_Format(PyExc_TypeError,
+                 "Tried to merge from a message with a different type. "
+                 "to: %s, from: %s",
+                 self->message->GetDescriptor()->full_name().c_str(),
+                 other_message->message->GetDescriptor()->full_name().c_str());
+    return NULL;
+  }
+  AssureWritable(self);
+
+  self->message->MergeFrom(*other_message->message);
+  Py_RETURN_NONE;
+}
+
+static PyObject* CMessage_CopyFrom(CMessage* self, PyObject* arg) {
+  CMessage* other_message;
+  if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), &CMessage_Type)) {
+    PyErr_SetString(PyExc_TypeError, "Must be a message");
+    return NULL;
+  }
+
+  other_message = reinterpret_cast<CMessage*>(arg);
+  if (other_message->message->GetDescriptor() !=
+      self->message->GetDescriptor()) {
+    PyErr_Format(PyExc_TypeError,
+                 "Tried to copy from a message with a different type. "
+                 "to: %s, from: %s",
+                 self->message->GetDescriptor()->full_name().c_str(),
+                 other_message->message->GetDescriptor()->full_name().c_str());
+    return NULL;
+  }
+
+  AssureWritable(self);
+
+  self->message->CopyFrom(*other_message->message);
+  Py_RETURN_NONE;
+}
+
+static PyObject* CMessage_MergeFromString(CMessage* self, PyObject* arg) {
+  const void* data;
+  Py_ssize_t data_length;
+  if (PyObject_AsReadBuffer(arg, &data, &data_length) < 0) {
+    return NULL;
+  }
+
+  AssureWritable(self);
+  google::protobuf::io::CodedInputStream input(
+      reinterpret_cast<const uint8*>(data), data_length);
+  bool success = self->message->MergePartialFromCodedStream(&input);
+  if (success) {
+    return PyInt_FromLong(self->message->ByteSize());
+  } else {
+    return PyInt_FromLong(-1);
+  }
+}
+
+static PyObject* CMessage_ByteSize(CMessage* self, PyObject* args) {
+  return PyLong_FromLong(self->message->ByteSize());
+}
+
+static PyObject* CMessage_SetInParent(CMessage* self, PyObject* args) {
+  AssureWritable(self);
+  Py_RETURN_NONE;
+}
+
+static PyObject* CMessage_SwapRepeatedFieldElements(
+    CMessage* self, PyObject* args) {
+  CFieldDescriptor* cfield_descriptor;
+  int index1, index2;
+  if (!PyArg_ParseTuple(args, C("O!ii:SwapRepeatedFieldElements"),
+                        &CFieldDescriptor_Type, &cfield_descriptor,
+                        &index1, &index2)) {
+    return NULL;
+  }
+
+  google::protobuf::Message* message = self->message;
+  const google::protobuf::Reflection* reflection = message->GetReflection();
+
+  reflection->SwapElements(
+      message, cfield_descriptor->descriptor, index1, index2);
+  Py_RETURN_NONE;
+}
+
+static PyObject* CMessage_AddMessage(CMessage* self, PyObject* arg) {
+  CFieldDescriptor* cfield_descriptor;
+  if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg),
+                          &CFieldDescriptor_Type)) {
+    PyErr_SetString(PyExc_TypeError, "Must be a field descriptor");
+    return NULL;
+  }
+  cfield_descriptor = reinterpret_cast<CFieldDescriptor*>(arg);
+  AssureWritable(self);
+
+  CMessage* py_cmsg = PyObject_New(CMessage, &CMessage_Type);
+  if (py_cmsg == NULL) {
+    return NULL;
+  }
+
+  google::protobuf::Message* message = self->message;
+  const google::protobuf::Reflection* reflection = message->GetReflection();
+  google::protobuf::Message* sub_message =
+      reflection->AddMessage(message, cfield_descriptor->descriptor);
+
+  py_cmsg->parent = NULL;
+  py_cmsg->full_name = sub_message->GetDescriptor()->full_name().c_str();
+  py_cmsg->message = sub_message;
+  py_cmsg->free_message = false;
+  py_cmsg->read_only = false;
+  return reinterpret_cast<PyObject*>(py_cmsg);
+}
+
+static PyObject* CMessage_GetRepeatedMessage(CMessage* self, PyObject* args) {
+  CFieldDescriptor* cfield_descriptor;
+  PyObject* slice;
+  if (!PyArg_ParseTuple(args, C("O!O:GetRepeatedMessage"),
+                        &CFieldDescriptor_Type, &cfield_descriptor, &slice)) {
+    return NULL;
+  }
+
+  return InternalGetRepeatedScalarSlice(
+      self, cfield_descriptor->descriptor, slice);
+}
+
+static PyObject* CMessage_NewSubMessage(CMessage* self, PyObject* arg) {
+  CFieldDescriptor* cfield_descriptor;
+  if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg),
+                          &CFieldDescriptor_Type)) {
+    PyErr_SetString(PyExc_TypeError, "Must be a field descriptor");
+    return NULL;
+  }
+  cfield_descriptor = reinterpret_cast<CFieldDescriptor*>(arg);
+
+  CMessage* py_cmsg = PyObject_New(CMessage, &CMessage_Type);
+  if (py_cmsg == NULL) {
+    return NULL;
+  }
+
+  google::protobuf::Message* message = self->message;
+  const google::protobuf::Reflection* reflection = message->GetReflection();
+  const google::protobuf::Message& sub_message =
+      reflection->GetMessage(*message, cfield_descriptor->descriptor,
+                             global_message_factory);
+
+  py_cmsg->full_name = sub_message.GetDescriptor()->full_name().c_str();
+  py_cmsg->parent = self;
+  py_cmsg->parent_field = cfield_descriptor;
+  py_cmsg->message = const_cast<google::protobuf::Message*>(&sub_message);
+  py_cmsg->free_message = false;
+  py_cmsg->read_only = true;
+  return reinterpret_cast<PyObject*>(py_cmsg);
+}
+
+static PyObject* CMessage_MutableMessage(CMessage* self, PyObject* arg) {
+  CFieldDescriptor* cfield_descriptor;
+  if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg),
+                          &CFieldDescriptor_Type)) {
+    PyErr_SetString(PyExc_TypeError, "Must be a field descriptor");
+    return NULL;
+  }
+  cfield_descriptor = reinterpret_cast<CFieldDescriptor*>(arg);
+  AssureWritable(self);
+
+  CMessage* py_cmsg = PyObject_New(CMessage, &CMessage_Type);
+  if (py_cmsg == NULL) {
+    return NULL;
+  }
+
+  google::protobuf::Message* message = self->message;
+  const google::protobuf::Reflection* reflection = message->GetReflection();
+  google::protobuf::Message* mutable_message =
+      reflection->MutableMessage(message, cfield_descriptor->descriptor,
+                                 global_message_factory);
+
+  py_cmsg->full_name = mutable_message->GetDescriptor()->full_name().c_str();
+  py_cmsg->message = mutable_message;
+  py_cmsg->free_message = false;
+  py_cmsg->read_only = false;
+  return reinterpret_cast<PyObject*>(py_cmsg);
+}
+
+static PyObject* CMessage_Equals(CMessage* self, PyObject* arg) {
+  CMessage* other_message;
+  if (!PyObject_TypeCheck(reinterpret_cast<PyObject *>(arg), &CMessage_Type)) {
+    PyErr_SetString(PyExc_TypeError, "Must be a message");
+    return NULL;
+  }
+  other_message = reinterpret_cast<CMessage*>(arg);
+
+  if (other_message->message == self->message) {
+    return PyBool_FromLong(1);
+  }
+
+  if (other_message->message->GetDescriptor() !=
+      self->message->GetDescriptor()) {
+    return PyBool_FromLong(0);
+  }
+
+  return PyBool_FromLong(1);
+}
+
+static PyObject* CMessage_ListFields(CMessage* self, PyObject* args) {
+  google::protobuf::Message* message = self->message;
+  const google::protobuf::Reflection* reflection = message->GetReflection();
+  vector<const google::protobuf::FieldDescriptor*> fields;
+  reflection->ListFields(*message, &fields);
+
+  PyObject* list = PyList_New(fields.size());
+  if (list == NULL) {
+    return NULL;
+  }
+
+  for (unsigned int i = 0; i < fields.size(); ++i) {
+    bool is_extension = fields[i]->is_extension();
+    PyObject* t = PyTuple_New(2);
+    if (t == NULL) {
+      Py_DECREF(list);
+      return NULL;
+    }
+
+    PyObject* is_extension_object = PyBool_FromLong(is_extension ? 1 : 0);
+
+    PyObject* field_name;
+    const string* s;
+    if (is_extension) {
+      s = &fields[i]->full_name();
+    } else {
+      s = &fields[i]->name();
+    }
+    field_name = PyString_FromStringAndSize(s->c_str(), s->length());
+    if (field_name == NULL) {
+      Py_DECREF(list);
+      Py_DECREF(t);
+      return NULL;
+    }
+
+    PyTuple_SET_ITEM(t, 0, is_extension_object);
+    PyTuple_SET_ITEM(t, 1, field_name);
+    PyList_SET_ITEM(list, i, t);
+  }
+
+  return list;
+}
+
+static PyObject* CMessage_FindInitializationErrors(CMessage* self) {
+  google::protobuf::Message* message = self->message;
+  vector<string> errors;
+  message->FindInitializationErrors(&errors);
+
+  PyObject* error_list = PyList_New(errors.size());
+  if (error_list == NULL) {
+    return NULL;
+  }
+  for (unsigned int i = 0; i < errors.size(); ++i) {
+    const string& error = errors[i];
+    PyObject* error_string = PyString_FromStringAndSize(
+        error.c_str(), error.length());
+    if (error_string == NULL) {
+      Py_DECREF(error_list);
+      return NULL;
+    }
+    PyList_SET_ITEM(error_list, i, error_string);
+  }
+  return error_list;
+}
+
+// ------ Python Constructor:
+
+PyObject* Python_NewCMessage(PyObject* ignored, PyObject* arg) {
+  const char* message_type = PyString_AsString(arg);
+  if (message_type == NULL) {
+    return NULL;
+  }
+
+  const google::protobuf::Message* message = CreateMessage(message_type);
+  if (message == NULL) {
+    PyErr_Format(PyExc_TypeError, "Couldn't create message of type %s!",
+                 message_type);
+    return NULL;
+  }
+
+  CMessage* py_cmsg = PyObject_New(CMessage, &CMessage_Type);
+  if (py_cmsg == NULL) {
+    return NULL;
+  }
+  py_cmsg->message = message->New();
+  py_cmsg->free_message = true;
+  py_cmsg->full_name = message->GetDescriptor()->full_name().c_str();
+  py_cmsg->read_only = false;
+  py_cmsg->parent = NULL;
+  py_cmsg->parent_field = NULL;
+  return reinterpret_cast<PyObject*>(py_cmsg);
+}
+
+// --- Module Functions (exposed to Python):
+
+PyMethodDef methods[] = {
+  { C("NewCMessage"), (PyCFunction)Python_NewCMessage,
+    METH_O,
+    C("Creates a new C++ protocol message, given its full name.") },
+  { C("NewCDescriptorPool"), (PyCFunction)Python_NewCDescriptorPool,
+    METH_NOARGS,
+    C("Creates a new C++ descriptor pool.") },
+  { C("BuildFile"), (PyCFunction)Python_BuildFile,
+    METH_O,
+    C("Registers a new protocol buffer file in the global C++ descriptor "
+      "pool.") },
+  {NULL}
+};
+
+// --- Exposing the C proto living inside Python proto to C code:
+
+extern const Message* (*GetCProtoInsidePyProtoPtr)(PyObject* msg);
+extern Message* (*MutableCProtoInsidePyProtoPtr)(PyObject* msg);
+
+static const google::protobuf::Message* GetCProtoInsidePyProtoImpl(PyObject* msg) {
+  PyObject* c_msg_obj = PyObject_GetAttrString(msg, "_cmsg");
+  if (c_msg_obj == NULL) {
+    PyErr_Clear();
+    return NULL;
+  }
+  Py_DECREF(c_msg_obj);
+  if (!PyObject_TypeCheck(c_msg_obj, &CMessage_Type)) {
+    return NULL;
+  }
+  CMessage* c_msg = reinterpret_cast<CMessage*>(c_msg_obj);
+  return c_msg->message;
+}
+
+static google::protobuf::Message* MutableCProtoInsidePyProtoImpl(PyObject* msg) {
+  PyObject* c_msg_obj = PyObject_GetAttrString(msg, "_cmsg");
+  if (c_msg_obj == NULL) {
+    PyErr_Clear();
+    return NULL;
+  }
+  Py_DECREF(c_msg_obj);
+  if (!PyObject_TypeCheck(c_msg_obj, &CMessage_Type)) {
+    return NULL;
+  }
+  CMessage* c_msg = reinterpret_cast<CMessage*>(c_msg_obj);
+  AssureWritable(c_msg);
+  return c_msg->message;
+}
+
+// --- Module Init Function:
+
+static const char module_docstring[] =
+"python-proto2 is a module that can be used to enhance proto2 Python API\n"
+"performance.\n"
+"\n"
+"It provides access to the protocol buffers C++ reflection API that\n"
+"implements the basic protocol buffer functions.";
+
+extern "C" {
+  void init_net_proto2___python() {
+    // Initialize constants.
+    kPythonZero = PyInt_FromLong(0);
+    kint32min_py = PyInt_FromLong(kint32min);
+    kint32max_py = PyInt_FromLong(kint32max);
+    kuint32max_py = PyLong_FromLongLong(kuint32max);
+    kint64min_py = PyLong_FromLongLong(kint64min);
+    kint64max_py = PyLong_FromLongLong(kint64max);
+    kuint64max_py = PyLong_FromUnsignedLongLong(kuint64max);
+
+    global_message_factory = new DynamicMessageFactory(GetDescriptorPool());
+    global_message_factory->SetDelegateToGeneratedFactory(true);
+
+    // Export our functions to Python.
+    PyObject *m;
+    m = Py_InitModule3(C("_net_proto2___python"), methods, C(module_docstring));
+    if (m == NULL) {
+      return;
+    }
+
+    AddConstants(m);
+
+    CMessage_Type.tp_new = PyType_GenericNew;
+    if (PyType_Ready(&CMessage_Type) < 0) {
+      return;
+    }
+
+    if (!InitDescriptor()) {
+      return;
+    }
+
+    // Override {Get,Mutable}CProtoInsidePyProto.
+    GetCProtoInsidePyProtoPtr = GetCProtoInsidePyProtoImpl;
+    MutableCProtoInsidePyProtoPtr = MutableCProtoInsidePyProtoImpl;
+  }
+}
+
+}  // namespace python
+}  // namespace protobuf
+}  // namespace google

+ 334 - 0
python/google/protobuf/pyext/python_descriptor.cc

@@ -0,0 +1,334 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: petar@google.com (Petar Petrov)
+
+#include <Python.h>
+
+#include <google/protobuf/pyext/python_descriptor.h>
+#include <google/protobuf/descriptor.pb.h>
+
+#define C(str) const_cast<char*>(str)
+
+namespace google {
+namespace protobuf {
+namespace python {
+
+static void CFieldDescriptorDealloc(CFieldDescriptor* self);
+
+static google::protobuf::DescriptorPool* g_descriptor_pool = NULL;
+
+static PyObject* CFieldDescriptor_GetFullName(
+    CFieldDescriptor* self, void *closure) {
+  Py_XINCREF(self->full_name);
+  return self->full_name;
+}
+
+static PyObject* CFieldDescriptor_GetName(
+    CFieldDescriptor *self, void *closure) {
+  Py_XINCREF(self->name);
+  return self->name;
+}
+
+static PyObject* CFieldDescriptor_GetCppType(
+    CFieldDescriptor *self, void *closure) {
+  Py_XINCREF(self->cpp_type);
+  return self->cpp_type;
+}
+
+static PyObject* CFieldDescriptor_GetLabel(
+    CFieldDescriptor *self, void *closure) {
+  Py_XINCREF(self->label);
+  return self->label;
+}
+
+static PyObject* CFieldDescriptor_GetID(
+    CFieldDescriptor *self, void *closure) {
+  Py_XINCREF(self->id);
+  return self->id;
+}
+
+
+static PyGetSetDef CFieldDescriptorGetters[] = {
+  { C("full_name"),
+    (getter)CFieldDescriptor_GetFullName, NULL, "Full name", NULL},
+  { C("name"),
+    (getter)CFieldDescriptor_GetName, NULL, "last name", NULL},
+  { C("cpp_type"),
+    (getter)CFieldDescriptor_GetCppType, NULL, "C++ Type", NULL},
+  { C("label"),
+    (getter)CFieldDescriptor_GetLabel, NULL, "Label", NULL},
+  { C("id"),
+    (getter)CFieldDescriptor_GetID, NULL, "ID", NULL},
+  {NULL}
+};
+
+PyTypeObject CFieldDescriptor_Type = {
+  PyObject_HEAD_INIT(&PyType_Type)
+  0,
+  C("google3.net.google.protobuf.python.internal."
+    "_net_proto2___python."
+    "CFieldDescriptor"),                // tp_name
+  sizeof(CFieldDescriptor),             // tp_basicsize
+  0,                                    // tp_itemsize
+  (destructor)CFieldDescriptorDealloc,  // tp_dealloc
+  0,                                    // tp_print
+  0,                                    // tp_getattr
+  0,                                    // tp_setattr
+  0,                                    // tp_compare
+  0,                                    // tp_repr
+  0,                                    // tp_as_number
+  0,                                    // tp_as_sequence
+  0,                                    // tp_as_mapping
+  0,                                    // tp_hash
+  0,                                    // tp_call
+  0,                                    // tp_str
+  0,                                    // tp_getattro
+  0,                                    // tp_setattro
+  0,                                    // tp_as_buffer
+  Py_TPFLAGS_DEFAULT,                   // tp_flags
+  C("A Field Descriptor"),              // tp_doc
+  0,                                    // tp_traverse
+  0,                                    // tp_clear
+  0,                                    // tp_richcompare
+  0,                                    // tp_weaklistoffset
+  0,                                    // tp_iter
+  0,                                    // tp_iternext
+  0,                                    // tp_methods
+  0,                                    // tp_members
+  CFieldDescriptorGetters,              // tp_getset
+  0,                                    // tp_base
+  0,                                    // tp_dict
+  0,                                    // tp_descr_get
+  0,                                    // tp_descr_set
+  0,                                    // tp_dictoffset
+  0,                                    // tp_init
+  PyType_GenericAlloc,                  // tp_alloc
+  PyType_GenericNew,                    // tp_new
+  PyObject_Del,                         // tp_free
+};
+
+static void CFieldDescriptorDealloc(CFieldDescriptor* self) {
+  Py_DECREF(self->full_name);
+  Py_DECREF(self->name);
+  Py_DECREF(self->cpp_type);
+  Py_DECREF(self->label);
+  Py_DECREF(self->id);
+  self->ob_type->tp_free(reinterpret_cast<PyObject*>(self));
+}
+
+typedef struct {
+  PyObject_HEAD
+
+  const google::protobuf::DescriptorPool* pool;
+} CDescriptorPool;
+
+static void CDescriptorPoolDealloc(CDescriptorPool* self);
+
+static PyObject* CDescriptorPool_NewCDescriptor(
+    const google::protobuf::FieldDescriptor* field_descriptor) {
+  CFieldDescriptor* cfield_descriptor = PyObject_New(
+      CFieldDescriptor, &CFieldDescriptor_Type);
+  if (cfield_descriptor == NULL) {
+    return NULL;
+  }
+  cfield_descriptor->descriptor = field_descriptor;
+
+  cfield_descriptor->full_name = PyString_FromString(
+      field_descriptor->full_name().c_str());
+  cfield_descriptor->name = PyString_FromString(
+      field_descriptor->name().c_str());
+  cfield_descriptor->cpp_type = PyLong_FromLong(field_descriptor->cpp_type());
+  cfield_descriptor->label = PyLong_FromLong(field_descriptor->label());
+  cfield_descriptor->id = PyLong_FromVoidPtr(cfield_descriptor);
+  return reinterpret_cast<PyObject*>(cfield_descriptor);
+}
+
+static PyObject* CDescriptorPool_FindFieldByName(
+    CDescriptorPool* self, PyObject* arg) {
+  const char* full_field_name = PyString_AsString(arg);
+  if (full_field_name == NULL) {
+    return NULL;
+  }
+
+  const google::protobuf::FieldDescriptor* field_descriptor = NULL;
+
+  field_descriptor = self->pool->FindFieldByName(full_field_name);
+  if (field_descriptor == NULL) {
+    PyErr_Format(PyExc_TypeError, "Couldn't find field %.200s",
+                 full_field_name);
+    return NULL;
+  }
+
+  return CDescriptorPool_NewCDescriptor(field_descriptor);
+}
+
+static PyObject* CDescriptorPool_FindExtensionByName(
+    CDescriptorPool* self, PyObject* arg) {
+  const char* full_field_name = PyString_AsString(arg);
+  if (full_field_name == NULL) {
+    return NULL;
+  }
+
+  const google::protobuf::FieldDescriptor* field_descriptor =
+      self->pool->FindExtensionByName(full_field_name);
+  if (field_descriptor == NULL) {
+    PyErr_Format(PyExc_TypeError, "Couldn't find field %.200s",
+                 full_field_name);
+    return NULL;
+  }
+
+  return CDescriptorPool_NewCDescriptor(field_descriptor);
+}
+
+static PyMethodDef CDescriptorPoolMethods[] = {
+  { C("FindFieldByName"),
+    (PyCFunction)CDescriptorPool_FindFieldByName,
+    METH_O,
+    C("Searches for a field descriptor by full name.") },
+  { C("FindExtensionByName"),
+    (PyCFunction)CDescriptorPool_FindExtensionByName,
+    METH_O,
+    C("Searches for extension descriptor by full name.") },
+  {NULL}
+};
+
+PyTypeObject CDescriptorPool_Type = {
+  PyObject_HEAD_INIT(&PyType_Type)
+  0,
+  C("google3.net.google.protobuf.python.internal."
+    "_net_proto2___python."
+    "CFieldDescriptor"),               // tp_name
+  sizeof(CDescriptorPool),             // tp_basicsize
+  0,                                   // tp_itemsize
+  (destructor)CDescriptorPoolDealloc,  // tp_dealloc
+  0,                                   // tp_print
+  0,                                   // tp_getattr
+  0,                                   // tp_setattr
+  0,                                   // tp_compare
+  0,                                   // tp_repr
+  0,                                   // tp_as_number
+  0,                                   // tp_as_sequence
+  0,                                   // tp_as_mapping
+  0,                                   // tp_hash
+  0,                                   // tp_call
+  0,                                   // tp_str
+  0,                                   // tp_getattro
+  0,                                   // tp_setattro
+  0,                                   // tp_as_buffer
+  Py_TPFLAGS_DEFAULT,                  // tp_flags
+  C("A Descriptor Pool"),              // tp_doc
+  0,                                   // tp_traverse
+  0,                                   // tp_clear
+  0,                                   // tp_richcompare
+  0,                                   // tp_weaklistoffset
+  0,                                   // tp_iter
+  0,                                   // tp_iternext
+  CDescriptorPoolMethods,              // tp_methods
+  0,                                   // tp_members
+  0,                                   // tp_getset
+  0,                                   // tp_base
+  0,                                   // tp_dict
+  0,                                   // tp_descr_get
+  0,                                   // tp_descr_set
+  0,                                   // tp_dictoffset
+  0,                                   // tp_init
+  PyType_GenericAlloc,                 // tp_alloc
+  PyType_GenericNew,                   // tp_new
+  PyObject_Del,                        // tp_free
+};
+
+static void CDescriptorPoolDealloc(CDescriptorPool* self) {
+  self->ob_type->tp_free(reinterpret_cast<PyObject*>(self));
+}
+
+google::protobuf::DescriptorPool* GetDescriptorPool() {
+  if (g_descriptor_pool == NULL) {
+    g_descriptor_pool = new google::protobuf::DescriptorPool(
+        google::protobuf::DescriptorPool::generated_pool());
+  }
+  return g_descriptor_pool;
+}
+
+PyObject* Python_NewCDescriptorPool(PyObject* ignored, PyObject* args) {
+  CDescriptorPool* cdescriptor_pool = PyObject_New(
+      CDescriptorPool, &CDescriptorPool_Type);
+  if (cdescriptor_pool == NULL) {
+    return NULL;
+  }
+  cdescriptor_pool->pool = GetDescriptorPool();
+  return reinterpret_cast<PyObject*>(cdescriptor_pool);
+}
+
+PyObject* Python_BuildFile(PyObject* ignored, PyObject* arg) {
+  char* message_type;
+  Py_ssize_t message_len;
+
+  if (PyString_AsStringAndSize(arg, &message_type, &message_len) < 0) {
+    return NULL;
+  }
+
+  google::protobuf::FileDescriptorProto file_proto;
+  if (!file_proto.ParseFromArray(message_type, message_len)) {
+    PyErr_SetString(PyExc_TypeError, "Couldn't parse file content!");
+    return NULL;
+  }
+
+  // If this file is already in the generated pool, don't add it again.
+  if (google::protobuf::DescriptorPool::generated_pool()->FindFileByName(
+      file_proto.name()) != NULL) {
+    Py_RETURN_NONE;
+  }
+
+  const google::protobuf::FileDescriptor* descriptor = GetDescriptorPool()->BuildFile(
+      file_proto);
+  if (descriptor == NULL) {
+    PyErr_SetString(PyExc_TypeError,
+                    "Couldn't build proto file into descriptor pool!");
+    return NULL;
+  }
+
+  Py_RETURN_NONE;
+}
+
+bool InitDescriptor() {
+  CFieldDescriptor_Type.tp_new = PyType_GenericNew;
+  if (PyType_Ready(&CFieldDescriptor_Type) < 0)
+    return false;
+
+  CDescriptorPool_Type.tp_new = PyType_GenericNew;
+  if (PyType_Ready(&CDescriptorPool_Type) < 0)
+    return false;
+  return true;
+}
+
+}  // namespace python
+}  // namespace protobuf
+}  // namespace google

+ 87 - 0
python/google/protobuf/pyext/python_descriptor.h

@@ -0,0 +1,87 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: petar@google.com (Petar Petrov)
+
+#ifndef GOOGLE_PROTOBUF_PYTHON_DESCRIPTOR_H__
+#define GOOGLE_PROTOBUF_PYTHON_DESCRIPTOR_H__
+
+#include <Python.h>
+#include <structmember.h>
+
+#include <google/protobuf/descriptor.h>
+
+#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
+typedef int Py_ssize_t;
+#define PY_SSIZE_T_MAX INT_MAX
+#define PY_SSIZE_T_MIN INT_MIN
+#endif
+
+namespace google {
+namespace protobuf {
+namespace python {
+
+typedef struct {
+  PyObject_HEAD
+
+  // The proto2 descriptor that this object represents.
+  const google::protobuf::FieldDescriptor* descriptor;
+
+  // Full name of the field (PyString).
+  PyObject* full_name;
+
+  // Name of the field (PyString).
+  PyObject* name;
+
+  // C++ type of the field (PyLong).
+  PyObject* cpp_type;
+
+  // Name of the field (PyLong).
+  PyObject* label;
+
+  // Identity of the descriptor (PyLong used as a poiner).
+  PyObject* id;
+} CFieldDescriptor;
+
+extern PyTypeObject CFieldDescriptor_Type;
+
+extern PyTypeObject CDescriptorPool_Type;
+
+
+PyObject* Python_NewCDescriptorPool(PyObject* ignored, PyObject* args);
+PyObject* Python_BuildFile(PyObject* ignored, PyObject* args);
+bool InitDescriptor();
+google::protobuf::DescriptorPool* GetDescriptorPool();
+
+}  // namespace python
+}  // namespace protobuf
+
+}  // namespace google
+#endif  // GOOGLE_PROTOBUF_PYTHON_DESCRIPTOR_H__

+ 63 - 0
python/google/protobuf/pyext/python_protobuf.cc

@@ -0,0 +1,63 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: qrczak@google.com (Marcin Kowalczyk)
+
+#include <google/protobuf/pyext/python_protobuf.h>
+
+namespace google {
+namespace protobuf {
+namespace python {
+
+static const Message* GetCProtoInsidePyProtoStub(PyObject* msg) {
+  return NULL;
+}
+static Message* MutableCProtoInsidePyProtoStub(PyObject* msg) {
+  return NULL;
+}
+
+// This is initialized with a default, stub implementation.
+// If python-google.protobuf.cc is loaded, the function pointer is overridden
+// with a full implementation.
+const Message* (*GetCProtoInsidePyProtoPtr)(PyObject* msg) =
+    GetCProtoInsidePyProtoStub;
+Message* (*MutableCProtoInsidePyProtoPtr)(PyObject* msg) =
+    MutableCProtoInsidePyProtoStub;
+
+const Message* GetCProtoInsidePyProto(PyObject* msg) {
+  return GetCProtoInsidePyProtoPtr(msg);
+}
+Message* MutableCProtoInsidePyProto(PyObject* msg) {
+  return MutableCProtoInsidePyProtoPtr(msg);
+}
+
+}  // namespace python
+}  // namespace protobuf
+}  // namespace google

+ 57 - 0
python/google/protobuf/pyext/python_protobuf.h

@@ -0,0 +1,57 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: qrczak@google.com (Marcin Kowalczyk)
+//
+// This module exposes the C proto inside the given Python proto, in
+// case the Python proto is implemented with a C proto.
+
+#ifndef GOOGLE_PROTOBUF_PYTHON_PYTHON_PROTOBUF_H__
+#define GOOGLE_PROTOBUF_PYTHON_PYTHON_PROTOBUF_H__
+
+#include <Python.h>
+
+namespace google {
+namespace protobuf {
+
+class Message;
+
+namespace python {
+
+// Return the pointer to the C proto inside the given Python proto,
+// or NULL when this is not a Python proto implemented with a C proto.
+const Message* GetCProtoInsidePyProto(PyObject* msg);
+Message* MutableCProtoInsidePyProto(PyObject* msg);
+
+}  // namespace python
+}  // namespace protobuf
+
+}  // namespace google
+#endif  // GOOGLE_PROTOBUF_PYTHON_PYTHON_PROTOBUF_H__

+ 17 - 1035
python/google/protobuf/reflection.py

@@ -29,9 +29,6 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 # This code is meant to work on Python 2.4 and above only.
-#
-# TODO(robinson): Helpers for verbose, common checks like seeing if a
-# descriptor's cpp_type is CPPTYPE_MESSAGE.
 
 """Contains a metaclass and helper functions used to create
 protocol message classes from Descriptor objects at runtime.
@@ -50,27 +47,22 @@ this file*.
 
 __author__ = 'robinson@google.com (Will Robinson)'
 
-try:
-  from cStringIO import StringIO
-except ImportError:
-  from StringIO import StringIO
-import struct
-import weakref
 
-# We use "as" to avoid name collisions with variables.
-from google.protobuf.internal import containers
-from google.protobuf.internal import decoder
-from google.protobuf.internal import encoder
-from google.protobuf.internal import message_listener as message_listener_mod
-from google.protobuf.internal import type_checkers
-from google.protobuf.internal import wire_format
+from google.protobuf.internal import api_implementation
 from google.protobuf import descriptor as descriptor_mod
-from google.protobuf import message as message_mod
-from google.protobuf import text_format
-
 _FieldDescriptor = descriptor_mod.FieldDescriptor
 
 
+if api_implementation.Type() == 'cpp':
+  from google.protobuf.internal import cpp_message
+  _NewMessage = cpp_message.NewMessage
+  _InitMessage = cpp_message.InitMessage
+else:
+  from google.protobuf.internal import python_message
+  _NewMessage = python_message.NewMessage
+  _InitMessage = python_message.InitMessage
+
+
 class GeneratedProtocolMessageType(type):
 
   """Metaclass for protocol message classes created at runtime from Descriptors.
@@ -120,10 +112,12 @@ class GeneratedProtocolMessageType(type):
       Newly-allocated class.
     """
     descriptor = dictionary[GeneratedProtocolMessageType._DESCRIPTOR_KEY]
-    _AddSlots(descriptor, dictionary)
-    _AddClassAttributesForNestedExtensions(descriptor, dictionary)
+    _NewMessage(descriptor, dictionary)
     superclass = super(GeneratedProtocolMessageType, cls)
-    return superclass.__new__(cls, name, bases, dictionary)
+
+    new_class = superclass.__new__(cls, name, bases, dictionary)
+    setattr(descriptor, '_concrete_class', new_class)
+    return new_class
 
   def __init__(cls, name, bases, dictionary):
     """Here we perform the majority of our work on the class.
@@ -143,1018 +137,6 @@ class GeneratedProtocolMessageType(type):
         type.
     """
     descriptor = dictionary[GeneratedProtocolMessageType._DESCRIPTOR_KEY]
-
-    cls._decoders_by_tag = {}
-    cls._extensions_by_name = {}
-    cls._extensions_by_number = {}
-    if (descriptor.has_options and
-        descriptor.GetOptions().message_set_wire_format):
-      cls._decoders_by_tag[decoder.MESSAGE_SET_ITEM_TAG] = (
-          decoder.MessageSetItemDecoder(cls._extensions_by_number))
-
-    # We act as a "friend" class of the descriptor, setting
-    # its _concrete_class attribute the first time we use a
-    # given descriptor to initialize a concrete protocol message
-    # class.  We also attach stuff to each FieldDescriptor for quick
-    # lookup later on.
-    concrete_class_attr_name = '_concrete_class'
-    if not hasattr(descriptor, concrete_class_attr_name):
-      setattr(descriptor, concrete_class_attr_name, cls)
-      for field in descriptor.fields:
-        _AttachFieldHelpers(cls, field)
-
-    _AddEnumValues(descriptor, cls)
-    _AddInitMethod(descriptor, cls)
-    _AddPropertiesForFields(descriptor, cls)
-    _AddPropertiesForExtensions(descriptor, cls)
-    _AddStaticMethods(cls)
-    _AddMessageMethods(descriptor, cls)
-    _AddPrivateHelperMethods(cls)
+    _InitMessage(descriptor, cls)
     superclass = super(GeneratedProtocolMessageType, cls)
     superclass.__init__(name, bases, dictionary)
-
-
-# Stateless helpers for GeneratedProtocolMessageType below.
-# Outside clients should not access these directly.
-#
-# I opted not to make any of these methods on the metaclass, to make it more
-# clear that I'm not really using any state there and to keep clients from
-# thinking that they have direct access to these construction helpers.
-
-
-def _PropertyName(proto_field_name):
-  """Returns the name of the public property attribute which
-  clients can use to get and (in some cases) set the value
-  of a protocol message field.
-
-  Args:
-    proto_field_name: The protocol message field name, exactly
-      as it appears (or would appear) in a .proto file.
-  """
-  # TODO(robinson): Escape Python keywords (e.g., yield), and test this support.
-  # nnorwitz makes my day by writing:
-  # """
-  # FYI.  See the keyword module in the stdlib. This could be as simple as:
-  #
-  # if keyword.iskeyword(proto_field_name):
-  #   return proto_field_name + "_"
-  # return proto_field_name
-  # """
-  # Kenton says:  The above is a BAD IDEA.  People rely on being able to use
-  #   getattr() and setattr() to reflectively manipulate field values.  If we
-  #   rename the properties, then every such user has to also make sure to apply
-  #   the same transformation.  Note that currently if you name a field "yield",
-  #   you can still access it just fine using getattr/setattr -- it's not even
-  #   that cumbersome to do so.
-  # TODO(kenton):  Remove this method entirely if/when everyone agrees with my
-  #   position.
-  return proto_field_name
-
-
-def _VerifyExtensionHandle(message, extension_handle):
-  """Verify that the given extension handle is valid."""
-
-  if not isinstance(extension_handle, _FieldDescriptor):
-    raise KeyError('HasExtension() expects an extension handle, got: %s' %
-                   extension_handle)
-
-  if not extension_handle.is_extension:
-    raise KeyError('"%s" is not an extension.' % extension_handle.full_name)
-
-  if extension_handle.containing_type is not message.DESCRIPTOR:
-    raise KeyError('Extension "%s" extends message type "%s", but this '
-                   'message is of type "%s".' %
-                   (extension_handle.full_name,
-                    extension_handle.containing_type.full_name,
-                    message.DESCRIPTOR.full_name))
-
-
-def _AddSlots(message_descriptor, dictionary):
-  """Adds a __slots__ entry to dictionary, containing the names of all valid
-  attributes for this message type.
-
-  Args:
-    message_descriptor: A Descriptor instance describing this message type.
-    dictionary: Class dictionary to which we'll add a '__slots__' entry.
-  """
-  dictionary['__slots__'] = ['_cached_byte_size',
-                             '_cached_byte_size_dirty',
-                             '_fields',
-                             '_is_present_in_parent',
-                             '_listener',
-                             '_listener_for_children',
-                             '__weakref__']
-
-
-def _IsMessageSetExtension(field):
-  return (field.is_extension and
-          field.containing_type.has_options and
-          field.containing_type.GetOptions().message_set_wire_format and
-          field.type == _FieldDescriptor.TYPE_MESSAGE and
-          field.message_type == field.extension_scope and
-          field.label == _FieldDescriptor.LABEL_OPTIONAL)
-
-
-def _AttachFieldHelpers(cls, field_descriptor):
-  is_repeated = (field_descriptor.label == _FieldDescriptor.LABEL_REPEATED)
-  is_packed = (field_descriptor.has_options and
-               field_descriptor.GetOptions().packed)
-
-  if _IsMessageSetExtension(field_descriptor):
-    field_encoder = encoder.MessageSetItemEncoder(field_descriptor.number)
-    sizer = encoder.MessageSetItemSizer(field_descriptor.number)
-  else:
-    field_encoder = type_checkers.TYPE_TO_ENCODER[field_descriptor.type](
-        field_descriptor.number, is_repeated, is_packed)
-    sizer = type_checkers.TYPE_TO_SIZER[field_descriptor.type](
-        field_descriptor.number, is_repeated, is_packed)
-
-  field_descriptor._encoder = field_encoder
-  field_descriptor._sizer = sizer
-  field_descriptor._default_constructor = _DefaultValueConstructorForField(
-      field_descriptor)
-
-  def AddDecoder(wiretype, is_packed):
-    tag_bytes = encoder.TagBytes(field_descriptor.number, wiretype)
-    cls._decoders_by_tag[tag_bytes] = (
-        type_checkers.TYPE_TO_DECODER[field_descriptor.type](
-            field_descriptor.number, is_repeated, is_packed,
-            field_descriptor, field_descriptor._default_constructor))
-
-  AddDecoder(type_checkers.FIELD_TYPE_TO_WIRE_TYPE[field_descriptor.type],
-             False)
-
-  if is_repeated and wire_format.IsTypePackable(field_descriptor.type):
-    # To support wire compatibility of adding packed = true, add a decoder for
-    # packed values regardless of the field's options.
-    AddDecoder(wire_format.WIRETYPE_LENGTH_DELIMITED, True)
-
-
-def _AddClassAttributesForNestedExtensions(descriptor, dictionary):
-  extension_dict = descriptor.extensions_by_name
-  for extension_name, extension_field in extension_dict.iteritems():
-    assert extension_name not in dictionary
-    dictionary[extension_name] = extension_field
-
-
-def _AddEnumValues(descriptor, cls):
-  """Sets class-level attributes for all enum fields defined in this message.
-
-  Args:
-    descriptor: Descriptor object for this message type.
-    cls: Class we're constructing for this message type.
-  """
-  for enum_type in descriptor.enum_types:
-    for enum_value in enum_type.values:
-      setattr(cls, enum_value.name, enum_value.number)
-
-
-def _DefaultValueConstructorForField(field):
-  """Returns a function which returns a default value for a field.
-
-  Args:
-    field: FieldDescriptor object for this field.
-
-  The returned function has one argument:
-    message: Message instance containing this field, or a weakref proxy
-      of same.
-
-  That function in turn returns a default value for this field.  The default
-    value may refer back to |message| via a weak reference.
-  """
-
-  if field.label == _FieldDescriptor.LABEL_REPEATED:
-    if field.default_value != []:
-      raise ValueError('Repeated field default value not empty list: %s' % (
-          field.default_value))
-    if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
-      # We can't look at _concrete_class yet since it might not have
-      # been set.  (Depends on order in which we initialize the classes).
-      message_type = field.message_type
-      def MakeRepeatedMessageDefault(message):
-        return containers.RepeatedCompositeFieldContainer(
-            message._listener_for_children, field.message_type)
-      return MakeRepeatedMessageDefault
-    else:
-      type_checker = type_checkers.GetTypeChecker(field.cpp_type, field.type)
-      def MakeRepeatedScalarDefault(message):
-        return containers.RepeatedScalarFieldContainer(
-            message._listener_for_children, type_checker)
-      return MakeRepeatedScalarDefault
-
-  if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
-    # _concrete_class may not yet be initialized.
-    message_type = field.message_type
-    def MakeSubMessageDefault(message):
-      result = message_type._concrete_class()
-      result._SetListener(message._listener_for_children)
-      return result
-    return MakeSubMessageDefault
-
-  def MakeScalarDefault(message):
-    return field.default_value
-  return MakeScalarDefault
-
-
-def _AddInitMethod(message_descriptor, cls):
-  """Adds an __init__ method to cls."""
-  fields = message_descriptor.fields
-  def init(self, **kwargs):
-    self._cached_byte_size = 0
-    self._cached_byte_size_dirty = False
-    self._fields = {}
-    self._is_present_in_parent = False
-    self._listener = message_listener_mod.NullMessageListener()
-    self._listener_for_children = _Listener(self)
-    for field_name, field_value in kwargs.iteritems():
-      field = _GetFieldByName(message_descriptor, field_name)
-      if field is None:
-        raise TypeError("%s() got an unexpected keyword argument '%s'" %
-                        (message_descriptor.name, field_name))
-      if field.label == _FieldDescriptor.LABEL_REPEATED:
-        copy = field._default_constructor(self)
-        if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:  # Composite
-          for val in field_value:
-            copy.add().MergeFrom(val)
-        else:  # Scalar
-          copy.extend(field_value)
-        self._fields[field] = copy
-      elif field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
-        copy = field._default_constructor(self)
-        copy.MergeFrom(field_value)
-        self._fields[field] = copy
-      else:
-        setattr(self, field_name, field_value)
-
-  init.__module__ = None
-  init.__doc__ = None
-  cls.__init__ = init
-
-
-def _GetFieldByName(message_descriptor, field_name):
-  """Returns a field descriptor by field name.
-
-  Args:
-    message_descriptor: A Descriptor describing all fields in message.
-    field_name: The name of the field to retrieve.
-  Returns:
-    The field descriptor associated with the field name.
-  """
-  try:
-    return message_descriptor.fields_by_name[field_name]
-  except KeyError:
-    raise ValueError('Protocol message has no "%s" field.' % field_name)
-
-
-def _AddPropertiesForFields(descriptor, cls):
-  """Adds properties for all fields in this protocol message type."""
-  for field in descriptor.fields:
-    _AddPropertiesForField(field, cls)
-
-  if descriptor.is_extendable:
-    # _ExtensionDict is just an adaptor with no state so we allocate a new one
-    # every time it is accessed.
-    cls.Extensions = property(lambda self: _ExtensionDict(self))
-
-
-def _AddPropertiesForField(field, cls):
-  """Adds a public property for a protocol message field.
-  Clients can use this property to get and (in the case
-  of non-repeated scalar fields) directly set the value
-  of a protocol message field.
-
-  Args:
-    field: A FieldDescriptor for this field.
-    cls: The class we're constructing.
-  """
-  # Catch it if we add other types that we should
-  # handle specially here.
-  assert _FieldDescriptor.MAX_CPPTYPE == 10
-
-  constant_name = field.name.upper() + "_FIELD_NUMBER"
-  setattr(cls, constant_name, field.number)
-
-  if field.label == _FieldDescriptor.LABEL_REPEATED:
-    _AddPropertiesForRepeatedField(field, cls)
-  elif field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
-    _AddPropertiesForNonRepeatedCompositeField(field, cls)
-  else:
-    _AddPropertiesForNonRepeatedScalarField(field, cls)
-
-
-def _AddPropertiesForRepeatedField(field, cls):
-  """Adds a public property for a "repeated" protocol message field.  Clients
-  can use this property to get the value of the field, which will be either a
-  _RepeatedScalarFieldContainer or _RepeatedCompositeFieldContainer (see
-  below).
-
-  Note that when clients add values to these containers, we perform
-  type-checking in the case of repeated scalar fields, and we also set any
-  necessary "has" bits as a side-effect.
-
-  Args:
-    field: A FieldDescriptor for this field.
-    cls: The class we're constructing.
-  """
-  proto_field_name = field.name
-  property_name = _PropertyName(proto_field_name)
-
-  def getter(self):
-    field_value = self._fields.get(field)
-    if field_value is None:
-      # Construct a new object to represent this field.
-      field_value = field._default_constructor(self)
-
-      # Atomically check if another thread has preempted us and, if not, swap
-      # in the new object we just created.  If someone has preempted us, we
-      # take that object and discard ours.
-      # WARNING:  We are relying on setdefault() being atomic.  This is true
-      #   in CPython but we haven't investigated others.  This warning appears
-      #   in several other locations in this file.
-      field_value = self._fields.setdefault(field, field_value)
-    return field_value
-  getter.__module__ = None
-  getter.__doc__ = 'Getter for %s.' % proto_field_name
-
-  # We define a setter just so we can throw an exception with a more
-  # helpful error message.
-  def setter(self, new_value):
-    raise AttributeError('Assignment not allowed to repeated field '
-                         '"%s" in protocol message object.' % proto_field_name)
-
-  doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name
-  setattr(cls, property_name, property(getter, setter, doc=doc))
-
-
-def _AddPropertiesForNonRepeatedScalarField(field, cls):
-  """Adds a public property for a nonrepeated, scalar protocol message field.
-  Clients can use this property to get and directly set the value of the field.
-  Note that when the client sets the value of a field by using this property,
-  all necessary "has" bits are set as a side-effect, and we also perform
-  type-checking.
-
-  Args:
-    field: A FieldDescriptor for this field.
-    cls: The class we're constructing.
-  """
-  proto_field_name = field.name
-  property_name = _PropertyName(proto_field_name)
-  type_checker = type_checkers.GetTypeChecker(field.cpp_type, field.type)
-  default_value = field.default_value
-
-  def getter(self):
-    return self._fields.get(field, default_value)
-  getter.__module__ = None
-  getter.__doc__ = 'Getter for %s.' % proto_field_name
-  def setter(self, new_value):
-    type_checker.CheckValue(new_value)
-    self._fields[field] = new_value
-    # Check _cached_byte_size_dirty inline to improve performance, since scalar
-    # setters are called frequently.
-    if not self._cached_byte_size_dirty:
-      self._Modified()
-  setter.__module__ = None
-  setter.__doc__ = 'Setter for %s.' % proto_field_name
-
-  # Add a property to encapsulate the getter/setter.
-  doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name
-  setattr(cls, property_name, property(getter, setter, doc=doc))
-
-
-def _AddPropertiesForNonRepeatedCompositeField(field, cls):
-  """Adds a public property for a nonrepeated, composite protocol message field.
-  A composite field is a "group" or "message" field.
-
-  Clients can use this property to get the value of the field, but cannot
-  assign to the property directly.
-
-  Args:
-    field: A FieldDescriptor for this field.
-    cls: The class we're constructing.
-  """
-  # TODO(robinson): Remove duplication with similar method
-  # for non-repeated scalars.
-  proto_field_name = field.name
-  property_name = _PropertyName(proto_field_name)
-  message_type = field.message_type
-
-  def getter(self):
-    field_value = self._fields.get(field)
-    if field_value is None:
-      # Construct a new object to represent this field.
-      field_value = message_type._concrete_class()
-      field_value._SetListener(self._listener_for_children)
-
-      # Atomically check if another thread has preempted us and, if not, swap
-      # in the new object we just created.  If someone has preempted us, we
-      # take that object and discard ours.
-      # WARNING:  We are relying on setdefault() being atomic.  This is true
-      #   in CPython but we haven't investigated others.  This warning appears
-      #   in several other locations in this file.
-      field_value = self._fields.setdefault(field, field_value)
-    return field_value
-  getter.__module__ = None
-  getter.__doc__ = 'Getter for %s.' % proto_field_name
-
-  # We define a setter just so we can throw an exception with a more
-  # helpful error message.
-  def setter(self, new_value):
-    raise AttributeError('Assignment not allowed to composite field '
-                         '"%s" in protocol message object.' % proto_field_name)
-
-  # Add a property to encapsulate the getter.
-  doc = 'Magic attribute generated for "%s" proto field.' % proto_field_name
-  setattr(cls, property_name, property(getter, setter, doc=doc))
-
-
-def _AddPropertiesForExtensions(descriptor, cls):
-  """Adds properties for all fields in this protocol message type."""
-  extension_dict = descriptor.extensions_by_name
-  for extension_name, extension_field in extension_dict.iteritems():
-    constant_name = extension_name.upper() + "_FIELD_NUMBER"
-    setattr(cls, constant_name, extension_field.number)
-
-
-def _AddStaticMethods(cls):
-  # TODO(robinson): This probably needs to be thread-safe(?)
-  def RegisterExtension(extension_handle):
-    extension_handle.containing_type = cls.DESCRIPTOR
-    _AttachFieldHelpers(cls, extension_handle)
-
-    # Try to insert our extension, failing if an extension with the same number
-    # already exists.
-    actual_handle = cls._extensions_by_number.setdefault(
-        extension_handle.number, extension_handle)
-    if actual_handle is not extension_handle:
-      raise AssertionError(
-          'Extensions "%s" and "%s" both try to extend message type "%s" with '
-          'field number %d.' %
-          (extension_handle.full_name, actual_handle.full_name,
-           cls.DESCRIPTOR.full_name, extension_handle.number))
-
-    cls._extensions_by_name[extension_handle.full_name] = extension_handle
-
-    handle = extension_handle  # avoid line wrapping
-    if _IsMessageSetExtension(handle):
-      # MessageSet extension.  Also register under type name.
-      cls._extensions_by_name[
-          extension_handle.message_type.full_name] = extension_handle
-
-  cls.RegisterExtension = staticmethod(RegisterExtension)
-
-  def FromString(s):
-    message = cls()
-    message.MergeFromString(s)
-    return message
-  cls.FromString = staticmethod(FromString)
-
-
-def _IsPresent(item):
-  """Given a (FieldDescriptor, value) tuple from _fields, return true if the
-  value should be included in the list returned by ListFields()."""
-
-  if item[0].label == _FieldDescriptor.LABEL_REPEATED:
-    return bool(item[1])
-  elif item[0].cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
-    return item[1]._is_present_in_parent
-  else:
-    return True
-
-
-def _AddListFieldsMethod(message_descriptor, cls):
-  """Helper for _AddMessageMethods()."""
-
-  def ListFields(self):
-    all_fields = [item for item in self._fields.iteritems() if _IsPresent(item)]
-    all_fields.sort(key = lambda item: item[0].number)
-    return all_fields
-
-  cls.ListFields = ListFields
-
-
-def _AddHasFieldMethod(message_descriptor, cls):
-  """Helper for _AddMessageMethods()."""
-
-  singular_fields = {}
-  for field in message_descriptor.fields:
-    if field.label != _FieldDescriptor.LABEL_REPEATED:
-      singular_fields[field.name] = field
-
-  def HasField(self, field_name):
-    try:
-      field = singular_fields[field_name]
-    except KeyError:
-      raise ValueError(
-          'Protocol message has no singular "%s" field.' % field_name)
-
-    if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
-      value = self._fields.get(field)
-      return value is not None and value._is_present_in_parent
-    else:
-      return field in self._fields
-  cls.HasField = HasField
-
-
-def _AddClearFieldMethod(message_descriptor, cls):
-  """Helper for _AddMessageMethods()."""
-  def ClearField(self, field_name):
-    try:
-      field = message_descriptor.fields_by_name[field_name]
-    except KeyError:
-      raise ValueError('Protocol message has no "%s" field.' % field_name)
-
-    if field in self._fields:
-      # Note:  If the field is a sub-message, its listener will still point
-      #   at us.  That's fine, because the worst than can happen is that it
-      #   will call _Modified() and invalidate our byte size.  Big deal.
-      del self._fields[field]
-
-    # Always call _Modified() -- even if nothing was changed, this is
-    # a mutating method, and thus calling it should cause the field to become
-    # present in the parent message.
-    self._Modified()
-
-  cls.ClearField = ClearField
-
-
-def _AddClearExtensionMethod(cls):
-  """Helper for _AddMessageMethods()."""
-  def ClearExtension(self, extension_handle):
-    _VerifyExtensionHandle(self, extension_handle)
-
-    # Similar to ClearField(), above.
-    if extension_handle in self._fields:
-      del self._fields[extension_handle]
-    self._Modified()
-  cls.ClearExtension = ClearExtension
-
-
-def _AddClearMethod(message_descriptor, cls):
-  """Helper for _AddMessageMethods()."""
-  def Clear(self):
-    # Clear fields.
-    self._fields = {}
-    self._Modified()
-  cls.Clear = Clear
-
-
-def _AddHasExtensionMethod(cls):
-  """Helper for _AddMessageMethods()."""
-  def HasExtension(self, extension_handle):
-    _VerifyExtensionHandle(self, extension_handle)
-    if extension_handle.label == _FieldDescriptor.LABEL_REPEATED:
-      raise KeyError('"%s" is repeated.' % extension_handle.full_name)
-
-    if extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
-      value = self._fields.get(extension_handle)
-      return value is not None and value._is_present_in_parent
-    else:
-      return extension_handle in self._fields
-  cls.HasExtension = HasExtension
-
-
-def _AddEqualsMethod(message_descriptor, cls):
-  """Helper for _AddMessageMethods()."""
-  def __eq__(self, other):
-    if (not isinstance(other, message_mod.Message) or
-        other.DESCRIPTOR != self.DESCRIPTOR):
-      return False
-
-    if self is other:
-      return True
-
-    return self.ListFields() == other.ListFields()
-
-  cls.__eq__ = __eq__
-
-
-def _AddStrMethod(message_descriptor, cls):
-  """Helper for _AddMessageMethods()."""
-  def __str__(self):
-    return text_format.MessageToString(self)
-  cls.__str__ = __str__
-
-
-def _AddSetListenerMethod(cls):
-  """Helper for _AddMessageMethods()."""
-  def SetListener(self, listener):
-    if listener is None:
-      self._listener = message_listener_mod.NullMessageListener()
-    else:
-      self._listener = listener
-  cls._SetListener = SetListener
-
-
-def _BytesForNonRepeatedElement(value, field_number, field_type):
-  """Returns the number of bytes needed to serialize a non-repeated element.
-  The returned byte count includes space for tag information and any
-  other additional space associated with serializing value.
-
-  Args:
-    value: Value we're serializing.
-    field_number: Field number of this value.  (Since the field number
-      is stored as part of a varint-encoded tag, this has an impact
-      on the total bytes required to serialize the value).
-    field_type: The type of the field.  One of the TYPE_* constants
-      within FieldDescriptor.
-  """
-  try:
-    fn = type_checkers.TYPE_TO_BYTE_SIZE_FN[field_type]
-    return fn(field_number, value)
-  except KeyError:
-    raise message_mod.EncodeError('Unrecognized field type: %d' % field_type)
-
-
-def _AddByteSizeMethod(message_descriptor, cls):
-  """Helper for _AddMessageMethods()."""
-
-  def ByteSize(self):
-    if not self._cached_byte_size_dirty:
-      return self._cached_byte_size
-
-    size = 0
-    for field_descriptor, field_value in self.ListFields():
-      size += field_descriptor._sizer(field_value)
-
-    self._cached_byte_size = size
-    self._cached_byte_size_dirty = False
-    self._listener_for_children.dirty = False
-    return size
-
-  cls.ByteSize = ByteSize
-
-
-def _AddSerializeToStringMethod(message_descriptor, cls):
-  """Helper for _AddMessageMethods()."""
-
-  def SerializeToString(self):
-    # Check if the message has all of its required fields set.
-    errors = []
-    if not self.IsInitialized():
-      raise message_mod.EncodeError(
-          'Message is missing required fields: ' +
-          ','.join(self.FindInitializationErrors()))
-    return self.SerializePartialToString()
-  cls.SerializeToString = SerializeToString
-
-
-def _AddSerializePartialToStringMethod(message_descriptor, cls):
-  """Helper for _AddMessageMethods()."""
-
-  def SerializePartialToString(self):
-    out = StringIO()
-    self._InternalSerialize(out.write)
-    return out.getvalue()
-  cls.SerializePartialToString = SerializePartialToString
-
-  def InternalSerialize(self, write_bytes):
-    for field_descriptor, field_value in self.ListFields():
-      field_descriptor._encoder(write_bytes, field_value)
-  cls._InternalSerialize = InternalSerialize
-
-
-def _AddMergeFromStringMethod(message_descriptor, cls):
-  """Helper for _AddMessageMethods()."""
-  def MergeFromString(self, serialized):
-    length = len(serialized)
-    try:
-      if self._InternalParse(serialized, 0, length) != length:
-        # The only reason _InternalParse would return early is if it
-        # encountered an end-group tag.
-        raise message_mod.DecodeError('Unexpected end-group tag.')
-    except IndexError:
-      raise message_mod.DecodeError('Truncated message.')
-    except struct.error, e:
-      raise message_mod.DecodeError(e)
-    return length   # Return this for legacy reasons.
-  cls.MergeFromString = MergeFromString
-
-  local_ReadTag = decoder.ReadTag
-  local_SkipField = decoder.SkipField
-  decoders_by_tag = cls._decoders_by_tag
-
-  def InternalParse(self, buffer, pos, end):
-    self._Modified()
-    field_dict = self._fields
-    while pos != end:
-      (tag_bytes, new_pos) = local_ReadTag(buffer, pos)
-      field_decoder = decoders_by_tag.get(tag_bytes)
-      if field_decoder is None:
-        new_pos = local_SkipField(buffer, new_pos, end, tag_bytes)
-        if new_pos == -1:
-          return pos
-        pos = new_pos
-      else:
-        pos = field_decoder(buffer, new_pos, end, self, field_dict)
-    return pos
-  cls._InternalParse = InternalParse
-
-
-def _AddIsInitializedMethod(message_descriptor, cls):
-  """Adds the IsInitialized and FindInitializationError methods to the
-  protocol message class."""
-
-  required_fields = [field for field in message_descriptor.fields
-                           if field.label == _FieldDescriptor.LABEL_REQUIRED]
-
-  def IsInitialized(self, errors=None):
-    """Checks if all required fields of a message are set.
-
-    Args:
-      errors:  A list which, if provided, will be populated with the field
-               paths of all missing required fields.
-
-    Returns:
-      True iff the specified message has all required fields set.
-    """
-
-    # Performance is critical so we avoid HasField() and ListFields().
-
-    for field in required_fields:
-      if (field not in self._fields or
-          (field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE and
-           not self._fields[field]._is_present_in_parent)):
-        if errors is not None:
-          errors.extend(self.FindInitializationErrors())
-        return False
-
-    for field, value in self._fields.iteritems():
-      if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
-        if field.label == _FieldDescriptor.LABEL_REPEATED:
-          for element in value:
-            if not element.IsInitialized():
-              if errors is not None:
-                errors.extend(self.FindInitializationErrors())
-              return False
-        elif value._is_present_in_parent and not value.IsInitialized():
-          if errors is not None:
-            errors.extend(self.FindInitializationErrors())
-          return False
-
-    return True
-
-  cls.IsInitialized = IsInitialized
-
-  def FindInitializationErrors(self):
-    """Finds required fields which are not initialized.
-
-    Returns:
-      A list of strings.  Each string is a path to an uninitialized field from
-      the top-level message, e.g. "foo.bar[5].baz".
-    """
-
-    errors = []  # simplify things
-
-    for field in required_fields:
-      if not self.HasField(field.name):
-        errors.append(field.name)
-
-    for field, value in self.ListFields():
-      if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
-        if field.is_extension:
-          name = "(%s)" % field.full_name
-        else:
-          name = field.name
-
-        if field.label == _FieldDescriptor.LABEL_REPEATED:
-          for i in xrange(len(value)):
-            element = value[i]
-            prefix = "%s[%d]." % (name, i)
-            sub_errors = element.FindInitializationErrors()
-            errors += [ prefix + error for error in sub_errors ]
-        else:
-          prefix = name + "."
-          sub_errors = value.FindInitializationErrors()
-          errors += [ prefix + error for error in sub_errors ]
-
-    return errors
-
-  cls.FindInitializationErrors = FindInitializationErrors
-
-
-def _AddMergeFromMethod(cls):
-  LABEL_REPEATED = _FieldDescriptor.LABEL_REPEATED
-  CPPTYPE_MESSAGE = _FieldDescriptor.CPPTYPE_MESSAGE
-
-  def MergeFrom(self, msg):
-    if not isinstance(msg, cls):
-      raise TypeError(
-          "Parameter to MergeFrom() must be instance of same class.")
-
-    assert msg is not self
-    self._Modified()
-
-    fields = self._fields
-
-    for field, value in msg._fields.iteritems():
-      if field.label == LABEL_REPEATED:
-        field_value = fields.get(field)
-        if field_value is None:
-          # Construct a new object to represent this field.
-          field_value = field._default_constructor(self)
-          fields[field] = field_value
-        field_value.MergeFrom(value)
-      elif field.cpp_type == CPPTYPE_MESSAGE:
-        if value._is_present_in_parent:
-          field_value = fields.get(field)
-          if field_value is None:
-            # Construct a new object to represent this field.
-            field_value = field._default_constructor(self)
-            fields[field] = field_value
-          field_value.MergeFrom(value)
-      else:
-        self._fields[field] = value
-  cls.MergeFrom = MergeFrom
-
-
-def _AddMessageMethods(message_descriptor, cls):
-  """Adds implementations of all Message methods to cls."""
-  _AddListFieldsMethod(message_descriptor, cls)
-  _AddHasFieldMethod(message_descriptor, cls)
-  _AddClearFieldMethod(message_descriptor, cls)
-  if message_descriptor.is_extendable:
-    _AddClearExtensionMethod(cls)
-    _AddHasExtensionMethod(cls)
-  _AddClearMethod(message_descriptor, cls)
-  _AddEqualsMethod(message_descriptor, cls)
-  _AddStrMethod(message_descriptor, cls)
-  _AddSetListenerMethod(cls)
-  _AddByteSizeMethod(message_descriptor, cls)
-  _AddSerializeToStringMethod(message_descriptor, cls)
-  _AddSerializePartialToStringMethod(message_descriptor, cls)
-  _AddMergeFromStringMethod(message_descriptor, cls)
-  _AddIsInitializedMethod(message_descriptor, cls)
-  _AddMergeFromMethod(cls)
-
-
-def _AddPrivateHelperMethods(cls):
-  """Adds implementation of private helper methods to cls."""
-
-  def Modified(self):
-    """Sets the _cached_byte_size_dirty bit to true,
-    and propagates this to our listener iff this was a state change.
-    """
-
-    # Note:  Some callers check _cached_byte_size_dirty before calling
-    #   _Modified() as an extra optimization.  So, if this method is ever
-    #   changed such that it does stuff even when _cached_byte_size_dirty is
-    #   already true, the callers need to be updated.
-    if not self._cached_byte_size_dirty:
-      self._cached_byte_size_dirty = True
-      self._listener_for_children.dirty = True
-      self._is_present_in_parent = True
-      self._listener.Modified()
-
-  cls._Modified = Modified
-  cls.SetInParent = Modified
-
-
-class _Listener(object):
-
-  """MessageListener implementation that a parent message registers with its
-  child message.
-
-  In order to support semantics like:
-
-    foo.bar.baz.qux = 23
-    assert foo.HasField('bar')
-
-  ...child objects must have back references to their parents.
-  This helper class is at the heart of this support.
-  """
-
-  def __init__(self, parent_message):
-    """Args:
-      parent_message: The message whose _Modified() method we should call when
-        we receive Modified() messages.
-    """
-    # This listener establishes a back reference from a child (contained) object
-    # to its parent (containing) object.  We make this a weak reference to avoid
-    # creating cyclic garbage when the client finishes with the 'parent' object
-    # in the tree.
-    if isinstance(parent_message, weakref.ProxyType):
-      self._parent_message_weakref = parent_message
-    else:
-      self._parent_message_weakref = weakref.proxy(parent_message)
-
-    # As an optimization, we also indicate directly on the listener whether
-    # or not the parent message is dirty.  This way we can avoid traversing
-    # up the tree in the common case.
-    self.dirty = False
-
-  def Modified(self):
-    if self.dirty:
-      return
-    try:
-      # Propagate the signal to our parents iff this is the first field set.
-      self._parent_message_weakref._Modified()
-    except ReferenceError:
-      # We can get here if a client has kept a reference to a child object,
-      # and is now setting a field on it, but the child's parent has been
-      # garbage-collected.  This is not an error.
-      pass
-
-
-# TODO(robinson): Move elsewhere?  This file is getting pretty ridiculous...
-# TODO(robinson): Unify error handling of "unknown extension" crap.
-# TODO(robinson): Support iteritems()-style iteration over all
-# extensions with the "has" bits turned on?
-class _ExtensionDict(object):
-
-  """Dict-like container for supporting an indexable "Extensions"
-  field on proto instances.
-
-  Note that in all cases we expect extension handles to be
-  FieldDescriptors.
-  """
-
-  def __init__(self, extended_message):
-    """extended_message: Message instance for which we are the Extensions dict.
-    """
-
-    self._extended_message = extended_message
-
-  def __getitem__(self, extension_handle):
-    """Returns the current value of the given extension handle."""
-
-    _VerifyExtensionHandle(self._extended_message, extension_handle)
-
-    result = self._extended_message._fields.get(extension_handle)
-    if result is not None:
-      return result
-
-    if extension_handle.label == _FieldDescriptor.LABEL_REPEATED:
-      result = extension_handle._default_constructor(self._extended_message)
-    elif extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
-      result = extension_handle.message_type._concrete_class()
-      try:
-        result._SetListener(self._extended_message._listener_for_children)
-      except ReferenceError:
-        pass
-    else:
-      # Singular scalar -- just return the default without inserting into the
-      # dict.
-      return extension_handle.default_value
-
-    # Atomically check if another thread has preempted us and, if not, swap
-    # in the new object we just created.  If someone has preempted us, we
-    # take that object and discard ours.
-    # WARNING:  We are relying on setdefault() being atomic.  This is true
-    #   in CPython but we haven't investigated others.  This warning appears
-    #   in several other locations in this file.
-    result = self._extended_message._fields.setdefault(
-        extension_handle, result)
-
-    return result
-
-  def __eq__(self, other):
-    if not isinstance(other, self.__class__):
-      return False
-
-    my_fields = self._extended_message.ListFields()
-    other_fields = other._extended_message.ListFields()
-
-    # Get rid of non-extension fields.
-    my_fields    = [ field for field in my_fields    if field.is_extension ]
-    other_fields = [ field for field in other_fields if field.is_extension ]
-
-    return my_fields == other_fields
-
-  def __ne__(self, other):
-    return not self == other
-
-  # Note that this is only meaningful for non-repeated, scalar extension
-  # fields.  Note also that we may have to call _Modified() when we do
-  # successfully set a field this way, to set any necssary "has" bits in the
-  # ancestors of the extended message.
-  def __setitem__(self, extension_handle, value):
-    """If extension_handle specifies a non-repeated, scalar extension
-    field, sets the value of that field.
-    """
-
-    _VerifyExtensionHandle(self._extended_message, extension_handle)
-
-    if (extension_handle.label == _FieldDescriptor.LABEL_REPEATED or
-        extension_handle.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE):
-      raise TypeError(
-          'Cannot assign to extension "%s" because it is a repeated or '
-          'composite type.' % extension_handle.full_name)
-
-    # It's slightly wasteful to lookup the type checker each time,
-    # but we expect this to be a vanishingly uncommon case anyway.
-    type_checker = type_checkers.GetTypeChecker(
-        extension_handle.cpp_type, extension_handle.type)
-    type_checker.CheckValue(value)
-    self._extended_message._fields[extension_handle] = value
-    self._extended_message._Modified()
-
-  def _FindExtensionByName(self, name):
-    """Tries to find a known extension with the specified name.
-
-    Args:
-      name: Extension full name.
-
-    Returns:
-      Extension field descriptor.
-    """
-    return self._extended_message._extensions_by_name.get(name, None)

+ 55 - 37
python/google/protobuf/text_format.py

@@ -53,24 +53,26 @@ class ParseError(Exception):
   """Thrown in case of ASCII parsing error."""
 
 
-def MessageToString(message):
+def MessageToString(message, as_utf8=False, as_one_line=False):
   out = cStringIO.StringIO()
-  PrintMessage(message, out)
+  PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line)
   result = out.getvalue()
   out.close()
+  if as_one_line:
+    return result.rstrip()
   return result
 
 
-def PrintMessage(message, out, indent = 0):
+def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False):
   for field, value in message.ListFields():
     if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
       for element in value:
-        PrintField(field, element, out, indent)
+        PrintField(field, element, out, indent, as_utf8, as_one_line)
     else:
-      PrintField(field, value, out, indent)
+      PrintField(field, value, out, indent, as_utf8, as_one_line)
 
 
-def PrintField(field, value, out, indent = 0):
+def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False):
   """Print a single field name/value pair.  For repeated fields, the value
   should be a single element."""
 
@@ -96,23 +98,35 @@ def PrintField(field, value, out, indent = 0):
     # don't include it.
     out.write(': ')
 
-  PrintFieldValue(field, value, out, indent)
-  out.write('\n')
+  PrintFieldValue(field, value, out, indent, as_utf8, as_one_line)
+  if as_one_line:
+    out.write(' ')
+  else:
+    out.write('\n')
 
 
-def PrintFieldValue(field, value, out, indent = 0):
+def PrintFieldValue(field, value, out, indent=0,
+                    as_utf8=False, as_one_line=False):
   """Print a single field value (not including name).  For repeated fields,
   the value should be a single element."""
 
   if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
-    out.write(' {\n')
-    PrintMessage(value, out, indent + 2)
-    out.write(' ' * indent + '}')
+    if as_one_line:
+      out.write(' { ')
+      PrintMessage(value, out, indent, as_utf8, as_one_line)
+      out.write('}')
+    else:
+      out.write(' {\n')
+      PrintMessage(value, out, indent + 2, as_utf8, as_one_line)
+      out.write(' ' * indent + '}')
   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
     out.write(field.enum_type.values_by_number[value].name)
   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
     out.write('\"')
-    out.write(_CEscape(value))
+    if type(value) is unicode:
+      out.write(_CEscape(value.encode('utf-8'), as_utf8))
+    else:
+      out.write(_CEscape(value, as_utf8))
     out.write('\"')
   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
     if value:
@@ -334,10 +348,10 @@ class _Tokenizer(object):
     Returns:
       True iff the end was reached.
     """
-    return not self._lines and not self._current_line
+    return self.token == ''
 
   def _PopLine(self):
-    while not self._current_line:
+    while len(self._current_line) <= self._column:
       if not self._lines:
         self._current_line = ''
         return
@@ -348,11 +362,10 @@ class _Tokenizer(object):
   def _SkipWhitespace(self):
     while True:
       self._PopLine()
-      match = re.match(self._WHITESPACE, self._current_line)
+      match = self._WHITESPACE.match(self._current_line, self._column)
       if not match:
         break
       length = len(match.group(0))
-      self._current_line = self._current_line[length:]
       self._column += length
 
   def TryConsume(self, token):
@@ -402,7 +415,7 @@ class _Tokenizer(object):
       ParseError: If an identifier couldn't be consumed.
     """
     result = self.token
-    if not re.match(self._IDENTIFIER, result):
+    if not self._IDENTIFIER.match(result):
       raise self._ParseError('Expected identifier.')
     self.NextToken()
     return result
@@ -481,13 +494,13 @@ class _Tokenizer(object):
       ParseError: If a floating point number couldn't be consumed.
     """
     text = self.token
-    if re.match(self._FLOAT_INFINITY, text):
+    if self._FLOAT_INFINITY.match(text):
       self.NextToken()
       if text.startswith('-'):
         return -_INFINITY
       return _INFINITY
 
-    if re.match(self._FLOAT_NAN, text):
+    if self._FLOAT_NAN.match(text):
       self.NextToken()
       return _NAN
 
@@ -507,10 +520,10 @@ class _Tokenizer(object):
     Raises:
       ParseError: If a boolean value couldn't be consumed.
     """
-    if self.token == 'true':
+    if self.token in ('true', 't', '1'):
       self.NextToken()
       return True
-    elif self.token == 'false':
+    elif self.token in ('false', 'f', '0'):
       self.NextToken()
       return False
     else:
@@ -525,7 +538,11 @@ class _Tokenizer(object):
     Raises:
       ParseError: If a string value couldn't be consumed.
     """
-    return unicode(self.ConsumeByteString(), 'utf-8')
+    bytes = self.ConsumeByteString()
+    try:
+      return unicode(bytes, 'utf-8')
+    except UnicodeDecodeError, e:
+      raise self._StringParseError(e)
 
   def ConsumeByteString(self):
     """Consumes a byte array value.
@@ -609,7 +626,7 @@ class _Tokenizer(object):
   def _ParseError(self, message):
     """Creates and *returns* a ParseError for the current token."""
     return ParseError('%d:%d : %s' % (
-        self._line + 1, self._column + 1, message))
+        self._line + 1, self._column - len(self.token) + 1, message))
 
   def _IntegerParseError(self, e):
     return self._ParseError('Couldn\'t parse integer: ' + str(e))
@@ -617,27 +634,27 @@ class _Tokenizer(object):
   def _FloatParseError(self, e):
     return self._ParseError('Couldn\'t parse number: ' + str(e))
 
+  def _StringParseError(self, e):
+    return self._ParseError('Couldn\'t parse string: ' + str(e))
+
   def NextToken(self):
     """Reads the next meaningful token."""
     self._previous_line = self._line
     self._previous_column = self._column
-    if self.AtEnd():
-      self.token = ''
-      return
+
     self._column += len(self.token)
+    self._SkipWhitespace()
 
-    # Make sure there is data to work on.
-    self._PopLine()
+    if not self._lines and len(self._current_line) <= self._column:
+      self.token = ''
+      return
 
-    match = re.match(self._TOKEN, self._current_line)
+    match = self._TOKEN.match(self._current_line, self._column)
     if match:
       token = match.group(0)
-      self._current_line = self._current_line[len(token):]
       self.token = token
     else:
-      self.token = self._current_line[0]
-      self._current_line = self._current_line[1:]
-    self._SkipWhitespace()
+      self.token = self._current_line[self._column]
 
 
 # text.encode('string_escape') does not seem to satisfy our needs as it
@@ -645,7 +662,7 @@ class _Tokenizer(object):
 # C++ unescaping function allows hex escapes to be any length.  So,
 # "\0011".encode('string_escape') ends up being "\\x011", which will be
 # decoded in C++ as a single-character string with char code 0x11.
-def _CEscape(text):
+def _CEscape(text, as_utf8):
   def escape(c):
     o = ord(c)
     if o == 10: return r"\n"   # optional escape
@@ -656,12 +673,13 @@ def _CEscape(text):
     if o == 34: return r'\"'   # necessary escape
     if o == 92: return r"\\"   # necessary escape
 
-    if o >= 127 or o < 32: return "\\%03o" % o # necessary escapes
+    # necessary escapes
+    if not as_utf8 and (o >= 127 or o < 32): return "\\%03o" % o
     return c
   return "".join([escape(c) for c in text])
 
 
-_CUNESCAPE_HEX = re.compile('\\\\x([0-9a-fA-F]{2}|[0-9a-f-A-F])')
+_CUNESCAPE_HEX = re.compile('\\\\x([0-9a-fA-F]{2}|[0-9a-fA-F])')
 
 
 def _CUnescape(text):

+ 20 - 3
python/setup.py

@@ -7,7 +7,7 @@
 from ez_setup import use_setuptools
 use_setuptools()
 
-from setuptools import setup
+from setuptools import setup, Extension
 from distutils.spawn import find_executable
 import sys
 import os
@@ -60,6 +60,7 @@ def MakeTestSuite():
     del sys.modules['google']
 
   generate_proto("../src/google/protobuf/unittest.proto")
+  generate_proto("../src/google/protobuf/unittest_custom_options.proto")
   generate_proto("../src/google/protobuf/unittest_import.proto")
   generate_proto("../src/google/protobuf/unittest_mset.proto")
   generate_proto("../src/google/protobuf/unittest_no_generic_services.proto")
@@ -94,24 +95,39 @@ if __name__ == '__main__':
     for (dirpath, dirnames, filenames) in os.walk("."):
       for filename in filenames:
         filepath = os.path.join(dirpath, filename)
-        if filepath.endswith("_pb2.py") or filepath.endswith(".pyc"):
+        if filepath.endswith("_pb2.py") or filepath.endswith(".pyc") or \
+          filepath.endswith(".so") or filepath.endswith(".o"):
           os.remove(filepath)
   else:
     # Generate necessary .proto file if it doesn't exist.
     # TODO(kenton):  Maybe we should hook this into a distutils command?
     generate_proto("../src/google/protobuf/descriptor.proto")
 
+  python_c_extension = Extension("google.protobuf.internal._net_proto2___python",
+                                 [ "google/protobuf/pyext/python_descriptor.cc",
+                                   "google/protobuf/pyext/python_protobuf.cc",
+                                   "google/protobuf/pyext/python-proto2.cc",
+                                   ],
+                                 include_dirs = [ "../src", ".", ],
+                                 libraries = [ "protobuf" ],
+                                 runtime_library_dirs = [ "../src/.libs" ],
+                                 library_dirs = [ "../src/.libs" ],
+                                 )
+
   setup(name = 'protobuf',
-        version = '2.3.1-pre',
+        version = '2.4.0-pre',
         packages = [ 'google' ],
         namespace_packages = [ 'google' ],
         test_suite = 'setup.MakeTestSuite',
         # Must list modules explicitly so that we don't install tests.
         py_modules = [
+          'google.protobuf.internal.api_implementation',
           'google.protobuf.internal.containers',
+          'google.protobuf.internal.cpp_message',
           'google.protobuf.internal.decoder',
           'google.protobuf.internal.encoder',
           'google.protobuf.internal.message_listener',
+          'google.protobuf.internal.python_message',
           'google.protobuf.internal.type_checkers',
           'google.protobuf.internal.wire_format',
           'google.protobuf.descriptor',
@@ -121,6 +137,7 @@ if __name__ == '__main__':
           'google.protobuf.service',
           'google.protobuf.service_reflection',
           'google.protobuf.text_format' ],
+        ext_modules = [ python_c_extension ],
         url = 'http://code.google.com/p/protobuf/',
         maintainer = maintainer_email,
         maintainer_email = 'protobuf@googlegroups.com',

+ 2 - 0
src/Makefile.am

@@ -178,6 +178,8 @@ libprotoc_la_SOURCES =                                         \
   google/protobuf/compiler/java/java_primitive_field.h         \
   google/protobuf/compiler/java/java_service.cc                \
   google/protobuf/compiler/java/java_service.h                 \
+  google/protobuf/compiler/java/java_string_field.cc           \
+  google/protobuf/compiler/java/java_string_field.h            \
   google/protobuf/compiler/python/python_generator.cc
 
 bin_PROGRAMS = protoc

+ 8 - 3
src/google/protobuf/compiler/code_generator.cc

@@ -42,14 +42,19 @@ namespace protobuf {
 namespace compiler {
 
 CodeGenerator::~CodeGenerator() {}
-OutputDirectory::~OutputDirectory() {}
+GeneratorContext::~GeneratorContext() {}
 
-io::ZeroCopyOutputStream* OutputDirectory::OpenForInsert(
+io::ZeroCopyOutputStream* GeneratorContext::OpenForInsert(
     const string& filename, const string& insertion_point) {
-  GOOGLE_LOG(FATAL) << "This OutputDirectory does not support insertion.";
+  GOOGLE_LOG(FATAL) << "This GeneratorContext does not support insertion.";
   return NULL;  // make compiler happy
 }
 
+void GeneratorContext::ListParsedFiles(
+    vector<const FileDescriptor*>* output) {
+  GOOGLE_LOG(FATAL) << "This GeneratorContext does not support ListParsedFiles";
+}
+
 // Parses a set of comma-delimited name/value pairs.
 void ParseGeneratorParameter(const string& text,
                              vector<pair<string, string> >* output) {

+ 17 - 7
src/google/protobuf/compiler/code_generator.h

@@ -53,7 +53,7 @@ namespace compiler {
 
 // Defined in this file.
 class CodeGenerator;
-class OutputDirectory;
+class GeneratorContext;
 
 // The abstract interface to a class which generates code implementing a
 // particular proto file in a particular language.  A number of these may
@@ -76,7 +76,7 @@ class LIBPROTOC_EXPORT CodeGenerator {
   // the problem (e.g. "invalid parameter") and returns false.
   virtual bool Generate(const FileDescriptor* file,
                         const string& parameter,
-                        OutputDirectory* output_directory,
+                        GeneratorContext* generator_context,
                         string* error) const = 0;
 
  private:
@@ -85,11 +85,12 @@ class LIBPROTOC_EXPORT CodeGenerator {
 
 // CodeGenerators generate one or more files in a given directory.  This
 // abstract interface represents the directory to which the CodeGenerator is
-// to write.
-class LIBPROTOC_EXPORT OutputDirectory {
+// to write and other information about the context in which the Generator
+// runs.
+class LIBPROTOC_EXPORT GeneratorContext {
  public:
-  inline OutputDirectory() {}
-  virtual ~OutputDirectory();
+  inline GeneratorContext() {}
+  virtual ~GeneratorContext();
 
   // Opens the given file, truncating it if it exists, and returns a
   // ZeroCopyOutputStream that writes to the file.  The caller takes ownership
@@ -112,10 +113,19 @@ class LIBPROTOC_EXPORT OutputDirectory {
   virtual io::ZeroCopyOutputStream* OpenForInsert(
       const string& filename, const string& insertion_point);
 
+  // Returns a vector of FileDescriptors for all the files being compiled
+  // in this run.  Useful for languages, such as Go, that treat files
+  // differently when compiled as a set rather than individually.
+  virtual void ListParsedFiles(vector<const FileDescriptor*>* output);
+
  private:
-  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(OutputDirectory);
+  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(GeneratorContext);
 };
 
+// The type GeneratorContext was once called OutputDirectory. This typedef
+// provides backward compatibility.
+typedef GeneratorContext OutputDirectory;
+
 // Several code generators treat the parameter argument as holding a
 // list of options separated by commas.  This helper function parses
 // a set of comma-delimited name/value pairs: e.g.,

+ 45 - 37
src/google/protobuf/compiler/command_line_interface.cc

@@ -48,6 +48,9 @@
 #include <iostream>
 #include <ctype.h>
 
+#include <google/protobuf/stubs/hash.h>
+
+#include <google/protobuf/stubs/common.h>
 #include <google/protobuf/compiler/importer.h>
 #include <google/protobuf/compiler/code_generator.h>
 #include <google/protobuf/compiler/plugin.pb.h>
@@ -58,12 +61,10 @@
 #include <google/protobuf/dynamic_message.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 #include <google/protobuf/io/printer.h>
-#include <google/protobuf/stubs/common.h>
 #include <google/protobuf/stubs/strutil.h>
 #include <google/protobuf/stubs/substitute.h>
 #include <google/protobuf/stubs/map-util.h>
 #include <google/protobuf/stubs/stl_util-inl.h>
-#include <google/protobuf/stubs/hash.h>
 
 
 namespace google {
@@ -182,7 +183,7 @@ bool TryCreateParentDirectory(const string& prefix, const string& filename) {
 class CommandLineInterface::ErrorPrinter : public MultiFileErrorCollector,
                                            public io::ErrorCollector {
  public:
-  ErrorPrinter(ErrorFormat format, DiskSourceTree *tree = NULL) 
+  ErrorPrinter(ErrorFormat format, DiskSourceTree *tree = NULL)
     : format_(format), tree_(tree) {}
   ~ErrorPrinter() {}
 
@@ -191,8 +192,8 @@ class CommandLineInterface::ErrorPrinter : public MultiFileErrorCollector,
                 const string& message) {
 
     // Print full path when running under MSVS
-    std::string dfile;
-    if (format_ == CommandLineInterface::ERROR_FORMAT_MSVS && 
+    string dfile;
+    if (format_ == CommandLineInterface::ERROR_FORMAT_MSVS &&
         tree_ != NULL &&
         tree_->VirtualFileToDiskFile(filename, &dfile)) {
       cerr << dfile;
@@ -229,12 +230,12 @@ class CommandLineInterface::ErrorPrinter : public MultiFileErrorCollector,
 
 // -------------------------------------------------------------------
 
-// An OutputDirectory implementation that buffers files in memory, then dumps
+// A GeneratorContext implementation that buffers files in memory, then dumps
 // them all to disk on demand.
-class CommandLineInterface::MemoryOutputDirectory : public OutputDirectory {
+class CommandLineInterface::GeneratorContextImpl : public GeneratorContext {
  public:
-  MemoryOutputDirectory();
-  ~MemoryOutputDirectory();
+  GeneratorContextImpl(const vector<const FileDescriptor*>& parsed_files);
+  ~GeneratorContextImpl();
 
   // Write all files in the directory to disk at the given output location,
   // which must end in a '/'.
@@ -248,10 +249,13 @@ class CommandLineInterface::MemoryOutputDirectory : public OutputDirectory {
   // format, unless one has already been written.
   void AddJarManifest();
 
-  // implements OutputDirectory --------------------------------------
+  // implements GeneratorContext --------------------------------------
   io::ZeroCopyOutputStream* Open(const string& filename);
   io::ZeroCopyOutputStream* OpenForInsert(
       const string& filename, const string& insertion_point);
+  void ListParsedFiles(vector<const FileDescriptor*>* output) {
+    *output = parsed_files_;
+  }
 
  private:
   friend class MemoryOutputStream;
@@ -259,14 +263,15 @@ class CommandLineInterface::MemoryOutputDirectory : public OutputDirectory {
   // map instead of hash_map so that files are written in order (good when
   // writing zips).
   map<string, string*> files_;
+  const vector<const FileDescriptor*>& parsed_files_;
   bool had_error_;
 };
 
 class CommandLineInterface::MemoryOutputStream
     : public io::ZeroCopyOutputStream {
  public:
-  MemoryOutputStream(MemoryOutputDirectory* directory, const string& filename);
-  MemoryOutputStream(MemoryOutputDirectory* directory, const string& filename,
+  MemoryOutputStream(GeneratorContextImpl* directory, const string& filename);
+  MemoryOutputStream(GeneratorContextImpl* directory, const string& filename,
                      const string& insertion_point);
   virtual ~MemoryOutputStream();
 
@@ -277,7 +282,7 @@ class CommandLineInterface::MemoryOutputStream
 
  private:
   // Where to insert the string when it's done.
-  MemoryOutputDirectory* directory_;
+  GeneratorContextImpl* directory_;
   string filename_;
   string insertion_point_;
 
@@ -290,14 +295,17 @@ class CommandLineInterface::MemoryOutputStream
 
 // -------------------------------------------------------------------
 
-CommandLineInterface::MemoryOutputDirectory::MemoryOutputDirectory()
-    : had_error_(false) {}
+CommandLineInterface::GeneratorContextImpl::GeneratorContextImpl(
+    const vector<const FileDescriptor*>& parsed_files)
+    : parsed_files_(parsed_files),
+      had_error_(false) {
+}
 
-CommandLineInterface::MemoryOutputDirectory::~MemoryOutputDirectory() {
+CommandLineInterface::GeneratorContextImpl::~GeneratorContextImpl() {
   STLDeleteValues(&files_);
 }
 
-bool CommandLineInterface::MemoryOutputDirectory::WriteAllToDisk(
+bool CommandLineInterface::GeneratorContextImpl::WriteAllToDisk(
     const string& prefix) {
   if (had_error_) {
     return false;
@@ -372,7 +380,7 @@ bool CommandLineInterface::MemoryOutputDirectory::WriteAllToDisk(
   return true;
 }
 
-bool CommandLineInterface::MemoryOutputDirectory::WriteAllToZip(
+bool CommandLineInterface::GeneratorContextImpl::WriteAllToZip(
     const string& filename) {
   if (had_error_) {
     return false;
@@ -413,7 +421,7 @@ bool CommandLineInterface::MemoryOutputDirectory::WriteAllToZip(
   return true;
 }
 
-void CommandLineInterface::MemoryOutputDirectory::AddJarManifest() {
+void CommandLineInterface::GeneratorContextImpl::AddJarManifest() {
   string** map_slot = &files_["META-INF/MANIFEST.MF"];
   if (*map_slot == NULL) {
     *map_slot = new string(
@@ -423,13 +431,13 @@ void CommandLineInterface::MemoryOutputDirectory::AddJarManifest() {
   }
 }
 
-io::ZeroCopyOutputStream* CommandLineInterface::MemoryOutputDirectory::Open(
+io::ZeroCopyOutputStream* CommandLineInterface::GeneratorContextImpl::Open(
     const string& filename) {
   return new MemoryOutputStream(this, filename);
 }
 
 io::ZeroCopyOutputStream*
-CommandLineInterface::MemoryOutputDirectory::OpenForInsert(
+CommandLineInterface::GeneratorContextImpl::OpenForInsert(
     const string& filename, const string& insertion_point) {
   return new MemoryOutputStream(this, filename, insertion_point);
 }
@@ -437,14 +445,14 @@ CommandLineInterface::MemoryOutputDirectory::OpenForInsert(
 // -------------------------------------------------------------------
 
 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
-    MemoryOutputDirectory* directory, const string& filename)
+    GeneratorContextImpl* directory, const string& filename)
     : directory_(directory),
       filename_(filename),
       inner_(new io::StringOutputStream(&data_)) {
 }
 
 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
-    MemoryOutputDirectory* directory, const string& filename,
+    GeneratorContextImpl* directory, const string& filename,
     const string& insertion_point)
     : directory_(directory),
       filename_(filename),
@@ -613,11 +621,11 @@ int CommandLineInterface::Run(int argc, const char* const argv[]) {
     }
   }
 
-  // We construct a separate OutputDirectory for each output location.  Note
+  // We construct a separate GeneratorContext for each output location.  Note
   // that two code generators may output to the same location, in which case
-  // they should share a single OutputDirectory (so that OpenForInsert() works).
-  typedef hash_map<string, MemoryOutputDirectory*> OutputDirectoryMap;
-  OutputDirectoryMap output_directories;
+  // they should share a single GeneratorContext so that OpenForInsert() works.
+  typedef hash_map<string, GeneratorContextImpl*> GeneratorContextMap;
+  GeneratorContextMap output_directories;
 
   // Generate output.
   if (mode_ == MODE_COMPILE) {
@@ -627,11 +635,11 @@ int CommandLineInterface::Run(int argc, const char* const argv[]) {
           !HasSuffixString(output_location, ".jar")) {
         AddTrailingSlash(&output_location);
       }
-      MemoryOutputDirectory** map_slot = &output_directories[output_location];
+      GeneratorContextImpl** map_slot = &output_directories[output_location];
 
       if (*map_slot == NULL) {
         // First time we've seen this output location.
-        *map_slot = new MemoryOutputDirectory;
+        *map_slot = new GeneratorContextImpl(parsed_files);
       }
 
       if (!GenerateOutput(parsed_files, output_directives_[i], *map_slot)) {
@@ -642,10 +650,10 @@ int CommandLineInterface::Run(int argc, const char* const argv[]) {
   }
 
   // Write all output to disk.
-  for (OutputDirectoryMap::iterator iter = output_directories.begin();
+  for (GeneratorContextMap::iterator iter = output_directories.begin();
        iter != output_directories.end(); ++iter) {
     const string& location = iter->first;
-    MemoryOutputDirectory* directory = iter->second;
+    GeneratorContextImpl* directory = iter->second;
     if (HasSuffixString(location, "/")) {
       if (!directory->WriteAllToDisk(location)) {
         STLDeleteValues(&output_directories);
@@ -1107,7 +1115,7 @@ void CommandLineInterface::PrintHelpText() {
 bool CommandLineInterface::GenerateOutput(
     const vector<const FileDescriptor*>& parsed_files,
     const OutputDirective& output_directive,
-    OutputDirectory* output_directory) {
+    GeneratorContext* generator_context) {
   // Call the generator.
   string error;
   if (output_directive.generator == NULL) {
@@ -1122,7 +1130,7 @@ bool CommandLineInterface::GenerateOutput(
 
     if (!GeneratePluginOutput(parsed_files, plugin_name,
                               output_directive.parameter,
-                              output_directory, &error)) {
+                              generator_context, &error)) {
       cerr << output_directive.name << ": " << error << endl;
       return false;
     }
@@ -1131,7 +1139,7 @@ bool CommandLineInterface::GenerateOutput(
     for (int i = 0; i < parsed_files.size(); i++) {
       if (!output_directive.generator->Generate(
           parsed_files[i], output_directive.parameter,
-          output_directory, &error)) {
+          generator_context, &error)) {
         // Generator returned an error.
         cerr << output_directive.name << ": " << parsed_files[i]->name() << ": "
              << error << endl;
@@ -1147,7 +1155,7 @@ bool CommandLineInterface::GeneratePluginOutput(
     const vector<const FileDescriptor*>& parsed_files,
     const string& plugin_name,
     const string& parameter,
-    OutputDirectory* output_directory,
+    GeneratorContext* generator_context,
     string* error) {
   CodeGeneratorRequest request;
   CodeGeneratorResponse response;
@@ -1190,14 +1198,14 @@ bool CommandLineInterface::GeneratePluginOutput(
       // We reset current_output to NULL first so that the old file is closed
       // before the new one is opened.
       current_output.reset();
-      current_output.reset(output_directory->OpenForInsert(
+      current_output.reset(generator_context->OpenForInsert(
           output_file.name(), output_file.insertion_point()));
     } else if (!output_file.name().empty()) {
       // Starting a new file.  Open it.
       // We reset current_output to NULL first so that the old file is closed
       // before the new one is opened.
       current_output.reset();
-      current_output.reset(output_directory->Open(output_file.name()));
+      current_output.reset(generator_context->Open(output_file.name()));
     } else if (current_output == NULL) {
       *error = strings::Substitute(
         "$0: First file chunk returned by plugin did not specify a file name.",

+ 4 - 4
src/google/protobuf/compiler/command_line_interface.h

@@ -56,7 +56,7 @@ template<typename T> class RepeatedPtrField;  // repeated_field.h
 namespace compiler {
 
 class CodeGenerator;        // code_generator.h
-class OutputDirectory;      // code_generator.h
+class GeneratorContext;      // code_generator.h
 class DiskSourceTree;       // importer.h
 
 // This class implements the command-line interface to the protocol compiler.
@@ -174,7 +174,7 @@ class LIBPROTOC_EXPORT CommandLineInterface {
   // -----------------------------------------------------------------
 
   class ErrorPrinter;
-  class MemoryOutputDirectory;
+  class GeneratorContextImpl;
   class MemoryOutputStream;
 
   // Clear state from previous Run().
@@ -212,11 +212,11 @@ class LIBPROTOC_EXPORT CommandLineInterface {
   struct OutputDirective;  // see below
   bool GenerateOutput(const vector<const FileDescriptor*>& parsed_files,
                       const OutputDirective& output_directive,
-                      OutputDirectory* output_directory);
+                      GeneratorContext* generator_context);
   bool GeneratePluginOutput(const vector<const FileDescriptor*>& parsed_files,
                             const string& plugin_name,
                             const string& parameter,
-                            OutputDirectory* output_directory,
+                            GeneratorContext* generator_context,
                             string* error);
 
   // Implements --encode and --decode.

+ 58 - 7
src/google/protobuf/compiler/command_line_interface_unittest.cc

@@ -143,6 +143,10 @@ class CommandLineInterfaceTest : public testing::Test {
                        const string& proto_name,
                        const string& message_name,
                        const string& output_directory);
+  void ExpectGeneratedWithMultipleInputs(const string& generator_name,
+                                         const string& all_proto_names,
+                                         const string& proto_name,
+                                         const string& message_name);
   void ExpectGeneratedWithInsertions(const string& generator_name,
                                      const string& parameter,
                                      const string& insertions,
@@ -190,7 +194,7 @@ class CommandLineInterfaceTest::NullCodeGenerator : public CodeGenerator {
   // implements CodeGenerator ----------------------------------------
   bool Generate(const FileDescriptor* file,
                 const string& parameter,
-                OutputDirectory* output_directory,
+                GeneratorContext* context,
                 string* error) const {
     called_ = true;
     parameter_ = parameter;
@@ -251,7 +255,6 @@ void CommandLineInterfaceTest::Run(const string& command) {
 
   if (!disallow_plugins_) {
     cli_.AllowPlugins("prefix-");
-
     const char* possible_paths[] = {
       // When building with shared libraries, libtool hides the real executable
       // in .libs and puts a fake wrapper in the current directory.
@@ -353,7 +356,8 @@ void CommandLineInterfaceTest::ExpectGenerated(
     const string& proto_name,
     const string& message_name) {
   MockCodeGenerator::ExpectGenerated(
-      generator_name, parameter, "", proto_name, message_name, temp_directory_);
+      generator_name, parameter, "", proto_name, message_name, proto_name,
+      temp_directory_);
 }
 
 void CommandLineInterfaceTest::ExpectGenerated(
@@ -363,10 +367,21 @@ void CommandLineInterfaceTest::ExpectGenerated(
     const string& message_name,
     const string& output_directory) {
   MockCodeGenerator::ExpectGenerated(
-      generator_name, parameter, "", proto_name, message_name,
+      generator_name, parameter, "", proto_name, message_name, proto_name,
       temp_directory_ + "/" + output_directory);
 }
 
+void CommandLineInterfaceTest::ExpectGeneratedWithMultipleInputs(
+    const string& generator_name,
+    const string& all_proto_names,
+    const string& proto_name,
+    const string& message_name) {
+  MockCodeGenerator::ExpectGenerated(
+      generator_name, "", "", proto_name, message_name,
+      all_proto_names,
+      temp_directory_);
+}
+
 void CommandLineInterfaceTest::ExpectGeneratedWithInsertions(
     const string& generator_name,
     const string& parameter,
@@ -375,7 +390,7 @@ void CommandLineInterfaceTest::ExpectGeneratedWithInsertions(
     const string& message_name) {
   MockCodeGenerator::ExpectGenerated(
       generator_name, parameter, insertions, proto_name, message_name,
-      temp_directory_);
+      proto_name, temp_directory_);
 }
 
 void CommandLineInterfaceTest::ExpectNullCodeGeneratorCalled(
@@ -455,8 +470,44 @@ TEST_F(CommandLineInterfaceTest, MultipleInputs) {
       "--proto_path=$tmpdir foo.proto bar.proto");
 
   ExpectNoErrors();
-  ExpectGenerated("test_generator", "", "foo.proto", "Foo");
-  ExpectGenerated("test_generator", "", "bar.proto", "Bar");
+  ExpectGeneratedWithMultipleInputs("test_generator", "foo.proto,bar.proto",
+                                    "foo.proto", "Foo");
+  ExpectGeneratedWithMultipleInputs("test_generator", "foo.proto,bar.proto",
+                                    "bar.proto", "Bar");
+  ExpectGeneratedWithMultipleInputs("test_plugin", "foo.proto,bar.proto",
+                                    "foo.proto", "Foo");
+  ExpectGeneratedWithMultipleInputs("test_plugin", "foo.proto,bar.proto",
+                                    "bar.proto", "Bar");
+}
+
+TEST_F(CommandLineInterfaceTest, MultipleInputsWithImport) {
+  // Test parsing multiple input files with an import of a separate file.
+
+  CreateTempFile("foo.proto",
+    "syntax = \"proto2\";\n"
+    "message Foo {}\n");
+  CreateTempFile("bar.proto",
+    "syntax = \"proto2\";\n"
+    "import \"baz.proto\";\n"
+    "message Bar {\n"
+    "  optional Baz a = 1;\n"
+    "}\n");
+  CreateTempFile("baz.proto",
+    "syntax = \"proto2\";\n"
+    "message Baz {}\n");
+
+  Run("protocol_compiler --test_out=$tmpdir --plug_out=$tmpdir "
+      "--proto_path=$tmpdir foo.proto bar.proto");
+
+  ExpectNoErrors();
+  ExpectGeneratedWithMultipleInputs("test_generator", "foo.proto,bar.proto",
+                                    "foo.proto", "Foo");
+  ExpectGeneratedWithMultipleInputs("test_generator", "foo.proto,bar.proto",
+                                    "bar.proto", "Bar");
+  ExpectGeneratedWithMultipleInputs("test_plugin", "foo.proto,bar.proto",
+                                    "foo.proto", "Foo");
+  ExpectGeneratedWithMultipleInputs("test_plugin", "foo.proto,bar.proto",
+                                    "bar.proto", "Bar");
 }
 
 TEST_F(CommandLineInterfaceTest, CreateDirectory) {

+ 16 - 16
src/google/protobuf/compiler/cpp/cpp_bootstrap_unittest.cc

@@ -79,10 +79,10 @@ class MockErrorCollector : public MultiFileErrorCollector {
   }
 };
 
-class MockOutputDirectory : public OutputDirectory {
+class MockGeneratorContext : public GeneratorContext {
  public:
-  MockOutputDirectory() {}
-  ~MockOutputDirectory() {
+  MockGeneratorContext() {}
+  ~MockGeneratorContext() {
     STLDeleteValues(&files_);
   }
 
@@ -102,7 +102,7 @@ class MockOutputDirectory : public OutputDirectory {
          "to your CL.";
   }
 
-  // implements OutputDirectory --------------------------------------
+  // implements GeneratorContext --------------------------------------
 
   virtual io::ZeroCopyOutputStream* Open(const string& filename) {
     string** map_slot = &files_[filename];
@@ -130,24 +130,24 @@ TEST(BootstrapTest, GeneratedDescriptorMatches) {
   ASSERT_TRUE(plugin_proto_file != NULL);
 
   CppGenerator generator;
-  MockOutputDirectory output_directory;
+  MockGeneratorContext context;
   string error;
   string parameter;
   parameter = "dllexport_decl=LIBPROTOBUF_EXPORT";
   ASSERT_TRUE(generator.Generate(proto_file, parameter,
-                                 &output_directory, &error));
+                                 &context, &error));
   parameter = "dllexport_decl=LIBPROTOC_EXPORT";
   ASSERT_TRUE(generator.Generate(plugin_proto_file, parameter,
-                                 &output_directory, &error));
-
-  output_directory.ExpectFileMatches("google/protobuf/descriptor.pb.h",
-                                     "google/protobuf/descriptor.pb.h");
-  output_directory.ExpectFileMatches("google/protobuf/descriptor.pb.cc",
-                                     "google/protobuf/descriptor.pb.cc");
-  output_directory.ExpectFileMatches("google/protobuf/compiler/plugin.pb.h",
-                                     "google/protobuf/compiler/plugin.pb.h");
-  output_directory.ExpectFileMatches("google/protobuf/compiler/plugin.pb.cc",
-                                     "google/protobuf/compiler/plugin.pb.cc");
+                                 &context, &error));
+
+  context.ExpectFileMatches("google/protobuf/descriptor.pb.h",
+                            "google/protobuf/descriptor.pb.h");
+  context.ExpectFileMatches("google/protobuf/descriptor.pb.cc",
+                            "google/protobuf/descriptor.pb.cc");
+  context.ExpectFileMatches("google/protobuf/compiler/plugin.pb.h",
+                            "google/protobuf/compiler/plugin.pb.h");
+  context.ExpectFileMatches("google/protobuf/compiler/plugin.pb.cc",
+                            "google/protobuf/compiler/plugin.pb.cc");
 }
 
 }  // namespace

+ 1 - 1
src/google/protobuf/compiler/cpp/cpp_enum_field.cc

@@ -85,7 +85,7 @@ GenerateInlineAccessorDefinitions(io::Printer* printer) const {
     "}\n"
     "inline void $classname$::set_$name$($type$ value) {\n"
     "  GOOGLE_DCHECK($type$_IsValid(value));\n"
-    "  _set_bit($index$);\n"
+    "  set_has_$name$();\n"
     "  $name$_ = value;\n"
     "}\n");
 }

+ 1 - 0
src/google/protobuf/compiler/cpp/cpp_file.cc

@@ -283,6 +283,7 @@ void FileGenerator::GenerateSource(io::Printer* printer) {
   printer->Print(
     "// Generated by the protocol buffer compiler.  DO NOT EDIT!\n"
     "\n"
+
     // The generated code calls accessors that might be deprecated. We don't
     // want the compiler to warn in generated code.
     "#define INTERNAL_SUPPRESS_PROTOBUF_FIELD_DEPRECATION\n"

+ 3 - 3
src/google/protobuf/compiler/cpp/cpp_generator.cc

@@ -53,7 +53,7 @@ CppGenerator::~CppGenerator() {}
 
 bool CppGenerator::Generate(const FileDescriptor* file,
                             const string& parameter,
-                            OutputDirectory* output_directory,
+                            GeneratorContext* generator_context,
                             string* error) const {
   vector<pair<string, string> > options;
   ParseGeneratorParameter(parameter, &options);
@@ -100,7 +100,7 @@ bool CppGenerator::Generate(const FileDescriptor* file,
   // Generate header.
   {
     scoped_ptr<io::ZeroCopyOutputStream> output(
-      output_directory->Open(basename + ".h"));
+      generator_context->Open(basename + ".h"));
     io::Printer printer(output.get(), '$');
     file_generator.GenerateHeader(&printer);
   }
@@ -108,7 +108,7 @@ bool CppGenerator::Generate(const FileDescriptor* file,
   // Generate cc file.
   {
     scoped_ptr<io::ZeroCopyOutputStream> output(
-      output_directory->Open(basename + ".cc"));
+      generator_context->Open(basename + ".cc"));
     io::Printer printer(output.get(), '$');
     file_generator.GenerateSource(&printer);
   }

+ 1 - 1
src/google/protobuf/compiler/cpp/cpp_generator.h

@@ -57,7 +57,7 @@ class LIBPROTOC_EXPORT CppGenerator : public CodeGenerator {
   // implements CodeGenerator ----------------------------------------
   bool Generate(const FileDescriptor* file,
                 const string& parameter,
-                OutputDirectory* output_directory,
+                GeneratorContext* generator_context,
                 string* error) const;
 
  private:

+ 214 - 51
src/google/protobuf/compiler/cpp/cpp_message.cc

@@ -35,6 +35,7 @@
 #include <algorithm>
 #include <google/protobuf/stubs/hash.h>
 #include <map>
+#include <utility>
 #include <vector>
 #include <google/protobuf/compiler/cpp/cpp_message.h>
 #include <google/protobuf/compiler/cpp/cpp_field.h>
@@ -143,6 +144,137 @@ static bool HasRequiredFields(const Descriptor* type) {
   return HasRequiredFields(type, &already_seen);
 }
 
+// This returns an estimate of the compiler's alignment for the field.  This
+// can't guarantee to be correct because the generated code could be compiled on
+// different systems with different alignment rules.  The estimates below assume
+// 64-bit pointers.
+int EstimateAlignmentSize(const FieldDescriptor* field) {
+  if (field == NULL) return 0;
+  if (field->is_repeated()) return 8;
+  switch (field->cpp_type()) {
+    case FieldDescriptor::CPPTYPE_BOOL:
+      return 1;
+
+    case FieldDescriptor::CPPTYPE_INT32:
+    case FieldDescriptor::CPPTYPE_UINT32:
+    case FieldDescriptor::CPPTYPE_ENUM:
+    case FieldDescriptor::CPPTYPE_FLOAT:
+      return 4;
+
+    case FieldDescriptor::CPPTYPE_INT64:
+    case FieldDescriptor::CPPTYPE_UINT64:
+    case FieldDescriptor::CPPTYPE_DOUBLE:
+    case FieldDescriptor::CPPTYPE_STRING:
+    case FieldDescriptor::CPPTYPE_MESSAGE:
+      return 8;
+  }
+  GOOGLE_LOG(FATAL) << "Can't get here.";
+  return -1;  // Make compiler happy.
+}
+
+// FieldGroup is just a helper for OptimizePadding below.  It holds a vector of
+// fields that are grouped together because they have compatible alignment, and
+// a preferred location in the final field ordering.
+class FieldGroup {
+ public:
+  FieldGroup()
+      : preferred_location_(0) {}
+
+  // A group with a single field.
+  FieldGroup(float preferred_location, const FieldDescriptor* field)
+      : preferred_location_(preferred_location),
+        fields_(1, field) {}
+
+  // Append the fields in 'other' to this group.
+  void Append(const FieldGroup& other) {
+    if (other.fields_.empty()) {
+      return;
+    }
+    // Preferred location is the average among all the fields, so we weight by
+    // the number of fields on each FieldGroup object.
+    preferred_location_ =
+        (preferred_location_ * fields_.size() +
+         (other.preferred_location_ * other.fields_.size())) /
+        (fields_.size() + other.fields_.size());
+    fields_.insert(fields_.end(), other.fields_.begin(), other.fields_.end());
+  }
+
+  void SetPreferredLocation(float location) { preferred_location_ = location; }
+  const vector<const FieldDescriptor*>& fields() const { return fields_; }
+
+  // FieldGroup objects sort by their preferred location.
+  bool operator<(const FieldGroup& other) const {
+    return preferred_location_ < other.preferred_location_;
+  }
+
+ private:
+  // "preferred_location_" is an estimate of where this group should go in the
+  // final list of fields.  We compute this by taking the average index of each
+  // field in this group in the original ordering of fields.  This is very
+  // approximate, but should put this group close to where its member fields
+  // originally went.
+  float preferred_location_;
+  vector<const FieldDescriptor*> fields_;
+  // We rely on the default copy constructor and operator= so this type can be
+  // used in a vector.
+};
+
+// Reorder 'fields' so that if the fields are output into a c++ class in the new
+// order, the alignment padding is minimized.  We try to do this while keeping
+// each field as close as possible to its original position so that we don't
+// reduce cache locality much for function that access each field in order.
+void OptimizePadding(vector<const FieldDescriptor*>* fields) {
+  // First divide fields into those that align to 1 byte, 4 bytes or 8 bytes.
+  vector<FieldGroup> aligned_to_1, aligned_to_4, aligned_to_8;
+  for (int i = 0; i < fields->size(); ++i) {
+    switch (EstimateAlignmentSize((*fields)[i])) {
+      case 1: aligned_to_1.push_back(FieldGroup(i, (*fields)[i])); break;
+      case 4: aligned_to_4.push_back(FieldGroup(i, (*fields)[i])); break;
+      case 8: aligned_to_8.push_back(FieldGroup(i, (*fields)[i])); break;
+      default:
+        GOOGLE_LOG(FATAL) << "Unknown alignment size.";
+    }
+  }
+
+  // Now group fields aligned to 1 byte into sets of 4, and treat those like a
+  // single field aligned to 4 bytes.
+  for (int i = 0; i < aligned_to_1.size(); i += 4) {
+    FieldGroup field_group;
+    for (int j = i; j < aligned_to_1.size() && j < i + 4; ++j) {
+      field_group.Append(aligned_to_1[j]);
+    }
+    aligned_to_4.push_back(field_group);
+  }
+  // Sort by preferred location to keep fields as close to their original
+  // location as possible.
+  sort(aligned_to_4.begin(), aligned_to_4.end());
+
+  // Now group fields aligned to 4 bytes (or the 4-field groups created above)
+  // into pairs, and treat those like a single field aligned to 8 bytes.
+  for (int i = 0; i < aligned_to_4.size(); i += 2) {
+    FieldGroup field_group;
+    for (int j = i; j < aligned_to_4.size() && j < i + 2; ++j) {
+      field_group.Append(aligned_to_4[j]);
+    }
+    if (i == aligned_to_4.size() - 1) {
+      // Move incomplete 4-byte block to the end.
+      field_group.SetPreferredLocation(fields->size() + 1);
+    }
+    aligned_to_8.push_back(field_group);
+  }
+  // Sort by preferred location to keep fields as close to their original
+  // location as possible.
+  sort(aligned_to_8.begin(), aligned_to_8.end());
+
+  // Now pull out all the FieldDescriptors in order.
+  fields->clear();
+  for (int i = 0; i < aligned_to_8.size(); ++i) {
+    fields->insert(fields->end(),
+                   aligned_to_8[i].fields().begin(),
+                   aligned_to_8[i].fields().end());
+  }
+}
+
 }
 
 // ===================================================================
@@ -264,10 +396,20 @@ GenerateFieldAccessorDefinitions(io::Printer* printer) {
         "}\n");
     } else {
       // Singular field.
+      char buffer[kFastToBufferSize];
+      vars["has_array_index"] = SimpleItoa(field->index() / 32);
+      vars["has_mask"] = FastHex32ToBuffer(1u << (field->index() % 32), buffer);
       printer->Print(vars,
         "inline bool $classname$::has_$name$() const {\n"
-        "  return _has_bit($index$);\n"
-        "}\n");
+        "  return (_has_bits_[$has_array_index$] & 0x$has_mask$u) != 0;\n"
+        "}\n"
+        "inline void $classname$::set_has_$name$() {\n"
+        "  _has_bits_[$has_array_index$] |= 0x$has_mask$u;\n"
+        "}\n"
+        "inline void $classname$::clear_has_$name$() {\n"
+        "  _has_bits_[$has_array_index$] &= ~0x$has_mask$u;\n"
+        "}\n"
+        );
     }
 
     // Generate clear_$name$()
@@ -279,7 +421,8 @@ GenerateFieldAccessorDefinitions(io::Printer* printer) {
     printer->Outdent();
 
     if (!field->is_repeated()) {
-      printer->Print(vars, "  _clear_bit($index$);\n");
+      printer->Print(vars,
+                     "  clear_has_$name$();\n");
     }
 
     printer->Print("}\n");
@@ -444,28 +587,74 @@ GenerateClassDefinition(io::Printer* printer) {
     "// @@protoc_insertion_point(class_scope:$full_name$)\n",
     "full_name", descriptor_->full_name());
 
-  // Generate private members for fields.
+  // Generate private members.
   printer->Outdent();
   printer->Print(" private:\n");
   printer->Indent();
 
+  for (int i = 0; i < descriptor_->field_count(); i++) {
+    if (!descriptor_->field(i)->is_repeated()) {
+      printer->Print(
+        "inline void set_has_$name$();\n",
+        "name", FieldName(descriptor_->field(i)));
+      printer->Print(
+        "inline void clear_has_$name$();\n",
+        "name", FieldName(descriptor_->field(i)));
+    }
+  }
+  printer->Print("\n");
+
+  // To minimize padding, data members are divided into three sections:
+  // (1) members assumed to align to 8 bytes
+  // (2) members corresponding to message fields, re-ordered to optimize
+  //     alignment.
+  // (3) members assumed to align to 4 bytes.
+
+  // Members assumed to align to 8 bytes:
+
   if (descriptor_->extension_range_count() > 0) {
     printer->Print(
-      "::google::protobuf::internal::ExtensionSet _extensions_;\n");
+      "::google::protobuf::internal::ExtensionSet _extensions_;\n"
+      "\n");
   }
 
   if (HasUnknownFields(descriptor_->file())) {
     printer->Print(
-      "::google::protobuf::UnknownFieldSet _unknown_fields_;\n");
+      "::google::protobuf::UnknownFieldSet _unknown_fields_;\n"
+      "\n");
+  }
+
+  // Field members:
+
+  vector<const FieldDescriptor*> fields;
+  for (int i = 0; i < descriptor_->field_count(); i++) {
+    fields.push_back(descriptor_->field(i));
+  }
+  OptimizePadding(&fields);
+  for (int i = 0; i < fields.size(); ++i) {
+    field_generators_.get(fields[i]).GeneratePrivateMembers(printer);
   }
 
+  // Members assumed to align to 4 bytes:
+
   // TODO(kenton):  Make _cached_size_ an atomic<int> when C++ supports it.
   printer->Print(
-    "mutable int _cached_size_;\n"
-    "\n");
-  for (int i = 0; i < descriptor_->field_count(); i++) {
-    field_generators_.get(descriptor_->field(i))
-                     .GeneratePrivateMembers(printer);
+      "\n"
+      "mutable int _cached_size_;\n");
+
+  // Generate _has_bits_.
+  if (descriptor_->field_count() > 0) {
+    printer->Print(vars,
+      "::google::protobuf::uint32 _has_bits_[($field_count$ + 31) / 32];\n"
+      "\n");
+  } else {
+    // Zero-size arrays aren't technically allowed, and MSVC in particular
+    // doesn't like them.  We still need to declare these arrays to make
+    // other code compile.  Since this is an uncommon case, we'll just declare
+    // them with size 1 and waste some space.  Oh well.
+    printer->Print(
+      "::google::protobuf::uint32 _has_bits_[1];\n"
+      "\n");
   }
 
   // Declare AddDescriptors(), BuildDescriptors(), and ShutdownFile() as
@@ -484,32 +673,7 @@ GenerateClassDefinition(io::Printer* printer) {
       GlobalAssignDescriptorsName(descriptor_->file()->name()),
     "shutdownfilename", GlobalShutdownFileName(descriptor_->file()->name()));
 
-  // Generate offsets and _has_bits_ boilerplate.
-  if (descriptor_->field_count() > 0) {
-    printer->Print(vars,
-      "::google::protobuf::uint32 _has_bits_[($field_count$ + 31) / 32];\n");
-  } else {
-    // Zero-size arrays aren't technically allowed, and MSVC in particular
-    // doesn't like them.  We still need to declare these arrays to make
-    // other code compile.  Since this is an uncommon case, we'll just declare
-    // them with size 1 and waste some space.  Oh well.
-    printer->Print(
-      "::google::protobuf::uint32 _has_bits_[1];\n");
-  }
-
   printer->Print(
-    "\n"
-    "// WHY DOES & HAVE LOWER PRECEDENCE THAN != !?\n"
-    "inline bool _has_bit(int index) const {\n"
-    "  return (_has_bits_[index / 32] & (1u << (index % 32))) != 0;\n"
-    "}\n"
-    "inline void _set_bit(int index) {\n"
-    "  _has_bits_[index / 32] |= (1u << (index % 32));\n"
-    "}\n"
-    "inline void _clear_bit(int index) {\n"
-    "  _has_bits_[index / 32] &= ~(1u << (index % 32));\n"
-    "}\n"
-    "\n"
     "void InitAsDefaultInstance();\n"
     "static $classname$* default_instance_;\n",
     "classname", classname_);
@@ -961,9 +1125,6 @@ GenerateClear(io::Printer* printer) {
     const FieldDescriptor* field = descriptor_->field(i);
 
     if (!field->is_repeated()) {
-      map<string, string> vars;
-      vars["index"] = SimpleItoa(field->index());
-
       // We can use the fact that _has_bits_ is a giant bitfield to our
       // advantage:  We can check up to 32 bits at a time for equality to
       // zero, and skip the whole range if so.  This can improve the speed
@@ -975,8 +1136,9 @@ GenerateClear(io::Printer* printer) {
           printer->Outdent();
           printer->Print("}\n");
         }
-        printer->Print(vars,
-          "if (_has_bits_[$index$ / 32] & (0xffu << ($index$ % 32))) {\n");
+        printer->Print(
+          "if (_has_bits_[$index$ / 32] & (0xffu << ($index$ % 32))) {\n",
+          "index", SimpleItoa(field->index()));
         printer->Indent();
       }
       last_index = i;
@@ -989,7 +1151,9 @@ GenerateClear(io::Printer* printer) {
         field->cpp_type() == FieldDescriptor::CPPTYPE_STRING;
 
       if (should_check_bit) {
-        printer->Print(vars, "if (_has_bit($index$)) {\n");
+        printer->Print(
+          "if (has_$name$()) {\n",
+          "name", FieldName(field));
         printer->Indent();
       }
 
@@ -1129,24 +1293,23 @@ GenerateMergeFrom(io::Printer* printer) {
     const FieldDescriptor* field = descriptor_->field(i);
 
     if (!field->is_repeated()) {
-      map<string, string> vars;
-      vars["index"] = SimpleItoa(field->index());
-
       // See above in GenerateClear for an explanation of this.
       if (i / 8 != last_index / 8 || last_index < 0) {
         if (last_index >= 0) {
           printer->Outdent();
           printer->Print("}\n");
         }
-        printer->Print(vars,
-          "if (from._has_bits_[$index$ / 32] & (0xffu << ($index$ % 32))) {\n");
+        printer->Print(
+          "if (from._has_bits_[$index$ / 32] & (0xffu << ($index$ % 32))) {\n",
+          "index", SimpleItoa(field->index()));
         printer->Indent();
       }
 
       last_index = i;
 
-      printer->Print(vars,
-        "if (from._has_bit($index$)) {\n");
+      printer->Print(
+        "if (from.has_$name$()) {\n",
+        "name", FieldName(field));
       printer->Indent();
 
       field_generators_.get(field).GenerateMergingCode(printer);
@@ -1423,8 +1586,8 @@ void MessageGenerator::GenerateSerializeOneField(
 
   if (!field->is_repeated()) {
     printer->Print(
-      "if (_has_bit($index$)) {\n",
-      "index", SimpleItoa(field->index()));
+      "if (has_$name$()) {\n",
+      "name", FieldName(field));
     printer->Indent();
   }
 

+ 9 - 2
src/google/protobuf/compiler/cpp/cpp_message_field.cc

@@ -75,7 +75,8 @@ void MessageFieldGenerator::
 GenerateAccessorDeclarations(io::Printer* printer) const {
   printer->Print(variables_,
     "inline const $type$& $name$() const$deprecation$;\n"
-    "inline $type$* mutable_$name$()$deprecation$;\n");
+    "inline $type$* mutable_$name$()$deprecation$;\n"
+    "inline $type$* release_$name$()$deprecation$;\n");
 }
 
 void MessageFieldGenerator::
@@ -85,9 +86,15 @@ GenerateInlineAccessorDefinitions(io::Printer* printer) const {
     "  return $name$_ != NULL ? *$name$_ : *default_instance_->$name$_;\n"
     "}\n"
     "inline $type$* $classname$::mutable_$name$() {\n"
-    "  _set_bit($index$);\n"
+    "  set_has_$name$();\n"
     "  if ($name$_ == NULL) $name$_ = new $type$;\n"
     "  return $name$_;\n"
+    "}\n"
+    "inline $type$* $classname$::release_$name$() {\n"
+    "  clear_has_$name$();\n"
+    "  $type$* temp = $name$_;\n"
+    "  $name$_ = NULL;\n"
+    "  return temp;\n"
     "}\n");
 }
 

+ 11 - 11
src/google/protobuf/compiler/cpp/cpp_plugin_unittest.cc

@@ -56,24 +56,24 @@ class TestGenerator : public CodeGenerator {
 
   virtual bool Generate(const FileDescriptor* file,
                         const string& parameter,
-                        OutputDirectory* output_directory,
+                        GeneratorContext* context,
                         string* error) const {
-    TryInsert("test.pb.h", "includes", output_directory);
-    TryInsert("test.pb.h", "namespace_scope", output_directory);
-    TryInsert("test.pb.h", "global_scope", output_directory);
-    TryInsert("test.pb.h", "class_scope:foo.Bar", output_directory);
-    TryInsert("test.pb.h", "class_scope:foo.Bar.Baz", output_directory);
+    TryInsert("test.pb.h", "includes", context);
+    TryInsert("test.pb.h", "namespace_scope", context);
+    TryInsert("test.pb.h", "global_scope", context);
+    TryInsert("test.pb.h", "class_scope:foo.Bar", context);
+    TryInsert("test.pb.h", "class_scope:foo.Bar.Baz", context);
 
-    TryInsert("test.pb.cc", "includes", output_directory);
-    TryInsert("test.pb.cc", "namespace_scope", output_directory);
-    TryInsert("test.pb.cc", "global_scope", output_directory);
+    TryInsert("test.pb.cc", "includes", context);
+    TryInsert("test.pb.cc", "namespace_scope", context);
+    TryInsert("test.pb.cc", "global_scope", context);
     return true;
   }
 
   void TryInsert(const string& filename, const string& insertion_point,
-                 OutputDirectory* output_directory) const {
+                 GeneratorContext* context) const {
     scoped_ptr<io::ZeroCopyOutputStream> output(
-      output_directory->OpenForInsert(filename, insertion_point));
+      context->OpenForInsert(filename, insertion_point));
     io::Printer printer(output.get(), '$');
     printer.Print("// inserted $name$\n", "name", insertion_point);
   }

+ 2 - 2
src/google/protobuf/compiler/cpp/cpp_primitive_field.cc

@@ -125,7 +125,7 @@ GenerateInlineAccessorDefinitions(io::Printer* printer) const {
     "  return $name$_;\n"
     "}\n"
     "inline void $classname$::set_$name$($type$ value) {\n"
-    "  _set_bit($index$);\n"
+    "  set_has_$name$();\n"
     "  $name$_ = value;\n"
     "}\n");
 }
@@ -156,7 +156,7 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
     "DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<\n"
     "         $type$, $wire_format_field_type$>(\n"
     "       input, &$name$_)));\n"
-    "_set_bit($index$);\n");
+    "set_has_$name$();\n");
 }
 
 void PrimitiveFieldGenerator::

+ 35 - 23
src/google/protobuf/compiler/cpp/cpp_string_field.cc

@@ -50,6 +50,9 @@ void SetStringVariables(const FieldDescriptor* descriptor,
   SetCommonFieldVariables(descriptor, variables);
   (*variables)["default"] =
     "\"" + CEscape(descriptor->default_value_string()) + "\"";
+  (*variables)["default_variable"] = descriptor->default_value_string().empty()
+      ? "::google::protobuf::internal::kEmptyString"
+      : "_default_" + FieldName(descriptor) + "_";
   (*variables)["pointer_type"] =
       descriptor->type() == FieldDescriptor::TYPE_BYTES ? "void" : "char";
 }
@@ -68,9 +71,10 @@ StringFieldGenerator::~StringFieldGenerator() {}
 
 void StringFieldGenerator::
 GeneratePrivateMembers(io::Printer* printer) const {
-  printer->Print(variables_,
-    "::std::string* $name$_;\n"
-    "static const ::std::string _default_$name$_;\n");
+  printer->Print(variables_, "::std::string* $name$_;\n");
+  if (!descriptor_->default_value_string().empty()) {
+    printer->Print(variables_, "static const ::std::string $default_variable$;\n");
+  }
 }
 
 void StringFieldGenerator::
@@ -105,7 +109,8 @@ GenerateAccessorDeclarations(io::Printer* printer) const {
     "inline void set_$name$(const char* value)$deprecation$;\n"
     "inline void set_$name$(const $pointer_type$* value, size_t size)"
                  "$deprecation$;\n"
-    "inline ::std::string* mutable_$name$()$deprecation$;\n");
+    "inline ::std::string* mutable_$name$()$deprecation$;\n"
+    "inline ::std::string* release_$name$()$deprecation$;\n");
 
   if (descriptor_->options().ctype() != FieldOptions::STRING) {
     printer->Outdent();
@@ -121,51 +126,58 @@ GenerateInlineAccessorDefinitions(io::Printer* printer) const {
     "  return *$name$_;\n"
     "}\n"
     "inline void $classname$::set_$name$(const ::std::string& value) {\n"
-    "  _set_bit($index$);\n"
-    "  if ($name$_ == &_default_$name$_) {\n"
+    "  set_has_$name$();\n"
+    "  if ($name$_ == &$default_variable$) {\n"
     "    $name$_ = new ::std::string;\n"
     "  }\n"
     "  $name$_->assign(value);\n"
     "}\n"
     "inline void $classname$::set_$name$(const char* value) {\n"
-    "  _set_bit($index$);\n"
-    "  if ($name$_ == &_default_$name$_) {\n"
+    "  set_has_$name$();\n"
+    "  if ($name$_ == &$default_variable$) {\n"
     "    $name$_ = new ::std::string;\n"
     "  }\n"
     "  $name$_->assign(value);\n"
     "}\n"
     "inline "
     "void $classname$::set_$name$(const $pointer_type$* value, size_t size) {\n"
-    "  _set_bit($index$);\n"
-    "  if ($name$_ == &_default_$name$_) {\n"
+    "  set_has_$name$();\n"
+    "  if ($name$_ == &$default_variable$) {\n"
     "    $name$_ = new ::std::string;\n"
     "  }\n"
     "  $name$_->assign(reinterpret_cast<const char*>(value), size);\n"
     "}\n"
     "inline ::std::string* $classname$::mutable_$name$() {\n"
-    "  _set_bit($index$);\n"
-    "  if ($name$_ == &_default_$name$_) {\n");
+    "  set_has_$name$();\n"
+    "  if ($name$_ == &$default_variable$) {\n");
   if (descriptor_->default_value_string().empty()) {
     printer->Print(variables_,
       "    $name$_ = new ::std::string;\n");
   } else {
     printer->Print(variables_,
-      "    $name$_ = new ::std::string(_default_$name$_);\n");
+      "    $name$_ = new ::std::string($default_variable$);\n");
   }
   printer->Print(variables_,
     "  }\n"
     "  return $name$_;\n"
+    "}\n"
+    "inline ::std::string* $classname$::release_$name$() {\n"
+    "  clear_has_$name$();\n"
+    "  if ($name$_ == &$default_variable$) {\n"
+    "    return NULL;\n"
+    "  } else {\n"
+    "    ::std::string* temp = $name$_;\n"
+    "    $name$_ = const_cast< ::std::string*>(&$default_variable$);\n"
+    "    return temp;\n"
+    "  }\n"
     "}\n");
 }
 
 void StringFieldGenerator::
 GenerateNonInlineAccessorDefinitions(io::Printer* printer) const {
-  if (descriptor_->default_value_string().empty()) {
-    printer->Print(variables_,
-      "const ::std::string $classname$::_default_$name$_;\n");
-  } else {
+  if (!descriptor_->default_value_string().empty()) {
     printer->Print(variables_,
-      "const ::std::string $classname$::_default_$name$_($default$);\n");
+      "const ::std::string $classname$::$default_variable$($default$);\n");
   }
 }
 
@@ -173,13 +185,13 @@ void StringFieldGenerator::
 GenerateClearingCode(io::Printer* printer) const {
   if (descriptor_->default_value_string().empty()) {
     printer->Print(variables_,
-      "if ($name$_ != &_default_$name$_) {\n"
+      "if ($name$_ != &$default_variable$) {\n"
       "  $name$_->clear();\n"
       "}\n");
   } else {
     printer->Print(variables_,
-      "if ($name$_ != &_default_$name$_) {\n"
-      "  $name$_->assign(_default_$name$_);\n"
+      "if ($name$_ != &$default_variable$) {\n"
+      "  $name$_->assign($default_variable$);\n"
       "}\n");
   }
 }
@@ -197,13 +209,13 @@ GenerateSwappingCode(io::Printer* printer) const {
 void StringFieldGenerator::
 GenerateConstructorCode(io::Printer* printer) const {
   printer->Print(variables_,
-    "$name$_ = const_cast< ::std::string*>(&_default_$name$_);\n");
+    "$name$_ = const_cast< ::std::string*>(&$default_variable$);\n");
 }
 
 void StringFieldGenerator::
 GenerateDestructorCode(io::Printer* printer) const {
   printer->Print(variables_,
-    "if ($name$_ != &_default_$name$_) {\n"
+    "if ($name$_ != &$default_variable$) {\n"
     "  delete $name$_;\n"
     "}\n");
 }

+ 4 - 0
src/google/protobuf/compiler/cpp/cpp_test_bad_identifiers.proto

@@ -36,6 +36,10 @@
 // though the same identifiers are used internally by the C++ code generator.
 
 
+// Some generic_services option(s) added automatically.
+// See:  http://go/proto2-generic-services-default
+option cc_generic_services = true;     // auto-added
+
 // We don't put this in a package within proto2 because we need to make sure
 // that the generated code doesn't depend on being in the proto2 namespace.
 package protobuf_unittest;

+ 45 - 1
src/google/protobuf/compiler/cpp/cpp_unittest.cc

@@ -195,6 +195,48 @@ TEST(GeneratedMessageTest, MutableStringDefault) {
   EXPECT_EQ("hello", *message.mutable_default_string());
 }
 
+TEST(GeneratedMessageTest, ReleaseString) {
+  // Check that release_foo() starts out NULL, and gives us a value
+  // that we can delete after it's been set.
+  unittest::TestAllTypes message;
+
+  EXPECT_EQ(NULL, message.release_default_string());
+  EXPECT_FALSE(message.has_default_string());
+  EXPECT_EQ("hello", message.default_string());
+
+  message.set_default_string("blah");
+  EXPECT_TRUE(message.has_default_string());
+  string* str = message.release_default_string();
+  EXPECT_FALSE(message.has_default_string());
+  ASSERT_TRUE(str != NULL);
+  EXPECT_EQ("blah", *str);
+  delete str;
+
+  EXPECT_EQ(NULL, message.release_default_string());
+  EXPECT_FALSE(message.has_default_string());
+  EXPECT_EQ("hello", message.default_string());
+}
+
+TEST(GeneratedMessageTest, ReleaseMessage) {
+  // Check that release_foo() starts out NULL, and gives us a value
+  // that we can delete after it's been set.
+  unittest::TestAllTypes message;
+
+  EXPECT_EQ(NULL, message.release_optional_nested_message());
+  EXPECT_FALSE(message.has_optional_nested_message());
+
+  message.mutable_optional_nested_message()->set_bb(1);
+  unittest::TestAllTypes::NestedMessage* nest =
+      message.release_optional_nested_message();
+  EXPECT_FALSE(message.has_optional_nested_message());
+  ASSERT_TRUE(nest != NULL);
+  EXPECT_EQ(1, nest->bb());
+  delete nest;
+
+  EXPECT_EQ(NULL, message.release_optional_nested_message());
+  EXPECT_FALSE(message.has_optional_nested_message());
+}
+
 TEST(GeneratedMessageTest, Clear) {
   // Set every field to a unique value, clear the message, then check that
   // it is cleared.
@@ -282,6 +324,7 @@ TEST(GeneratedMessageTest, CopyFrom) {
   TestUtil::ExpectAllFieldsSet(message2);
 }
 
+
 TEST(GeneratedMessageTest, SwapWithEmpty) {
   unittest::TestAllTypes message1, message2;
   TestUtil::SetAllFields(&message1);
@@ -376,7 +419,7 @@ TEST(GeneratedMessageTest, CopyAssignmentOperator) {
   TestUtil::ExpectAllFieldsSet(message2);
 
   // Make sure that self-assignment does something sane.
-  message2 = message2;
+  message2.operator=(message2);
   TestUtil::ExpectAllFieldsSet(message2);
 }
 
@@ -718,6 +761,7 @@ TEST(GeneratedMessageTest, TestSpaceUsed) {
 
 #endif  // !PROTOBUF_TEST_NO_DESCRIPTORS
 
+
 TEST(GeneratedMessageTest, FieldConstantValues) {
   unittest::TestRequired message;
   EXPECT_EQ(unittest::TestAllTypes_NestedMessage::kBbFieldNumber, 1);

+ 9 - 11
src/google/protobuf/compiler/java/java_enum.cc

@@ -104,6 +104,15 @@ void EnumGenerator::Generate(io::Printer* printer) {
       "public static final $classname$ $name$ = $canonical_name$;\n");
   }
 
+  for (int i = 0; i < descriptor_->value_count(); i++) {
+    map<string, string> vars;
+    vars["name"] = descriptor_->value(i)->name();
+    vars["number"] = SimpleItoa(descriptor_->value(i)->number());
+    printer->Print(vars,
+      "public static final int $name$_VALUE = $number$;\n");
+  }
+  printer->Print("\n");
+
   // -----------------------------------------------------------------
 
   printer->Print(
@@ -219,17 +228,6 @@ void EnumGenerator::Generate(io::Printer* printer) {
     "  this.value = value;\n"
     "}\n");
 
-  if (HasDescriptorMethods(descriptor_)) {
-    // Force the static initialization code for the file to run, since it may
-    // initialize static variables declared in this class.
-    printer->Print(
-      "\n"
-      "static {\n"
-      "  $file$.getDescriptor();\n"
-      "}\n",
-      "file", ClassName(descriptor_->file()));
-  }
-
   printer->Print(
     "\n"
     "// @@protoc_insertion_point(enum_scope:$full_name$)\n",

+ 229 - 65
src/google/protobuf/compiler/java/java_enum_field.cc

@@ -52,17 +52,44 @@ namespace {
 // TODO(kenton):  Factor out a "SetCommonFieldVariables()" to get rid of
 //   repeat code between this and the other field types.
 void SetEnumVariables(const FieldDescriptor* descriptor,
+                      int messageBitIndex,
+                      int builderBitIndex,
                       map<string, string>* variables) {
   (*variables)["name"] =
     UnderscoresToCamelCase(descriptor);
   (*variables)["capitalized_name"] =
     UnderscoresToCapitalizedCamelCase(descriptor);
+  (*variables)["constant_name"] = FieldConstantName(descriptor);
   (*variables)["number"] = SimpleItoa(descriptor->number());
   (*variables)["type"] = ClassName(descriptor->enum_type());
   (*variables)["default"] = DefaultValue(descriptor);
   (*variables)["tag"] = SimpleItoa(internal::WireFormat::MakeTag(descriptor));
   (*variables)["tag_size"] = SimpleItoa(
       internal::WireFormat::TagSize(descriptor->number(), GetType(descriptor)));
+  // TODO(birdo): Add @deprecated javadoc when generating javadoc is supported
+  // by the proto compiler
+  (*variables)["deprecation"] = descriptor->options().deprecated()
+      ? "@java.lang.Deprecated " : "";
+  (*variables)["on_changed"] =
+      HasDescriptorMethods(descriptor->containing_type()) ? "onChanged();" : "";
+
+  // For singular messages and builders, one bit is used for the hasField bit.
+  (*variables)["get_has_field_bit_message"] = GenerateGetBit(messageBitIndex);
+
+  (*variables)["get_has_field_bit_builder"] = GenerateGetBit(builderBitIndex);
+  (*variables)["set_has_field_bit_builder"] = GenerateSetBit(builderBitIndex);
+  (*variables)["clear_has_field_bit_builder"] =
+      GenerateClearBit(builderBitIndex);
+
+  // For repated builders, one bit is used for whether the array is immutable.
+  (*variables)["get_mutable_bit_builder"] = GenerateGetBit(builderBitIndex);
+  (*variables)["set_mutable_bit_builder"] = GenerateSetBit(builderBitIndex);
+  (*variables)["clear_mutable_bit_builder"] = GenerateClearBit(builderBitIndex);
+
+  (*variables)["get_has_field_bit_from_local"] =
+      GenerateGetBitFromLocal(builderBitIndex);
+  (*variables)["set_has_field_bit_to_local"] =
+      GenerateSetBitToLocal(messageBitIndex);
 }
 
 }  // namespace
@@ -70,51 +97,87 @@ void SetEnumVariables(const FieldDescriptor* descriptor,
 // ===================================================================
 
 EnumFieldGenerator::
-EnumFieldGenerator(const FieldDescriptor* descriptor)
-  : descriptor_(descriptor) {
-  SetEnumVariables(descriptor, &variables_);
+EnumFieldGenerator(const FieldDescriptor* descriptor,
+                      int messageBitIndex,
+                      int builderBitIndex)
+  : descriptor_(descriptor), messageBitIndex_(messageBitIndex),
+    builderBitIndex_(builderBitIndex) {
+  SetEnumVariables(descriptor, messageBitIndex, builderBitIndex, &variables_);
 }
 
 EnumFieldGenerator::~EnumFieldGenerator() {}
 
+int EnumFieldGenerator::GetNumBitsForMessage() const {
+  return 1;
+}
+
+int EnumFieldGenerator::GetNumBitsForBuilder() const {
+  return 1;
+}
+
+void EnumFieldGenerator::
+GenerateInterfaceMembers(io::Printer* printer) const {
+  printer->Print(variables_,
+    "$deprecation$boolean has$capitalized_name$();\n"
+    "$deprecation$$type$ get$capitalized_name$();\n");
+}
+
 void EnumFieldGenerator::
 GenerateMembers(io::Printer* printer) const {
   printer->Print(variables_,
-    "private boolean has$capitalized_name$;\n"
     "private $type$ $name$_;\n"
-    "public boolean has$capitalized_name$() { return has$capitalized_name$; }\n"
-    "public $type$ get$capitalized_name$() { return $name$_; }\n");
+    "$deprecation$public boolean has$capitalized_name$() {\n"
+    "  return $get_has_field_bit_message$;\n"
+    "}\n"
+    "$deprecation$public $type$ get$capitalized_name$() {\n"
+    "  return $name$_;\n"
+    "}\n");
 }
 
 void EnumFieldGenerator::
 GenerateBuilderMembers(io::Printer* printer) const {
   printer->Print(variables_,
-    "public boolean has$capitalized_name$() {\n"
-    "  return result.has$capitalized_name$();\n"
+    "private $type$ $name$_ = $default$;\n"
+    "$deprecation$public boolean has$capitalized_name$() {\n"
+    "  return $get_has_field_bit_builder$;\n"
     "}\n"
-    "public $type$ get$capitalized_name$() {\n"
-    "  return result.get$capitalized_name$();\n"
+    "$deprecation$public $type$ get$capitalized_name$() {\n"
+    "  return $name$_;\n"
     "}\n"
-    "public Builder set$capitalized_name$($type$ value) {\n"
+    "$deprecation$public Builder set$capitalized_name$($type$ value) {\n"
     "  if (value == null) {\n"
     "    throw new NullPointerException();\n"
     "  }\n"
-    "  result.has$capitalized_name$ = true;\n"
-    "  result.$name$_ = value;\n"
+    "  $set_has_field_bit_builder$;\n"
+    "  $name$_ = value;\n"
+    "  $on_changed$\n"
     "  return this;\n"
     "}\n"
-    "public Builder clear$capitalized_name$() {\n"
-    "  result.has$capitalized_name$ = false;\n"
-    "  result.$name$_ = $default$;\n"
+    "$deprecation$public Builder clear$capitalized_name$() {\n"
+    "  $clear_has_field_bit_builder$;\n"
+    "  $name$_ = $default$;\n"
+    "  $on_changed$\n"
     "  return this;\n"
     "}\n");
 }
 
+void EnumFieldGenerator::
+GenerateFieldBuilderInitializationCode(io::Printer* printer)  const {
+  // noop for enums
+}
+
 void EnumFieldGenerator::
 GenerateInitializationCode(io::Printer* printer) const {
   printer->Print(variables_, "$name$_ = $default$;\n");
 }
 
+void EnumFieldGenerator::
+GenerateBuilderClearCode(io::Printer* printer) const {
+  printer->Print(variables_,
+      "$name$_ = $default$;\n"
+      "$clear_has_field_bit_builder$;\n");
+}
+
 void EnumFieldGenerator::
 GenerateMergingCode(io::Printer* printer) const {
   printer->Print(variables_,
@@ -125,7 +188,11 @@ GenerateMergingCode(io::Printer* printer) const {
 
 void EnumFieldGenerator::
 GenerateBuildingCode(io::Printer* printer) const {
-  // Nothing to do here for enum types.
+  printer->Print(variables_,
+    "if ($get_has_field_bit_from_local$) {\n"
+    "  $set_has_field_bit_to_local$;\n"
+    "}\n"
+    "result.$name$_ = $name$_;\n");
 }
 
 void EnumFieldGenerator::
@@ -143,27 +210,42 @@ GenerateParsingCode(io::Printer* printer) const {
       "if (value != null) {\n");
   }
   printer->Print(variables_,
-    "  set$capitalized_name$(value);\n"
+    "  $set_has_field_bit_builder$;\n"
+    "  $name$_ = value;\n"
     "}\n");
 }
 
 void EnumFieldGenerator::
 GenerateSerializationCode(io::Printer* printer) const {
   printer->Print(variables_,
-    "if (has$capitalized_name$()) {\n"
-    "  output.writeEnum($number$, get$capitalized_name$().getNumber());\n"
+    "if ($get_has_field_bit_message$) {\n"
+    "  output.writeEnum($number$, $name$_.getNumber());\n"
     "}\n");
 }
 
 void EnumFieldGenerator::
 GenerateSerializedSizeCode(io::Printer* printer) const {
   printer->Print(variables_,
-    "if (has$capitalized_name$()) {\n"
+    "if ($get_has_field_bit_message$) {\n"
     "  size += com.google.protobuf.CodedOutputStream\n"
-    "    .computeEnumSize($number$, get$capitalized_name$().getNumber());\n"
+    "    .computeEnumSize($number$, $name$_.getNumber());\n"
     "}\n");
 }
 
+void EnumFieldGenerator::
+GenerateEqualsCode(io::Printer* printer) const {
+  printer->Print(variables_,
+    "result = result &&\n"
+    "    (get$capitalized_name$() == other.get$capitalized_name$());\n");
+}
+
+void EnumFieldGenerator::
+GenerateHashCode(io::Printer* printer) const {
+  printer->Print(variables_,
+    "hash = (37 * hash) + $constant_name$;\n"
+    "hash = (53 * hash) + hashEnum(get$capitalized_name$());\n");
+}
+
 string EnumFieldGenerator::GetBoxedType() const {
   return ClassName(descriptor_->enum_type());
 }
@@ -171,23 +253,43 @@ string EnumFieldGenerator::GetBoxedType() const {
 // ===================================================================
 
 RepeatedEnumFieldGenerator::
-RepeatedEnumFieldGenerator(const FieldDescriptor* descriptor)
-  : descriptor_(descriptor) {
-  SetEnumVariables(descriptor, &variables_);
+RepeatedEnumFieldGenerator(const FieldDescriptor* descriptor,
+                           int messageBitIndex,
+                           int builderBitIndex)
+  : descriptor_(descriptor), messageBitIndex_(messageBitIndex),
+    builderBitIndex_(builderBitIndex) {
+  SetEnumVariables(descriptor, messageBitIndex, builderBitIndex, &variables_);
 }
 
 RepeatedEnumFieldGenerator::~RepeatedEnumFieldGenerator() {}
 
+int RepeatedEnumFieldGenerator::GetNumBitsForMessage() const {
+  return 0;
+}
+
+int RepeatedEnumFieldGenerator::GetNumBitsForBuilder() const {
+  return 1;
+}
+
+void RepeatedEnumFieldGenerator::
+GenerateInterfaceMembers(io::Printer* printer) const {
+  printer->Print(variables_,
+    "$deprecation$java.util.List<$type$> get$capitalized_name$List();\n"
+    "$deprecation$int get$capitalized_name$Count();\n"
+    "$deprecation$$type$ get$capitalized_name$(int index);\n");
+}
+
 void RepeatedEnumFieldGenerator::
 GenerateMembers(io::Printer* printer) const {
   printer->Print(variables_,
-    "private java.util.List<$type$> $name$_ =\n"
-    "  java.util.Collections.emptyList();\n"
-    "public java.util.List<$type$> get$capitalized_name$List() {\n"
+    "private java.util.List<$type$> $name$_;\n"
+    "$deprecation$public java.util.List<$type$> get$capitalized_name$List() {\n"
     "  return $name$_;\n"   // note:  unmodifiable list
     "}\n"
-    "public int get$capitalized_name$Count() { return $name$_.size(); }\n"
-    "public $type$ get$capitalized_name$(int index) {\n"
+    "$deprecation$public int get$capitalized_name$Count() {\n"
+    "  return $name$_.size();\n"
+    "}\n"
+    "$deprecation$public $type$ get$capitalized_name$(int index) {\n"
     "  return $name$_.get(index);\n"
     "}\n");
 
@@ -201,73 +303,119 @@ GenerateMembers(io::Printer* printer) const {
 void RepeatedEnumFieldGenerator::
 GenerateBuilderMembers(io::Printer* printer) const {
   printer->Print(variables_,
+    // One field is the list and the other field keeps track of whether the
+    // list is immutable. If it's immutable, the invariant is that it must
+    // either an instance of Collections.emptyList() or it's an ArrayList
+    // wrapped in a Collections.unmodifiableList() wrapper and nobody else has
+    // a refererence to the underlying ArrayList. This invariant allows us to
+    // share instances of lists between protocol buffers avoiding expensive
+    // memory allocations. Note, immutable is a strong guarantee here -- not
+    // just that the list cannot be modified via the reference but that the
+    // list can never be modified.
+    "private java.util.List<$type$> $name$_ =\n"
+    "  java.util.Collections.emptyList();\n"
+
+    "private void ensure$capitalized_name$IsMutable() {\n"
+    "  if (!$get_mutable_bit_builder$) {\n"
+    "    $name$_ = new java.util.ArrayList<$type$>($name$_);\n"
+    "    $set_mutable_bit_builder$;\n"
+    "  }\n"
+    "}\n"
+
     // Note:  We return an unmodifiable list because otherwise the caller
     //   could hold on to the returned list and modify it after the message
     //   has been built, thus mutating the message which is supposed to be
     //   immutable.
-    "public java.util.List<$type$> get$capitalized_name$List() {\n"
-    "  return java.util.Collections.unmodifiableList(result.$name$_);\n"
+    "$deprecation$public java.util.List<$type$> get$capitalized_name$List() {\n"
+    "  return java.util.Collections.unmodifiableList($name$_);\n"
     "}\n"
-    "public int get$capitalized_name$Count() {\n"
-    "  return result.get$capitalized_name$Count();\n"
+    "$deprecation$public int get$capitalized_name$Count() {\n"
+    "  return $name$_.size();\n"
     "}\n"
-    "public $type$ get$capitalized_name$(int index) {\n"
-    "  return result.get$capitalized_name$(index);\n"
+    "$deprecation$public $type$ get$capitalized_name$(int index) {\n"
+    "  return $name$_.get(index);\n"
     "}\n"
-    "public Builder set$capitalized_name$(int index, $type$ value) {\n"
+    "$deprecation$public Builder set$capitalized_name$(\n"
+    "    int index, $type$ value) {\n"
     "  if (value == null) {\n"
     "    throw new NullPointerException();\n"
     "  }\n"
-    "  result.$name$_.set(index, value);\n"
+    "  ensure$capitalized_name$IsMutable();\n"
+    "  $name$_.set(index, value);\n"
+    "  $on_changed$\n"
     "  return this;\n"
     "}\n"
-    "public Builder add$capitalized_name$($type$ value) {\n"
+    "$deprecation$public Builder add$capitalized_name$($type$ value) {\n"
     "  if (value == null) {\n"
     "    throw new NullPointerException();\n"
     "  }\n"
-    "  if (result.$name$_.isEmpty()) {\n"
-    "    result.$name$_ = new java.util.ArrayList<$type$>();\n"
-    "  }\n"
-    "  result.$name$_.add(value);\n"
+    "  ensure$capitalized_name$IsMutable();\n"
+    "  $name$_.add(value);\n"
+    "  $on_changed$\n"
     "  return this;\n"
     "}\n"
-    "public Builder addAll$capitalized_name$(\n"
+    "$deprecation$public Builder addAll$capitalized_name$(\n"
     "    java.lang.Iterable<? extends $type$> values) {\n"
-    "  if (result.$name$_.isEmpty()) {\n"
-    "    result.$name$_ = new java.util.ArrayList<$type$>();\n"
-    "  }\n"
-    "  super.addAll(values, result.$name$_);\n"
+    "  ensure$capitalized_name$IsMutable();\n"
+    "  super.addAll(values, $name$_);\n"
+    "  $on_changed$\n"
     "  return this;\n"
     "}\n"
-    "public Builder clear$capitalized_name$() {\n"
-    "  result.$name$_ = java.util.Collections.emptyList();\n"
+    "$deprecation$public Builder clear$capitalized_name$() {\n"
+    "  $name$_ = java.util.Collections.emptyList();\n"
+    "  $clear_mutable_bit_builder$;\n"
+    "  $on_changed$\n"
     "  return this;\n"
     "}\n");
 }
 
+void RepeatedEnumFieldGenerator::
+GenerateFieldBuilderInitializationCode(io::Printer* printer)  const {
+  // noop for enums
+}
+
 void RepeatedEnumFieldGenerator::
 GenerateInitializationCode(io::Printer* printer) const {
-  // Initialized inline.
+  printer->Print(variables_, "$name$_ = java.util.Collections.emptyList();\n");
+}
+
+void RepeatedEnumFieldGenerator::
+GenerateBuilderClearCode(io::Printer* printer) const {
+  printer->Print(variables_,
+    "$name$_ = java.util.Collections.emptyList();\n"
+    "$clear_mutable_bit_builder$;\n");
 }
 
 void RepeatedEnumFieldGenerator::
 GenerateMergingCode(io::Printer* printer) const {
+  // The code below does two optimizations:
+  //   1. If the other list is empty, there's nothing to do. This ensures we
+  //      don't allocate a new array if we already have an immutable one.
+  //   2. If the other list is non-empty and our current list is empty, we can
+  //      reuse the other list which is guaranteed to be immutable.
   printer->Print(variables_,
     "if (!other.$name$_.isEmpty()) {\n"
-    "  if (result.$name$_.isEmpty()) {\n"
-    "    result.$name$_ = new java.util.ArrayList<$type$>();\n"
+    "  if ($name$_.isEmpty()) {\n"
+    "    $name$_ = other.$name$_;\n"
+    "    $clear_mutable_bit_builder$;\n"
+    "  } else {\n"
+    "    ensure$capitalized_name$IsMutable();\n"
+    "    $name$_.addAll(other.$name$_);\n"
     "  }\n"
-    "  result.$name$_.addAll(other.$name$_);\n"
+    "  $on_changed$\n"
     "}\n");
 }
 
 void RepeatedEnumFieldGenerator::
 GenerateBuildingCode(io::Printer* printer) const {
+  // The code below ensures that the result has an immutable list. If our
+  // list is immutable, we can just reuse it. If not, we make it immutable.
   printer->Print(variables_,
-    "if (result.$name$_ != java.util.Collections.EMPTY_LIST) {\n"
-    "  result.$name$_ =\n"
-    "    java.util.Collections.unmodifiableList(result.$name$_);\n"
-    "}\n");
+    "if ($get_mutable_bit_builder$) {\n"
+    "  $name$_ = java.util.Collections.unmodifiableList($name$_);\n"
+    "  $clear_mutable_bit_builder$;\n"
+    "}\n"
+    "result.$name$_ = $name$_;\n");
 }
 
 void RepeatedEnumFieldGenerator::
@@ -316,13 +464,13 @@ GenerateSerializationCode(io::Printer* printer) const {
       "  output.writeRawVarint32($tag$);\n"
       "  output.writeRawVarint32($name$MemoizedSerializedSize);\n"
       "}\n"
-      "for ($type$ element : get$capitalized_name$List()) {\n"
-      "  output.writeEnumNoTag(element.getNumber());\n"
+      "for (int i = 0; i < $name$_.size(); i++) {\n"
+      "  output.writeEnumNoTag($name$_.get(i).getNumber());\n"
       "}\n");
   } else {
     printer->Print(variables_,
-      "for ($type$ element : get$capitalized_name$List()) {\n"
-      "  output.writeEnum($number$, element.getNumber());\n"
+      "for (int i = 0; i < $name$_.size(); i++) {\n"
+      "  output.writeEnum($number$, $name$_.get(i).getNumber());\n"
       "}\n");
   }
 }
@@ -335,9 +483,9 @@ GenerateSerializedSizeCode(io::Printer* printer) const {
   printer->Indent();
 
   printer->Print(variables_,
-    "for ($type$ element : get$capitalized_name$List()) {\n"
+    "for (int i = 0; i < $name$_.size(); i++) {\n"
     "  dataSize += com.google.protobuf.CodedOutputStream\n"
-    "    .computeEnumSizeNoTag(element.getNumber());\n"
+    "    .computeEnumSizeNoTag($name$_.get(i).getNumber());\n"
     "}\n");
   printer->Print(
     "size += dataSize;\n");
@@ -350,7 +498,7 @@ GenerateSerializedSizeCode(io::Printer* printer) const {
       "}");
   } else {
     printer->Print(variables_,
-      "size += $tag_size$ * get$capitalized_name$List().size();\n");
+      "size += $tag_size$ * $name$_.size();\n");
   }
 
   // cache the data size for packed fields.
@@ -363,6 +511,22 @@ GenerateSerializedSizeCode(io::Printer* printer) const {
   printer->Print("}\n");
 }
 
+void RepeatedEnumFieldGenerator::
+GenerateEqualsCode(io::Printer* printer) const {
+  printer->Print(variables_,
+    "result = result && get$capitalized_name$List()\n"
+    "    .equals(other.get$capitalized_name$List());\n");
+}
+
+void RepeatedEnumFieldGenerator::
+GenerateHashCode(io::Printer* printer) const {
+  printer->Print(variables_,
+    "if (get$capitalized_name$Count() > 0) {\n"
+    "  hash = (37 * hash) + $constant_name$;\n"
+    "  hash = (53 * hash) + hashEnumList(get$capitalized_name$List());\n"
+    "}\n");
+}
+
 string RepeatedEnumFieldGenerator::GetBoxedType() const {
   return ClassName(descriptor_->enum_type());
 }

+ 22 - 2
src/google/protobuf/compiler/java/java_enum_field.h

@@ -46,49 +46,69 @@ namespace java {
 
 class EnumFieldGenerator : public FieldGenerator {
  public:
-  explicit EnumFieldGenerator(const FieldDescriptor* descriptor);
+  explicit EnumFieldGenerator(const FieldDescriptor* descriptor,
+      int messageBitIndex, int builderBitIndex);
   ~EnumFieldGenerator();
 
   // implements FieldGenerator ---------------------------------------
+  int GetNumBitsForMessage() const;
+  int GetNumBitsForBuilder() const;
+  void GenerateInterfaceMembers(io::Printer* printer) const;
   void GenerateMembers(io::Printer* printer) const;
   void GenerateBuilderMembers(io::Printer* printer) const;
   void GenerateInitializationCode(io::Printer* printer) const;
+  void GenerateBuilderClearCode(io::Printer* printer) const;
   void GenerateMergingCode(io::Printer* printer) const;
   void GenerateBuildingCode(io::Printer* printer) const;
   void GenerateParsingCode(io::Printer* printer) const;
   void GenerateSerializationCode(io::Printer* printer) const;
   void GenerateSerializedSizeCode(io::Printer* printer) const;
+  void GenerateFieldBuilderInitializationCode(io::Printer* printer) const;
+  void GenerateEqualsCode(io::Printer* printer) const;
+  void GenerateHashCode(io::Printer* printer) const;
 
   string GetBoxedType() const;
 
  private:
   const FieldDescriptor* descriptor_;
   map<string, string> variables_;
+  const int messageBitIndex_;
+  const int builderBitIndex_;
 
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(EnumFieldGenerator);
 };
 
 class RepeatedEnumFieldGenerator : public FieldGenerator {
  public:
-  explicit RepeatedEnumFieldGenerator(const FieldDescriptor* descriptor);
+  explicit RepeatedEnumFieldGenerator(const FieldDescriptor* descriptor,
+      int messageBitIndex, int builderBitIndex);
   ~RepeatedEnumFieldGenerator();
 
   // implements FieldGenerator ---------------------------------------
+  int GetNumBitsForMessage() const;
+  int GetNumBitsForBuilder() const;
+  void GenerateInterfaceMembers(io::Printer* printer) const;
   void GenerateMembers(io::Printer* printer) const;
   void GenerateBuilderMembers(io::Printer* printer) const;
   void GenerateInitializationCode(io::Printer* printer) const;
+  void GenerateBuilderClearCode(io::Printer* printer) const;
   void GenerateMergingCode(io::Printer* printer) const;
   void GenerateBuildingCode(io::Printer* printer) const;
   void GenerateParsingCode(io::Printer* printer) const;
   void GenerateParsingCodeFromPacked(io::Printer* printer) const;
   void GenerateSerializationCode(io::Printer* printer) const;
   void GenerateSerializedSizeCode(io::Printer* printer) const;
+  void GenerateFieldBuilderInitializationCode(io::Printer* printer) const;
+  void GenerateEqualsCode(io::Printer* printer) const;
+  void GenerateHashCode(io::Printer* printer) const;
 
   string GetBoxedType() const;
 
  private:
   const FieldDescriptor* descriptor_;
   map<string, string> variables_;
+  const int messageBitIndex_;
+  const int builderBitIndex_;
 
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(RepeatedEnumFieldGenerator);
 };

+ 95 - 80
src/google/protobuf/compiler/java/java_extension.cc

@@ -86,108 +86,123 @@ ExtensionGenerator::ExtensionGenerator(const FieldDescriptor* descriptor)
 
 ExtensionGenerator::~ExtensionGenerator() {}
 
-void ExtensionGenerator::Generate(io::Printer* printer) {
-  map<string, string> vars;
-  vars["name"] = UnderscoresToCamelCase(descriptor_);
-  vars["containing_type"] = ClassName(descriptor_->containing_type());
-  vars["number"] = SimpleItoa(descriptor_->number());
-  vars["constant_name"] = FieldConstantName(descriptor_);
-  vars["lite"] = HasDescriptorMethods(descriptor_->file()) ? "" : "Lite";
+// Initializes the vars referenced in the generated code templates.
+void InitTemplateVars(const FieldDescriptor* descriptor,
+                      const string& scope,
+                      map<string, string>* vars_pointer) {
+  map<string, string> &vars = *vars_pointer;
+  vars["scope"] = scope;
+  vars["name"] = UnderscoresToCamelCase(descriptor);
+  vars["containing_type"] = ClassName(descriptor->containing_type());
+  vars["number"] = SimpleItoa(descriptor->number());
+  vars["constant_name"] = FieldConstantName(descriptor);
+  vars["index"] = SimpleItoa(descriptor->index());
+  vars["default"] =
+      descriptor->is_repeated() ? "" : DefaultValue(descriptor);
+  vars["type_constant"] = TypeName(GetType(descriptor));
+  vars["packed"] = descriptor->options().packed() ? "true" : "false";
+  vars["enum_map"] = "null";
+  vars["prototype"] = "null";
 
-  JavaType java_type = GetJavaType(descriptor_);
+  JavaType java_type = GetJavaType(descriptor);
   string singular_type;
   switch (java_type) {
     case JAVATYPE_MESSAGE:
-      vars["type"] = ClassName(descriptor_->message_type());
+      singular_type = ClassName(descriptor->message_type());
+      vars["prototype"] = singular_type + ".getDefaultInstance()";
       break;
     case JAVATYPE_ENUM:
-      vars["type"] = ClassName(descriptor_->enum_type());
+      singular_type = ClassName(descriptor->enum_type());
+      vars["enum_map"] = singular_type + ".internalGetValueMap()";
       break;
     default:
-      vars["type"] = BoxedPrimitiveTypeName(java_type);
+      singular_type = BoxedPrimitiveTypeName(java_type);
       break;
   }
-
-  printer->Print(vars,
-    "public static final int $constant_name$ = $number$;\n");
-  if (descriptor_->is_repeated()) {
-    printer->Print(vars,
-      "public static final\n"
-      "  com.google.protobuf.GeneratedMessage$lite$.GeneratedExtension<\n"
-      "    $containing_type$,\n"
-      "    java.util.List<$type$>> $name$ =\n"
-      "      com.google.protobuf.GeneratedMessage$lite$\n"
-      "        .newGeneratedExtension();\n");
-  } else {
-    printer->Print(vars,
-      "public static final\n"
-      "  com.google.protobuf.GeneratedMessage$lite$.GeneratedExtension<\n"
-      "    $containing_type$,\n"
-      "    $type$> $name$ =\n"
-      "      com.google.protobuf.GeneratedMessage$lite$\n"
-      "        .newGeneratedExtension();\n");
-  }
+  vars["type"] = descriptor->is_repeated() ?
+      "java.util.List<" + singular_type + ">" : singular_type;
+  vars["singular_type"] = singular_type;
 }
 
-void ExtensionGenerator::GenerateInitializationCode(io::Printer* printer) {
+void ExtensionGenerator::Generate(io::Printer* printer) {
   map<string, string> vars;
-  vars["name"] = UnderscoresToCamelCase(descriptor_);
-  vars["scope"] = scope_;
-  vars["index"] = SimpleItoa(descriptor_->index());
-  vars["extendee"] = ClassName(descriptor_->containing_type());
-  vars["default"] = descriptor_->is_repeated() ? "" : DefaultValue(descriptor_);
-  vars["number"] = SimpleItoa(descriptor_->number());
-  vars["type_constant"] = TypeName(GetType(descriptor_));
-  vars["packed"] = descriptor_->options().packed() ? "true" : "false";
-  vars["enum_map"] = "null";
-  vars["prototype"] = "null";
-
-  JavaType java_type = GetJavaType(descriptor_);
-  string singular_type;
-  switch (java_type) {
-    case JAVATYPE_MESSAGE:
-      vars["type"] = ClassName(descriptor_->message_type());
-      vars["prototype"] = ClassName(descriptor_->message_type()) +
-                          ".getDefaultInstance()";
-      break;
-    case JAVATYPE_ENUM:
-      vars["type"] = ClassName(descriptor_->enum_type());
-      vars["enum_map"] = ClassName(descriptor_->enum_type()) +
-                         ".internalGetValueMap()";
-      break;
-    default:
-      vars["type"] = BoxedPrimitiveTypeName(java_type);
-      break;
-  }
+  InitTemplateVars(descriptor_, scope_, &vars);
+  printer->Print(vars,
+      "public static final int $constant_name$ = $number$;\n");
 
   if (HasDescriptorMethods(descriptor_->file())) {
-    printer->Print(vars,
-      "$scope$.$name$.internalInit(\n"
-      "    $scope$.getDescriptor().getExtensions().get($index$),\n"
-      "    $type$.class);\n");
+    // Non-lite extensions
+    if (descriptor_->extension_scope() == NULL) {
+      // Non-nested
+      printer->Print(
+          vars,
+          "public static final\n"
+          "  com.google.protobuf.GeneratedMessage.GeneratedExtension<\n"
+          "    $containing_type$,\n"
+          "    $type$> $name$ = com.google.protobuf.GeneratedMessage\n"
+          "        .newFileScopedGeneratedExtension(\n"
+          "      $singular_type$.class,\n"
+          "      $prototype$);\n");
+    } else {
+      // Nested
+      printer->Print(
+          vars,
+          "public static final\n"
+          "  com.google.protobuf.GeneratedMessage.GeneratedExtension<\n"
+          "    $containing_type$,\n"
+          "    $type$> $name$ = com.google.protobuf.GeneratedMessage\n"
+          "        .newMessageScopedGeneratedExtension(\n"
+          "      $scope$.getDefaultInstance(),\n"
+          "      $index$,\n"
+          "      $singular_type$.class,\n"
+          "      $prototype$);\n");
+    }
   } else {
+    // Lite extensions
     if (descriptor_->is_repeated()) {
-      printer->Print(vars,
-        "$scope$.$name$.internalInitRepeated(\n"
-        "    $extendee$.getDefaultInstance(),\n"
-        "    $prototype$,\n"
-        "    $enum_map$,\n"
-        "    $number$,\n"
-        "    com.google.protobuf.WireFormat.FieldType.$type_constant$,\n"
-        "    $packed$);\n");
+      printer->Print(
+          vars,
+          "public static final\n"
+          "  com.google.protobuf.GeneratedMessageLite.GeneratedExtension<\n"
+          "    $containing_type$,\n"
+          "    $type$> $name$ = com.google.protobuf.GeneratedMessageLite\n"
+          "        .newRepeatedGeneratedExtension(\n"
+          "      $containing_type$.getDefaultInstance(),\n"
+          "      $prototype$,\n"
+          "      $enum_map$,\n"
+          "      $number$,\n"
+          "      com.google.protobuf.WireFormat.FieldType.$type_constant$,\n"
+          "      $packed$);\n");
     } else {
-      printer->Print(vars,
-        "$scope$.$name$.internalInitSingular(\n"
-        "    $extendee$.getDefaultInstance(),\n"
-        "    $default$,\n"
-        "    $prototype$,\n"
-        "    $enum_map$,\n"
-        "    $number$,\n"
-        "    com.google.protobuf.WireFormat.FieldType.$type_constant$);\n");
+      printer->Print(
+          vars,
+          "public static final\n"
+          "  com.google.protobuf.GeneratedMessageLite.GeneratedExtension<\n"
+          "    $containing_type$,\n"
+          "    $type$> $name$ = com.google.protobuf.GeneratedMessageLite\n"
+          "        .newSingularGeneratedExtension(\n"
+          "      $containing_type$.getDefaultInstance(),\n"
+          "      $default$,\n"
+          "      $prototype$,\n"
+          "      $enum_map$,\n"
+          "      $number$,\n"
+          "      com.google.protobuf.WireFormat.FieldType.$type_constant$);\n");
     }
   }
 }
 
+void ExtensionGenerator::GenerateNonNestedInitializationCode(
+    io::Printer* printer) {
+  if (descriptor_->extension_scope() == NULL &&
+      HasDescriptorMethods(descriptor_->file())) {
+    // Only applies to non-nested, non-lite extensions.
+    printer->Print(
+        "$name$.internalInit(descriptor.getExtensions().get($index$));\n",
+        "name", UnderscoresToCamelCase(descriptor_),
+        "index", SimpleItoa(descriptor_->index()));
+  }
+}
+
 void ExtensionGenerator::GenerateRegistrationCode(io::Printer* printer) {
   printer->Print(
     "registry.add($scope$.$name$);\n",

+ 1 - 1
src/google/protobuf/compiler/java/java_extension.h

@@ -60,7 +60,7 @@ class ExtensionGenerator {
   ~ExtensionGenerator();
 
   void Generate(io::Printer* printer);
-  void GenerateInitializationCode(io::Printer* printer);
+  void GenerateNonNestedInitializationCode(io::Printer* printer);
   void GenerateRegistrationCode(io::Printer* printer);
 
  private:

+ 35 - 10
src/google/protobuf/compiler/java/java_field.cc

@@ -37,6 +37,7 @@
 #include <google/protobuf/compiler/java/java_primitive_field.h>
 #include <google/protobuf/compiler/java/java_enum_field.h>
 #include <google/protobuf/compiler/java/java_message_field.h>
+#include <google/protobuf/compiler/java/java_string_field.h>
 #include <google/protobuf/stubs/common.h>
 
 namespace google {
@@ -63,33 +64,57 @@ FieldGeneratorMap::FieldGeneratorMap(const Descriptor* descriptor)
     extension_generators_(
       new scoped_ptr<FieldGenerator>[descriptor->extension_count()]) {
 
-  // Construct all the FieldGenerators.
+  // Construct all the FieldGenerators and assign them bit indices for their
+  // bit fields.
+  int messageBitIndex = 0;
+  int builderBitIndex = 0;
   for (int i = 0; i < descriptor->field_count(); i++) {
-    field_generators_[i].reset(MakeGenerator(descriptor->field(i)));
+    FieldGenerator* generator = MakeGenerator(descriptor->field(i),
+        messageBitIndex, builderBitIndex);
+    field_generators_[i].reset(generator);
+    messageBitIndex += generator->GetNumBitsForMessage();
+    builderBitIndex += generator->GetNumBitsForBuilder();
   }
   for (int i = 0; i < descriptor->extension_count(); i++) {
-    extension_generators_[i].reset(MakeGenerator(descriptor->extension(i)));
+    FieldGenerator* generator = MakeGenerator(descriptor->extension(i),
+        messageBitIndex, builderBitIndex);
+    extension_generators_[i].reset(generator);
+    messageBitIndex += generator->GetNumBitsForMessage();
+    builderBitIndex += generator->GetNumBitsForBuilder();
   }
 }
 
-FieldGenerator* FieldGeneratorMap::MakeGenerator(const FieldDescriptor* field) {
+FieldGenerator* FieldGeneratorMap::MakeGenerator(
+    const FieldDescriptor* field, int messageBitIndex, int builderBitIndex) {
   if (field->is_repeated()) {
     switch (GetJavaType(field)) {
       case JAVATYPE_MESSAGE:
-        return new RepeatedMessageFieldGenerator(field);
+        return new RepeatedMessageFieldGenerator(
+            field, messageBitIndex, builderBitIndex);
       case JAVATYPE_ENUM:
-        return new RepeatedEnumFieldGenerator(field);
+        return new RepeatedEnumFieldGenerator(
+            field, messageBitIndex, builderBitIndex);
+      case JAVATYPE_STRING:
+        return new RepeatedStringFieldGenerator(
+            field, messageBitIndex, builderBitIndex);
       default:
-        return new RepeatedPrimitiveFieldGenerator(field);
+        return new RepeatedPrimitiveFieldGenerator(
+            field, messageBitIndex, builderBitIndex);
     }
   } else {
     switch (GetJavaType(field)) {
       case JAVATYPE_MESSAGE:
-        return new MessageFieldGenerator(field);
+        return new MessageFieldGenerator(
+            field, messageBitIndex, builderBitIndex);
       case JAVATYPE_ENUM:
-        return new EnumFieldGenerator(field);
+        return new EnumFieldGenerator(
+            field, messageBitIndex, builderBitIndex);
+      case JAVATYPE_STRING:
+        return new StringFieldGenerator(
+            field, messageBitIndex, builderBitIndex);
       default:
-        return new PrimitiveFieldGenerator(field);
+        return new PrimitiveFieldGenerator(
+            field, messageBitIndex, builderBitIndex);
     }
   }
 }

+ 11 - 1
src/google/protobuf/compiler/java/java_field.h

@@ -55,15 +55,24 @@ class FieldGenerator {
   FieldGenerator() {}
   virtual ~FieldGenerator();
 
+  virtual int GetNumBitsForMessage() const = 0;
+  virtual int GetNumBitsForBuilder() const = 0;
+  virtual void GenerateInterfaceMembers(io::Printer* printer) const = 0;
   virtual void GenerateMembers(io::Printer* printer) const = 0;
   virtual void GenerateBuilderMembers(io::Printer* printer) const = 0;
   virtual void GenerateInitializationCode(io::Printer* printer) const = 0;
+  virtual void GenerateBuilderClearCode(io::Printer* printer) const = 0;
   virtual void GenerateMergingCode(io::Printer* printer) const = 0;
   virtual void GenerateBuildingCode(io::Printer* printer) const = 0;
   virtual void GenerateParsingCode(io::Printer* printer) const = 0;
   virtual void GenerateParsingCodeFromPacked(io::Printer* printer) const;
   virtual void GenerateSerializationCode(io::Printer* printer) const = 0;
   virtual void GenerateSerializedSizeCode(io::Printer* printer) const = 0;
+  virtual void GenerateFieldBuilderInitializationCode(io::Printer* printer)
+      const = 0;
+
+  virtual void GenerateEqualsCode(io::Printer* printer) const = 0;
+  virtual void GenerateHashCode(io::Printer* printer) const = 0;
 
   virtual string GetBoxedType() const = 0;
 
@@ -85,7 +94,8 @@ class FieldGeneratorMap {
   scoped_array<scoped_ptr<FieldGenerator> > field_generators_;
   scoped_array<scoped_ptr<FieldGenerator> > extension_generators_;
 
-  static FieldGenerator* MakeGenerator(const FieldDescriptor* field);
+  static FieldGenerator* MakeGenerator(const FieldDescriptor* field,
+      int messageBitIndex, int builderBitIndex);
 
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldGeneratorMap);
 };

+ 30 - 30
src/google/protobuf/compiler/java/java_file.cc

@@ -88,7 +88,8 @@ bool UsesExtensions(const Message& message) {
 FileGenerator::FileGenerator(const FileDescriptor* file)
   : file_(file),
     java_package_(FileJavaPackage(file)),
-    classname_(FileClassName(file)) {}
+    classname_(FileClassName(file)) {
+}
 
 FileGenerator::~FileGenerator() {}
 
@@ -179,7 +180,9 @@ void FileGenerator::Generate(io::Printer* printer) {
       EnumGenerator(file_->enum_type(i)).Generate(printer);
     }
     for (int i = 0; i < file_->message_type_count(); i++) {
-      MessageGenerator(file_->message_type(i)).Generate(printer);
+      MessageGenerator messageGenerator(file_->message_type(i));
+      messageGenerator.GenerateInterface(printer);
+      messageGenerator.Generate(printer);
     }
     if (HasGenericServices(file_)) {
       for (int i = 0; i < file_->service_count(); i++) {
@@ -215,24 +218,11 @@ void FileGenerator::Generate(io::Printer* printer) {
         .GenerateStaticVariableInitializers(printer);
     }
 
-    for (int i = 0; i < file_->extension_count(); i++) {
-      // TODO(kenton):  Reuse ExtensionGenerator objects?
-      ExtensionGenerator(file_->extension(i))
-        .GenerateInitializationCode(printer);
-    }
-
     printer->Outdent();
     printer->Print(
       "}\n");
   }
 
-  // Dummy function we can use to force the static initialization block to
-  // run.  Needed by inner classes.  Cannot be private due to
-  // java_multiple_files option.
-  printer->Print(
-    "\n"
-    "public static void internalForceInit() {}\n");
-
   printer->Print(
     "\n"
     "// @@protoc_insertion_point(outer_class_scope)\n");
@@ -310,11 +300,10 @@ void FileGenerator::GenerateEmbeddedDescriptor(io::Printer* printer) {
     MessageGenerator(file_->message_type(i))
       .GenerateStaticVariableInitializers(printer);
   }
-
   for (int i = 0; i < file_->extension_count(); i++) {
     // TODO(kenton):  Reuse ExtensionGenerator objects?
     ExtensionGenerator(file_->extension(i))
-      .GenerateInitializationCode(printer);
+        .GenerateNonNestedInitializationCode(printer);
   }
 
   if (UsesExtensions(file_proto)) {
@@ -325,9 +314,11 @@ void FileGenerator::GenerateEmbeddedDescriptor(io::Printer* printer) {
       "  com.google.protobuf.ExtensionRegistry.newInstance();\n"
       "registerAllExtensions(registry);\n");
     for (int i = 0; i < file_->dependency_count(); i++) {
-      printer->Print(
-        "$dependency$.registerAllExtensions(registry);\n",
-        "dependency", ClassName(file_->dependency(i)));
+      if (ShouldIncludeDependency(file_->dependency(i))) {
+        printer->Print(
+            "$dependency$.registerAllExtensions(registry);\n",
+            "dependency", ClassName(file_->dependency(i)));
+      }
     }
     printer->Print(
       "return registry;\n");
@@ -372,13 +363,14 @@ template<typename GeneratorClass, typename DescriptorClass>
 static void GenerateSibling(const string& package_dir,
                             const string& java_package,
                             const DescriptorClass* descriptor,
-                            OutputDirectory* output_directory,
-                            vector<string>* file_list) {
-  string filename = package_dir + descriptor->name() + ".java";
+                            GeneratorContext* context,
+                            vector<string>* file_list,
+                            const string& name_suffix,
+                            void (GeneratorClass::*pfn)(io::Printer* printer)) {
+  string filename = package_dir + descriptor->name() + name_suffix + ".java";
   file_list->push_back(filename);
 
-  scoped_ptr<io::ZeroCopyOutputStream> output(
-    output_directory->Open(filename));
+  scoped_ptr<io::ZeroCopyOutputStream> output(context->Open(filename));
   io::Printer printer(output.get(), '$');
 
   printer.Print(
@@ -391,28 +383,36 @@ static void GenerateSibling(const string& package_dir,
       "package", java_package);
   }
 
-  GeneratorClass(descriptor).Generate(&printer);
+  GeneratorClass generator(descriptor);
+  (generator.*pfn)(&printer);
 }
 
 void FileGenerator::GenerateSiblings(const string& package_dir,
-                                     OutputDirectory* output_directory,
+                                     GeneratorContext* context,
                                      vector<string>* file_list) {
   if (file_->options().java_multiple_files()) {
     for (int i = 0; i < file_->enum_type_count(); i++) {
       GenerateSibling<EnumGenerator>(package_dir, java_package_,
                                      file_->enum_type(i),
-                                     output_directory, file_list);
+                                     context, file_list, "",
+                                     &EnumGenerator::Generate);
     }
     for (int i = 0; i < file_->message_type_count(); i++) {
       GenerateSibling<MessageGenerator>(package_dir, java_package_,
                                         file_->message_type(i),
-                                        output_directory, file_list);
+                                        context, file_list, "OrBuilder",
+                                        &MessageGenerator::GenerateInterface);
+      GenerateSibling<MessageGenerator>(package_dir, java_package_,
+                                        file_->message_type(i),
+                                        context, file_list, "",
+                                        &MessageGenerator::Generate);
     }
     if (HasGenericServices(file_)) {
       for (int i = 0; i < file_->service_count(); i++) {
         GenerateSibling<ServiceGenerator>(package_dir, java_package_,
                                           file_->service(i),
-                                          output_directory, file_list);
+                                          context, file_list, "",
+                                          &ServiceGenerator::Generate);
       }
     }
   }

+ 4 - 2
src/google/protobuf/compiler/java/java_file.h

@@ -46,7 +46,7 @@ namespace protobuf {
     class Printer;             // printer.h
   }
   namespace compiler {
-    class OutputDirectory;     // code_generator.h
+    class GeneratorContext;     // code_generator.h
   }
 }
 
@@ -70,12 +70,13 @@ class FileGenerator {
   // files other than the outer file (i.e. one for each message, enum, and
   // service type).
   void GenerateSiblings(const string& package_dir,
-                        OutputDirectory* output_directory,
+                        GeneratorContext* generator_context,
                         vector<string>* file_list);
 
   const string& java_package() { return java_package_; }
   const string& classname()    { return classname_;    }
 
+
  private:
   // Returns whether the dependency should be included in the output file.
   // Always returns true for opensource, but used internally at Google to help
@@ -86,6 +87,7 @@ class FileGenerator {
   string java_package_;
   string classname_;
 
+
   void GenerateEmbeddedDescriptor(io::Printer* printer);
 
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileGenerator);

+ 17 - 11
src/google/protobuf/compiler/java/java_generator.cc

@@ -51,11 +51,8 @@ JavaGenerator::~JavaGenerator() {}
 
 bool JavaGenerator::Generate(const FileDescriptor* file,
                              const string& parameter,
-                             OutputDirectory* output_directory,
+                             GeneratorContext* context,
                              string* error) const {
-  vector<pair<string, string> > options;
-  ParseGeneratorParameter(parameter, &options);
-
   // -----------------------------------------------------------------
   // parse generator options
 
@@ -63,6 +60,10 @@ bool JavaGenerator::Generate(const FileDescriptor* file,
   // per line.
   string output_list_file;
 
+
+  vector<pair<string, string> > options;
+  ParseGeneratorParameter(parameter, &options);
+
   for (int i = 0; i < options.size(); i++) {
     if (options[i].first == "output_list_file") {
       output_list_file = options[i].second;
@@ -72,18 +73,23 @@ bool JavaGenerator::Generate(const FileDescriptor* file,
     }
   }
 
-
   // -----------------------------------------------------------------
 
 
+  if (file->options().optimize_for() == FileOptions::LITE_RUNTIME &&
+      file->options().java_generate_equals_and_hash()) {
+    *error = "The \"java_generate_equals_and_hash\" option is incompatible "
+             "with \"optimize_for = LITE_RUNTIME\".  You must optimize for "
+             "SPEED or CODE_SIZE if you want to use this option.";
+    return false;
+  }
+
   FileGenerator file_generator(file);
   if (!file_generator.Validate(error)) {
     return false;
   }
 
-  string package_dir =
-    StringReplace(file_generator.java_package(), ".", "/", true);
-  if (!package_dir.empty()) package_dir += "/";
+  string package_dir = JavaPackageToDir(file_generator.java_package());
 
   vector<string> all_files;
 
@@ -94,19 +100,19 @@ bool JavaGenerator::Generate(const FileDescriptor* file,
 
   // Generate main java file.
   scoped_ptr<io::ZeroCopyOutputStream> output(
-    output_directory->Open(java_filename));
+    context->Open(java_filename));
   io::Printer printer(output.get(), '$');
   file_generator.Generate(&printer);
 
   // Generate sibling files.
-  file_generator.GenerateSiblings(package_dir, output_directory, &all_files);
+  file_generator.GenerateSiblings(package_dir, context, &all_files);
 
   // Generate output list if requested.
   if (!output_list_file.empty()) {
     // Generate output list.  This is just a simple text file placed in a
     // deterministic location which lists the .java files being generated.
     scoped_ptr<io::ZeroCopyOutputStream> srclist_raw_output(
-      output_directory->Open(output_list_file));
+      context->Open(output_list_file));
     io::Printer srclist_printer(srclist_raw_output.get(), '$');
     for (int i = 0; i < all_files.size(); i++) {
       srclist_printer.Print("$filename$\n", "filename", all_files[i]);

+ 1 - 1
src/google/protobuf/compiler/java/java_generator.h

@@ -57,7 +57,7 @@ class LIBPROTOC_EXPORT JavaGenerator : public CodeGenerator {
   // implements CodeGenerator ----------------------------------------
   bool Generate(const FileDescriptor* file,
                 const string& parameter,
-                OutputDirectory* output_directory,
+                GeneratorContext* context,
                 string* error) const;
 
  private:

+ 140 - 3
src/google/protobuf/compiler/java/java_helpers.cc

@@ -134,16 +134,27 @@ string FileClassName(const FileDescriptor* file) {
 }
 
 string FileJavaPackage(const FileDescriptor* file) {
+  string result;
+
   if (file->options().has_java_package()) {
-    return file->options().java_package();
+    result = file->options().java_package();
   } else {
-    string result = kDefaultPackage;
+    result = kDefaultPackage;
     if (!file->package().empty()) {
       if (!result.empty()) result += '.';
       result += file->package();
     }
-    return result;
   }
+
+
+  return result;
+}
+
+string JavaPackageToDir(string package_name) {
+  string package_dir =
+    StringReplace(package_name, ".", "/", true);
+  if (!package_dir.empty()) package_dir += "/";
+  return package_dir;
 }
 
 string ToJavaName(const string& full_name, const FileDescriptor* file) {
@@ -335,6 +346,132 @@ string DefaultValue(const FieldDescriptor* field) {
   return "";
 }
 
+bool IsDefaultValueJavaDefault(const FieldDescriptor* field) {
+  // Switch on CppType since we need to know which default_value_* method
+  // of FieldDescriptor to call.
+  switch (field->cpp_type()) {
+    case FieldDescriptor::CPPTYPE_INT32:
+      return field->default_value_int32() == 0;
+    case FieldDescriptor::CPPTYPE_UINT32:
+      return field->default_value_uint32() == 0;
+    case FieldDescriptor::CPPTYPE_INT64:
+      return field->default_value_int64() == 0L;
+    case FieldDescriptor::CPPTYPE_UINT64:
+      return field->default_value_uint64() == 0L;
+    case FieldDescriptor::CPPTYPE_DOUBLE:
+      return field->default_value_double() == 0.0;
+    case FieldDescriptor::CPPTYPE_FLOAT:
+      return field->default_value_float() == 0.0;
+    case FieldDescriptor::CPPTYPE_BOOL:
+      return field->default_value_bool() == false;
+
+    case FieldDescriptor::CPPTYPE_STRING:
+    case FieldDescriptor::CPPTYPE_ENUM:
+    case FieldDescriptor::CPPTYPE_MESSAGE:
+      return false;
+
+    // No default because we want the compiler to complain if any new
+    // types are added.
+  }
+
+  GOOGLE_LOG(FATAL) << "Can't get here.";
+  return false;
+}
+
+const char* bit_masks[] = {
+  "0x00000001",
+  "0x00000002",
+  "0x00000004",
+  "0x00000008",
+  "0x00000010",
+  "0x00000020",
+  "0x00000040",
+  "0x00000080",
+
+  "0x00000100",
+  "0x00000200",
+  "0x00000400",
+  "0x00000800",
+  "0x00001000",
+  "0x00002000",
+  "0x00004000",
+  "0x00008000",
+
+  "0x00010000",
+  "0x00020000",
+  "0x00040000",
+  "0x00080000",
+  "0x00100000",
+  "0x00200000",
+  "0x00400000",
+  "0x00800000",
+
+  "0x01000000",
+  "0x02000000",
+  "0x04000000",
+  "0x08000000",
+  "0x10000000",
+  "0x20000000",
+  "0x40000000",
+  "0x80000000",
+};
+
+string GetBitFieldName(int index) {
+  string varName = "bitField";
+  varName += SimpleItoa(index);
+  varName += "_";
+  return varName;
+}
+
+string GetBitFieldNameForBit(int bitIndex) {
+  return GetBitFieldName(bitIndex / 32);
+}
+
+string GenerateGetBit(int bitIndex) {
+  string varName = GetBitFieldNameForBit(bitIndex);
+  int bitInVarIndex = bitIndex % 32;
+
+  string mask = bit_masks[bitInVarIndex];
+  string result = "((" + varName + " & " + mask + ") == " + mask + ")";
+  return result;
+}
+
+string GenerateSetBit(int bitIndex) {
+  string varName = GetBitFieldNameForBit(bitIndex);
+  int bitInVarIndex = bitIndex % 32;
+
+  string mask = bit_masks[bitInVarIndex];
+  string result = varName + " |= " + mask;
+  return result;
+}
+
+string GenerateClearBit(int bitIndex) {
+  string varName = GetBitFieldNameForBit(bitIndex);
+  int bitInVarIndex = bitIndex % 32;
+
+  string mask = bit_masks[bitInVarIndex];
+  string result = varName + " = (" + varName + " & ~" + mask + ")";
+  return result;
+}
+
+string GenerateGetBitFromLocal(int bitIndex) {
+  string varName = "from_" + GetBitFieldNameForBit(bitIndex);
+  int bitInVarIndex = bitIndex % 32;
+
+  string mask = bit_masks[bitInVarIndex];
+  string result = "((" + varName + " & " + mask + ") == " + mask + ")";
+  return result;
+}
+
+string GenerateSetBitToLocal(int bitIndex) {
+  string varName = "to_" + GetBitFieldNameForBit(bitIndex);
+  int bitInVarIndex = bitIndex % 32;
+
+  string mask = bit_masks[bitInVarIndex];
+  string result = varName + " |= " + mask;
+  return result;
+}
+
 }  // namespace java
 }  // namespace compiler
 }  // namespace protobuf

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно