Просмотр исходного кода

Down-integrate from internal branch

xiaofeng@google.com 13 лет назад
Родитель
Сommit
b55a20fa2c
100 измененных файлов с 11434 добавлено и 1056 удалено
  1. 58 0
      CHANGES.txt
  2. 9 0
      java/pom.xml
  3. 251 85
      java/src/main/java/com/google/protobuf/AbstractMessage.java
  4. 22 4
      java/src/main/java/com/google/protobuf/AbstractMessageLite.java
  5. 261 0
      java/src/main/java/com/google/protobuf/AbstractParser.java
  6. 163 0
      java/src/main/java/com/google/protobuf/BoundedByteString.java
  7. 670 157
      java/src/main/java/com/google/protobuf/ByteString.java
  8. 38 3
      java/src/main/java/com/google/protobuf/CodedInputStream.java
  9. 32 2
      java/src/main/java/com/google/protobuf/CodedOutputStream.java
  10. 120 37
      java/src/main/java/com/google/protobuf/Descriptors.java
  11. 56 12
      java/src/main/java/com/google/protobuf/DynamicMessage.java
  12. 17 1
      java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java
  13. 105 32
      java/src/main/java/com/google/protobuf/FieldSet.java
  14. 142 37
      java/src/main/java/com/google/protobuf/GeneratedMessage.java
  15. 173 107
      java/src/main/java/com/google/protobuf/GeneratedMessageLite.java
  16. 21 74
      java/src/main/java/com/google/protobuf/Internal.java
  17. 21 0
      java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java
  18. 216 0
      java/src/main/java/com/google/protobuf/LazyField.java
  19. 27 3
      java/src/main/java/com/google/protobuf/LazyStringArrayList.java
  20. 13 4
      java/src/main/java/com/google/protobuf/LazyStringList.java
  21. 349 0
      java/src/main/java/com/google/protobuf/LiteralByteString.java
  22. 26 4
      java/src/main/java/com/google/protobuf/Message.java
  23. 17 23
      java/src/main/java/com/google/protobuf/MessageLite.java
  24. 2 0
      java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java
  25. 21 2
      java/src/main/java/com/google/protobuf/MessageOrBuilder.java
  26. 259 0
      java/src/main/java/com/google/protobuf/Parser.java
  27. 7 7
      java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java
  28. 945 0
      java/src/main/java/com/google/protobuf/RopeByteString.java
  29. 6 6
      java/src/main/java/com/google/protobuf/SingleFieldBuilder.java
  30. 5 5
      java/src/main/java/com/google/protobuf/SmallSortedMap.java
  31. 102 19
      java/src/main/java/com/google/protobuf/TextFormat.java
  32. 27 2
      java/src/main/java/com/google/protobuf/UnknownFieldSet.java
  33. 7 0
      java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java
  34. 349 0
      java/src/main/java/com/google/protobuf/Utf8.java
  35. 1 1
      java/src/main/java/com/google/protobuf/WireFormat.java
  36. 50 1
      java/src/test/java/com/google/protobuf/AbstractMessageTest.java
  37. 68 0
      java/src/test/java/com/google/protobuf/BoundedByteStringTest.java
  38. 692 0
      java/src/test/java/com/google/protobuf/ByteStringTest.java
  39. 8 9
      java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java
  40. 190 2
      java/src/test/java/com/google/protobuf/DescriptorsTest.java
  41. 55 17
      java/src/test/java/com/google/protobuf/DynamicMessageTest.java
  42. 167 1
      java/src/test/java/com/google/protobuf/GeneratedMessageTest.java
  43. 180 0
      java/src/test/java/com/google/protobuf/IsValidUtf8Test.java
  44. 421 0
      java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
  45. 44 0
      java/src/test/java/com/google/protobuf/LazyStringArrayListTest.java
  46. 28 0
      java/src/test/java/com/google/protobuf/LazyStringEndToEndTest.java
  47. 396 0
      java/src/test/java/com/google/protobuf/LiteralByteStringTest.java
  48. 40 0
      java/src/test/java/com/google/protobuf/MessageTest.java
  49. 375 0
      java/src/test/java/com/google/protobuf/ParserTest.java
  50. 97 0
      java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java
  51. 115 0
      java/src/test/java/com/google/protobuf/RopeByteStringTest.java
  52. 16 2
      java/src/test/java/com/google/protobuf/TestBadIdentifiers.java
  53. 178 34
      java/src/test/java/com/google/protobuf/TestUtil.java
  54. 34 0
      java/src/test/java/com/google/protobuf/TextFormatTest.java
  55. 155 2
      java/src/test/java/com/google/protobuf/WireFormatTest.java
  56. 18 0
      java/src/test/java/com/google/protobuf/test_bad_identifiers.proto
  57. 120 5
      python/google/protobuf/descriptor.py
  58. 120 0
      python/google/protobuf/descriptor_database.py
  59. 527 0
      python/google/protobuf/descriptor_pool.py
  60. 23 0
      python/google/protobuf/internal/api_implementation.py
  61. 12 2
      python/google/protobuf/internal/containers.py
  62. 82 35
      python/google/protobuf/internal/cpp_message.py
  63. 6 0
      python/google/protobuf/internal/decoder.py
  64. 63 0
      python/google/protobuf/internal/descriptor_database_test.py
  65. 220 0
      python/google/protobuf/internal/descriptor_pool_test.py
  66. 279 0
      python/google/protobuf/internal/descriptor_test.py
  67. 89 0
      python/google/protobuf/internal/enum_type_wrapper.py
  68. 55 0
      python/google/protobuf/internal/factory_test1.proto
  69. 77 0
      python/google/protobuf/internal/factory_test2.proto
  70. 25 0
      python/google/protobuf/internal/generator_test.py
  71. 45 0
      python/google/protobuf/internal/message_cpp_test.py
  72. 113 0
      python/google/protobuf/internal/message_factory_test.py
  73. 166 15
      python/google/protobuf/internal/message_test.py
  74. 49 0
      python/google/protobuf/internal/more_extensions_dynamic.proto
  75. 59 7
      python/google/protobuf/internal/python_message.py
  76. 91 0
      python/google/protobuf/internal/reflection_cpp_generated_test.py
  77. 269 19
      python/google/protobuf/internal/reflection_test.py
  78. 52 0
      python/google/protobuf/internal/test_bad_identifiers.proto
  79. 18 2
      python/google/protobuf/internal/test_util.py
  80. 40 14
      python/google/protobuf/internal/text_format_test.py
  81. 170 0
      python/google/protobuf/internal/unknown_fields_test.py
  82. 12 0
      python/google/protobuf/message.py
  83. 113 0
      python/google/protobuf/message_factory.py
  84. 77 14
      python/google/protobuf/pyext/python-proto2.cc
  85. 6 3
      python/google/protobuf/pyext/python_descriptor.cc
  86. 31 4
      python/google/protobuf/reflection.py
  87. 150 107
      python/google/protobuf/text_format.py
  88. 26 6
      python/setup.py
  89. 22 2
      src/Makefile.am
  90. 156 76
      src/google/protobuf/compiler/command_line_interface.cc
  91. 40 5
      src/google/protobuf/compiler/command_line_interface.h
  92. 114 6
      src/google/protobuf/compiler/command_line_interface_unittest.cc
  93. 1 1
      src/google/protobuf/compiler/cpp/cpp_bootstrap_unittest.cc
  94. 4 4
      src/google/protobuf/compiler/cpp/cpp_enum.cc
  95. 4 2
      src/google/protobuf/compiler/cpp/cpp_enum.h
  96. 14 9
      src/google/protobuf/compiler/cpp/cpp_enum_field.cc
  97. 4 2
      src/google/protobuf/compiler/cpp/cpp_enum_field.h
  98. 4 4
      src/google/protobuf/compiler/cpp/cpp_extension.cc
  99. 4 3
      src/google/protobuf/compiler/cpp/cpp_extension.h
  100. 17 14
      src/google/protobuf/compiler/cpp/cpp_field.cc

+ 58 - 0
CHANGES.txt

@@ -1,3 +1,61 @@
+2012-09-19 version 2.5.0:
+
+  General
+  * New notion "import public" that allows a proto file to forward the content
+    it imports to its importers. For example,
+      // foo.proto
+      import public "bar.proto";
+      import "baz.proto";
+
+      // qux.proto
+      import "foo.proto";
+      // Stuff defined in bar.proto may be used in this file, but stuff from
+      // baz.proto may NOT be used without importing it explicitly.
+    This is useful for moving proto files. To move a proto file, just leave
+    a single "import public" in the old proto file.
+  * New enum option "allow_alias" that specifies whether different symbols can
+    be assigned the same numeric value. Default value is "true". Setting it to
+    false causes the compiler to reject enum definitions where multiple symbols
+    have the same numeric value.
+
+  C++
+  * New generated method set_allocated_foo(Type* foo) for message and string
+    fields. This method allows you to set the field to a pre-allocated object
+    and the containing message takes the ownership of that object.
+  * Added SetAllocatedExtension() and ReleaseExtension() to extensions API.
+  * Custom options are now formatted correctly when descriptors are printed in
+    text format.
+  * Various speed optimizations.
+
+  Java
+  * Comments in proto files are now collected and put into generated code as
+    comments for corresponding classes and data members.
+  * Added Parser to parse directly into messages without a Builder. For
+    example,
+      Foo foo = Foo.getParser().ParseFrom(input);
+    Using Parser is ~25% faster than using Builder to parse messages.
+  * Added getters/setters to access the underlying ByteString of a string field
+    directly.
+  * ByteString now supports more operations: substring(), prepend(), and
+    append(). The implementation of ByteString uses a binary tree structure
+    to support these operations efficiently.
+  * New method findInitializationErrors() that lists all missing required
+    fields.
+  * Various code size and speed optimizations.
+
+  Python
+  * Added support for dynamic message creation. DescriptorDatabase,
+    DescriptorPool, and MessageFactory work like their C++ couterparts to
+    simplify Descriptor construction from *DescriptorProtos, and MessageFactory
+    provides a message instance from a Descriptor.
+  * Added pickle support for protobuf messages.
+  * Unknown fields are now preserved after parsing.
+  * Fixed bug where custom options were not correctly populated. Custom
+    options can be accessed now.
+  * Added EnumTypeWrapper that provides better accessibility to enum types.
+  * Added ParseMessage(descriptor, bytes) to generate a new Message instance
+    from a descriptor and a byte string.
+
 2011-05-01 version 2.4.1:
 
   C++

+ 9 - 0
java/pom.xml

@@ -102,6 +102,7 @@
                   <arg value="--proto_path=src/test/java" />
                   <arg value="../src/google/protobuf/unittest.proto" />
                   <arg value="../src/google/protobuf/unittest_import.proto" />
+                  <arg value="../src/google/protobuf/unittest_import_public.proto" />
                   <arg value="../src/google/protobuf/unittest_mset.proto" />
                   <arg
                     value="src/test/java/com/google/protobuf/multiple_files_test.proto" />
@@ -117,6 +118,7 @@
                     value="../src/google/protobuf/unittest_custom_options.proto" />
                   <arg value="../src/google/protobuf/unittest_lite.proto" />
                   <arg value="../src/google/protobuf/unittest_import_lite.proto" />
+                  <arg value="../src/google/protobuf/unittest_import_public_lite.proto" />
                   <arg value="../src/google/protobuf/unittest_lite_imports_nonlite.proto" />
                   <arg value="../src/google/protobuf/unittest_enormous_descriptor.proto" />
                   <arg value="../src/google/protobuf/unittest_no_generic_services.proto" />
@@ -158,6 +160,13 @@
                 <include>**/UninitializedMessageException.java</include>
                 <include>**/UnmodifiableLazyStringList.java</include>
                 <include>**/WireFormat.java</include>
+                <include>**/Parser.java</include>
+                <include>**/AbstractParser.java</include>
+                <include>**/BoundedByteString.java</include>
+                <include>**/LiteralByteString.java</include>
+                <include>**/RopeByteString.java</include>
+                <include>**/Utf8.java</include>
+                <include>**/LazyField.java</include>
               </includes>
               <testIncludes>
                 <testInclude>**/LiteTest.java</testInclude>

+ 251 - 85
java/src/main/java/com/google/protobuf/AbstractMessage.java

@@ -32,6 +32,7 @@ package com.google.protobuf;
 
 import com.google.protobuf.Descriptors.Descriptor;
 import com.google.protobuf.Descriptors.FieldDescriptor;
+import com.google.protobuf.GeneratedMessage.ExtendableBuilder;
 import com.google.protobuf.Internal.EnumLite;
 
 import java.io.IOException;
@@ -81,6 +82,25 @@ public abstract class AbstractMessage extends AbstractMessageLite
     return true;
   }
 
+  public List<String> findInitializationErrors() {
+    return Builder.findMissingFields(this);
+  }
+
+  public String getInitializationErrorString() {
+    return delimitWithCommas(findInitializationErrors());
+  }
+
+  private static String delimitWithCommas(List<String> parts) {
+    StringBuilder result = new StringBuilder();
+    for (String part : parts) {
+      if (result.length() > 0) {
+        result.append(", ");
+      }
+      result.append(part);
+    }
+    return result.toString();
+  }
+
   @Override
   public final String toString() {
     return TextFormat.printToString(this);
@@ -208,6 +228,15 @@ public abstract class AbstractMessage extends AbstractMessageLite
     return b ? 1231 : 1237;
   }
 
+  /**
+   * Package private helper method for AbstractParser to create
+   * UninitializedMessageException with missing field information.
+   */
+  @Override
+  UninitializedMessageException newUninitializedMessageException() {
+    return Builder.newUninitializedMessageException(this);
+  }
+
   /**
    * Helper method for implementing {@link Message#hashCode()}.
    * <p>
@@ -251,6 +280,14 @@ public abstract class AbstractMessage extends AbstractMessageLite
       return (BuilderType) this;
     }
 
+    public List<String> findInitializationErrors() {
+      return findMissingFields(this);
+    }
+
+    public String getInitializationErrorString() {
+      return delimitWithCommas(findInitializationErrors());
+    }
+
     public BuilderType mergeFrom(final Message other) {
       if (other.getDescriptorForType() != getDescriptorForType()) {
         throw new IllegalArgumentException(
@@ -314,7 +351,7 @@ public abstract class AbstractMessage extends AbstractMessageLite
         }
 
         if (!mergeFieldFrom(input, unknownFields, extensionRegistry,
-                            this, tag)) {
+                            getDescriptorForType(), this, null, tag)) {
           // end group tag
           break;
         }
@@ -323,25 +360,93 @@ public abstract class AbstractMessage extends AbstractMessageLite
       return (BuilderType) this;
     }
 
+    /** helper method to handle {@code builder} and {@code extensions}. */
+    private static void addRepeatedField(
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions,
+        FieldDescriptor field,
+        Object value) {
+      if (builder != null) {
+        builder.addRepeatedField(field, value);
+      } else {
+        extensions.addRepeatedField(field, value);
+      }
+    }
+
+    /** helper method to handle {@code builder} and {@code extensions}. */
+    private static void setField(
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions,
+        FieldDescriptor field,
+        Object value) {
+      if (builder != null) {
+        builder.setField(field, value);
+      } else {
+        extensions.setField(field, value);
+      }
+    }
+
+    /** helper method to handle {@code builder} and {@code extensions}. */
+    private static boolean hasOriginalMessage(
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions,
+        FieldDescriptor field) {
+      if (builder != null) {
+        return builder.hasField(field);
+      } else {
+        return extensions.hasField(field);
+      }
+    }
+
+    /** helper method to handle {@code builder} and {@code extensions}. */
+    private static Message getOriginalMessage(
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions,
+        FieldDescriptor field) {
+      if (builder != null) {
+        return (Message) builder.getField(field);
+      } else {
+        return (Message) extensions.getField(field);
+      }
+    }
+
+    /** helper method to handle {@code builder} and {@code extensions}. */
+    private static void mergeOriginalMessage(
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions,
+        FieldDescriptor field,
+        Message.Builder subBuilder) {
+      Message originalMessage = getOriginalMessage(builder, extensions, field);
+      if (originalMessage != null) {
+        subBuilder.mergeFrom(originalMessage);
+      }
+    }
+
     /**
-     * Like {@link #mergeFrom(CodedInputStream, UnknownFieldSet.Builder,
-     * ExtensionRegistryLite, Message.Builder)}, but parses a single field.
+     * Like {@link #mergeFrom(CodedInputStream, ExtensionRegistryLite)}, but
+     * parses a single field.
+     *
+     * When {@code builder} is not null, the method will parse and merge the
+     * field into {@code builder}. Otherwise, it will try to parse the field
+     * into {@code extensions}, when it's called by the parsing constructor in
+     * generated classes.
+     *
      * Package-private because it is used by GeneratedMessage.ExtendableMessage.
      * @param tag The tag, which should have already been read.
      * @return {@code true} unless the tag is an end-group tag.
      */
     static boolean mergeFieldFrom(
-        final CodedInputStream input,
-        final UnknownFieldSet.Builder unknownFields,
-        final ExtensionRegistryLite extensionRegistry,
-        final Message.Builder builder,
-        final int tag) throws IOException {
-      final Descriptor type = builder.getDescriptorForType();
-
+        CodedInputStream input,
+        UnknownFieldSet.Builder unknownFields,
+        ExtensionRegistryLite extensionRegistry,
+        Descriptor type,
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions,
+        int tag) throws IOException {
       if (type.getOptions().getMessageSetWireFormat() &&
           tag == WireFormat.MESSAGE_SET_ITEM_TAG) {
         mergeMessageSetExtensionFromCodedStream(
-          input, unknownFields, extensionRegistry, builder);
+            input, unknownFields, extensionRegistry, type, builder, extensions);
         return true;
       }
 
@@ -376,8 +481,10 @@ public abstract class AbstractMessage extends AbstractMessageLite
         } else {
           field = null;
         }
-      } else {
+      } else if (builder != null) {
         field = type.findFieldByNumber(fieldNumber);
+      } else {
+        field = null;
       }
 
       boolean unknown = false;
@@ -413,13 +520,13 @@ public abstract class AbstractMessage extends AbstractMessageLite
               // enum, drop it (don't even add it to unknownFields).
               return true;
             }
-            builder.addRepeatedField(field, value);
+            addRepeatedField(builder, extensions, field, value);
           }
         } else {
           while (input.getBytesUntilLimit() > 0) {
             final Object value =
               FieldSet.readPrimitiveField(input, field.getLiteType());
-            builder.addRepeatedField(field, value);
+            addRepeatedField(builder, extensions, field, value);
           }
         }
         input.popLimit(limit);
@@ -434,10 +541,10 @@ public abstract class AbstractMessage extends AbstractMessageLite
               subBuilder = builder.newBuilderForField(field);
             }
             if (!field.isRepeated()) {
-              subBuilder.mergeFrom((Message) builder.getField(field));
+              mergeOriginalMessage(builder, extensions, field, subBuilder);
             }
             input.readGroup(field.getNumber(), subBuilder, extensionRegistry);
-            value = subBuilder.build();
+            value = subBuilder.buildPartial();
             break;
           }
           case MESSAGE: {
@@ -448,10 +555,10 @@ public abstract class AbstractMessage extends AbstractMessageLite
               subBuilder = builder.newBuilderForField(field);
             }
             if (!field.isRepeated()) {
-              subBuilder.mergeFrom((Message) builder.getField(field));
+              mergeOriginalMessage(builder, extensions, field, subBuilder);
             }
             input.readMessage(subBuilder, extensionRegistry);
-            value = subBuilder.build();
+            value = subBuilder.buildPartial();
             break;
           }
           case ENUM:
@@ -470,22 +577,28 @@ public abstract class AbstractMessage extends AbstractMessageLite
         }
 
         if (field.isRepeated()) {
-          builder.addRepeatedField(field, value);
+          addRepeatedField(builder, extensions, field, value);
         } else {
-          builder.setField(field, value);
+          setField(builder, extensions, field, value);
         }
       }
 
       return true;
     }
 
-    /** Called by {@code #mergeFieldFrom()} to parse a MessageSet extension. */
+    /**
+     * Called by {@code #mergeFieldFrom()} to parse a MessageSet extension.
+     * If {@code builder} is not null, this method will merge MessageSet into
+     * the builder.  Otherwise, it will merge the MessageSet into {@code
+     * extensions}.
+     */
     private static void mergeMessageSetExtensionFromCodedStream(
-        final CodedInputStream input,
-        final UnknownFieldSet.Builder unknownFields,
-        final ExtensionRegistryLite extensionRegistry,
-        final Message.Builder builder) throws IOException {
-      final Descriptor type = builder.getDescriptorForType();
+        CodedInputStream input,
+        UnknownFieldSet.Builder unknownFields,
+        ExtensionRegistryLite extensionRegistry,
+        Descriptor type,
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions) throws IOException {
 
       // The wire format for MessageSet is:
       //   message MessageSet {
@@ -504,10 +617,11 @@ public abstract class AbstractMessage extends AbstractMessageLite
       // should be prepared to accept them.
 
       int typeId = 0;
-      ByteString rawBytes = null;  // If we encounter "message" before "typeId"
-      Message.Builder subBuilder = null;
-      FieldDescriptor field = null;
+      ByteString rawBytes = null; // If we encounter "message" before "typeId"
+      ExtensionRegistry.ExtensionInfo extension = null;
 
+      // Read bytes from input, if we get it's type first then parse it eagerly,
+      // otherwise we store the raw bytes in a local variable.
       while (true) {
         final int tag = input.readTag();
         if (tag == 0) {
@@ -516,75 +630,121 @@ public abstract class AbstractMessage extends AbstractMessageLite
 
         if (tag == WireFormat.MESSAGE_SET_TYPE_ID_TAG) {
           typeId = input.readUInt32();
-          // Zero is not a valid type ID.
           if (typeId != 0) {
-            final ExtensionRegistry.ExtensionInfo extension;
-
             // extensionRegistry may be either ExtensionRegistry or
-            // ExtensionRegistryLite.  Since the type we are parsing is a full
+            // ExtensionRegistryLite. Since the type we are parsing is a full
             // message, only a full ExtensionRegistry could possibly contain
-            // extensions of it.  Otherwise we will treat the registry as if it
+            // extensions of it. Otherwise we will treat the registry as if it
             // were empty.
             if (extensionRegistry instanceof ExtensionRegistry) {
               extension = ((ExtensionRegistry) extensionRegistry)
                   .findExtensionByNumber(type, typeId);
-            } else {
-              extension = null;
-            }
-
-            if (extension != null) {
-              field = extension.descriptor;
-              subBuilder = extension.defaultInstance.newBuilderForType();
-              final Message originalMessage = (Message)builder.getField(field);
-              if (originalMessage != null) {
-                subBuilder.mergeFrom(originalMessage);
-              }
-              if (rawBytes != null) {
-                // We already encountered the message.  Parse it now.
-                subBuilder.mergeFrom(
-                  CodedInputStream.newInstance(rawBytes.newInput()));
-                rawBytes = null;
-              }
-            } else {
-              // Unknown extension number.  If we already saw data, put it
-              // in rawBytes.
-              if (rawBytes != null) {
-                unknownFields.mergeField(typeId,
-                  UnknownFieldSet.Field.newBuilder()
-                    .addLengthDelimited(rawBytes)
-                    .build());
-                rawBytes = null;
-              }
             }
           }
+
         } else if (tag == WireFormat.MESSAGE_SET_MESSAGE_TAG) {
-          if (typeId == 0) {
-            // We haven't seen a type ID yet, so we have to store the raw bytes
-            // for now.
-            rawBytes = input.readBytes();
-          } else if (subBuilder == null) {
-            // We don't know how to parse this.  Ignore it.
-            unknownFields.mergeField(typeId,
-              UnknownFieldSet.Field.newBuilder()
-                .addLengthDelimited(input.readBytes())
-                .build());
-          } else {
-            // We already know the type, so we can parse directly from the input
-            // with no copying.  Hooray!
-            input.readMessage(subBuilder, extensionRegistry);
+          if (typeId != 0) {
+            if (extension != null && ExtensionRegistryLite.isEagerlyParseMessageSets()) {
+              // We already know the type, so we can parse directly from the
+              // input with no copying.  Hooray!
+              eagerlyMergeMessageSetExtension(
+                  input, extension, extensionRegistry, builder, extensions);
+              rawBytes = null;
+              continue;
+            }
           }
-        } else {
-          // Unknown tag.  Skip it.
+          // We haven't seen a type ID yet or we want parse message lazily.
+          rawBytes = input.readBytes();
+
+        } else { // Unknown tag. Skip it.
           if (!input.skipField(tag)) {
-            break;  // end of group
+            break; // End of group
           }
         }
       }
-
       input.checkLastTagWas(WireFormat.MESSAGE_SET_ITEM_END_TAG);
 
-      if (subBuilder != null) {
-        builder.setField(field, subBuilder.build());
+      // Process the raw bytes.
+      if (rawBytes != null && typeId != 0) { // Zero is not a valid type ID.
+        if (extension != null) { // We known the type
+          mergeMessageSetExtensionFromBytes(
+              rawBytes, extension, extensionRegistry, builder, extensions);
+        } else { // We don't know how to parse this. Ignore it.
+          if (rawBytes != null) {
+            unknownFields.mergeField(typeId, UnknownFieldSet.Field.newBuilder()
+                .addLengthDelimited(rawBytes).build());
+          }
+        }
+      }
+    }
+
+    private static void eagerlyMergeMessageSetExtension(
+        CodedInputStream input,
+        ExtensionRegistry.ExtensionInfo extension,
+        ExtensionRegistryLite extensionRegistry,
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions) throws IOException {
+
+      FieldDescriptor field = extension.descriptor;
+      Message value = null;
+      if (hasOriginalMessage(builder, extensions, field)) {
+        Message originalMessage =
+            getOriginalMessage(builder, extensions, field);
+        Message.Builder subBuilder = originalMessage.toBuilder();
+        input.readMessage(subBuilder, extensionRegistry);
+        value = subBuilder.buildPartial();
+      } else {
+        value = input.readMessage(extension.defaultInstance.getParserForType(),
+          extensionRegistry);
+      }
+
+      if (builder != null) {
+        builder.setField(field, value);
+      } else {
+        extensions.setField(field, value);
+      }
+    }
+
+    private static void mergeMessageSetExtensionFromBytes(
+        ByteString rawBytes,
+        ExtensionRegistry.ExtensionInfo extension,
+        ExtensionRegistryLite extensionRegistry,
+        Message.Builder builder,
+        FieldSet<FieldDescriptor> extensions) throws IOException {
+
+      FieldDescriptor field = extension.descriptor;
+      boolean hasOriginalValue = hasOriginalMessage(builder, extensions, field);
+
+      if (hasOriginalValue || ExtensionRegistryLite.isEagerlyParseMessageSets()) {
+        // If the field already exists, we just parse the field.
+        Message value = null;
+        if (hasOriginalValue) {
+          Message originalMessage =
+              getOriginalMessage(builder, extensions, field);
+          Message.Builder subBuilder= originalMessage.toBuilder();
+          subBuilder.mergeFrom(rawBytes, extensionRegistry);
+          value = subBuilder.buildPartial();
+        } else {
+          value = extension.defaultInstance.getParserForType()
+              .parsePartialFrom(rawBytes, extensionRegistry);
+        }
+        setField(builder, extensions, field, value);
+      } else {
+        // Use LazyField to load MessageSet lazily.
+        LazyField lazyField = new LazyField(
+            extension.defaultInstance, extensionRegistry, rawBytes);
+        if (builder != null) {
+          // TODO(xiangl): it looks like this method can only be invoked by
+          // ExtendableBuilder, but I'm not sure. So I double check the type of
+          // builder here. It may be useless and need more investigation.
+          if (builder instanceof ExtendableBuilder) {
+            builder.setField(field, lazyField);
+          } else {
+            builder.setField(field, lazyField.getValue());
+          }
+        } else {
+          extensions.setField(field, lazyField);
+        }
       }
     }
 
@@ -596,6 +756,11 @@ public abstract class AbstractMessage extends AbstractMessageLite
       return (BuilderType) this;
     }
 
+    public Message.Builder getFieldBuilder(final FieldDescriptor field) {
+      throw new UnsupportedOperationException(
+          "getFieldBuilder() called on an unsupported message type.");
+    }
+
     /**
      * Construct an UninitializedMessageException reporting missing fields in
      * the given message.
@@ -609,14 +774,15 @@ public abstract class AbstractMessage extends AbstractMessageLite
      * Populates {@code this.missingFields} with the full "path" of each
      * missing required field in the given message.
      */
-    private static List<String> findMissingFields(final Message message) {
+    private static List<String> findMissingFields(
+        final MessageOrBuilder message) {
       final List<String> results = new ArrayList<String>();
       findMissingFields(message, "", results);
       return results;
     }
 
     /** Recursive helper implementing {@link #findMissingFields(Message)}. */
-    private static void findMissingFields(final Message message,
+    private static void findMissingFields(final MessageOrBuilder message,
                                           final String prefix,
                                           final List<String> results) {
       for (final FieldDescriptor field :
@@ -635,13 +801,13 @@ public abstract class AbstractMessage extends AbstractMessageLite
           if (field.isRepeated()) {
             int i = 0;
             for (final Object element : (List) value) {
-              findMissingFields((Message) element,
+              findMissingFields((MessageOrBuilder) element,
                                 subMessagePrefix(prefix, field, i++),
                                 results);
             }
           } else {
             if (message.hasField(field)) {
-              findMissingFields((Message) value,
+              findMissingFields((MessageOrBuilder) value,
                                 subMessagePrefix(prefix, field, -1),
                                 results);
             }

+ 22 - 4
java/src/main/java/com/google/protobuf/AbstractMessageLite.java

@@ -91,6 +91,14 @@ public abstract class AbstractMessageLite implements MessageLite {
     codedOutput.flush();
   }
 
+  /**
+   * Package private helper method for AbstractParser to create
+   * UninitializedMessageException.
+   */
+  UninitializedMessageException newUninitializedMessageException() {
+    return new UninitializedMessageException(this);
+  }
+
   /**
    * A partial implementation of the {@link Message.Builder} interface which
    * implements as many methods of that interface as possible in terms of
@@ -307,10 +315,12 @@ public abstract class AbstractMessageLite implements MessageLite {
      */
     protected static <T> void addAll(final Iterable<T> values,
                                      final Collection<? super T> list) {
-      for (final T value : values) {
-        if (value == null) {
-          throw new NullPointerException();
-        }
+      if (values instanceof LazyStringList) {
+        // For StringOrByteStringLists, check the underlying elements to avoid
+        // forcing conversions of ByteStrings to Strings.
+        checkForNullValues(((LazyStringList) values).getUnderlyingElements());
+      } else {
+        checkForNullValues(values);
       }
       if (values instanceof Collection) {
         final Collection<T> collection = (Collection<T>) values;
@@ -321,5 +331,13 @@ public abstract class AbstractMessageLite implements MessageLite {
         }
       }
     }
+
+    private static void checkForNullValues(final Iterable<?> values) {
+      for (final Object value : values) {
+        if (value == null) {
+          throw new NullPointerException();
+        }
+      }
+    }
   }
 }

+ 261 - 0
java/src/main/java/com/google/protobuf/AbstractParser.java

@@ -0,0 +1,261 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import com.google.protobuf.AbstractMessageLite.Builder.LimitedInputStream;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * A partial implementation of the {@link Parser} interface which implements
+ * as many methods of that interface as possible in terms of other methods.
+ *
+ * Note: This class implements all the convenience methods in the
+ * {@link Parser} interface. See {@link Parser} for related javadocs.
+ * Subclasses need to implement
+ * {@link Parser#parsePartialFrom(CodedInputStream, ExtensionRegistryLite)}
+ *
+ * @author liujisi@google.com (Pherl Liu)
+ */
+public abstract class AbstractParser<MessageType extends MessageLite>
+    implements Parser<MessageType> {
+  /**
+   * Creates an UninitializedMessageException for MessageType.
+   */
+  private UninitializedMessageException
+      newUninitializedMessageException(MessageType message) {
+    if (message instanceof AbstractMessageLite) {
+      return ((AbstractMessageLite) message).newUninitializedMessageException();
+    }
+    return new UninitializedMessageException(message);
+  }
+
+  /**
+   * Helper method to check if message is initialized.
+   *
+   * @throws InvalidProtocolBufferException if it is not initialized.
+   * @return The message to check.
+   */
+  private MessageType checkMessageInitialized(MessageType message)
+      throws InvalidProtocolBufferException {
+    if (message != null && !message.isInitialized()) {
+      throw newUninitializedMessageException(message)
+          .asInvalidProtocolBufferException()
+          .setUnfinishedMessage(message);
+    }
+    return message;
+  }
+
+  private static final ExtensionRegistryLite EMPTY_REGISTRY
+      = ExtensionRegistryLite.getEmptyRegistry();
+
+  public MessageType parsePartialFrom(CodedInputStream input)
+      throws InvalidProtocolBufferException {
+    return parsePartialFrom(input, EMPTY_REGISTRY);
+  }
+
+  public MessageType parseFrom(CodedInputStream input,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialFrom(input, extensionRegistry));
+  }
+
+  public MessageType parseFrom(CodedInputStream input)
+      throws InvalidProtocolBufferException {
+    return parseFrom(input, EMPTY_REGISTRY);
+  }
+
+  public MessageType parsePartialFrom(ByteString data,
+                                      ExtensionRegistryLite extensionRegistry)
+    throws InvalidProtocolBufferException {
+    MessageType message;
+    try {
+      CodedInputStream input = data.newCodedInput();
+      message = parsePartialFrom(input, extensionRegistry);
+      try {
+        input.checkLastTagWas(0);
+      } catch (InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(message);
+      }
+      return message;
+    } catch (InvalidProtocolBufferException e) {
+      throw e;
+    } catch (IOException e) {
+      throw new RuntimeException(
+          "Reading from a ByteString threw an IOException (should " +
+          "never happen).", e);
+    }
+  }
+
+  public MessageType parsePartialFrom(ByteString data)
+      throws InvalidProtocolBufferException {
+    return parsePartialFrom(data, EMPTY_REGISTRY);
+  }
+
+  public MessageType parseFrom(ByteString data,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(parsePartialFrom(data, extensionRegistry));
+  }
+
+  public MessageType parseFrom(ByteString data)
+      throws InvalidProtocolBufferException {
+    return parseFrom(data, EMPTY_REGISTRY);
+  }
+
+  public MessageType parsePartialFrom(byte[] data, int off, int len,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    try {
+      CodedInputStream input = CodedInputStream.newInstance(data, off, len);
+      MessageType message = parsePartialFrom(input, extensionRegistry);
+      try {
+        input.checkLastTagWas(0);
+      } catch (InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(message);
+      }
+      return message;
+    } catch (InvalidProtocolBufferException e) {
+      throw e;
+    } catch (IOException e) {
+      throw new RuntimeException(
+          "Reading from a byte array threw an IOException (should " +
+          "never happen).", e);
+    }
+  }
+
+  public MessageType parsePartialFrom(byte[] data, int off, int len)
+      throws InvalidProtocolBufferException {
+    return parsePartialFrom(data, off, len, EMPTY_REGISTRY);
+  }
+
+  public MessageType parsePartialFrom(byte[] data,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return parsePartialFrom(data, 0, data.length, extensionRegistry);
+  }
+
+  public MessageType parsePartialFrom(byte[] data)
+      throws InvalidProtocolBufferException {
+    return parsePartialFrom(data, 0, data.length, EMPTY_REGISTRY);
+  }
+
+  public MessageType parseFrom(byte[] data, int off, int len,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialFrom(data, off, len, extensionRegistry));
+  }
+
+  public MessageType parseFrom(byte[] data, int off, int len)
+      throws InvalidProtocolBufferException {
+    return parseFrom(data, off, len, EMPTY_REGISTRY);
+  }
+
+  public MessageType parseFrom(byte[] data,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return parseFrom(data, 0, data.length, extensionRegistry);
+  }
+
+  public MessageType parseFrom(byte[] data)
+      throws InvalidProtocolBufferException {
+    return parseFrom(data, EMPTY_REGISTRY);
+  }
+
+  public MessageType parsePartialFrom(InputStream input,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    CodedInputStream codedInput = CodedInputStream.newInstance(input);
+    MessageType message = parsePartialFrom(codedInput, extensionRegistry);
+    try {
+      codedInput.checkLastTagWas(0);
+    } catch (InvalidProtocolBufferException e) {
+      throw e.setUnfinishedMessage(message);
+    }
+    return message;
+  }
+
+  public MessageType parsePartialFrom(InputStream input)
+      throws InvalidProtocolBufferException {
+    return parsePartialFrom(input, EMPTY_REGISTRY);
+  }
+
+  public MessageType parseFrom(InputStream input,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialFrom(input, extensionRegistry));
+  }
+
+  public MessageType parseFrom(InputStream input)
+      throws InvalidProtocolBufferException {
+    return parseFrom(input, EMPTY_REGISTRY);
+  }
+
+  public MessageType parsePartialDelimitedFrom(
+      InputStream input,
+      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    int size;
+    try {
+      int firstByte = input.read();
+      if (firstByte == -1) {
+        return null;
+      }
+      size = CodedInputStream.readRawVarint32(firstByte, input);
+    } catch (IOException e) {
+      throw new InvalidProtocolBufferException(e.getMessage());
+    }
+    InputStream limitedInput = new LimitedInputStream(input, size);
+    return parsePartialFrom(limitedInput, extensionRegistry);
+  }
+
+  public MessageType parsePartialDelimitedFrom(InputStream input)
+      throws InvalidProtocolBufferException {
+    return parsePartialDelimitedFrom(input, EMPTY_REGISTRY);
+  }
+
+  public MessageType parseDelimitedFrom(
+      InputStream input,
+      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialDelimitedFrom(input, extensionRegistry));
+  }
+
+  public MessageType parseDelimitedFrom(InputStream input)
+      throws InvalidProtocolBufferException {
+    return parseDelimitedFrom(input, EMPTY_REGISTRY);
+  }
+}

+ 163 - 0
java/src/main/java/com/google/protobuf/BoundedByteString.java

@@ -0,0 +1,163 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.util.NoSuchElementException;
+
+/**
+ * This class is used to represent the substring of a {@link ByteString} over a
+ * single byte array. In terms of the public API of {@link ByteString}, you end
+ * up here by calling {@link ByteString#copyFrom(byte[])} followed by {@link
+ * ByteString#substring(int, int)}.
+ *
+ * <p>This class contains most of the overhead involved in creating a substring
+ * from a {@link LiteralByteString}.  The overhead involves some range-checking
+ * and two extra fields.
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+class BoundedByteString extends LiteralByteString {
+
+  private final int bytesOffset;
+  private final int bytesLength;
+
+  /**
+   * Creates a {@code BoundedByteString} backed by the sub-range of given array,
+   * without copying.
+   *
+   * @param bytes  array to wrap
+   * @param offset index to first byte to use in bytes
+   * @param length number of bytes to use from bytes
+   * @throws IllegalArgumentException if {@code offset < 0}, {@code length < 0},
+   *                                  or if {@code offset + length >
+   *                                  bytes.length}.
+   */
+  BoundedByteString(byte[] bytes, int offset, int length) {
+    super(bytes);
+    if (offset < 0) {
+      throw new IllegalArgumentException("Offset too small: " + offset);
+    }
+    if (length < 0) {
+      throw new IllegalArgumentException("Length too small: " + offset);
+    }
+    if ((long) offset + length > bytes.length) {
+      throw new IllegalArgumentException(
+          "Offset+Length too large: " + offset + "+" + length);
+    }
+
+    this.bytesOffset = offset;
+    this.bytesLength = length;
+  }
+
+  /**
+   * Gets the byte at the given index.
+   * Throws {@link ArrayIndexOutOfBoundsException}
+   * for backwards-compatibility reasons although it would more properly be
+   * {@link IndexOutOfBoundsException}.
+   *
+   * @param index index of byte
+   * @return the value
+   * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
+   */
+  @Override
+  public byte byteAt(int index) {
+    // We must check the index ourselves as we cannot rely on Java array index
+    // checking for substrings.
+    if (index < 0) {
+      throw new ArrayIndexOutOfBoundsException("Index too small: " + index);
+    }
+    if (index >= size()) {
+      throw new ArrayIndexOutOfBoundsException(
+          "Index too large: " + index + ", " + size());
+    }
+
+    return bytes[bytesOffset + index];
+  }
+
+  @Override
+  public int size() {
+    return bytesLength;
+  }
+
+  @Override
+  protected int getOffsetIntoBytes() {
+    return bytesOffset;
+  }
+
+  // =================================================================
+  // ByteString -> byte[]
+
+  @Override
+  protected void copyToInternal(byte[] target, int sourceOffset, 
+      int targetOffset, int numberToCopy) {
+    System.arraycopy(bytes, getOffsetIntoBytes() + sourceOffset, target,
+        targetOffset, numberToCopy);
+  }
+
+  // =================================================================
+  // ByteIterator
+
+  @Override
+  public ByteIterator iterator() {
+    return new BoundedByteIterator();
+  }
+
+  private class BoundedByteIterator implements ByteIterator {
+
+    private int position;
+    private final int limit;
+
+    private BoundedByteIterator() {
+      position = getOffsetIntoBytes();
+      limit = position + size();
+    }
+
+    public boolean hasNext() {
+      return (position < limit);
+    }
+
+    public Byte next() {
+      // Boxing calls Byte.valueOf(byte), which does not instantiate.
+      return nextByte();
+    }
+
+    public byte nextByte() {
+      if (position >= limit) {
+        throw new NoSuchElementException();
+      }
+      return bytes[position++];
+    }
+
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+}

Разница между файлами не показана из-за своего большого размера
+ 670 - 157
java/src/main/java/com/google/protobuf/ByteString.java


+ 38 - 3
java/src/main/java/com/google/protobuf/CodedInputStream.java

@@ -243,6 +243,23 @@ public final class CodedInputStream {
     --recursionDepth;
   }
 
+  /** Read a {@code group} field value from the stream. */
+  public <T extends MessageLite> T readGroup(
+      final int fieldNumber,
+      final Parser<T> parser,
+      final ExtensionRegistryLite extensionRegistry)
+      throws IOException {
+    if (recursionDepth >= recursionLimit) {
+      throw InvalidProtocolBufferException.recursionLimitExceeded();
+    }
+    ++recursionDepth;
+    T result = parser.parsePartialFrom(this, extensionRegistry);
+    checkLastTagWas(
+      WireFormat.makeTag(fieldNumber, WireFormat.WIRETYPE_END_GROUP));
+    --recursionDepth;
+    return result;
+  }
+
   /**
    * Reads a {@code group} field value from the stream and merges it into the
    * given {@link UnknownFieldSet}.
@@ -278,6 +295,24 @@ public final class CodedInputStream {
     popLimit(oldLimit);
   }
 
+  /** Read an embedded message field value from the stream. */
+  public <T extends MessageLite> T readMessage(
+      final Parser<T> parser,
+      final ExtensionRegistryLite extensionRegistry)
+      throws IOException {
+    int length = readRawVarint32();
+    if (recursionDepth >= recursionLimit) {
+      throw InvalidProtocolBufferException.recursionLimitExceeded();
+    }
+    final int oldLimit = pushLimit(length);
+    ++recursionDepth;
+    T result = parser.parsePartialFrom(this, extensionRegistry);
+    checkLastTagWas(0);
+    --recursionDepth;
+    popLimit(oldLimit);
+    return result;
+  }
+
   /** Read a {@code bytes} field value from the stream. */
   public ByteString readBytes() throws IOException {
     final int size = readRawVarint32();
@@ -601,7 +636,7 @@ public final class CodedInputStream {
    * refreshing its buffer.  If you need to prevent reading past a certain
    * point in the underlying {@code InputStream} (e.g. because you expect it to
    * contain more data after the end of the message which you need to handle
-   * differently) then you must place a wrapper around you {@code InputStream}
+   * differently) then you must place a wrapper around your {@code InputStream}
    * which limits the amount of data that can be read from it.
    *
    * @return the old limit.
@@ -676,7 +711,7 @@ public final class CodedInputStream {
 
   /**
    * Called with {@code this.buffer} is empty to read more bytes from the
-   * input.  If {@code mustSucceed} is true, refillBuffer() gurantees that
+   * input.  If {@code mustSucceed} is true, refillBuffer() guarantees that
    * either there will be at least one byte in the buffer when it returns
    * or it will throw an exception.  If {@code mustSucceed} is false,
    * refillBuffer() returns false if no more bytes were available.
@@ -879,7 +914,7 @@ public final class CodedInputStream {
         refillBuffer(true);
       }
 
-      bufferPos = size - pos; 
+      bufferPos = size - pos;
     }
   }
 }

+ 32 - 2
java/src/main/java/com/google/protobuf/CodedOutputStream.java

@@ -30,10 +30,10 @@
 
 package com.google.protobuf;
 
-import java.io.OutputStream;
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
 
 /**
  * Encodes and writes protocol message fields.
@@ -539,6 +539,15 @@ public final class CodedOutputStream {
     return computeTagSize(fieldNumber) + computeBytesSizeNoTag(value);
   }
 
+  /**
+   * Compute the number of bytes that would be needed to encode an
+   * embedded message in lazy field, including tag.
+   */
+  public static int computeLazyFieldSize(final int fieldNumber,
+                                         final LazyField value) {
+    return computeTagSize(fieldNumber) + computeLazyFieldSizeNoTag(value);
+  }
+
   /**
    * Compute the number of bytes that would be needed to encode a
    * {@code uint32} field, including tag.
@@ -614,6 +623,18 @@ public final class CodedOutputStream {
            computeBytesSize(WireFormat.MESSAGE_SET_MESSAGE, value);
   }
 
+  /**
+   * Compute the number of bytes that would be needed to encode an
+   * lazily parsed MessageSet extension field to the stream.  For
+   * historical reasons, the wire format differs from normal fields.
+   */
+  public static int computeLazyFieldMessageSetExtensionSize(
+      final int fieldNumber, final LazyField value) {
+    return computeTagSize(WireFormat.MESSAGE_SET_ITEM) * 2 +
+           computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) +
+           computeLazyFieldSize(WireFormat.MESSAGE_SET_MESSAGE, value);
+  }
+  
   // -----------------------------------------------------------------
 
   /**
@@ -729,6 +750,15 @@ public final class CodedOutputStream {
     return computeRawVarint32Size(size) + size;
   }
 
+  /**
+   * Compute the number of bytes that would be needed to encode an embedded
+   * message stored in lazy field.
+   */
+  public static int computeLazyFieldSizeNoTag(final LazyField value) {
+    final int size = value.getSerializedSize();
+    return computeRawVarint32Size(size) + size;
+  }
+
   /**
    * Compute the number of bytes that would be needed to encode a
    * {@code bytes} field.

+ 120 - 37
java/src/main/java/com/google/protobuf/Descriptors.java

@@ -35,8 +35,10 @@ import com.google.protobuf.DescriptorProtos.*;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.io.UnsupportedEncodingException;
 
 /**
@@ -106,6 +108,11 @@ public final class Descriptors {
       return Collections.unmodifiableList(Arrays.asList(dependencies));
     }
 
+    /** Get a list of this file's public dependencies (public imports). */
+    public List<FileDescriptor> getPublicDependencies() {
+      return Collections.unmodifiableList(Arrays.asList(publicDependencies));
+    }
+
     /**
      * Find a message type in the file by name.  Does not find nested types.
      *
@@ -216,7 +223,7 @@ public final class Descriptors {
     public static FileDescriptor buildFrom(final FileDescriptorProto proto,
                                            final FileDescriptor[] dependencies)
                                     throws DescriptorValidationException {
-      // Building decsriptors involves two steps:  translating and linking.
+      // Building descriptors involves two steps:  translating and linking.
       // In the translation step (implemented by FileDescriptor's
       // constructor), we build an object tree mirroring the
       // FileDescriptorProto's tree and put all of the descriptors into the
@@ -317,12 +324,12 @@ public final class Descriptors {
      * {@link FileDescriptor#internalBuildGeneratedFileFrom}, the caller
      * provides a callback implementing this interface.  The callback is called
      * after the FileDescriptor has been constructed, in order to assign all
-     * the global variales defined in the generated code which point at parts
+     * the global variables defined in the generated code which point at parts
      * of the FileDescriptor.  The callback returns an ExtensionRegistry which
      * contains any extensions which might be used in the descriptor -- that
      * is, extensions of the various "Options" messages defined in
      * descriptor.proto.  The callback may also return null to indicate that
-     * no extensions are used in the decsriptor.
+     * no extensions are used in the descriptor.
      */
     public interface InternalDescriptorAssigner {
       ExtensionRegistry assignDescriptors(FileDescriptor root);
@@ -334,6 +341,7 @@ public final class Descriptors {
     private final ServiceDescriptor[] services;
     private final FieldDescriptor[] extensions;
     private final FileDescriptor[] dependencies;
+    private final FileDescriptor[] publicDependencies;
     private final DescriptorPool pool;
 
     private FileDescriptor(final FileDescriptorProto proto,
@@ -343,6 +351,17 @@ public final class Descriptors {
       this.pool = pool;
       this.proto = proto;
       this.dependencies = dependencies.clone();
+      this.publicDependencies =
+          new FileDescriptor[proto.getPublicDependencyCount()];
+      for (int i = 0; i < proto.getPublicDependencyCount(); i++) {
+        int index = proto.getPublicDependency(i);
+        if (index < 0 || index >= this.dependencies.length) {
+          throw new DescriptorValidationException(this,
+              "Invalid public dependency index.");
+        }
+        this.publicDependencies[i] =
+            this.dependencies[proto.getPublicDependency(i)];
+      }
 
       pool.addPackage(getPackage(), this);
 
@@ -390,7 +409,7 @@ public final class Descriptors {
      * in the original.  This method is needed for bootstrapping when a file
      * defines custom options.  The options may be defined in the file itself,
      * so we can't actually parse them until we've constructed the descriptors,
-     * but to construct the decsriptors we have to have parsed the descriptor
+     * but to construct the descriptors we have to have parsed the descriptor
      * protos.  So, we have to parse the descriptor protos a second time after
      * constructing the descriptors.
      */
@@ -641,7 +660,7 @@ public final class Descriptors {
                  FieldSet.FieldDescriptorLite<FieldDescriptor> {
     /**
      * Get the index of this descriptor within its parent.
-     * @see Descriptor#getIndex()
+     * @see Descriptors.Descriptor#getIndex()
      */
     public int getIndex() { return index; }
 
@@ -656,7 +675,7 @@ public final class Descriptors {
 
     /**
      * Get the field's fully-qualified name.
-     * @see Descriptor#getFullName()
+     * @see Descriptors.Descriptor#getFullName()
      */
     public String getFullName() { return fullName; }
 
@@ -943,7 +962,8 @@ public final class Descriptors {
     private void crossLink() throws DescriptorValidationException {
       if (proto.hasExtendee()) {
         final GenericDescriptor extendee =
-          file.pool.lookupSymbol(proto.getExtendee(), this);
+          file.pool.lookupSymbol(proto.getExtendee(), this,
+              DescriptorPool.SearchFilter.TYPES_ONLY);
         if (!(extendee instanceof Descriptor)) {
           throw new DescriptorValidationException(this,
               '\"' + proto.getExtendee() + "\" is not a message type.");
@@ -960,7 +980,8 @@ public final class Descriptors {
 
       if (proto.hasTypeName()) {
         final GenericDescriptor typeDescriptor =
-          file.pool.lookupSymbol(proto.getTypeName(), this);
+          file.pool.lookupSymbol(proto.getTypeName(), this,
+              DescriptorPool.SearchFilter.TYPES_ONLY);
 
         if (!proto.hasType()) {
           // Choose field type based on symbol.
@@ -1149,7 +1170,7 @@ public final class Descriptors {
       implements GenericDescriptor, Internal.EnumLiteMap<EnumValueDescriptor> {
     /**
      * Get the index of this descriptor within its parent.
-     * @see Descriptor#getIndex()
+     * @see Descriptors.Descriptor#getIndex()
      */
     public int getIndex() { return index; }
 
@@ -1161,7 +1182,7 @@ public final class Descriptors {
 
     /**
      * Get the type's fully-qualified name.
-     * @see Descriptor#getFullName()
+     * @see Descriptors.Descriptor#getFullName()
      */
     public String getFullName() { return fullName; }
 
@@ -1182,7 +1203,7 @@ public final class Descriptors {
     /**
      * Find an enum value by name.
      * @param name The unqualified name of the value (e.g. "FOO").
-     * @return the value's decsriptor, or {@code null} if not found.
+     * @return the value's descriptor, or {@code null} if not found.
      */
     public EnumValueDescriptor findValueByName(final String name) {
       final GenericDescriptor result =
@@ -1198,7 +1219,7 @@ public final class Descriptors {
      * Find an enum value by number.  If multiple enum values have the same
      * number, this returns the first defined value with that number.
      * @param number The value's number.
-     * @return the value's decsriptor, or {@code null} if not found.
+     * @return the value's descriptor, or {@code null} if not found.
      */
     public EnumValueDescriptor findValueByNumber(final int number) {
       return file.pool.enumValuesByNumber.get(
@@ -1261,7 +1282,7 @@ public final class Descriptors {
       implements GenericDescriptor, Internal.EnumLite {
     /**
      * Get the index of this descriptor within its parent.
-     * @see Descriptor#getIndex()
+     * @see Descriptors.Descriptor#getIndex()
      */
     public int getIndex() { return index; }
 
@@ -1276,7 +1297,7 @@ public final class Descriptors {
 
     /**
      * Get the value's fully-qualified name.
-     * @see Descriptor#getFullName()
+     * @see Descriptors.Descriptor#getFullName()
      */
     public String getFullName() { return fullName; }
 
@@ -1337,7 +1358,7 @@ public final class Descriptors {
 
     /**
      * Get the type's fully-qualified name.
-     * @see Descriptor#getFullName()
+     * @see Descriptors.Descriptor#getFullName()
      */
     public String getFullName() { return fullName; }
 
@@ -1355,7 +1376,7 @@ public final class Descriptors {
     /**
      * Find a method by name.
      * @param name The unqualified name of the method (e.g. "Foo").
-     * @return the method's decsriptor, or {@code null} if not found.
+     * @return the method's descriptor, or {@code null} if not found.
      */
     public MethodDescriptor findMethodByName(final String name) {
       final GenericDescriptor result =
@@ -1427,7 +1448,7 @@ public final class Descriptors {
 
     /**
      * Get the method's fully-qualified name.
-     * @see Descriptor#getFullName()
+     * @see Descriptors.Descriptor#getFullName()
      */
     public String getFullName() { return fullName; }
 
@@ -1475,7 +1496,8 @@ public final class Descriptors {
 
     private void crossLink() throws DescriptorValidationException {
       final GenericDescriptor input =
-        file.pool.lookupSymbol(proto.getInputType(), this);
+        file.pool.lookupSymbol(proto.getInputType(), this,
+            DescriptorPool.SearchFilter.TYPES_ONLY);
       if (!(input instanceof Descriptor)) {
         throw new DescriptorValidationException(this,
             '\"' + proto.getInputType() + "\" is not a message type.");
@@ -1483,7 +1505,8 @@ public final class Descriptors {
       inputType = (Descriptor)input;
 
       final GenericDescriptor output =
-        file.pool.lookupSymbol(proto.getOutputType(), this);
+        file.pool.lookupSymbol(proto.getOutputType(), this,
+            DescriptorPool.SearchFilter.TYPES_ONLY);
       if (!(output instanceof Descriptor)) {
         throw new DescriptorValidationException(this,
             '\"' + proto.getOutputType() + "\" is not a message type.");
@@ -1535,7 +1558,7 @@ public final class Descriptors {
     public String getProblemSymbolName() { return name; }
 
     /**
-     * Gets the the protocol message representation of the invalid descriptor.
+     * Gets the protocol message representation of the invalid descriptor.
      */
     public Message getProblemProto() { return proto; }
 
@@ -1590,14 +1613,22 @@ public final class Descriptors {
    * descriptors defined in a particular file.
    */
   private static final class DescriptorPool {
+    
+    /** Defines what subclass of descriptors to search in the descriptor pool. 
+     */
+    enum SearchFilter {
+      TYPES_ONLY, AGGREGATES_ONLY, ALL_SYMBOLS
+    }
+    
     DescriptorPool(final FileDescriptor[] dependencies) {
-      this.dependencies = new DescriptorPool[dependencies.length];
+      this.dependencies = new HashSet<FileDescriptor>();
 
-      for (int i = 0; i < dependencies.length; i++)  {
-        this.dependencies[i] = dependencies[i].pool;
+      for (int i = 0; i < dependencies.length; i++) {
+        this.dependencies.add(dependencies[i]);
+        importPublicDependencies(dependencies[i]);
       }
 
-      for (final FileDescriptor dependency : dependencies) {
+      for (final FileDescriptor dependency : this.dependencies) {
         try {
           addPackage(dependency.getPackage(), dependency);
         } catch (DescriptorValidationException e) {
@@ -1609,7 +1640,16 @@ public final class Descriptors {
       }
     }
 
-    private final DescriptorPool[] dependencies;
+    /** Find and put public dependencies of the file into dependencies set.*/
+    private void importPublicDependencies(final FileDescriptor file) {
+      for (FileDescriptor dependency : file.getPublicDependencies()) {
+        if (dependencies.add(dependency)) {
+          importPublicDependencies(dependency);
+        }
+      }
+    }
+
+    private final Set<FileDescriptor> dependencies;
 
     private final Map<String, GenericDescriptor> descriptorsByName =
       new HashMap<String, GenericDescriptor>();
@@ -1620,39 +1660,81 @@ public final class Descriptors {
 
     /** Find a generic descriptor by fully-qualified name. */
     GenericDescriptor findSymbol(final String fullName) {
+      return findSymbol(fullName, SearchFilter.ALL_SYMBOLS);
+    }
+    
+    /** Find a descriptor by fully-qualified name and given option to only 
+     * search valid field type descriptors. 
+     */
+    GenericDescriptor findSymbol(final String fullName,
+                                 final SearchFilter filter) {
       GenericDescriptor result = descriptorsByName.get(fullName);
       if (result != null) {
-        return result;
+        if ((filter==SearchFilter.ALL_SYMBOLS) ||
+            ((filter==SearchFilter.TYPES_ONLY) && isType(result)) ||
+            ((filter==SearchFilter.AGGREGATES_ONLY) && isAggregate(result))) {
+          return result;
+        }
       }
 
-      for (final DescriptorPool dependency : dependencies) {
-        result = dependency.descriptorsByName.get(fullName);
+      for (final FileDescriptor dependency : dependencies) {
+        result = dependency.pool.descriptorsByName.get(fullName);
         if (result != null) {
-          return result;
+          if ((filter==SearchFilter.ALL_SYMBOLS) ||
+              ((filter==SearchFilter.TYPES_ONLY) && isType(result)) ||
+              ((filter==SearchFilter.AGGREGATES_ONLY) && isAggregate(result))) {
+            return result;
+          }
         }
       }
 
       return null;
     }
 
+    /** Checks if the descriptor is a valid type for a message field. */
+    boolean isType(GenericDescriptor descriptor) {
+      return (descriptor instanceof Descriptor) || 
+        (descriptor instanceof EnumDescriptor);
+    }
+    
+    /** Checks if the descriptor is a valid namespace type. */
+    boolean isAggregate(GenericDescriptor descriptor) {
+      return (descriptor instanceof Descriptor) || 
+        (descriptor instanceof EnumDescriptor) || 
+        (descriptor instanceof PackageDescriptor) || 
+        (descriptor instanceof ServiceDescriptor);
+    }
+       
     /**
-     * Look up a descriptor by name, relative to some other descriptor.
+     * Look up a type descriptor by name, relative to some other descriptor.
      * The name may be fully-qualified (with a leading '.'),
      * partially-qualified, or unqualified.  C++-like name lookup semantics
      * are used to search for the matching descriptor.
      */
     GenericDescriptor lookupSymbol(final String name,
-                                   final GenericDescriptor relativeTo)
+                                   final GenericDescriptor relativeTo,
+                                   final DescriptorPool.SearchFilter filter)
                             throws DescriptorValidationException {
       // TODO(kenton):  This could be optimized in a number of ways.
 
       GenericDescriptor result;
       if (name.startsWith(".")) {
         // Fully-qualified name.
-        result = findSymbol(name.substring(1));
+        result = findSymbol(name.substring(1), filter);
       } else {
         // If "name" is a compound identifier, we want to search for the
         // first component of it, then search within it for the rest.
+        // If name is something like "Foo.Bar.baz", and symbols named "Foo" are
+        // defined in multiple parent scopes, we only want to find "Bar.baz" in
+        // the innermost one.  E.g., the following should produce an error:
+        //   message Bar { message Baz {} }
+        //   message Foo {
+        //     message Bar {
+        //     }
+        //     optional Bar.Baz baz = 1;
+        //   }
+        // So, we look for just "Foo" first, then look for "Bar.baz" within it
+        // if found.
         final int firstPartLength = name.indexOf('.');
         final String firstPart;
         if (firstPartLength == -1) {
@@ -1670,14 +1752,15 @@ public final class Descriptors {
           // Chop off the last component of the scope.
           final int dotpos = scopeToTry.lastIndexOf(".");
           if (dotpos == -1) {
-            result = findSymbol(name);
+            result = findSymbol(name, filter);
             break;
           } else {
             scopeToTry.setLength(dotpos + 1);
 
-            // Append firstPart and try to find.
+            // Append firstPart and try to find
             scopeToTry.append(firstPart);
-            result = findSymbol(scopeToTry.toString());
+            result = findSymbol(scopeToTry.toString(), 
+                DescriptorPool.SearchFilter.AGGREGATES_ONLY);
 
             if (result != null) {
               if (firstPartLength != -1) {
@@ -1686,7 +1769,7 @@ public final class Descriptors {
                 // searching parent scopes.
                 scopeToTry.setLength(dotpos + 1);
                 scopeToTry.append(name);
-                result = findSymbol(scopeToTry.toString());
+                result = findSymbol(scopeToTry.toString(), filter);
               }
               break;
             }
@@ -1817,7 +1900,7 @@ public final class Descriptors {
 
     /**
      * Adds a field to the fieldsByNumber table.  Throws an exception if a
-     * field with hte same containing type and number already exists.
+     * field with the same containing type and number already exists.
      */
     void addFieldByNumber(final FieldDescriptor field)
                    throws DescriptorValidationException {

+ 56 - 12
java/src/main/java/com/google/protobuf/DynamicMessage.java

@@ -35,6 +35,7 @@ import com.google.protobuf.Descriptors.FieldDescriptor;
 
 import java.io.InputStream;
 import java.io.IOException;
+import java.util.Collections;
 import java.util.Map;
 
 /**
@@ -160,7 +161,9 @@ public final class DynamicMessage extends AbstractMessage {
     verifyContainingType(field);
     Object result = fields.getField(field);
     if (result == null) {
-      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+      if (field.isRepeated()) {
+        result = Collections.emptyList();
+      } else if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
         result = getDefaultInstance(field.getMessageType());
       } else {
         result = field.getDefaultValue();
@@ -198,10 +201,12 @@ public final class DynamicMessage extends AbstractMessage {
     return fields.isInitialized();
   }
 
+  @Override
   public boolean isInitialized() {
     return isInitialized(type, fields);
   }
 
+  @Override
   public void writeTo(CodedOutputStream output) throws IOException {
     if (type.getOptions().getMessageSetWireFormat()) {
       fields.writeMessageSetTo(output);
@@ -212,6 +217,7 @@ public final class DynamicMessage extends AbstractMessage {
     }
   }
 
+  @Override
   public int getSerializedSize() {
     int size = memoizedSize;
     if (size != -1) return size;
@@ -236,6 +242,26 @@ public final class DynamicMessage extends AbstractMessage {
     return newBuilderForType().mergeFrom(this);
   }
 
+  public Parser<DynamicMessage> getParserForType() {
+    return new AbstractParser<DynamicMessage>() {
+      public DynamicMessage parsePartialFrom(
+          CodedInputStream input,
+          ExtensionRegistryLite extensionRegistry)
+          throws InvalidProtocolBufferException {
+        Builder builder = newBuilder(type);
+        try {
+          builder.mergeFrom(input, extensionRegistry);
+        } catch (InvalidProtocolBufferException e) {
+          throw e.setUnfinishedMessage(builder.buildPartial());
+        } catch (IOException e) {
+          throw new InvalidProtocolBufferException(e.getMessage())
+              .setUnfinishedMessage(builder.buildPartial());
+        }
+        return builder.buildPartial();
+      }
+    };
+  }
+
   /** Verifies that the field is a field of this message. */
   private void verifyContainingType(FieldDescriptor field) {
     if (field.getContainingType() != type) {
@@ -264,14 +290,18 @@ public final class DynamicMessage extends AbstractMessage {
     // ---------------------------------------------------------------
     // Implementation of Message.Builder interface.
 
+    @Override
     public Builder clear() {
-      if (fields == null) {
-        throw new IllegalStateException("Cannot call clear() after build().");
+      if (fields.isImmutable()) {
+        fields = FieldSet.newFieldSet();
+      } else {
+        fields.clear();
       }
-      fields.clear();
+      unknownFields = UnknownFieldSet.getDefaultInstance();
       return this;
     }
 
+    @Override
     public Builder mergeFrom(Message other) {
       if (other instanceof DynamicMessage) {
         // This should be somewhat faster than calling super.mergeFrom().
@@ -280,6 +310,7 @@ public final class DynamicMessage extends AbstractMessage {
           throw new IllegalArgumentException(
             "mergeFrom(Message) can only merge messages of the same type.");
         }
+        ensureIsMutable();
         fields.mergeFrom(otherDynamicMessage.fields);
         mergeUnknownFields(otherDynamicMessage.unknownFields);
         return this;
@@ -289,8 +320,7 @@ public final class DynamicMessage extends AbstractMessage {
     }
 
     public DynamicMessage build() {
-      // If fields == null, we'll throw an appropriate exception later.
-      if (fields != null && !isInitialized()) {
+      if (!isInitialized()) {
         throw newUninitializedMessageException(
           new DynamicMessage(type, fields, unknownFields));
       }
@@ -312,21 +342,17 @@ public final class DynamicMessage extends AbstractMessage {
     }
 
     public DynamicMessage buildPartial() {
-      if (fields == null) {
-        throw new IllegalStateException(
-            "build() has already been called on this Builder.");
-      }
       fields.makeImmutable();
       DynamicMessage result =
         new DynamicMessage(type, fields, unknownFields);
-      fields = null;
-      unknownFields = null;
       return result;
     }
 
+    @Override
     public Builder clone() {
       Builder result = new Builder(type);
       result.fields.mergeFrom(fields);
+      result.mergeUnknownFields(unknownFields);
       return result;
     }
 
@@ -377,12 +403,14 @@ public final class DynamicMessage extends AbstractMessage {
 
     public Builder setField(FieldDescriptor field, Object value) {
       verifyContainingType(field);
+      ensureIsMutable();
       fields.setField(field, value);
       return this;
     }
 
     public Builder clearField(FieldDescriptor field) {
       verifyContainingType(field);
+      ensureIsMutable();
       fields.clearField(field);
       return this;
     }
@@ -400,12 +428,14 @@ public final class DynamicMessage extends AbstractMessage {
     public Builder setRepeatedField(FieldDescriptor field,
                                     int index, Object value) {
       verifyContainingType(field);
+      ensureIsMutable();
       fields.setRepeatedField(field, index, value);
       return this;
     }
 
     public Builder addRepeatedField(FieldDescriptor field, Object value) {
       verifyContainingType(field);
+      ensureIsMutable();
       fields.addRepeatedField(field, value);
       return this;
     }
@@ -419,6 +449,7 @@ public final class DynamicMessage extends AbstractMessage {
       return this;
     }
 
+    @Override
     public Builder mergeUnknownFields(UnknownFieldSet unknownFields) {
       this.unknownFields =
         UnknownFieldSet.newBuilder(this.unknownFields)
@@ -434,5 +465,18 @@ public final class DynamicMessage extends AbstractMessage {
           "FieldDescriptor does not match message type.");
       }
     }
+
+    private void ensureIsMutable() {
+      if (fields.isImmutable()) {
+        fields = fields.clone();
+      }
+    }
+
+    @Override
+    public com.google.protobuf.Message.Builder getFieldBuilder(FieldDescriptor field) {
+      // TODO(xiangl): need implementation for dynamic message
+      throw new UnsupportedOperationException(
+        "getFieldBuilder() called on a dynamic message type.");
+    }
   }
 }

+ 17 - 1
java/src/main/java/com/google/protobuf/ExtensionRegistryLite.java

@@ -43,7 +43,7 @@ import java.util.Map;
  * make sense to mix the two, since if you have any regular types in your
  * program, you then require the full runtime and lose all the benefits of
  * the lite runtime, so you might as well make all your types be regular types.
- * However, in some cases (e.g. when depending on multiple third-patry libraries
+ * However, in some cases (e.g. when depending on multiple third-party libraries
  * where one uses lite types and one uses regular), you may find yourself
  * wanting to mix the two.  In this case things get more complicated.
  * <p>
@@ -71,6 +71,22 @@ import java.util.Map;
  * @author kenton@google.com Kenton Varda
  */
 public class ExtensionRegistryLite {
+
+  // Set true to enable lazy parsing feature for MessageSet.
+  //
+  // TODO(xiangl): Now we use a global flag to control whether enable lazy
+  // parsing feature for MessageSet, which may be too crude for some
+  // applications. Need to support this feature on smaller granularity.
+  private static volatile boolean eagerlyParseMessageSets = false;
+
+  public static boolean isEagerlyParseMessageSets() {
+    return eagerlyParseMessageSets;
+  }
+
+  public static void setEagerlyParseMessageSets(boolean isEagerlyParse) {
+    eagerlyParseMessageSets = isEagerlyParse;
+  }
+
   /** Construct a new, empty instance. */
   public static ExtensionRegistryLite newInstance() {
     return new ExtensionRegistryLite();

+ 105 - 32
java/src/main/java/com/google/protobuf/FieldSet.java

@@ -30,12 +30,14 @@
 
 package com.google.protobuf;
 
+import com.google.protobuf.LazyField.LazyIterator;
+
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.io.IOException;
 
 /**
  * A class which represents an arbitrary set of fields of some message type.
@@ -68,6 +70,7 @@ final class FieldSet<FieldDescriptorType extends
 
   private final SmallSortedMap<FieldDescriptorType, Object> fields;
   private boolean isImmutable;
+  private boolean hasLazyField = false;
 
   /** Construct a new FieldSet. */
   private FieldSet() {
@@ -95,7 +98,7 @@ final class FieldSet<FieldDescriptorType extends
       FieldSet<T> emptySet() {
     return DEFAULT_INSTANCE;
   }
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings("rawtypes")
   private static final FieldSet DEFAULT_INSTANCE = new FieldSet(true);
 
   /** Make this FieldSet immutable from this point forward. */
@@ -109,7 +112,7 @@ final class FieldSet<FieldDescriptorType extends
   }
 
   /**
-   * Retuns whether the FieldSet is immutable. This is true if it is the
+   * Returns whether the FieldSet is immutable. This is true if it is the
    * {@link #emptySet} or if {@link #makeImmutable} were called.
    *
    * @return whether the FieldSet is immutable.
@@ -139,6 +142,7 @@ final class FieldSet<FieldDescriptorType extends
       FieldDescriptorType descriptor = entry.getKey();
       clone.setField(descriptor, entry.getValue());
     }
+    clone.hasLazyField = hasLazyField;
     return clone;
   }
 
@@ -147,21 +151,52 @@ final class FieldSet<FieldDescriptorType extends
   /** See {@link Message.Builder#clear()}. */
   public void clear() {
     fields.clear();
+    hasLazyField = false;
   }
 
   /**
    * Get a simple map containing all the fields.
    */
   public Map<FieldDescriptorType, Object> getAllFields() {
+    if (hasLazyField) {
+      SmallSortedMap<FieldDescriptorType, Object> result =
+          SmallSortedMap.newFieldMap(16);
+      for (int i = 0; i < fields.getNumArrayEntries(); i++) {
+        cloneFieldEntry(result, fields.getArrayEntryAt(i));
+      }
+      for (Map.Entry<FieldDescriptorType, Object> entry :
+          fields.getOverflowEntries()) {
+        cloneFieldEntry(result, entry);
+      }
+      if (fields.isImmutable()) {
+        result.makeImmutable();
+      }
+      return result;
+    }
     return fields.isImmutable() ? fields : Collections.unmodifiableMap(fields);
   }
 
+  private void cloneFieldEntry(Map<FieldDescriptorType, Object> map,
+      Map.Entry<FieldDescriptorType, Object> entry) {
+    FieldDescriptorType key = entry.getKey();
+    Object value = entry.getValue();
+    if (value instanceof LazyField) {
+      map.put(key, ((LazyField) value).getValue());
+    } else {
+      map.put(key, value);
+    }
+  }
+
   /**
    * Get an iterator to the field map. This iterator should not be leaked out
-   * of the protobuf library as it is not protected from mutation when
-   * fields is not immutable.
+   * of the protobuf library as it is not protected from mutation when fields
+   * is not immutable.
    */
   public Iterator<Map.Entry<FieldDescriptorType, Object>> iterator() {
+    if (hasLazyField) {
+      return new LazyIterator<FieldDescriptorType>(
+          fields.entrySet().iterator());
+    }
     return fields.entrySet().iterator();
   }
 
@@ -185,14 +220,18 @@ final class FieldSet<FieldDescriptorType extends
    * to the caller to fetch the field's default value.
    */
   public Object getField(final FieldDescriptorType descriptor) {
-    return fields.get(descriptor);
+    Object o = fields.get(descriptor);
+    if (o instanceof LazyField) {
+      return ((LazyField) o).getValue();
+    }
+    return o;
   }
 
   /**
    * Useful for implementing
    * {@link Message.Builder#setField(Descriptors.FieldDescriptor,Object)}.
    */
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings({"unchecked", "rawtypes"})
   public void setField(final FieldDescriptorType descriptor,
                        Object value) {
     if (descriptor.isRepeated()) {
@@ -204,7 +243,7 @@ final class FieldSet<FieldDescriptorType extends
       // Wrap the contents in a new list so that the caller cannot change
       // the list's contents after setting it.
       final List newList = new ArrayList();
-      newList.addAll((List)value);
+      newList.addAll((List) value);
       for (final Object element : newList) {
         verifyType(descriptor.getLiteType(), element);
       }
@@ -213,6 +252,9 @@ final class FieldSet<FieldDescriptorType extends
       verifyType(descriptor.getLiteType(), value);
     }
 
+    if (value instanceof LazyField) {
+      hasLazyField = true;
+    }
     fields.put(descriptor, value);
   }
 
@@ -222,6 +264,9 @@ final class FieldSet<FieldDescriptorType extends
    */
   public void clearField(final FieldDescriptorType descriptor) {
     fields.remove(descriptor);
+    if (fields.isEmpty()) {
+      hasLazyField = false;
+    }
   }
 
   /**
@@ -234,7 +279,7 @@ final class FieldSet<FieldDescriptorType extends
         "getRepeatedField() can only be called on repeated fields.");
     }
 
-    final Object value = fields.get(descriptor);
+    final Object value = getField(descriptor);
     if (value == null) {
       return 0;
     } else {
@@ -253,7 +298,7 @@ final class FieldSet<FieldDescriptorType extends
         "getRepeatedField() can only be called on repeated fields.");
     }
 
-    final Object value = fields.get(descriptor);
+    final Object value = getField(descriptor);
 
     if (value == null) {
       throw new IndexOutOfBoundsException();
@@ -275,13 +320,13 @@ final class FieldSet<FieldDescriptorType extends
         "getRepeatedField() can only be called on repeated fields.");
     }
 
-    final Object list = fields.get(descriptor);
+    final Object list = getField(descriptor);
     if (list == null) {
       throw new IndexOutOfBoundsException();
     }
 
     verifyType(descriptor.getLiteType(), value);
-    ((List) list).set(index, value);
+    ((List<Object>) list).set(index, value);
   }
 
   /**
@@ -298,13 +343,13 @@ final class FieldSet<FieldDescriptorType extends
 
     verifyType(descriptor.getLiteType(), value);
 
-    final Object existingValue = fields.get(descriptor);
-    List list;
+    final Object existingValue = getField(descriptor);
+    List<Object> list;
     if (existingValue == null) {
-      list = new ArrayList();
+      list = new ArrayList<Object>();
       fields.put(descriptor, list);
     } else {
-      list = (List) existingValue;
+      list = (List<Object>) existingValue;
     }
 
     list.add(value);
@@ -338,7 +383,8 @@ final class FieldSet<FieldDescriptorType extends
         break;
       case MESSAGE:
         // TODO(kenton):  Caller must do type checking here, I guess.
-        isValid = value instanceof MessageLite;
+        isValid =
+            (value instanceof MessageLite) || (value instanceof LazyField);
         break;
     }
 
@@ -392,8 +438,16 @@ final class FieldSet<FieldDescriptorType extends
           }
         }
       } else {
-        if (!((MessageLite) entry.getValue()).isInitialized()) {
-          return false;
+        Object value = entry.getValue();
+        if (value instanceof MessageLite) {
+          if (!((MessageLite) value).isInitialized()) {
+            return false;
+          }
+        } else if (value instanceof LazyField) {
+          return true;
+        } else {
+          throw new IllegalArgumentException(
+              "Wrong object type used with protocol message reflection.");
         }
       }
     }
@@ -416,7 +470,8 @@ final class FieldSet<FieldDescriptorType extends
   }
 
   /**
-   * Like {@link #mergeFrom(Message)}, but merges from another {@link FieldSet}.
+   * Like {@link Message.Builder#mergeFrom(Message)}, but merges from another 
+   * {@link FieldSet}.
    */
   public void mergeFrom(final FieldSet<FieldDescriptorType> other) {
     for (int i = 0; i < other.fields.getNumArrayEntries(); i++) {
@@ -428,14 +483,17 @@ final class FieldSet<FieldDescriptorType extends
     }
   }
 
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings({"unchecked", "rawtypes"})
   private void mergeFromField(
       final Map.Entry<FieldDescriptorType, Object> entry) {
     final FieldDescriptorType descriptor = entry.getKey();
-    final Object otherValue = entry.getValue();
+    Object otherValue = entry.getValue();
+    if (otherValue instanceof LazyField) {
+      otherValue = ((LazyField) otherValue).getValue();
+    }
 
     if (descriptor.isRepeated()) {
-      Object value = fields.get(descriptor);
+      Object value = getField(descriptor);
       if (value == null) {
         // Our list is empty, but we still need to make a defensive copy of
         // the other list since we don't know if the other FieldSet is still
@@ -446,7 +504,7 @@ final class FieldSet<FieldDescriptorType extends
         ((List) value).addAll((List) otherValue);
       }
     } else if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE) {
-      Object value = fields.get(descriptor);
+      Object value = getField(descriptor);
       if (value == null) {
         fields.put(descriptor, otherValue);
       } else {
@@ -457,7 +515,6 @@ final class FieldSet<FieldDescriptorType extends
                 ((MessageLite) value).toBuilder(), (MessageLite) otherValue)
             .build());
       }
-
     } else {
       fields.put(descriptor, otherValue);
     }
@@ -646,7 +703,11 @@ final class FieldSet<FieldDescriptorType extends
         }
       }
     } else {
-      writeElement(output, type, number, value);
+      if (value instanceof LazyField) {
+        writeElement(output, type, number, ((LazyField) value).getValue());
+      } else {
+        writeElement(output, type, number, value);
+      }
     }
   }
 
@@ -686,12 +747,18 @@ final class FieldSet<FieldDescriptorType extends
   private int getMessageSetSerializedSize(
       final Map.Entry<FieldDescriptorType, Object> entry) {
     final FieldDescriptorType descriptor = entry.getKey();
-    if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE &&
-        !descriptor.isRepeated() && !descriptor.isPacked()) {
-      return CodedOutputStream.computeMessageSetExtensionSize(
-          entry.getKey().getNumber(), (MessageLite) entry.getValue());
+    Object value = entry.getValue();
+    if (descriptor.getLiteJavaType() == WireFormat.JavaType.MESSAGE
+        && !descriptor.isRepeated() && !descriptor.isPacked()) {
+      if (value instanceof LazyField) {
+        return CodedOutputStream.computeLazyFieldMessageSetExtensionSize(
+            entry.getKey().getNumber(), (LazyField) value);
+      } else {
+        return CodedOutputStream.computeMessageSetExtensionSize(
+            entry.getKey().getNumber(), (MessageLite) value);
+      }
     } else {
-      return computeFieldSize(descriptor, entry.getValue());
+      return computeFieldSize(descriptor, value);
     }
   }
 
@@ -741,7 +808,6 @@ final class FieldSet<FieldDescriptorType extends
       case BOOL    : return CodedOutputStream.computeBoolSizeNoTag    ((Boolean    )value);
       case STRING  : return CodedOutputStream.computeStringSizeNoTag  ((String     )value);
       case GROUP   : return CodedOutputStream.computeGroupSizeNoTag   ((MessageLite)value);
-      case MESSAGE : return CodedOutputStream.computeMessageSizeNoTag ((MessageLite)value);
       case BYTES   : return CodedOutputStream.computeBytesSizeNoTag   ((ByteString )value);
       case UINT32  : return CodedOutputStream.computeUInt32SizeNoTag  ((Integer    )value);
       case SFIXED32: return CodedOutputStream.computeSFixed32SizeNoTag((Integer    )value);
@@ -749,6 +815,13 @@ final class FieldSet<FieldDescriptorType extends
       case SINT32  : return CodedOutputStream.computeSInt32SizeNoTag  ((Integer    )value);
       case SINT64  : return CodedOutputStream.computeSInt64SizeNoTag  ((Long       )value);
 
+      case MESSAGE:
+        if (value instanceof LazyField) {
+          return CodedOutputStream.computeLazyFieldSizeNoTag((LazyField) value);
+        } else {
+          return CodedOutputStream.computeMessageSizeNoTag((MessageLite) value);
+        }
+
       case ENUM:
         return CodedOutputStream.computeEnumSizeNoTag(
             ((Internal.EnumLite) value).getNumber());

+ 142 - 37
java/src/main/java/com/google/protobuf/GeneratedMessage.java

@@ -58,8 +58,6 @@ public abstract class GeneratedMessage extends AbstractMessage
     implements Serializable {
   private static final long serialVersionUID = 1L;
 
-  private final UnknownFieldSet unknownFields;
-
   /**
    * For testing. Allows a test to disable the optimization that avoids using
    * field builders for nested messages until they are requested. By disabling
@@ -68,11 +66,14 @@ public abstract class GeneratedMessage extends AbstractMessage
   protected static boolean alwaysUseFieldBuilders = false;
 
   protected GeneratedMessage() {
-    this.unknownFields = UnknownFieldSet.getDefaultInstance();
   }
 
   protected GeneratedMessage(Builder<?> builder) {
-    this.unknownFields = builder.getUnknownFields();
+  }
+
+  public Parser<? extends Message> getParserForType() {
+    throw new UnsupportedOperationException(
+        "This is supposed to be overridden by subclasses.");
   }
 
  /**
@@ -175,8 +176,28 @@ public abstract class GeneratedMessage extends AbstractMessage
   }
 
   //@Override (Java 1.6 override semantics, but we must support 1.5)
-  public final UnknownFieldSet getUnknownFields() {
-    return unknownFields;
+  public UnknownFieldSet getUnknownFields() {
+    throw new UnsupportedOperationException(
+        "This is supposed to be overridden by subclasses.");
+  }
+
+  /**
+   * Called by subclasses to parse an unknown field.
+   * @return {@code true} unless the tag is an end-group tag.
+   */
+  protected boolean parseUnknownField(
+      CodedInputStream input,
+      UnknownFieldSet.Builder unknownFields,
+      ExtensionRegistryLite extensionRegistry,
+      int tag) throws IOException {
+    return unknownFields.mergeFieldFrom(tag, input);
+  }
+
+  /**
+   * Used by parsing constructors in generated classes.
+   */
+  protected void makeExtensionsImmutable() {
+    // Noop for messages without extensions.
   }
 
   protected abstract Message.Builder newBuilderForType(BuilderParent parent);
@@ -318,6 +339,11 @@ public abstract class GeneratedMessage extends AbstractMessage
       return internalGetFieldAccessorTable().getField(field).newBuilder();
     }
 
+    //@Override (Java 1.6 override semantics, but we must support 1.5)
+    public Message.Builder getFieldBuilder(final FieldDescriptor field) {
+      return internalGetFieldAccessorTable().getField(field).getBuilder(this);
+    }
+
     //@Override (Java 1.6 override semantics, but we must support 1.5)
     public boolean hasField(final FieldDescriptor field) {
       return internalGetFieldAccessorTable().getField(field).has(this);
@@ -626,6 +652,25 @@ public abstract class GeneratedMessage extends AbstractMessage
       return super.isInitialized() && extensionsAreInitialized();
     }
 
+    @Override
+    protected boolean parseUnknownField(
+        CodedInputStream input,
+        UnknownFieldSet.Builder unknownFields,
+        ExtensionRegistryLite extensionRegistry,
+        int tag) throws IOException {
+      return AbstractMessage.Builder.mergeFieldFrom(
+        input, unknownFields, extensionRegistry, getDescriptorForType(),
+        null, extensions, tag);
+    }
+
+    /**
+     * Used by parsing constructors in generated classes.
+     */
+    @Override
+    protected void makeExtensionsImmutable() {
+      extensions.makeImmutable();
+    }
+
     /**
      * Used by subclasses to serialize extensions.  Extension ranges may be
      * interleaved with field numbers, but we must write them in canonical
@@ -655,9 +700,21 @@ public abstract class GeneratedMessage extends AbstractMessage
           if (messageSetWireFormat && descriptor.getLiteJavaType() ==
                   WireFormat.JavaType.MESSAGE &&
               !descriptor.isRepeated()) {
-            output.writeMessageSetExtension(descriptor.getNumber(),
-                                            (Message) next.getValue());
+            if (next instanceof LazyField.LazyEntry<?>) {
+              output.writeRawMessageSetExtension(descriptor.getNumber(),
+                  ((LazyField.LazyEntry<?>) next).getField().toByteString());
+            } else {
+              output.writeMessageSetExtension(descriptor.getNumber(),
+                                              (Message) next.getValue());
+            }
           } else {
+            // TODO(xiangl): Taken care of following code, it may cause
+            // problem when we use LazyField for normal fields/extensions.
+            // Due to the optional field can be duplicated at the end of
+            // serialized bytes, which will make the serialized size change
+            // after lazy field parsed. So when we use LazyField globally,
+            // we need to change the following write method to write cached
+            // bytes directly rather than write the parsed message.
             FieldSet.writeField(descriptor, next.getValue(), output);
           }
           if (iter.hasNext()) {
@@ -974,7 +1031,8 @@ public abstract class GeneratedMessage extends AbstractMessage
         final ExtensionRegistryLite extensionRegistry,
         final int tag) throws IOException {
       return AbstractMessage.Builder.mergeFieldFrom(
-        input, unknownFields, extensionRegistry, this, tag);
+        input, unknownFields, extensionRegistry, getDescriptorForType(),
+        this, null, tag);
     }
 
     // ---------------------------------------------------------------
@@ -1405,39 +1463,72 @@ public abstract class GeneratedMessage extends AbstractMessage
         final String[] camelCaseNames,
         final Class<? extends GeneratedMessage> messageClass,
         final Class<? extends Builder> builderClass) {
+      this(descriptor, camelCaseNames);
+      ensureFieldAccessorsInitialized(messageClass, builderClass);
+    }
+
+    /**
+     * Construct a FieldAccessorTable for a particular message class without
+     * initializing FieldAccessors.
+     */
+    public FieldAccessorTable(
+        final Descriptor descriptor,
+        final String[] camelCaseNames) {
       this.descriptor = descriptor;
+      this.camelCaseNames = camelCaseNames;
       fields = new FieldAccessor[descriptor.getFields().size()];
+      initialized = false;
+    }
 
-      for (int i = 0; i < fields.length; i++) {
-        final FieldDescriptor field = descriptor.getFields().get(i);
-        if (field.isRepeated()) {
-          if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
-            fields[i] = new RepeatedMessageFieldAccessor(
-              field, camelCaseNames[i], messageClass, builderClass);
-          } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) {
-            fields[i] = new RepeatedEnumFieldAccessor(
-              field, camelCaseNames[i], messageClass, builderClass);
-          } else {
-            fields[i] = new RepeatedFieldAccessor(
-              field, camelCaseNames[i], messageClass, builderClass);
-          }
-        } else {
-          if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
-            fields[i] = new SingularMessageFieldAccessor(
-              field, camelCaseNames[i], messageClass, builderClass);
-          } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) {
-            fields[i] = new SingularEnumFieldAccessor(
-              field, camelCaseNames[i], messageClass, builderClass);
+    /**
+     * Ensures the field accessors are initialized. This method is thread-safe.
+     *
+     * @param messageClass   The message type.
+     * @param builderClass   The builder type.
+     * @return this
+     */
+    public FieldAccessorTable ensureFieldAccessorsInitialized(
+        Class<? extends GeneratedMessage> messageClass,
+        Class<? extends Builder> builderClass) {
+      if (initialized) { return this; }
+      synchronized (this) {
+        if (initialized) { return this; }
+        for (int i = 0; i < fields.length; i++) {
+          FieldDescriptor field = descriptor.getFields().get(i);
+          if (field.isRepeated()) {
+            if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+              fields[i] = new RepeatedMessageFieldAccessor(
+                  field, camelCaseNames[i], messageClass, builderClass);
+            } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) {
+              fields[i] = new RepeatedEnumFieldAccessor(
+                  field, camelCaseNames[i], messageClass, builderClass);
+            } else {
+              fields[i] = new RepeatedFieldAccessor(
+                  field, camelCaseNames[i], messageClass, builderClass);
+            }
           } else {
-            fields[i] = new SingularFieldAccessor(
-              field, camelCaseNames[i], messageClass, builderClass);
+            if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+              fields[i] = new SingularMessageFieldAccessor(
+                  field, camelCaseNames[i], messageClass, builderClass);
+            } else if (field.getJavaType() == FieldDescriptor.JavaType.ENUM) {
+              fields[i] = new SingularEnumFieldAccessor(
+                  field, camelCaseNames[i], messageClass, builderClass);
+            } else {
+              fields[i] = new SingularFieldAccessor(
+                  field, camelCaseNames[i], messageClass, builderClass);
+            }
           }
         }
+        initialized = true;
+        camelCaseNames = null;
+        return this;
       }
     }
 
     private final Descriptor descriptor;
     private final FieldAccessor[] fields;
+    private String[] camelCaseNames;
+    private volatile boolean initialized;
 
     /** Get the FieldAccessor for a particular field. */
     private FieldAccessor getField(final FieldDescriptor field) {
@@ -1472,6 +1563,7 @@ public abstract class GeneratedMessage extends AbstractMessage
       int getRepeatedCount(GeneratedMessage.Builder builder);
       void clear(Builder builder);
       Message.Builder newBuilder();
+      Message.Builder getBuilder(GeneratedMessage.Builder builder);
     }
 
     // ---------------------------------------------------------------
@@ -1551,6 +1643,10 @@ public abstract class GeneratedMessage extends AbstractMessage
         throw new UnsupportedOperationException(
           "newBuilderForField() called on a non-Message type.");
       }
+      public Message.Builder getBuilder(GeneratedMessage.Builder builder) {
+        throw new UnsupportedOperationException(
+          "getFieldBuilder() called on a non-Message type.");
+      }
     }
 
     private static class RepeatedFieldAccessor implements FieldAccessor {
@@ -1573,8 +1669,6 @@ public abstract class GeneratedMessage extends AbstractMessage
                                    "get" + camelCaseName + "List");
         getMethodBuilder = getMethodOrDie(builderClass,
                                    "get" + camelCaseName + "List");
-
-
         getRepeatedMethod =
             getMethodOrDie(messageClass, "get" + camelCaseName, Integer.TYPE);
         getRepeatedMethodBuilder =
@@ -1625,11 +1719,11 @@ public abstract class GeneratedMessage extends AbstractMessage
       }
       public boolean has(final GeneratedMessage message) {
         throw new UnsupportedOperationException(
-          "hasField() called on a singular field.");
+          "hasField() called on a repeated field.");
       }
       public boolean has(GeneratedMessage.Builder builder) {
         throw new UnsupportedOperationException(
-          "hasField() called on a singular field.");
+          "hasField() called on a repeated field.");
       }
       public int getRepeatedCount(final GeneratedMessage message) {
         return (Integer) invokeOrDie(getCountMethod, message);
@@ -1644,6 +1738,10 @@ public abstract class GeneratedMessage extends AbstractMessage
         throw new UnsupportedOperationException(
           "newBuilderForField() called on a non-Message type.");
       }
+      public Message.Builder getBuilder(GeneratedMessage.Builder builder) {
+        throw new UnsupportedOperationException(
+          "getFieldBuilder() called on a non-Message type.");
+      }
     }
 
     // ---------------------------------------------------------------
@@ -1753,9 +1851,12 @@ public abstract class GeneratedMessage extends AbstractMessage
         super(descriptor, camelCaseName, messageClass, builderClass);
 
         newBuilderMethod = getMethodOrDie(type, "newBuilder");
+        getBuilderMethodBuilder =
+            getMethodOrDie(builderClass, "get" + camelCaseName + "Builder");
       }
 
       private final Method newBuilderMethod;
+      private final Method getBuilderMethodBuilder;
 
       private Object coerceType(final Object value) {
         if (type.isInstance(value)) {
@@ -1766,7 +1867,7 @@ public abstract class GeneratedMessage extends AbstractMessage
           // DynamicMessage -- we should accept it.  In this case we can make
           // a copy of the message.
           return ((Message.Builder) invokeOrDie(newBuilderMethod, null))
-                  .mergeFrom((Message) value).build();
+                  .mergeFrom((Message) value).buildPartial();
         }
       }
 
@@ -1778,6 +1879,10 @@ public abstract class GeneratedMessage extends AbstractMessage
       public Message.Builder newBuilder() {
         return (Message.Builder) invokeOrDie(newBuilderMethod, null);
       }
+      @Override
+      public Message.Builder getBuilder(GeneratedMessage.Builder builder) {
+        return (Message.Builder) invokeOrDie(getBuilderMethodBuilder, builder);
+      }
     }
 
     private static final class RepeatedMessageFieldAccessor
@@ -1825,7 +1930,7 @@ public abstract class GeneratedMessage extends AbstractMessage
   /**
    * Replaces this object in the output stream with a serialized form.
    * Part of Java's serialization magic.  Generated sub-classes must override
-   * this method by calling <code>return super.writeReplace();</code>
+   * this method by calling {@code return super.writeReplace();}
    * @return a SerializedForm of this message
    */
   protected Object writeReplace() throws ObjectStreamException {

+ 173 - 107
java/src/main/java/com/google/protobuf/GeneratedMessageLite.java

@@ -55,6 +55,29 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
   protected GeneratedMessageLite(Builder builder) {
   }
 
+  public Parser<? extends MessageLite> getParserForType() {
+    throw new UnsupportedOperationException(
+        "This is supposed to be overridden by subclasses.");
+  }
+
+  /**
+   * Called by subclasses to parse an unknown field.
+   * @return {@code true} unless the tag is an end-group tag.
+   */
+  protected boolean parseUnknownField(
+      CodedInputStream input,
+      ExtensionRegistryLite extensionRegistry,
+      int tag) throws IOException {
+    return input.skipField(tag);
+  }
+
+  /**
+   * Used by parsing constructors in generated classes.
+   */
+  protected void makeExtensionsImmutable() {
+    // Noop for messages without extensions.
+  }
+
   @SuppressWarnings("unchecked")
   public abstract static class Builder<MessageType extends GeneratedMessageLite,
                                        BuilderType extends Builder>
@@ -86,9 +109,9 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
      * @return {@code true} unless the tag is an end-group tag.
      */
     protected boolean parseUnknownField(
-        final CodedInputStream input,
-        final ExtensionRegistryLite extensionRegistry,
-        final int tag) throws IOException {
+        CodedInputStream input,
+        ExtensionRegistryLite extensionRegistry,
+        int tag) throws IOException {
       return input.skipField(tag);
     }
   }
@@ -193,6 +216,31 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
       return extensions.isInitialized();
     }
 
+    /**
+     * Called by subclasses to parse an unknown field or an extension.
+     * @return {@code true} unless the tag is an end-group tag.
+     */
+    @Override
+    protected boolean parseUnknownField(
+        CodedInputStream input,
+        ExtensionRegistryLite extensionRegistry,
+        int tag) throws IOException {
+      return GeneratedMessageLite.parseUnknownField(
+          extensions,
+          getDefaultInstanceForType(),
+          input,
+          extensionRegistry,
+          tag);
+    }
+
+    /**
+     * Used by parsing constructors in generated classes.
+     */
+    @Override
+    protected void makeExtensionsImmutable() {
+      extensions.makeImmutable();
+    }
+
     /**
      * Used by subclasses to serialize extensions.  Extension ranges may be
      * interleaved with field numbers, but we must write them in canonical
@@ -400,121 +448,139 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
      */
     @Override
     protected boolean parseUnknownField(
-        final CodedInputStream input,
-        final ExtensionRegistryLite extensionRegistry,
-        final int tag) throws IOException {
-      final int wireType = WireFormat.getTagWireType(tag);
-      final int fieldNumber = WireFormat.getTagFieldNumber(tag);
-
-      final GeneratedExtension<MessageType, ?> extension =
-        extensionRegistry.findLiteExtensionByNumber(
-            getDefaultInstanceForType(), fieldNumber);
-
-      boolean unknown = false;
-      boolean packed = false;
-      if (extension == null) {
-        unknown = true;  // Unknown field.
-      } else if (wireType == FieldSet.getWireFormatForFieldType(
-                   extension.descriptor.getLiteType(),
-                   false  /* isPacked */)) {
-        packed = false;  // Normal, unpacked value.
-      } else if (extension.descriptor.isRepeated &&
-                 extension.descriptor.type.isPackable() &&
-                 wireType == FieldSet.getWireFormatForFieldType(
-                   extension.descriptor.getLiteType(),
-                   true  /* isPacked */)) {
-        packed = true;  // Packed value.
-      } else {
-        unknown = true;  // Wrong wire type.
-      }
+        CodedInputStream input,
+        ExtensionRegistryLite extensionRegistry,
+        int tag) throws IOException {
+      ensureExtensionsIsMutable();
+      return GeneratedMessageLite.parseUnknownField(
+          extensions,
+          getDefaultInstanceForType(),
+          input,
+          extensionRegistry,
+          tag);
+    }
 
-      if (unknown) {  // Unknown field or wrong wire type.  Skip.
-        return input.skipField(tag);
-      }
+    protected final void mergeExtensionFields(final MessageType other) {
+      ensureExtensionsIsMutable();
+      extensions.mergeFrom(((ExtendableMessage) other).extensions);
+    }
+  }
 
-      if (packed) {
-        final int length = input.readRawVarint32();
-        final int limit = input.pushLimit(length);
-        if (extension.descriptor.getLiteType() == WireFormat.FieldType.ENUM) {
-          while (input.getBytesUntilLimit() > 0) {
-            final int rawValue = input.readEnum();
-            final Object value =
-                extension.descriptor.getEnumType().findValueByNumber(rawValue);
-            if (value == null) {
-              // If the number isn't recognized as a valid value for this
-              // enum, drop it (don't even add it to unknownFields).
-              return true;
-            }
-            ensureExtensionsIsMutable();
-            extensions.addRepeatedField(extension.descriptor, value);
-          }
-        } else {
-          while (input.getBytesUntilLimit() > 0) {
-            final Object value =
-              FieldSet.readPrimitiveField(input,
-                                          extension.descriptor.getLiteType());
-            ensureExtensionsIsMutable();
-            extensions.addRepeatedField(extension.descriptor, value);
+  // -----------------------------------------------------------------
+
+  /**
+   * Parse an unknown field or an extension.
+   * @return {@code true} unless the tag is an end-group tag.
+   */
+  private static <MessageType extends MessageLite>
+      boolean parseUnknownField(
+          FieldSet<ExtensionDescriptor> extensions,
+          MessageType defaultInstance,
+          CodedInputStream input,
+          ExtensionRegistryLite extensionRegistry,
+          int tag) throws IOException {
+    int wireType = WireFormat.getTagWireType(tag);
+    int fieldNumber = WireFormat.getTagFieldNumber(tag);
+
+    GeneratedExtension<MessageType, ?> extension =
+      extensionRegistry.findLiteExtensionByNumber(
+          defaultInstance, fieldNumber);
+
+    boolean unknown = false;
+    boolean packed = false;
+    if (extension == null) {
+      unknown = true;  // Unknown field.
+    } else if (wireType == FieldSet.getWireFormatForFieldType(
+                 extension.descriptor.getLiteType(),
+                 false  /* isPacked */)) {
+      packed = false;  // Normal, unpacked value.
+    } else if (extension.descriptor.isRepeated &&
+               extension.descriptor.type.isPackable() &&
+               wireType == FieldSet.getWireFormatForFieldType(
+                 extension.descriptor.getLiteType(),
+                 true  /* isPacked */)) {
+      packed = true;  // Packed value.
+    } else {
+      unknown = true;  // Wrong wire type.
+    }
+
+    if (unknown) {  // Unknown field or wrong wire type.  Skip.
+      return input.skipField(tag);
+    }
+
+    if (packed) {
+      int length = input.readRawVarint32();
+      int limit = input.pushLimit(length);
+      if (extension.descriptor.getLiteType() == WireFormat.FieldType.ENUM) {
+        while (input.getBytesUntilLimit() > 0) {
+          int rawValue = input.readEnum();
+          Object value =
+              extension.descriptor.getEnumType().findValueByNumber(rawValue);
+          if (value == null) {
+            // If the number isn't recognized as a valid value for this
+            // enum, drop it (don't even add it to unknownFields).
+            return true;
           }
+          extensions.addRepeatedField(extension.descriptor, value);
         }
-        input.popLimit(limit);
       } else {
-        final Object value;
-        switch (extension.descriptor.getLiteJavaType()) {
-          case MESSAGE: {
-            MessageLite.Builder subBuilder = null;
-            if (!extension.descriptor.isRepeated()) {
-              MessageLite existingValue =
-                  (MessageLite) extensions.getField(extension.descriptor);
-              if (existingValue != null) {
-                subBuilder = existingValue.toBuilder();
-              }
-            }
-            if (subBuilder == null) {
-              subBuilder = extension.messageDefaultInstance.newBuilderForType();
-            }
-            if (extension.descriptor.getLiteType() ==
-                WireFormat.FieldType.GROUP) {
-              input.readGroup(extension.getNumber(),
-                              subBuilder, extensionRegistry);
-            } else {
-              input.readMessage(subBuilder, extensionRegistry);
+        while (input.getBytesUntilLimit() > 0) {
+          Object value =
+            FieldSet.readPrimitiveField(input,
+                                        extension.descriptor.getLiteType());
+          extensions.addRepeatedField(extension.descriptor, value);
+        }
+      }
+      input.popLimit(limit);
+    } else {
+      Object value;
+      switch (extension.descriptor.getLiteJavaType()) {
+        case MESSAGE: {
+          MessageLite.Builder subBuilder = null;
+          if (!extension.descriptor.isRepeated()) {
+            MessageLite existingValue =
+                (MessageLite) extensions.getField(extension.descriptor);
+            if (existingValue != null) {
+              subBuilder = existingValue.toBuilder();
             }
-            value = subBuilder.build();
-            break;
           }
-          case ENUM:
-            final int rawValue = input.readEnum();
-            value = extension.descriptor.getEnumType()
-                             .findValueByNumber(rawValue);
-            // If the number isn't recognized as a valid value for this enum,
-            // drop it.
-            if (value == null) {
-              return true;
-            }
-            break;
-          default:
-            value = FieldSet.readPrimitiveField(input,
-                extension.descriptor.getLiteType());
-            break;
-        }
-
-        if (extension.descriptor.isRepeated()) {
-          ensureExtensionsIsMutable();
-          extensions.addRepeatedField(extension.descriptor, value);
-        } else {
-          ensureExtensionsIsMutable();
-          extensions.setField(extension.descriptor, value);
+          if (subBuilder == null) {
+            subBuilder = extension.messageDefaultInstance.newBuilderForType();
+          }
+          if (extension.descriptor.getLiteType() ==
+              WireFormat.FieldType.GROUP) {
+            input.readGroup(extension.getNumber(),
+                            subBuilder, extensionRegistry);
+          } else {
+            input.readMessage(subBuilder, extensionRegistry);
+          }
+          value = subBuilder.build();
+          break;
         }
+        case ENUM:
+          int rawValue = input.readEnum();
+          value = extension.descriptor.getEnumType()
+                           .findValueByNumber(rawValue);
+          // If the number isn't recognized as a valid value for this enum,
+          // drop it.
+          if (value == null) {
+            return true;
+          }
+          break;
+        default:
+          value = FieldSet.readPrimitiveField(input,
+              extension.descriptor.getLiteType());
+          break;
       }
 
-      return true;
+      if (extension.descriptor.isRepeated()) {
+        extensions.addRepeatedField(extension.descriptor, value);
+      } else {
+        extensions.setField(extension.descriptor, value);
+      }
     }
 
-    protected final void mergeExtensionFields(final MessageType other) {
-      ensureExtensionsIsMutable();
-      extensions.mergeFrom(((ExtendableMessage) other).extensions);
-    }
+    return true;
   }
 
   // -----------------------------------------------------------------
@@ -722,7 +788,7 @@ public abstract class GeneratedMessageLite extends AbstractMessageLite
   /**
    * Replaces this object in the output stream with a serialized form.
    * Part of Java's serialization magic.  Generated sub-classes must override
-   * this method by calling <code>return super.writeReplace();</code>
+   * this method by calling {@code return super.writeReplace();}
    * @return a SerializedForm of this message
    */
   protected Object writeReplace() throws ObjectStreamException {

+ 21 - 74
java/src/main/java/com/google/protobuf/Internal.java

@@ -103,85 +103,32 @@ public class Internal {
    * Helper called by generated code to determine if a byte array is a valid
    * UTF-8 encoded string such that the original bytes can be converted to
    * a String object and then back to a byte array round tripping the bytes
-   * without loss.
-   * <p>
-   * This is inspired by UTF_8.java in sun.nio.cs.
+   * without loss.  More precisely, returns {@code true} whenever:
+   * <pre>   {@code
+   * Arrays.equals(byteString.toByteArray(),
+   *     new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
+   * }</pre>
+   *
+   * <p>This method rejects "overlong" byte sequences, as well as
+   * 3-byte sequences that would map to a surrogate character, in
+   * accordance with the restricted definition of UTF-8 introduced in
+   * Unicode 3.1.  Note that the UTF-8 decoder included in Oracle's
+   * JDK has been modified to also reject "overlong" byte sequences,
+   * but currently (2011) still accepts 3-byte surrogate character
+   * byte sequences.
+   *
+   * <p>See the Unicode Standard,</br>
+   * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
+   * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
+   *
+   * <p>As of 2011-02, this method simply returns the result of {@link
+   * ByteString#isValidUtf8()}.  Calling that method directly is preferred.
    *
    * @param byteString the string to check
    * @return whether the byte array is round trippable
    */
   public static boolean isValidUtf8(ByteString byteString) {
-    int index = 0;
-    int size = byteString.size();
-    // To avoid the masking, we could change this to use bytes;
-    // Then X > 0xC2 gets turned into X < -0xC2; X < 0x80
-    // gets turned into X >= 0, etc.
-
-    while (index < size) {
-      int byte1 = byteString.byteAt(index++) & 0xFF;
-      if (byte1 < 0x80) {
-        // fast loop for single bytes
-        continue;
-
-        // we know from this point on that we have 2-4 byte forms
-      } else if (byte1 < 0xC2 || byte1 > 0xF4) {
-        // catch illegal first bytes: < C2 or > F4
-        return false;
-      }
-      if (index >= size) {
-        // fail if we run out of bytes
-        return false;
-      }
-      int byte2 = byteString.byteAt(index++) & 0xFF;
-      if (byte2 < 0x80 || byte2 > 0xBF) {
-        // general trail-byte test
-        return false;
-      }
-      if (byte1 <= 0xDF) {
-        // two-byte form; general trail-byte test is sufficient
-        continue;
-      }
-
-      // we know from this point on that we have 3 or 4 byte forms
-      if (index >= size) {
-        // fail if we run out of bytes
-        return false;
-      }
-      int byte3 = byteString.byteAt(index++) & 0xFF;
-      if (byte3 < 0x80 || byte3 > 0xBF) {
-        // general trail-byte test
-        return false;
-      }
-      if (byte1 <= 0xEF) {
-        // three-byte form. Vastly more frequent than four-byte forms
-        // The following has an extra test, but not worth restructuring
-        if (byte1 == 0xE0 && byte2 < 0xA0 ||
-            byte1 == 0xED && byte2 > 0x9F) {
-          // check special cases of byte2
-          return false;
-        }
-
-      } else {
-        // four-byte form
-
-        if (index >= size) {
-          // fail if we run out of bytes
-          return false;
-        }
-        int byte4 = byteString.byteAt(index++) & 0xFF;
-        if (byte4 < 0x80 || byte4 > 0xBF) {
-          // general trail-byte test
-          return false;
-        }
-        // The following has an extra test, but not worth restructuring
-        if (byte1 == 0xF0 && byte2 < 0x90 ||
-            byte1 == 0xF4 && byte2 > 0x8F) {
-          // check special cases of byte2
-          return false;
-        }
-      }
-    }
-    return true;
+    return byteString.isValidUtf8();
   }
 
   /**

+ 21 - 0
java/src/main/java/com/google/protobuf/InvalidProtocolBufferException.java

@@ -40,11 +40,32 @@ import java.io.IOException;
  */
 public class InvalidProtocolBufferException extends IOException {
   private static final long serialVersionUID = -1616151763072450476L;
+  private MessageLite unfinishedMessage = null;
 
   public InvalidProtocolBufferException(final String description) {
     super(description);
   }
 
+  /**
+   * Attaches an unfinished message to the exception to support best-effort
+   * parsing in {@code Parser} interface.
+   *
+   * @return this
+   */
+  public InvalidProtocolBufferException setUnfinishedMessage(
+      MessageLite unfinishedMessage) {
+    this.unfinishedMessage = unfinishedMessage;
+    return this;
+  }
+
+  /**
+   * Returns the unfinished message attached to the exception, or null if
+   * no message is attached.
+   */
+  public MessageLite getUnfinishedMessage() {
+    return unfinishedMessage;
+  }
+
   static InvalidProtocolBufferException truncatedMessage() {
     return new InvalidProtocolBufferException(
       "While parsing a protocol message, the input ended unexpectedly " +

+ 216 - 0
java/src/main/java/com/google/protobuf/LazyField.java

@@ -0,0 +1,216 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+/**
+ * LazyField encapsulates the logic of lazily parsing message fields. It stores
+ * the message in a ByteString initially and then parse it on-demand.
+ *
+ * LazyField is thread-compatible e.g. concurrent read are safe, however,
+ * synchronizations are needed under read/write situations.
+ *
+ * Now LazyField is only used to lazily load MessageSet.
+ * TODO(xiangl): Use LazyField to lazily load all messages.
+ *
+ * @author xiangl@google.com (Xiang Li)
+ */
+class LazyField {
+
+  final private MessageLite defaultInstance;
+  final private ExtensionRegistryLite extensionRegistry;
+
+  // Mutable because it is initialized lazily.
+  private ByteString bytes;
+  private volatile MessageLite value;
+  private volatile boolean isDirty = false;
+
+  public LazyField(MessageLite defaultInstance,
+      ExtensionRegistryLite extensionRegistry, ByteString bytes) {
+    this.defaultInstance = defaultInstance;
+    this.extensionRegistry = extensionRegistry;
+    this.bytes = bytes;
+  }
+
+  public MessageLite getValue() {
+    ensureInitialized();
+    return value;
+  }
+
+  /**
+   * LazyField is not thread-safe for write access. Synchronizations are needed
+   * under read/write situations.
+   */
+  public MessageLite setValue(MessageLite value) {
+    MessageLite originalValue = this.value;
+    this.value = value;
+    bytes = null;
+    isDirty = true;
+    return originalValue;
+  }
+
+  /**
+   * Due to the optional field can be duplicated at the end of serialized
+   * bytes, which will make the serialized size changed after LazyField
+   * parsed. Be careful when using this method.
+   */
+  public int getSerializedSize() {
+    if (isDirty) {
+      return value.getSerializedSize();
+    }
+    return bytes.size();
+  }
+
+  public ByteString toByteString() {
+    if (!isDirty) {
+      return bytes;
+    }
+    synchronized (this) {
+      if (!isDirty) {
+        return bytes;
+      }
+      bytes = value.toByteString();
+      isDirty = false;
+      return bytes;
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    ensureInitialized();
+    return value.hashCode();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    ensureInitialized();
+    return value.equals(obj);
+  }
+
+  @Override
+  public String toString() {
+    ensureInitialized();
+    return value.toString();
+  }
+
+  private void ensureInitialized() {
+    if (value != null) {
+      return;
+    }
+    synchronized (this) {
+      if (value != null) {
+        return;
+      }
+      try {
+        if (bytes != null) {
+          value = defaultInstance.getParserForType()
+              .parseFrom(bytes, extensionRegistry);
+        }
+      } catch (IOException e) {
+        // TODO(xiangl): Refactory the API to support the exception thrown from
+        // lazily load messages.
+      }
+    }
+  }
+
+  // ====================================================
+
+  /**
+   * LazyEntry and LazyIterator are used to encapsulate the LazyField, when
+   * users iterate all fields from FieldSet.
+   */
+  static class LazyEntry<K> implements Entry<K, Object> {
+    private Entry<K, LazyField> entry;
+
+    private LazyEntry(Entry<K, LazyField> entry) {
+      this.entry = entry;
+    }
+
+    @Override
+    public K getKey() {
+      return entry.getKey();
+    }
+
+    @Override
+    public Object getValue() {
+      LazyField field = entry.getValue();
+      if (field == null) {
+        return null;
+      }
+      return field.getValue();
+    }
+
+    public LazyField getField() {
+      return entry.getValue();
+    }
+
+    @Override
+    public Object setValue(Object value) {
+      if (!(value instanceof MessageLite)) {
+        throw new IllegalArgumentException(
+            "LazyField now only used for MessageSet, "
+            + "and the value of MessageSet must be an instance of MessageLite");
+      }
+      return entry.getValue().setValue((MessageLite) value);
+    }
+  }
+
+  static class LazyIterator<K> implements Iterator<Entry<K, Object>> {
+    private Iterator<Entry<K, Object>> iterator;
+
+    public LazyIterator(Iterator<Entry<K, Object>> iterator) {
+      this.iterator = iterator;
+    }
+
+    @Override
+    public boolean hasNext() {
+      return iterator.hasNext();
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public Entry<K, Object> next() {
+      Entry<K, ?> entry = iterator.next();
+      if (entry.getValue() instanceof LazyField) {
+        return new LazyEntry<K>((Entry<K, LazyField>) entry);
+      }
+      return (Entry<K, Object>) entry;
+    }
+
+    @Override
+    public void remove() {
+      iterator.remove();
+    }
+  }
+}

+ 27 - 3
java/src/main/java/com/google/protobuf/LazyStringArrayList.java

@@ -33,8 +33,9 @@ package com.google.protobuf;
 import java.util.List;
 import java.util.AbstractList;
 import java.util.ArrayList;
-import java.util.RandomAccess;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.RandomAccess;
 
 /**
  * An implementation of {@link LazyStringList} that wraps an ArrayList. Each
@@ -72,6 +73,11 @@ public class LazyStringArrayList extends AbstractList<String>
     list = new ArrayList<Object>();
   }
 
+  public LazyStringArrayList(LazyStringList from) {
+    list = new ArrayList<Object>(from.size());
+    addAll(from);
+  }
+
   public LazyStringArrayList(List<String> from) {
     list = new ArrayList<Object>(from);
   }
@@ -84,7 +90,7 @@ public class LazyStringArrayList extends AbstractList<String>
     } else {
       ByteString bs = (ByteString) o;
       String s = bs.toStringUtf8();
-      if (Internal.isValidUtf8(bs)) {
+      if (bs.isValidUtf8()) {
         list.set(index, s);
       }
       return s;
@@ -108,9 +114,22 @@ public class LazyStringArrayList extends AbstractList<String>
     modCount++;
   }
 
+  @Override
+  public boolean addAll(Collection<? extends String> c) {
+    // The default implementation of AbstractCollection.addAll(Collection)
+    // delegates to add(Object). This implementation instead delegates to
+    // addAll(int, Collection), which makes a special case for Collections
+    // which are instances of LazyStringList.
+    return addAll(size(), c);
+  }
+
   @Override
   public boolean addAll(int index, Collection<? extends String> c) {
-    boolean ret = list.addAll(index, c);
+    // When copying from another LazyStringList, directly copy the underlying
+    // elements rather than forcing each element to be decoded to a String.
+    Collection<?> collection = c instanceof LazyStringList
+        ? ((LazyStringList) c).getUnderlyingElements() : c;
+    boolean ret = list.addAll(index, collection);
     modCount++;
     return ret;
   }
@@ -152,4 +171,9 @@ public class LazyStringArrayList extends AbstractList<String>
       return ((ByteString) o).toStringUtf8();
     }
   }
+
+  @Override
+  public List<?> getUnderlyingElements() {
+    return Collections.unmodifiableList(list);
+  }
 }

+ 13 - 4
java/src/main/java/com/google/protobuf/LazyStringList.java

@@ -33,7 +33,7 @@ package com.google.protobuf;
 import java.util.List;
 
 /**
- * An interface extending List&lt;String&gt; that also provides access to the
+ * An interface extending {@code List<String>} that also provides access to the
  * items of the list as UTF8-encoded ByteString objects. This is used by the
  * protocol buffer implementation to support lazily converting bytes parsed
  * over the wire to String objects until needed and also increases the
@@ -41,9 +41,9 @@ import java.util.List;
  * ByteString is already cached.
  * <p>
  * This only adds additional methods that are required for the use in the
- * protocol buffer code in order to be able successfuly round trip byte arrays
+ * protocol buffer code in order to be able successfully round trip byte arrays
  * through parsing and serialization without conversion to strings. It's not
- * attempting to support the functionality of say List&ltByteString&gt, hence
+ * attempting to support the functionality of say {@code List<ByteString>}, hence
  * why only these two very specific methods are added.
  *
  * @author jonp@google.com (Jon Perlow)
@@ -56,7 +56,7 @@ public interface LazyStringList extends List<String> {
    * @param index index of the element to return
    * @return the element at the specified position in this list
    * @throws IndexOutOfBoundsException if the index is out of range
-   *         (<tt>index &lt; 0 || index &gt;= size()</tt>)
+   *         ({@code index < 0 || index >= size()})
    */
   ByteString getByteString(int index);
 
@@ -69,4 +69,13 @@ public interface LazyStringList extends List<String> {
    *         is not supported by this list
    */
   void add(ByteString element);
+
+  /**
+   * Returns an unmodifiable List of the underlying elements, each of
+   * which is either a {@code String} or its equivalent UTF-8 encoded
+   * {@code ByteString}. It is an error for the caller to modify the returned
+   * List, and attempting to do so will result in an
+   * {@link UnsupportedOperationException}.
+   */
+  List<?> getUnderlyingElements();
 }

+ 349 - 0
java/src/main/java/com/google/protobuf/LiteralByteString.java

@@ -0,0 +1,349 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * This class implements a {@link com.google.protobuf.ByteString} backed by a
+ * single array of bytes, contiguous in memory. It supports substring by
+ * pointing to only a sub-range of the underlying byte array, meaning that a
+ * substring will reference the full byte-array of the string it's made from,
+ * exactly as with {@link String}.
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+class LiteralByteString extends ByteString {
+
+  protected final byte[] bytes;
+
+  /**
+   * Creates a {@code LiteralByteString} backed by the given array, without
+   * copying.
+   *
+   * @param bytes array to wrap
+   */
+  LiteralByteString(byte[] bytes) {
+    this.bytes = bytes;
+  }
+
+  @Override
+  public byte byteAt(int index) {
+    // Unlike most methods in this class, this one is a direct implementation
+    // ignoring the potential offset because we need to do range-checking in the
+    // substring case anyway.
+    return bytes[index];
+  }
+
+  @Override
+  public int size() {
+    return bytes.length;
+  }
+
+  // =================================================================
+  // ByteString -> substring
+
+  @Override
+  public ByteString substring(int beginIndex, int endIndex) {
+    if (beginIndex < 0) {
+      throw new IndexOutOfBoundsException(
+          "Beginning index: " + beginIndex + " < 0");
+    }
+    if (endIndex > size()) {
+      throw new IndexOutOfBoundsException("End index: " + endIndex + " > " +
+          size());
+    }
+    int substringLength = endIndex - beginIndex;
+    if (substringLength < 0) {
+      throw new IndexOutOfBoundsException(
+          "Beginning index larger than ending index: " + beginIndex + ", "
+              + endIndex);
+    }
+
+    ByteString result;
+    if (substringLength == 0) {
+      result = ByteString.EMPTY;
+    } else {
+      result = new BoundedByteString(bytes, getOffsetIntoBytes() + beginIndex,
+          substringLength);
+    }
+    return result;
+  }
+
+  // =================================================================
+  // ByteString -> byte[]
+
+  @Override
+  protected void copyToInternal(byte[] target, int sourceOffset, 
+      int targetOffset, int numberToCopy) {
+    // Optimized form, not for subclasses, since we don't call
+    // getOffsetIntoBytes() or check the 'numberToCopy' parameter.
+    System.arraycopy(bytes, sourceOffset, target, targetOffset, numberToCopy);
+  }
+
+  @Override
+  public void copyTo(ByteBuffer target) {
+    target.put(bytes, getOffsetIntoBytes(), size());  // Copies bytes
+  }
+
+  @Override
+  public ByteBuffer asReadOnlyByteBuffer() {
+    ByteBuffer byteBuffer =
+        ByteBuffer.wrap(bytes, getOffsetIntoBytes(), size());
+    return byteBuffer.asReadOnlyBuffer();
+  }
+
+  @Override
+  public List<ByteBuffer> asReadOnlyByteBufferList() {
+    // Return the ByteBuffer generated by asReadOnlyByteBuffer() as a singleton
+    List<ByteBuffer> result = new ArrayList<ByteBuffer>(1);
+    result.add(asReadOnlyByteBuffer());
+    return result;
+ }
+
+ @Override
+  public void writeTo(OutputStream outputStream) throws IOException {
+    outputStream.write(toByteArray());
+  }
+
+  @Override
+  public String toString(String charsetName)
+      throws UnsupportedEncodingException {
+    return new String(bytes, getOffsetIntoBytes(), size(), charsetName);
+  }
+
+  // =================================================================
+  // UTF-8 decoding
+
+  @Override
+  public boolean isValidUtf8() {
+    int offset = getOffsetIntoBytes();
+    return Utf8.isValidUtf8(bytes, offset, offset + size());
+  }
+
+  @Override
+  protected int partialIsValidUtf8(int state, int offset, int length) {
+    int index = getOffsetIntoBytes() + offset;
+    return Utf8.partialIsValidUtf8(state, bytes, index, index + length);
+  }
+
+  // =================================================================
+  // equals() and hashCode()
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    if (!(other instanceof ByteString)) {
+      return false;
+    }
+
+    if (size() != ((ByteString) other).size()) {
+      return false;
+    }
+    if (size() == 0) {
+      return true;
+    }
+
+    if (other instanceof LiteralByteString) {
+      return equalsRange((LiteralByteString) other, 0, size());
+    } else if (other instanceof RopeByteString) {
+      return other.equals(this);
+    } else {
+      throw new IllegalArgumentException(
+          "Has a new type of ByteString been created? Found "
+              + other.getClass());
+    }
+  }
+
+  /**
+   * Check equality of the substring of given length of this object starting at
+   * zero with another {@code LiteralByteString} substring starting at offset.
+   *
+   * @param other  what to compare a substring in
+   * @param offset offset into other
+   * @param length number of bytes to compare
+   * @return true for equality of substrings, else false.
+   */
+  boolean equalsRange(LiteralByteString other, int offset, int length) {
+    if (length > other.size()) {
+      throw new IllegalArgumentException(
+          "Length too large: " + length + size());
+    }
+    if (offset + length > other.size()) {
+      throw new IllegalArgumentException(
+          "Ran off end of other: " + offset + ", " + length + ", " +
+              other.size());
+    }
+
+    byte[] thisBytes = bytes;
+    byte[] otherBytes = other.bytes;
+    int thisLimit = getOffsetIntoBytes() + length;
+    for (int thisIndex = getOffsetIntoBytes(), otherIndex =
+        other.getOffsetIntoBytes() + offset;
+        (thisIndex < thisLimit); ++thisIndex, ++otherIndex) {
+      if (thisBytes[thisIndex] != otherBytes[otherIndex]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Cached hash value.  Intentionally accessed via a data race, which
+   * is safe because of the Java Memory Model's "no out-of-thin-air values"
+   * guarantees for ints.
+   */
+  private int hash = 0;
+
+  /**
+   * Compute the hashCode using the traditional algorithm from {@link
+   * ByteString}.
+   *
+   * @return hashCode value
+   */
+  @Override
+  public int hashCode() {
+    int h = hash;
+
+    if (h == 0) {
+      int size = size();
+      h = partialHash(size, 0, size);
+      if (h == 0) {
+        h = 1;
+      }
+      hash = h;
+    }
+    return h;
+  }
+
+  @Override
+  protected int peekCachedHashCode() {
+    return hash;
+  }
+
+  @Override
+  protected int partialHash(int h, int offset, int length) {
+    byte[] thisBytes = bytes;
+    for (int i = getOffsetIntoBytes() + offset, limit = i + length; i < limit;
+        i++) {
+      h = h * 31 + thisBytes[i];
+    }
+    return h;
+  }
+
+  // =================================================================
+  // Input stream
+
+  @Override
+  public InputStream newInput() {
+    return new ByteArrayInputStream(bytes, getOffsetIntoBytes(),
+        size());  // No copy
+  }
+
+  @Override
+  public CodedInputStream newCodedInput() {
+    // We trust CodedInputStream not to modify the bytes, or to give anyone
+    // else access to them.
+    return CodedInputStream
+        .newInstance(bytes, getOffsetIntoBytes(), size());  // No copy
+  }
+
+  // =================================================================
+  // ByteIterator
+
+  @Override
+  public ByteIterator iterator() {
+    return new LiteralByteIterator();
+  }
+
+  private class LiteralByteIterator implements ByteIterator {
+    private int position;
+    private final int limit;
+
+    private LiteralByteIterator() {
+      position = 0;
+      limit = size();
+    }
+
+    public boolean hasNext() {
+      return (position < limit);
+    }
+
+    public Byte next() {
+      // Boxing calls Byte.valueOf(byte), which does not instantiate.
+      return nextByte();
+    }
+
+    public byte nextByte() {
+      try {
+        return bytes[position++];
+      } catch (ArrayIndexOutOfBoundsException e) {
+        throw new NoSuchElementException(e.getMessage());
+      }
+    }
+
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  // =================================================================
+  // Internal methods
+
+  @Override
+  protected int getTreeDepth() {
+    return 0;
+  }
+
+  @Override
+  protected boolean isBalanced() {
+    return true;
+  }
+
+  /**
+   * Offset into {@code bytes[]} to use, non-zero for substrings.
+   *
+   * @return always 0 for this class
+   */
+  protected int getOffsetIntoBytes() {
+    return 0;
+  }
+}

+ 26 - 4
java/src/main/java/com/google/protobuf/Message.java

@@ -50,25 +50,28 @@ import java.util.Map;
  */
 public interface Message extends MessageLite, MessageOrBuilder {
 
+  // (From MessageLite, re-declared here only for return type covariance.)
+  Parser<? extends Message> getParserForType();
+
   // -----------------------------------------------------------------
   // Comparison and hashing
 
   /**
    * Compares the specified object with this message for equality.  Returns
-   * <tt>true</tt> if the given object is a message of the same type (as
+   * {@code true} if the given object is a message of the same type (as
    * defined by {@code getDescriptorForType()}) and has identical values for
    * all of its fields.  Subclasses must implement this; inheriting
    * {@code Object.equals()} is incorrect.
    *
    * @param other object to be compared for equality with this message
-   * @return <tt>true</tt> if the specified object is equal to this message
+   * @return {@code true} if the specified object is equal to this message
    */
   @Override
   boolean equals(Object other);
 
   /**
    * Returns the hash code value for this message.  The hash code of a message
-   * should mix the message's type (object identity of the decsriptor) with its
+   * should mix the message's type (object identity of the descriptor) with its
    * contents (known and unknown field values).  Subclasses must implement this;
    * inheriting {@code Object.hashCode()} is incorrect.
    *
@@ -83,7 +86,8 @@ public interface Message extends MessageLite, MessageOrBuilder {
 
   /**
    * Converts the message to a string in protocol buffer text format. This is
-   * just a trivial wrapper around {@link TextFormat#printToString(Message)}.
+   * just a trivial wrapper around {@link
+   * TextFormat#printToString(MessageOrBuilder)}.
    */
   @Override
   String toString();
@@ -144,6 +148,24 @@ public interface Message extends MessageLite, MessageOrBuilder {
      */
     Builder newBuilderForField(Descriptors.FieldDescriptor field);
 
+    /**
+     * Get a nested builder instance for the given field.
+     * <p>
+     * Normally, we hold a reference to the immutable message object for the
+     * message type field. Some implementations(the generated message builders),
+     * however, can also hold a reference to the builder object (a nested
+     * builder) for the field.
+     * <p>
+     * If the field is already backed up by a nested builder, the nested builder
+     * will be returned. Otherwise, a new field builder will be created and
+     * returned. The original message field (if exist) will be merged into the
+     * field builder, which will then be nested into its parent builder.
+     * <p>
+     * NOTE: implementations that do not support nested builders will throw
+     * <code>UnsupportedException</code>.
+     */
+    Builder getFieldBuilder(Descriptors.FieldDescriptor field);
+
     /**
      * Sets a field to the given value.  The value must be of the correct type
      * for this field, i.e. the same type that

+ 17 - 23
java/src/main/java/com/google/protobuf/MessageLite.java

@@ -79,6 +79,12 @@ public interface MessageLite extends MessageLiteOrBuilder {
    */
   int getSerializedSize();
 
+
+  /**
+   * Gets the parser for a message of the same type as this message.
+   */
+  Parser<? extends MessageLite> getParserForType();
+
   // -----------------------------------------------------------------
   // Convenience methods.
 
@@ -144,11 +150,8 @@ public interface MessageLite extends MessageLiteOrBuilder {
     Builder clear();
 
     /**
-     * Construct the final message.  Once this is called, the Builder is no
-     * longer valid, and calling any other method will result in undefined
-     * behavior and may throw a NullPointerException.  If you need to continue
-     * working with the builder after calling {@code build()}, {@code clone()}
-     * it first.
+     * Constructs the message based on the state of the Builder. Subsequent
+     * changes to the Builder will not affect the returned message.
      * @throws UninitializedMessageException The message is missing one or more
      *         required fields (i.e. {@link #isInitialized()} returns false).
      *         Use {@link #buildPartial()} to bypass this check.
@@ -158,11 +161,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
     /**
      * Like {@link #build()}, but does not throw an exception if the message
      * is missing required fields.  Instead, a partial message is returned.
-     * Once this is called, the Builder is no longer valid, and calling any
-     * will result in undefined behavior and may throw a NullPointerException.
-     *
-     * If you need to continue working with the builder after calling
-     * {@code buildPartial()}, {@code clone()} it first.
+     * Subsequent changes to the Builder will not affect the returned message.
      */
     MessageLite buildPartial();
 
@@ -174,7 +173,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
 
     /**
      * Parses a message of this type from the input and merges it with this
-     * message, as if using {@link Builder#mergeFrom(MessageLite)}.
+     * message.
      *
      * <p>Warning:  This does not verify that all required fields are present in
      * the input message.  If you call {@link #build()} without setting all
@@ -184,11 +183,6 @@ public interface MessageLite extends MessageLiteOrBuilder {
      * <ul>
      *   <li>Call {@link #isInitialized()} to verify that all required fields
      *       are set before building.
-     *   <li>Parse the message separately using one of the static
-     *       {@code parseFrom} methods, then use {@link #mergeFrom(MessageLite)}
-     *       to merge it with this one.  {@code parseFrom} will throw an
-     *       {@link InvalidProtocolBufferException} (an {@code IOException})
-     *       if some required fields are missing.
      *   <li>Use {@code buildPartial()} to build, which ignores missing
      *       required fields.
      * </ul>
@@ -225,7 +219,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
     /**
      * Parse {@code data} as a message of this type and merge it with the
      * message being built.  This is just a small wrapper around
-     * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+     * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}.
      *
      * @return this
      */
@@ -255,7 +249,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
     /**
      * Parse {@code data} as a message of this type and merge it with the
      * message being built.  This is just a small wrapper around
-     * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+     * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}.
      *
      * @return this
      */
@@ -266,7 +260,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
     /**
      * Parse {@code data} as a message of this type and merge it with the
      * message being built.  This is just a small wrapper around
-     * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+     * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}.
      *
      * @return this
      */
@@ -293,7 +287,7 @@ public interface MessageLite extends MessageLiteOrBuilder {
     /**
      * Parse a message of this type from {@code input} and merge it with the
      * message being built.  This is just a small wrapper around
-     * {@link #mergeFrom(CodedInputStream,ExtensionRegistry)}.
+     * {@link #mergeFrom(CodedInputStream,ExtensionRegistryLite)}.
      *
      * @return this
      */
@@ -308,9 +302,9 @@ public interface MessageLite extends MessageLiteOrBuilder {
      * {@link MessageLite#writeDelimitedTo(OutputStream)} to write messages in
      * this format.
      *
-     * @returns True if successful, or false if the stream is at EOF when the
-     *          method starts.  Any other error (including reaching EOF during
-     *          parsing) will cause an exception to be thrown.
+     * @return True if successful, or false if the stream is at EOF when the
+     *         method starts.  Any other error (including reaching EOF during
+     *         parsing) will cause an exception to be thrown.
      */
     boolean mergeDelimitedFrom(InputStream input)
                                throws IOException;

+ 2 - 0
java/src/main/java/com/google/protobuf/MessageLiteOrBuilder.java

@@ -52,6 +52,8 @@ public interface MessageLiteOrBuilder {
   /**
    * Returns true if all required fields in the message and all embedded
    * messages are set, false otherwise.
+   *
+   * <p>See also: {@link MessageOrBuilder#getInitializationErrorString()}
    */
   boolean isInitialized();
 

+ 21 - 2
java/src/main/java/com/google/protobuf/MessageOrBuilder.java

@@ -30,6 +30,7 @@
 
 package com.google.protobuf;
 
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -44,6 +45,24 @@ public interface MessageOrBuilder extends MessageLiteOrBuilder {
   //@Override (Java 1.6 override semantics, but we must support 1.5)
   Message getDefaultInstanceForType();
 
+  /**
+   * Returns a list of field paths (e.g. "foo.bar.baz") of required fields
+   * which are not set in this message.  You should call
+   * {@link MessageLiteOrBuilder#isInitialized()} first to check if there
+   * are any missing fields, as that method is likely to be much faster
+   * than this one even when the message is fully-initialized.
+   */
+  List<String> findInitializationErrors();
+
+  /**
+   * Returns a comma-delimited list of required fields which are not set
+   * in this message object.  You should call
+   * {@link MessageLiteOrBuilder#isInitialized()} first to check if there
+   * are any missing fields, as that method is likely to be much faster
+   * than this one even when the message is fully-initialized.
+   */
+  String getInitializationErrorString();
+
   /**
    * Get the message's type's descriptor.  This differs from the
    * {@code getDescriptor()} method of generated message classes in that
@@ -80,7 +99,7 @@ public interface MessageOrBuilder extends MessageLiteOrBuilder {
   /**
    * Obtains the value of the given field, or the default value if it is
    * not set.  For primitive fields, the boxed primitive value is returned.
-   * For enum fields, the EnumValueDescriptor for the value is returend. For
+   * For enum fields, the EnumValueDescriptor for the value is returned. For
    * embedded message fields, the sub-message is returned.  For repeated
    * fields, a java.util.List is returned.
    */
@@ -98,7 +117,7 @@ public interface MessageOrBuilder extends MessageLiteOrBuilder {
   /**
    * Gets an element of a repeated field.  For primitive fields, the boxed
    * primitive value is returned.  For enum fields, the EnumValueDescriptor
-   * for the value is returend. For embedded message fields, the sub-message
+   * for the value is returned. For embedded message fields, the sub-message
    * is returned.
    * @throws IllegalArgumentException The field is not a repeated field, or
    *           {@code field.getContainingType() != getDescriptorForType()}.

+ 259 - 0
java/src/main/java/com/google/protobuf/Parser.java

@@ -0,0 +1,259 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.InputStream;
+
+/**
+ * Abstract interface for parsing Protocol Messages.
+ *
+ * @author liujisi@google.com (Pherl Liu)
+ */
+public interface Parser<MessageType> {
+  /**
+   * Parses a message of {@code MessageType} from the input.
+   *
+   * <p>Note:  The caller should call
+   * {@link CodedInputStream#checkLastTagWas(int)} after calling this to
+   * verify that the last tag seen was the appropriate end-group tag,
+   * or zero for EOF.
+   */
+  public MessageType parseFrom(CodedInputStream input)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(CodedInputStream)}, but also parses extensions.
+   * The extensions that you want to be able to parse must be registered in
+   * {@code extensionRegistry}. Extensions not in the registry will be treated
+   * as unknown fields.
+   */
+  public MessageType parseFrom(CodedInputStream input,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(CodedInputStream)}, but does not throw an
+   * exception if the message is missing required fields. Instead, a partial
+   * message is returned.
+   */
+  public MessageType parsePartialFrom(CodedInputStream input)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(CodedInputStream input, ExtensionRegistryLite)},
+   * but does not throw an exception if the message is missing required fields.
+   * Instead, a partial message is returned.
+   */
+  public MessageType parsePartialFrom(CodedInputStream input,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  // ---------------------------------------------------------------
+  // Convenience methods.
+
+  /**
+   * Parses {@code data} as a message of {@code MessageType}.
+   * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}.
+   */
+  public MessageType parseFrom(ByteString data)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parses {@code data} as a message of {@code MessageType}.
+   * This is just a small wrapper around
+   * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}.
+   */
+  public MessageType parseFrom(ByteString data,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(ByteString)}, but does not throw an
+   * exception if the message is missing required fields. Instead, a partial
+   * message is returned.
+   */
+  public MessageType parsePartialFrom(ByteString data)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(ByteString, ExtensionRegistryLite)},
+   * but does not throw an exception if the message is missing required fields.
+   * Instead, a partial message is returned.
+   */
+  public MessageType parsePartialFrom(ByteString data,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parses {@code data} as a message of {@code MessageType}.
+   * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}.
+   */
+  public MessageType parseFrom(byte[] data, int off, int len)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parses {@code data} as a message of {@code MessageType}.
+   * This is just a small wrapper around
+   * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}.
+   */
+  public MessageType parseFrom(byte[] data, int off, int len,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parses {@code data} as a message of {@code MessageType}.
+   * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}.
+   */
+  public MessageType parseFrom(byte[] data)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parses {@code data} as a message of {@code MessageType}.
+   * This is just a small wrapper around
+   * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}.
+   */
+  public MessageType parseFrom(byte[] data,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(byte[], int, int)}, but does not throw an
+   * exception if the message is missing required fields. Instead, a partial
+   * message is returned.
+   */
+  public MessageType parsePartialFrom(byte[] data, int off, int len)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(ByteString, ExtensionRegistryLite)},
+   * but does not throw an exception if the message is missing required fields.
+   * Instead, a partial message is returned.
+   */
+  public MessageType parsePartialFrom(byte[] data, int off, int len,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(byte[])}, but does not throw an
+   * exception if the message is missing required fields. Instead, a partial
+   * message is returned.
+   */
+  public MessageType parsePartialFrom(byte[] data)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(byte[], ExtensionRegistryLite)},
+   * but does not throw an exception if the message is missing required fields.
+   * Instead, a partial message is returned.
+   */
+  public MessageType parsePartialFrom(byte[] data,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parse a message of {@code MessageType} from {@code input}.
+   * This is just a small wrapper around {@link #parseFrom(CodedInputStream)}.
+   * Note that this method always reads the <i>entire</i> input (unless it
+   * throws an exception).  If you want it to stop earlier, you will need to
+   * wrap your input in some wrapper stream that limits reading.  Or, use
+   * {@link MessageLite#writeDelimitedTo(java.io.OutputStream)} to write your
+   * message and {@link #parseDelimitedFrom(InputStream)} to read it.
+   * <p>
+   * Despite usually reading the entire input, this does not close the stream.
+   */
+  public MessageType parseFrom(InputStream input)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Parses a message of {@code MessageType} from {@code input}.
+   * This is just a small wrapper around
+   * {@link #parseFrom(CodedInputStream, ExtensionRegistryLite)}.
+   */
+  public MessageType parseFrom(InputStream input,
+                               ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(InputStream)}, but does not throw an
+   * exception if the message is missing required fields. Instead, a partial
+   * message is returned.
+   */
+  public MessageType parsePartialFrom(InputStream input)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(InputStream, ExtensionRegistryLite)},
+   * but does not throw an exception if the message is missing required fields.
+   * Instead, a partial message is returned.
+   */
+  public MessageType parsePartialFrom(InputStream input,
+                                      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseFrom(InputStream)}, but does not read util EOF.
+   * Instead, the size of message (encoded as a varint) is read first,
+   * then the message data. Use
+   * {@link MessageLite#writeDelimitedTo(java.io.OutputStream)} to write
+   * messages in this format.
+   *
+   * @return True if successful, or false if the stream is at EOF when the
+   *         method starts. Any other error (including reaching EOF during
+   *         parsing) will cause an exception to be thrown.
+   */
+  public MessageType parseDelimitedFrom(InputStream input)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseDelimitedFrom(InputStream)} but supporting extensions.
+   */
+  public MessageType parseDelimitedFrom(InputStream input,
+                                        ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseDelimitedFrom(InputStream)}, but does not throw an
+   * exception if the message is missing required fields. Instead, a partial
+   * message is returned.
+   */
+  public MessageType parsePartialDelimitedFrom(InputStream input)
+      throws InvalidProtocolBufferException;
+
+  /**
+   * Like {@link #parseDelimitedFrom(InputStream, ExtensionRegistryLite)},
+   * but does not throw an exception if the message is missing required fields.
+   * Instead, a partial message is returned.
+   */
+  public MessageType parsePartialDelimitedFrom(
+      InputStream input,
+      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException;
+}

+ 7 - 7
java/src/main/java/com/google/protobuf/RepeatedFieldBuilder.java

@@ -37,22 +37,22 @@ import java.util.Collections;
 import java.util.List;
 
 /**
- * <code>RepeatedFieldBuilder</code> implements a structure that a protocol
+ * {@code RepeatedFieldBuilder} implements a structure that a protocol
  * message uses to hold a repeated field of other protocol messages. It supports
  * the classical use case of adding immutable {@link Message}'s to the
  * repeated field and is highly optimized around this (no extra memory
  * allocations and sharing of immutable arrays).
  * <br>
  * It also supports the additional use case of adding a {@link Message.Builder}
- * to the repeated field and deferring conversion of that <code>Builder</code>
- * to an immutable <code>Message</code>. In this way, it's possible to maintain
- * a tree of <code>Builder</code>'s that acts as a fully read/write data
+ * to the repeated field and deferring conversion of that {@code Builder}
+ * to an immutable {@code Message}. In this way, it's possible to maintain
+ * a tree of {@code Builder}'s that acts as a fully read/write data
  * structure.
  * <br>
  * Logically, one can think of a tree of builders as converting the entire tree
  * to messages when build is called on the root or when any method is called
  * that desires a Message instead of a Builder. In terms of the implementation,
- * the <code>SingleFieldBuilder</code> and <code>RepeatedFieldBuilder</code>
+ * the {@code SingleFieldBuilder} and {@code RepeatedFieldBuilder}
  * classes cache messages that were created so that messages only need to be
  * created when some change occured in its builder or a builder for one of its
  * descendants.
@@ -192,7 +192,7 @@ public class RepeatedFieldBuilder
 
   /**
    * Get the message at the specified index. If the message is currently stored
-   * as a <code>Builder</code>, it is converted to a <code>Message</code> by
+   * as a {@code Builder}, it is converted to a {@code Message} by
    * calling {@link Message.Builder#buildPartial} on it.
    *
    * @param index the index of the message to get
@@ -204,7 +204,7 @@ public class RepeatedFieldBuilder
 
   /**
    * Get the message at the specified index. If the message is currently stored
-   * as a <code>Builder</code>, it is converted to a <code>Message</code> by
+   * as a {@code Builder}, it is converted to a {@code Message} by
    * calling {@link Message.Builder#buildPartial} on it.
    *
    * @param index the index of the message to get

+ 945 - 0
java/src/main/java/com/google/protobuf/RopeByteString.java

@@ -0,0 +1,945 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.io.ByteArrayInputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * Class to represent {@code ByteStrings} formed by concatenation of other
+ * ByteStrings, without copying the data in the pieces. The concatenation is
+ * represented as a tree whose leaf nodes are each a {@link LiteralByteString}.
+ *
+ * <p>Most of the operation here is inspired by the now-famous paper <a
+ * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
+ * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and
+ * michael plass
+ *
+ * <p>The algorithms described in the paper have been implemented for character
+ * strings in {@link com.google.common.string.Rope} and in the c++ class {@code
+ * cord.cc}.
+ *
+ * <p>Fundamentally the Rope algorithm represents the collection of pieces as a
+ * binary tree. BAP95 uses a Fibonacci bound relating depth to a minimum
+ * sequence length, sequences that are too short relative to their depth cause a
+ * tree rebalance.  More precisely, a tree of depth d is "balanced" in the
+ * terminology of BAP95 if its length is at least F(d+2), where F(n) is the
+ * n-the Fibonacci number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum
+ * lengths 1, 2, 3, 5, 8, 13,...
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+class RopeByteString extends ByteString {
+
+  /**
+   * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of
+   * depth n is "balanced", i.e flat enough, if its length is at least Fn+2,
+   * e.g. a "balanced" {@link RopeByteString} of depth 1 must have length at
+   * least 2, of depth 4 must have length >= 8, etc.
+   *
+   * <p>There's nothing special about using the Fibonacci numbers for this, but
+   * they are a reasonable sequence for encapsulating the idea that we are OK
+   * with longer strings being encoded in deeper binary trees.
+   *
+   * <p>For 32-bit integers, this array has length 46.
+   */
+  private static final int[] minLengthByDepth;
+
+  static {
+    // Dynamically generate the list of Fibonacci numbers the first time this
+    // class is accessed.
+    List<Integer> numbers = new ArrayList<Integer>();
+
+    // we skip the first Fibonacci number (1).  So instead of: 1 1 2 3 5 8 ...
+    // we have: 1 2 3 5 8 ...
+    int f1 = 1;
+    int f2 = 1;
+
+    // get all the values until we roll over.
+    while (f2 > 0) {
+      numbers.add(f2);
+      int temp = f1 + f2;
+      f1 = f2;
+      f2 = temp;
+    }
+
+    // we include this here so that we can index this array to [x + 1] in the
+    // loops below.
+    numbers.add(Integer.MAX_VALUE);
+    minLengthByDepth = new int[numbers.size()];
+    for (int i = 0; i < minLengthByDepth.length; i++) {
+      // unbox all the values
+      minLengthByDepth[i] = numbers.get(i);
+    }
+  }
+
+  private final int totalLength;
+  private final ByteString left;
+  private final ByteString right;
+  private final int leftLength;
+  private final int treeDepth;
+
+  /**
+   * Create a new RopeByteString, which can be thought of as a new tree node, by
+   * recording references to the two given strings.
+   *
+   * @param left  string on the left of this node, should have {@code size() >
+   *              0}
+   * @param right string on the right of this node, should have {@code size() >
+   *              0}
+   */
+  private RopeByteString(ByteString left, ByteString right) {
+    this.left = left;
+    this.right = right;
+    leftLength = left.size();
+    totalLength = leftLength + right.size();
+    treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
+  }
+
+  /**
+   * Concatenate the given strings while performing various optimizations to
+   * slow the growth rate of tree depth and tree node count. The result is
+   * either a {@link LiteralByteString} or a {@link RopeByteString}
+   * depending on which optimizations, if any, were applied.
+   *
+   * <p>Small pieces of length less than {@link
+   * ByteString#CONCATENATE_BY_COPY_SIZE} may be copied by value here, as in
+   * BAP95.  Large pieces are referenced without copy.
+   *
+   * @param left  string on the left
+   * @param right string on the right
+   * @return concatenation representing the same sequence as the given strings
+   */
+  static ByteString concatenate(ByteString left, ByteString right) {
+    ByteString result;
+    RopeByteString leftRope =
+        (left instanceof RopeByteString) ? (RopeByteString) left : null;
+    if (right.size() == 0) {
+      result = left;
+    } else if (left.size() == 0) {
+      result = right;
+    } else {
+      int newLength = left.size() + right.size();
+      if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) {
+        // Optimization from BAP95: For short (leaves in paper, but just short
+        // here) total length, do a copy of data to a new leaf.
+        result = concatenateBytes(left, right);
+      } else if (leftRope != null
+          && leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) {
+        // Optimization from BAP95: As an optimization of the case where the
+        // ByteString is constructed by repeated concatenate, recognize the case
+        // where a short string is concatenated to a left-hand node whose
+        // right-hand branch is short.  In the paper this applies to leaves, but
+        // we just look at the length here. This has the advantage of shedding
+        // references to unneeded data when substrings have been taken.
+        //
+        // When we recognize this case, we do a copy of the data and create a
+        // new parent node so that the depth of the result is the same as the
+        // given left tree.
+        ByteString newRight = concatenateBytes(leftRope.right, right);
+        result = new RopeByteString(leftRope.left, newRight);
+      } else if (leftRope != null
+          && leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth()
+          && leftRope.getTreeDepth() > right.getTreeDepth()) {
+        // Typically for concatenate-built strings the left-side is deeper than
+        // the right.  This is our final attempt to concatenate without
+        // increasing the tree depth.  We'll redo the the node on the RHS.  This
+        // is yet another optimization for building the string by repeatedly
+        // concatenating on the right.
+        ByteString newRight = new RopeByteString(leftRope.right, right);
+        result = new RopeByteString(leftRope.left, newRight);
+      } else {
+        // Fine, we'll add a node and increase the tree depth--unless we
+        // rebalance ;^)
+        int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
+        if (newLength >= minLengthByDepth[newDepth]) {
+          // The tree is shallow enough, so don't rebalance
+          result = new RopeByteString(left, right);
+        } else {
+          result = new Balancer().balance(left, right);
+        }
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Concatenates two strings by copying data values. This is called in a few
+   * cases in order to reduce the growth of the number of tree nodes.
+   *
+   * @param left  string on the left
+   * @param right string on the right
+   * @return string formed by copying data bytes
+   */
+  private static LiteralByteString concatenateBytes(ByteString left,
+      ByteString right) {
+    int leftSize = left.size();
+    int rightSize = right.size();
+    byte[] bytes = new byte[leftSize + rightSize];
+    left.copyTo(bytes, 0, 0, leftSize);
+    right.copyTo(bytes, 0, leftSize, rightSize);
+    return new LiteralByteString(bytes);  // Constructor wraps bytes
+  }
+
+  /**
+   * Create a new RopeByteString for testing only while bypassing all the
+   * defenses of {@link #concatenate(ByteString, ByteString)}. This allows
+   * testing trees of specific structure. We are also able to insert empty
+   * leaves, though these are dis-allowed, so that we can make sure the
+   * implementation can withstand their presence.
+   *
+   * @param left  string on the left of this node
+   * @param right string on the right of this node
+   * @return an unsafe instance for testing only
+   */
+  static RopeByteString newInstanceForTest(ByteString left, ByteString right) {
+    return new RopeByteString(left, right);
+  }
+
+  /**
+   * Gets the byte at the given index.
+   * Throws {@link ArrayIndexOutOfBoundsException} for backwards-compatibility
+   * reasons although it would more properly be {@link
+   * IndexOutOfBoundsException}.
+   *
+   * @param index index of byte
+   * @return the value
+   * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
+   */
+  @Override
+  public byte byteAt(int index) {
+    if (index < 0) {
+      throw new ArrayIndexOutOfBoundsException("Index < 0: " + index);
+    }
+    if (index > totalLength) {
+      throw new ArrayIndexOutOfBoundsException(
+          "Index > length: " + index + ", " + totalLength);
+    }
+
+    byte result;
+    // Find the relevant piece by recursive descent
+    if (index < leftLength) {
+      result = left.byteAt(index);
+    } else {
+      result = right.byteAt(index - leftLength);
+    }
+    return result;
+  }
+
+  @Override
+  public int size() {
+    return totalLength;
+  }
+
+  // =================================================================
+  // Pieces
+
+  @Override
+  protected int getTreeDepth() {
+    return treeDepth;
+  }
+
+  /**
+   * Determines if the tree is balanced according to BAP95, which means the tree
+   * is flat-enough with respect to the bounds. Note that this definition of
+   * balanced is one where sub-trees of balanced trees are not necessarily
+   * balanced.
+   *
+   * @return true if the tree is balanced
+   */
+  @Override
+  protected boolean isBalanced() {
+    return totalLength >= minLengthByDepth[treeDepth];
+  }
+
+  /**
+   * Takes a substring of this one. This involves recursive descent along the
+   * left and right edges of the substring, and referencing any wholly contained
+   * segments in between. Any leaf nodes entirely uninvolved in the substring
+   * will not be referenced by the substring.
+   *
+   * <p>Substrings of {@code length < 2} should result in at most a single
+   * recursive call chain, terminating at a leaf node. Thus the result will be a
+   * {@link LiteralByteString}. {@link #RopeByteString(ByteString,
+   * ByteString)}.
+   *
+   * @param beginIndex start at this index
+   * @param endIndex   the last character is the one before this index
+   * @return substring leaf node or tree
+   */
+  @Override
+  public ByteString substring(int beginIndex, int endIndex) {
+    if (beginIndex < 0) {
+      throw new IndexOutOfBoundsException(
+          "Beginning index: " + beginIndex + " < 0");
+    }
+    if (endIndex > totalLength) {
+      throw new IndexOutOfBoundsException(
+          "End index: " + endIndex + " > " + totalLength);
+    }
+    int substringLength = endIndex - beginIndex;
+    if (substringLength < 0) {
+      throw new IndexOutOfBoundsException(
+          "Beginning index larger than ending index: " + beginIndex + ", "
+              + endIndex);
+    }
+
+    ByteString result;
+    if (substringLength == 0) {
+      // Empty substring
+      result = ByteString.EMPTY;
+    } else if (substringLength == totalLength) {
+      // The whole string
+      result = this;
+    } else {
+      // Proper substring
+      if (endIndex <= leftLength) {
+        // Substring on the left
+        result = left.substring(beginIndex, endIndex);
+      } else if (beginIndex >= leftLength) {
+        // Substring on the right
+        result = right
+            .substring(beginIndex - leftLength, endIndex - leftLength);
+      } else {
+        // Split substring
+        ByteString leftSub = left.substring(beginIndex);
+        ByteString rightSub = right.substring(0, endIndex - leftLength);
+        // Intentionally not rebalancing, since in many cases these two
+        // substrings will already be less deep than the top-level
+        // RopeByteString we're taking a substring of.
+        result = new RopeByteString(leftSub, rightSub);
+      }
+    }
+    return result;
+  }
+
+  // =================================================================
+  // ByteString -> byte[]
+
+  @Override
+  protected void copyToInternal(byte[] target, int sourceOffset,
+      int targetOffset, int numberToCopy) {
+   if (sourceOffset + numberToCopy <= leftLength) {
+      left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
+    } else if (sourceOffset >= leftLength) {
+      right.copyToInternal(target, sourceOffset - leftLength, targetOffset,
+          numberToCopy);
+    } else {
+      int leftLength = this.leftLength - sourceOffset;
+      left.copyToInternal(target, sourceOffset, targetOffset, leftLength);
+      right.copyToInternal(target, 0, targetOffset + leftLength,
+          numberToCopy - leftLength);
+    }
+  }
+
+  @Override
+  public void copyTo(ByteBuffer target) {
+    left.copyTo(target);
+    right.copyTo(target);
+  }
+
+  @Override
+  public ByteBuffer asReadOnlyByteBuffer() {
+    ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray());
+    return byteBuffer.asReadOnlyBuffer();
+  }
+
+  @Override
+  public List<ByteBuffer> asReadOnlyByteBufferList() {
+    // Walk through the list of LiteralByteString's that make up this
+    // rope, and add each one as a read-only ByteBuffer.
+    List<ByteBuffer> result = new ArrayList<ByteBuffer>();
+    PieceIterator pieces = new PieceIterator(this);
+    while (pieces.hasNext()) {
+      LiteralByteString byteString = pieces.next();
+      result.add(byteString.asReadOnlyByteBuffer());
+    }
+    return result;
+  }
+
+  @Override
+  public void writeTo(OutputStream outputStream) throws IOException {
+    left.writeTo(outputStream);
+    right.writeTo(outputStream);
+  }
+
+  @Override
+  public String toString(String charsetName)
+      throws UnsupportedEncodingException {
+    return new String(toByteArray(), charsetName);
+  }
+
+  // =================================================================
+  // UTF-8 decoding
+
+  @Override
+  public boolean isValidUtf8() {
+    int leftPartial = left.partialIsValidUtf8(Utf8.COMPLETE, 0, leftLength);
+    int state = right.partialIsValidUtf8(leftPartial, 0, right.size());
+    return state == Utf8.COMPLETE;
+  }
+
+  @Override
+  protected int partialIsValidUtf8(int state, int offset, int length) {
+    int toIndex = offset + length;
+    if (toIndex <= leftLength) {
+      return left.partialIsValidUtf8(state, offset, length);
+    } else if (offset >= leftLength) {
+      return right.partialIsValidUtf8(state, offset - leftLength, length);
+    } else {
+      int leftLength = this.leftLength - offset;
+      int leftPartial = left.partialIsValidUtf8(state, offset, leftLength);
+      return right.partialIsValidUtf8(leftPartial, 0, length - leftLength);
+    }
+  }
+
+  // =================================================================
+  // equals() and hashCode()
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    if (!(other instanceof ByteString)) {
+      return false;
+    }
+
+    ByteString otherByteString = (ByteString) other;
+    if (totalLength != otherByteString.size()) {
+      return false;
+    }
+    if (totalLength == 0) {
+      return true;
+    }
+
+    // You don't really want to be calling equals on long strings, but since
+    // we cache the hashCode, we effectively cache inequality. We use the cached
+    // hashCode if it's already computed.  It's arguable we should compute the
+    // hashCode here, and if we're going to be testing a bunch of byteStrings,
+    // it might even make sense.
+    if (hash != 0) {
+      int cachedOtherHash = otherByteString.peekCachedHashCode();
+      if (cachedOtherHash != 0 && hash != cachedOtherHash) {
+        return false;
+      }
+    }
+
+    return equalsFragments(otherByteString);
+  }
+
+  /**
+   * Determines if this string is equal to another of the same length by
+   * iterating over the leaf nodes. On each step of the iteration, the
+   * overlapping segments of the leaves are compared.
+   *
+   * @param other string of the same length as this one
+   * @return true if the values of this string equals the value of the given
+   *         one
+   */
+  private boolean equalsFragments(ByteString other) {
+    int thisOffset = 0;
+    Iterator<LiteralByteString> thisIter = new PieceIterator(this);
+    LiteralByteString thisString = thisIter.next();
+
+    int thatOffset = 0;
+    Iterator<LiteralByteString> thatIter = new PieceIterator(other);
+    LiteralByteString thatString = thatIter.next();
+
+    int pos = 0;
+    while (true) {
+      int thisRemaining = thisString.size() - thisOffset;
+      int thatRemaining = thatString.size() - thatOffset;
+      int bytesToCompare = Math.min(thisRemaining, thatRemaining);
+
+      // At least one of the offsets will be zero
+      boolean stillEqual = (thisOffset == 0)
+          ? thisString.equalsRange(thatString, thatOffset, bytesToCompare)
+          : thatString.equalsRange(thisString, thisOffset, bytesToCompare);
+      if (!stillEqual) {
+        return false;
+      }
+
+      pos += bytesToCompare;
+      if (pos >= totalLength) {
+        if (pos == totalLength) {
+          return true;
+        }
+        throw new IllegalStateException();
+      }
+      // We always get to the end of at least one of the pieces
+      if (bytesToCompare == thisRemaining) { // If reached end of this
+        thisOffset = 0;
+        thisString = thisIter.next();
+      } else {
+        thisOffset += bytesToCompare;
+      }
+      if (bytesToCompare == thatRemaining) { // If reached end of that
+        thatOffset = 0;
+        thatString = thatIter.next();
+      } else {
+        thatOffset += bytesToCompare;
+      }
+    }
+  }
+
+  /**
+   * Cached hash value.  Intentionally accessed via a data race, which is safe
+   * because of the Java Memory Model's "no out-of-thin-air values" guarantees
+   * for ints.
+   */
+  private int hash = 0;
+
+  @Override
+  public int hashCode() {
+    int h = hash;
+
+    if (h == 0) {
+      h = totalLength;
+      h = partialHash(h, 0, totalLength);
+      if (h == 0) {
+        h = 1;
+      }
+      hash = h;
+    }
+    return h;
+  }
+
+  @Override
+  protected int peekCachedHashCode() {
+    return hash;
+  }
+
+  @Override
+  protected int partialHash(int h, int offset, int length) {
+    int toIndex = offset + length;
+    if (toIndex <= leftLength) {
+      return left.partialHash(h, offset, length);
+    } else if (offset >= leftLength) {
+      return right.partialHash(h, offset - leftLength, length);
+    } else {
+      int leftLength = this.leftLength - offset;
+      int leftPartial = left.partialHash(h, offset, leftLength);
+      return right.partialHash(leftPartial, 0, length - leftLength);
+    }
+  }
+
+  // =================================================================
+  // Input stream
+
+  @Override
+  public CodedInputStream newCodedInput() {
+    return CodedInputStream.newInstance(new RopeInputStream());
+  }
+
+  @Override
+  public InputStream newInput() {
+    return new RopeInputStream();
+  }
+
+  /**
+   * This class implements the balancing algorithm of BAP95. In the paper the
+   * authors use an array to keep track of pieces, while here we use a stack.
+   * The tree is balanced by traversing subtrees in left to right order, and the
+   * stack always contains the part of the string we've traversed so far.
+   *
+   * <p>One surprising aspect of the algorithm is the result of balancing is not
+   * necessarily balanced, though it is nearly balanced.  For details, see
+   * BAP95.
+   */
+  private static class Balancer {
+    // Stack containing the part of the string, starting from the left, that
+    // we've already traversed.  The final string should be the equivalent of
+    // concatenating the strings on the stack from bottom to top.
+    private final Deque<ByteString> prefixesStack =
+        new ArrayDeque<ByteString>(minLengthByDepth.length);
+
+    private ByteString balance(ByteString left, ByteString right) {
+      doBalance(left);
+      doBalance(right);
+
+      // Sweep stack to gather the result
+      ByteString partialString = prefixesStack.pop();
+      while (!prefixesStack.isEmpty()) {
+        ByteString newLeft = prefixesStack.pop();
+        partialString = new RopeByteString(newLeft, partialString);
+      }
+      // We should end up with a RopeByteString since at a minimum we will
+      // create one from concatenating left and right
+      return partialString;
+    }
+
+    private void doBalance(ByteString root) {
+      // BAP95: Insert balanced subtrees whole. This means the result might not
+      // be balanced, leading to repeated rebalancings on concatenate. However,
+      // these rebalancings are shallow due to ignoring balanced subtrees, and
+      // relatively few calls to insert() result.
+      if (root.isBalanced()) {
+        insert(root);
+      } else if (root instanceof RopeByteString) {
+        RopeByteString rbs = (RopeByteString) root;
+        doBalance(rbs.left);
+        doBalance(rbs.right);
+      } else {
+        throw new IllegalArgumentException(
+            "Has a new type of ByteString been created? Found " +
+                root.getClass());
+      }
+    }
+
+    /**
+     * Push a string on the balance stack (BAP95).  BAP95 uses an array and
+     * calls the elements in the array 'bins'.  We instead use a stack, so the
+     * 'bins' of lengths are represented by differences between the elements of
+     * minLengthByDepth.
+     *
+     * <p>If the length bin for our string, and all shorter length bins, are
+     * empty, we just push it on the stack.  Otherwise, we need to start
+     * concatenating, putting the given string in the "middle" and continuing
+     * until we land in an empty length bin that matches the length of our
+     * concatenation.
+     *
+     * @param byteString string to place on the balance stack
+     */
+    private void insert(ByteString byteString) {
+      int depthBin = getDepthBinForLength(byteString.size());
+      int binEnd = minLengthByDepth[depthBin + 1];
+
+      // BAP95: Concatenate all trees occupying bins representing the length of
+      // our new piece or of shorter pieces, to the extent that is possible.
+      // The goal is to clear the bin which our piece belongs in, but that may
+      // not be entirely possible if there aren't enough longer bins occupied.
+      if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) {
+        prefixesStack.push(byteString);
+      } else {
+        int binStart = minLengthByDepth[depthBin];
+
+        // Concatenate the subtrees of shorter length
+        ByteString newTree = prefixesStack.pop();
+        while (!prefixesStack.isEmpty()
+            && prefixesStack.peek().size() < binStart) {
+          ByteString left = prefixesStack.pop();
+          newTree = new RopeByteString(left, newTree);
+        }
+
+        // Concatenate the given string
+        newTree = new RopeByteString(newTree, byteString);
+
+        // Continue concatenating until we land in an empty bin
+        while (!prefixesStack.isEmpty()) {
+          depthBin = getDepthBinForLength(newTree.size());
+          binEnd = minLengthByDepth[depthBin + 1];
+          if (prefixesStack.peek().size() < binEnd) {
+            ByteString left = prefixesStack.pop();
+            newTree = new RopeByteString(left, newTree);
+          } else {
+            break;
+          }
+        }
+        prefixesStack.push(newTree);
+      }
+    }
+
+    private int getDepthBinForLength(int length) {
+      int depth = Arrays.binarySearch(minLengthByDepth, length);
+      if (depth < 0) {
+        // It wasn't an exact match, so convert to the index of the containing
+        // fragment, which is one less even than the insertion point.
+        int insertionPoint = -(depth + 1);
+        depth = insertionPoint - 1;
+      }
+
+      return depth;
+    }
+  }
+
+  /**
+   * This class is a continuable tree traversal, which keeps the state
+   * information which would exist on the stack in a recursive traversal instead
+   * on a stack of "Bread Crumbs". The maximum depth of the stack in this
+   * iterator is the same as the depth of the tree being traversed.
+   *
+   * <p>This iterator is used to implement
+   * {@link RopeByteString#equalsFragments(ByteString)}.
+   */
+  private static class PieceIterator implements Iterator<LiteralByteString> {
+
+    private final Deque<RopeByteString> breadCrumbs =
+        new ArrayDeque<RopeByteString>(minLengthByDepth.length);
+    private LiteralByteString next;
+
+    private PieceIterator(ByteString root) {
+      next = getLeafByLeft(root);
+    }
+
+    private LiteralByteString getLeafByLeft(ByteString root) {
+      ByteString pos = root;
+      while (pos instanceof RopeByteString) {
+        RopeByteString rbs = (RopeByteString) pos;
+        breadCrumbs.push(rbs);
+        pos = rbs.left;
+      }
+      return (LiteralByteString) pos;
+    }
+
+    private LiteralByteString getNextNonEmptyLeaf() {
+      while (true) {
+        // Almost always, we go through this loop exactly once.  However, if
+        // we discover an empty string in the rope, we toss it and try again.
+        if (breadCrumbs.isEmpty()) {
+          return null;
+        } else {
+          LiteralByteString result = getLeafByLeft(breadCrumbs.pop().right);
+          if (!result.isEmpty()) {
+            return result;
+          }
+        }
+      }
+    }
+
+    public boolean hasNext() {
+      return next != null;
+    }
+
+    /**
+     * Returns the next item and advances one {@code LiteralByteString}.
+     *
+     * @return next non-empty LiteralByteString or {@code null}
+     */
+    public LiteralByteString next() {
+      if (next == null) {
+        throw new NoSuchElementException();
+      }
+      LiteralByteString result = next;
+      next = getNextNonEmptyLeaf();
+      return result;
+    }
+
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  // =================================================================
+  // ByteIterator
+
+  @Override
+  public ByteIterator iterator() {
+    return new RopeByteIterator();
+  }
+
+  private class RopeByteIterator implements ByteString.ByteIterator {
+
+    private final PieceIterator pieces;
+    private ByteIterator bytes;
+    int bytesRemaining;
+
+    private RopeByteIterator() {
+      pieces = new PieceIterator(RopeByteString.this);
+      bytes = pieces.next().iterator();
+      bytesRemaining = size();
+    }
+
+    public boolean hasNext() {
+      return (bytesRemaining > 0);
+    }
+
+    public Byte next() {
+      return nextByte(); // Does not instantiate a Byte
+    }
+
+    public byte nextByte() {
+      if (!bytes.hasNext()) {
+        bytes = pieces.next().iterator();
+      }
+      --bytesRemaining;
+      return bytes.nextByte();
+    }
+
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  /**
+   * This class is the {@link RopeByteString} equivalent for
+   * {@link ByteArrayInputStream}.
+   */
+  private class RopeInputStream extends InputStream {
+    // Iterates through the pieces of the rope
+    private PieceIterator pieceIterator;
+    // The current piece
+    private LiteralByteString currentPiece;
+    // The size of the current piece
+    private int currentPieceSize;
+    // The index of the next byte to read in the current piece
+    private int currentPieceIndex;
+    // The offset of the start of the current piece in the rope byte string
+    private int currentPieceOffsetInRope;
+    // Offset in the buffer at which user called mark();
+    private int mark;
+
+    public RopeInputStream() {
+      initialize();
+    }
+
+    @Override
+    public int read(byte b[], int offset, int length)  {
+      if (b == null) {
+        throw new NullPointerException();
+      } else if (offset < 0 || length < 0 || length > b.length - offset) {
+        throw new IndexOutOfBoundsException();
+      }
+      return readSkipInternal(b, offset, length);
+    }
+
+    @Override
+    public long skip(long length) {
+      if (length < 0) {
+        throw new IndexOutOfBoundsException();
+      } else if (length > Integer.MAX_VALUE) {
+        length = Integer.MAX_VALUE;
+      }
+      return readSkipInternal(null, 0, (int) length);
+    }
+
+    /**
+     * Internal implementation of read and skip.  If b != null, then read the
+     * next {@code length} bytes into the buffer {@code b} at
+     * offset {@code offset}.  If b == null, then skip the next {@code length)
+     * bytes.
+     * <p>
+     * This method assumes that all error checking has already happened.
+     * <p>
+     * Returns the actual number of bytes read or skipped.
+     */
+    private int readSkipInternal(byte b[], int offset, int length)  {
+      int bytesRemaining = length;
+      while (bytesRemaining > 0) {
+        advanceIfCurrentPieceFullyRead();
+        if (currentPiece == null) {
+          if (bytesRemaining == length) {
+             // We didn't manage to read anything
+             return -1;
+           }
+          break;
+        } else {
+          // Copy the bytes from this piece.
+          int currentPieceRemaining = currentPieceSize - currentPieceIndex;
+          int count = Math.min(currentPieceRemaining, bytesRemaining);
+          if (b != null) {
+            currentPiece.copyTo(b, currentPieceIndex, offset, count);
+            offset += count;
+          }
+          currentPieceIndex += count;
+          bytesRemaining -= count;
+        }
+      }
+       // Return the number of bytes read.
+      return length - bytesRemaining;
+    }
+
+    @Override
+    public int read() throws IOException {
+      advanceIfCurrentPieceFullyRead();
+      if (currentPiece == null) {
+        return -1;
+      } else {
+        return currentPiece.byteAt(currentPieceIndex++) & 0xFF;
+      }
+    }
+
+    @Override
+    public int available() throws IOException {
+      int bytesRead = currentPieceOffsetInRope + currentPieceIndex;
+      return RopeByteString.this.size() - bytesRead;
+    }
+
+    @Override
+    public boolean markSupported() {
+      return true;
+    }
+
+    @Override
+    public void mark(int readAheadLimit) {
+      // Set the mark to our position in the byte string
+      mark = currentPieceOffsetInRope + currentPieceIndex;
+    }
+
+    @Override
+    public synchronized void reset() {
+      // Just reinitialize and skip the specified number of bytes.
+      initialize();
+      readSkipInternal(null, 0, mark);
+    }
+
+    /** Common initialization code used by both the constructor and reset() */
+    private void initialize() {
+      pieceIterator = new PieceIterator(RopeByteString.this);
+      currentPiece = pieceIterator.next();
+      currentPieceSize = currentPiece.size();
+      currentPieceIndex = 0;
+      currentPieceOffsetInRope = 0;
+    }
+
+    /**
+     * Skips to the next piece if we have read all the data in the current
+     * piece.  Sets currentPiece to null if we have reached the end of the
+     * input.
+     */
+    private void advanceIfCurrentPieceFullyRead() {
+      if (currentPiece != null && currentPieceIndex == currentPieceSize) {
+        // Generally, we can only go through this loop at most once, since
+        // empty strings can't end up in a rope.  But better to test.
+        currentPieceOffsetInRope += currentPieceSize;
+        currentPieceIndex = 0;
+        if (pieceIterator.hasNext()) {
+          currentPiece = pieceIterator.next();
+          currentPieceSize = currentPiece.size();
+        } else {
+          currentPiece = null;
+          currentPieceSize = 0;
+        }
+      }
+    }
+  }
+}

+ 6 - 6
java/src/main/java/com/google/protobuf/SingleFieldBuilder.java

@@ -31,21 +31,21 @@
 package com.google.protobuf;
 
 /**
- * <code>SingleFieldBuilder</code> implements a structure that a protocol
+ * {@code SingleFieldBuilder} implements a structure that a protocol
  * message uses to hold a single field of another protocol message. It supports
  * the classical use case of setting an immutable {@link Message} as the value
  * of the field and is highly optimized around this.
  * <br>
  * It also supports the additional use case of setting a {@link Message.Builder}
- * as the field and deferring conversion of that <code>Builder</code>
- * to an immutable <code>Message</code>. In this way, it's possible to maintain
- * a tree of <code>Builder</code>'s that acts as a fully read/write data
+ * as the field and deferring conversion of that {@code Builder}
+ * to an immutable {@code Message}. In this way, it's possible to maintain
+ * a tree of {@code Builder}'s that acts as a fully read/write data
  * structure.
  * <br>
  * Logically, one can think of a tree of builders as converting the entire tree
  * to messages when build is called on the root or when any method is called
  * that desires a Message instead of a Builder. In terms of the implementation,
- * the <code>SingleFieldBuilder</code> and <code>RepeatedFieldBuilder</code>
+ * the {@code SingleFieldBuilder} and {@code RepeatedFieldBuilder}
  * classes cache messages that were created so that messages only need to be
  * created when some change occured in its builder or a builder for one of its
  * descendants.
@@ -99,7 +99,7 @@ public class SingleFieldBuilder
 
   /**
    * Get the message for the field. If the message is currently stored
-   * as a <code>Builder</code>, it is converted to a <code>Message</code> by
+   * as a {@code Builder}, it is converted to a {@code Message} by
    * calling {@link Message.Builder#buildPartial} on it. If no message has
    * been set, returns the default instance of the message.
    *

+ 5 - 5
java/src/main/java/com/google/protobuf/SmallSortedMap.java

@@ -51,14 +51,14 @@ import java.util.SortedMap;
  * remaining entries are stored in an overflow map. Iteration over the entries
  * in the map should be done as follows:
  *
- * <pre>
- * for (int i = 0; i &lt; fieldMap.getNumArrayEntries(); i++) {
+ * <pre>   {@code
+ * for (int i = 0; i < fieldMap.getNumArrayEntries(); i++) {
  *   process(fieldMap.getArrayEntryAt(i));
  * }
- * for (Map.Entry&lt;K, V&gt; entry : fieldMap.getOverflowEntries()) {
+ * for (Map.Entry<K, V> entry : fieldMap.getOverflowEntries()) {
  *   process(entry);
  * }
- * </pre>
+ * }</pre>
  *
  * The resulting iteration is in order of ascending field tag number. The
  * object returned by {@link #entrySet()} adheres to the same contract but is
@@ -394,7 +394,7 @@ class SmallSortedMap<K extends Comparable<K>, V> extends AbstractMap<K, V> {
 
   /**
    * Entry implementation that implements Comparable in order to support
-   * binary search witin the entry array. Also checks mutability in
+   * binary search within the entry array. Also checks mutability in
    * {@link #setValue()}.
    */
   private class Entry implements Map.Entry<K, V>, Comparable<Entry> {

+ 102 - 19
java/src/main/java/com/google/protobuf/TextFormat.java

@@ -55,15 +55,18 @@ import java.util.regex.Pattern;
 public final class TextFormat {
   private TextFormat() {}
 
-  private static final Printer DEFAULT_PRINTER = new Printer(false);
-  private static final Printer SINGLE_LINE_PRINTER = new Printer(true);
+  private static final Printer DEFAULT_PRINTER = new Printer();
+  private static final Printer SINGLE_LINE_PRINTER =
+      (new Printer()).setSingleLineMode(true);
+  private static final Printer UNICODE_PRINTER =
+      (new Printer()).setEscapeNonAscii(false);
 
   /**
    * Outputs a textual representation of the Protocol Message supplied into
    * the parameter output. (This representation is the new version of the
    * classic "ProtocolPrinter" output from the original Protocol Buffer system)
    */
-  public static void print(final Message message, final Appendable output)
+  public static void print(final MessageOrBuilder message, final Appendable output)
                            throws IOException {
     DEFAULT_PRINTER.print(message, new TextGenerator(output));
   }
@@ -79,7 +82,7 @@ public final class TextFormat {
    * Generates a human readable form of this message, useful for debugging and
    * other purposes, with no newline characters.
    */
-  public static String shortDebugString(final Message message) {
+  public static String shortDebugString(final MessageOrBuilder message) {
     try {
       final StringBuilder sb = new StringBuilder();
       SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));
@@ -109,7 +112,7 @@ public final class TextFormat {
    * Like {@code print()}, but writes directly to a {@code String} and
    * returns it.
    */
-  public static String printToString(final Message message) {
+  public static String printToString(final MessageOrBuilder message) {
     try {
       final StringBuilder text = new StringBuilder();
       print(message, text);
@@ -133,6 +136,34 @@ public final class TextFormat {
     }
   }
 
+  /**
+   * Same as {@code printToString()}, except that non-ASCII characters
+   * in string type fields are not escaped in backslash+octals.
+   */
+  public static String printToUnicodeString(final MessageOrBuilder message) {
+    try {
+      final StringBuilder text = new StringBuilder();
+      UNICODE_PRINTER.print(message, new TextGenerator(text));
+      return text.toString();
+    } catch (IOException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+
+  /**
+   * Same as {@code printToString()}, except that non-ASCII characters
+   * in string type fields are not escaped in backslash+octals.
+   */
+  public static String printToUnicodeString(final UnknownFieldSet fields) {
+    try {
+      final StringBuilder text = new StringBuilder();
+      UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text));
+      return text.toString();
+    } catch (IOException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+
   public static void printField(final FieldDescriptor field,
                                 final Object value,
                                 final Appendable output)
@@ -216,13 +247,26 @@ public final class TextFormat {
   /** Helper class for converting protobufs to text. */
   private static final class Printer {
     /** Whether to omit newlines from the output. */
-    final boolean singleLineMode;
+    boolean singleLineMode = false;
+
+    /** Whether to escape non ASCII characters with backslash and octal. */
+    boolean escapeNonAscii = true;
+
+    private Printer() {}
 
-    private Printer(final boolean singleLineMode) {
+    /** Setter of singleLineMode */
+    private Printer setSingleLineMode(boolean singleLineMode) {
       this.singleLineMode = singleLineMode;
+      return this;
+    }
+
+    /** Setter of escapeNonAscii */
+    private Printer setEscapeNonAscii(boolean escapeNonAscii) {
+      this.escapeNonAscii = escapeNonAscii;
+      return this;
     }
 
-    private void print(final Message message, final TextGenerator generator)
+    private void print(final MessageOrBuilder message, final TextGenerator generator)
         throws IOException {
       for (Map.Entry<FieldDescriptor, Object> field
           : message.getAllFields().entrySet()) {
@@ -339,7 +383,9 @@ public final class TextFormat {
 
         case STRING:
           generator.print("\"");
-          generator.print(escapeText((String) value));
+          generator.print(escapeNonAscii ?
+              escapeText((String) value) :
+              (String) value);
           generator.print("\"");
           break;
 
@@ -541,7 +587,7 @@ public final class TextFormat {
     private int previousLine = 0;
     private int previousColumn = 0;
 
-    // We use possesive quantifiers (*+ and ++) because otherwise the Java
+    // We use possessive quantifiers (*+ and ++) because otherwise the Java
     // regex matcher has stack overflows on large inputs.
     private static final Pattern WHITESPACE =
       Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
@@ -864,7 +910,7 @@ public final class TextFormat {
     public ParseException parseException(final String description) {
       // Note:  People generally prefer one-based line and column numbers.
       return new ParseException(
-        (line + 1) + ":" + (column + 1) + ": " + description);
+        line + 1, column + 1, description);
     }
 
     /**
@@ -875,7 +921,7 @@ public final class TextFormat {
         final String description) {
       // Note:  People generally prefer one-based line and column numbers.
       return new ParseException(
-        (previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
+        previousLine + 1, previousColumn + 1, description);
     }
 
     /**
@@ -900,8 +946,45 @@ public final class TextFormat {
   public static class ParseException extends IOException {
     private static final long serialVersionUID = 3196188060225107702L;
 
+    private final int line;
+    private final int column;
+
+    /** Create a new instance, with -1 as the line and column numbers. */
     public ParseException(final String message) {
-      super(message);
+      this(-1, -1, message);
+    }
+
+    /**
+     * Create a new instance
+     *
+     * @param line the line number where the parse error occurred,
+     * using 1-offset.
+     * @param column the column number where the parser error occurred,
+     * using 1-offset.
+     */
+    public ParseException(final int line, final int column,
+        final String message) {
+      super(Integer.toString(line) + ":" + column + ": " + message);
+      this.line = line;
+      this.column = column;
+    }
+
+    /**
+     * Return the line where the parse exception occurred, or -1 when
+     * none is provided. The value is specified as 1-offset, so the first
+     * line is line 1.
+     */
+    public int getLine() {
+      return line;
+    }
+
+    /**
+     * Return the column where the parse exception occurred, or -1 when
+     * none is provided. The value is specified as 1-offset, so the first
+     * line is line 1.
+     */
+    public int getColumn() {
+      return column;
     }
   }
 
@@ -1073,7 +1156,7 @@ public final class TextFormat {
         mergeField(tokenizer, extensionRegistry, subBuilder);
       }
 
-      value = subBuilder.build();
+      value = subBuilder.buildPartial();
 
     } else {
       tokenizer.consume(":");
@@ -1212,7 +1295,7 @@ public final class TextFormat {
    */
   static ByteString unescapeBytes(final CharSequence charString)
       throws InvalidEscapeSequenceException {
-    // First convert the Java characater sequence to UTF-8 bytes.
+    // First convert the Java character sequence to UTF-8 bytes.
     ByteString input = ByteString.copyFromUtf8(charString.toString());
     // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
     // escapes can all be expressed with ASCII characters, so it is safe to
@@ -1349,7 +1432,7 @@ public final class TextFormat {
   /**
    * Parse a 32-bit signed integer from the text.  Unlike the Java standard
    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
-   * and "0" to signify hexidecimal and octal numbers, respectively.
+   * and "0" to signify hexadecimal and octal numbers, respectively.
    */
   static int parseInt32(final String text) throws NumberFormatException {
     return (int) parseInteger(text, true, false);
@@ -1358,7 +1441,7 @@ public final class TextFormat {
   /**
    * Parse a 32-bit unsigned integer from the text.  Unlike the Java standard
    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
-   * and "0" to signify hexidecimal and octal numbers, respectively.  The
+   * and "0" to signify hexadecimal and octal numbers, respectively.  The
    * result is coerced to a (signed) {@code int} when returned since Java has
    * no unsigned integer type.
    */
@@ -1369,7 +1452,7 @@ public final class TextFormat {
   /**
    * Parse a 64-bit signed integer from the text.  Unlike the Java standard
    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
-   * and "0" to signify hexidecimal and octal numbers, respectively.
+   * and "0" to signify hexadecimal and octal numbers, respectively.
    */
   static long parseInt64(final String text) throws NumberFormatException {
     return parseInteger(text, true, true);
@@ -1378,7 +1461,7 @@ public final class TextFormat {
   /**
    * Parse a 64-bit unsigned integer from the text.  Unlike the Java standard
    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
-   * and "0" to signify hexidecimal and octal numbers, respectively.  The
+   * and "0" to signify hexadecimal and octal numbers, respectively.  The
    * result is coerced to a (signed) {@code long} when returned since Java has
    * no unsigned long type.
    */

+ 27 - 2
java/src/main/java/com/google/protobuf/UnknownFieldSet.java

@@ -46,7 +46,7 @@ import java.util.TreeMap;
  * {@code UnknownFieldSet} is used to keep track of fields which were seen when
  * parsing a protocol message but whose field numbers or types are unrecognized.
  * This most frequently occurs when new fields are added to a message type
- * and then messages containing those feilds are read by old software that was
+ * and then messages containing those fields are read by old software that was
  * compiled before the new types were added.
  *
  * <p>Every {@link Message} contains an {@code UnknownFieldSet} (and every
@@ -468,7 +468,7 @@ public final class UnknownFieldSet implements MessageLite {
     /**
      * Parse a single field from {@code input} and merge it into this set.
      * @param tag The field's tag number, which was already parsed.
-     * @return {@code false} if the tag is an engroup tag.
+     * @return {@code false} if the tag is an end group tag.
      */
     public boolean mergeFieldFrom(final int tag, final CodedInputStream input)
                                   throws IOException {
@@ -950,4 +950,29 @@ public final class UnknownFieldSet implements MessageLite {
       }
     }
   }
+
+  /**
+   * Parser to implement MessageLite interface.
+   */
+  public static final class Parser extends AbstractParser<UnknownFieldSet> {
+    public UnknownFieldSet parsePartialFrom(
+        CodedInputStream input, ExtensionRegistryLite extensionRegistry)
+        throws InvalidProtocolBufferException {
+      Builder builder = newBuilder();
+      try {
+        builder.mergeFrom(input);
+      } catch (InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(builder.buildPartial());
+      } catch (IOException e) {
+        throw new InvalidProtocolBufferException(e.getMessage())
+            .setUnfinishedMessage(builder.buildPartial());
+      }
+      return builder.buildPartial();
+    }
+  }
+
+  private static final Parser PARSER = new Parser();
+  public final Parser getParserForType() {
+    return PARSER;
+  }
 }

+ 7 - 0
java/src/main/java/com/google/protobuf/UnmodifiableLazyStringList.java

@@ -32,6 +32,7 @@ package com.google.protobuf;
 
 import java.util.AbstractList;
 import java.util.RandomAccess;
+import java.util.List;
 import java.util.ListIterator;
 import java.util.Iterator;
 
@@ -143,4 +144,10 @@ public class UnmodifiableLazyStringList extends AbstractList<String>
       }
     };
   }
+
+  @Override
+  public List<?> getUnderlyingElements() {
+    // The returned value is already unmodifiable.
+    return list.getUnderlyingElements();
+  }
 }

+ 349 - 0
java/src/main/java/com/google/protobuf/Utf8.java

@@ -0,0 +1,349 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+/**
+ * A set of low-level, high-performance static utility methods related
+ * to the UTF-8 character encoding.  This class has no dependencies
+ * outside of the core JDK libraries.
+ *
+ * <p>There are several variants of UTF-8.  The one implemented by
+ * this class is the restricted definition of UTF-8 introduced in
+ * Unicode 3.1, which mandates the rejection of "overlong" byte
+ * sequences as well as rejection of 3-byte surrogate codepoint byte
+ * sequences.  Note that the UTF-8 decoder included in Oracle's JDK
+ * has been modified to also reject "overlong" byte sequences, but (as
+ * of 2011) still accepts 3-byte surrogate codepoint byte sequences.
+ *
+ * <p>The byte sequences considered valid by this class are exactly
+ * those that can be roundtrip converted to Strings and back to bytes
+ * using the UTF-8 charset, without loss: <pre> {@code
+ * Arrays.equals(bytes, new String(bytes, "UTF-8").getBytes("UTF-8"))
+ * }</pre>
+ *
+ * <p>See the Unicode Standard,</br>
+ * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
+ * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
+ *
+ * <p>This class supports decoding of partial byte sequences, so that the
+ * bytes in a complete UTF-8 byte sequences can be stored in multiple
+ * segments.  Methods typically return {@link #MALFORMED} if the partial
+ * byte sequence is definitely not well-formed, {@link #COMPLETE} if it is
+ * well-formed in the absence of additional input, or if the byte sequence
+ * apparently terminated in the middle of a character, an opaque integer
+ * "state" value containing enough information to decode the character when
+ * passed to a subsequent invocation of a partial decoding method.
+ *
+ * @author martinrb@google.com (Martin Buchholz)
+ */
+final class Utf8 {
+  private Utf8() {}
+
+  /**
+   * State value indicating that the byte sequence is well-formed and
+   * complete (no further bytes are needed to complete a character).
+   */
+  public static final int COMPLETE = 0;
+
+  /**
+   * State value indicating that the byte sequence is definitely not
+   * well-formed.
+   */
+  public static final int MALFORMED = -1;
+
+  // Other state values include the partial bytes of the incomplete
+  // character to be decoded in the simplest way: we pack the bytes
+  // into the state int in little-endian order.  For example:
+  //
+  // int state = byte1 ^ (byte2 << 8) ^ (byte3 << 16);
+  //
+  // Such a state is unpacked thus (note the ~ operation for byte2 to
+  // undo byte1's sign-extension bits):
+  //
+  // int byte1 = (byte) state;
+  // int byte2 = (byte) ~(state >> 8);
+  // int byte3 = (byte) (state >> 16);
+  //
+  // We cannot store a zero byte in the state because it would be
+  // indistinguishable from the absence of a byte.  But we don't need
+  // to, because partial bytes must always be negative.  When building
+  // a state, we ensure that byte1 is negative and subsequent bytes
+  // are valid trailing bytes.
+
+  /**
+   * Returns {@code true} if the given byte array is a well-formed
+   * UTF-8 byte sequence.
+   *
+   * <p>This is a convenience method, equivalent to a call to {@code
+   * isValidUtf8(bytes, 0, bytes.length)}.
+   */
+  public static boolean isValidUtf8(byte[] bytes) {
+    return isValidUtf8(bytes, 0, bytes.length);
+  }
+
+  /**
+   * Returns {@code true} if the given byte array slice is a
+   * well-formed UTF-8 byte sequence.  The range of bytes to be
+   * checked extends from index {@code index}, inclusive, to {@code
+   * limit}, exclusive.
+   *
+   * <p>This is a convenience method, equivalent to {@code
+   * partialIsValidUtf8(bytes, index, limit) == Utf8.COMPLETE}.
+   */
+  public static boolean isValidUtf8(byte[] bytes, int index, int limit) {
+    return partialIsValidUtf8(bytes, index, limit) == COMPLETE;
+  }
+
+  /**
+   * Tells whether the given byte array slice is a well-formed,
+   * malformed, or incomplete UTF-8 byte sequence.  The range of bytes
+   * to be checked extends from index {@code index}, inclusive, to
+   * {@code limit}, exclusive.
+   *
+   * @param state either {@link Utf8#COMPLETE} (if this is the initial decoding
+   * operation) or the value returned from a call to a partial decoding method
+   * for the previous bytes
+   *
+   * @return {@link #MALFORMED} if the partial byte sequence is
+   * definitely not well-formed, {@link #COMPLETE} if it is well-formed
+   * (no additional input needed), or if the byte sequence is
+   * "incomplete", i.e. apparently terminated in the middle of a character,
+   * an opaque integer "state" value containing enough information to
+   * decode the character when passed to a subsequent invocation of a
+   * partial decoding method.
+   */
+  public static int partialIsValidUtf8(
+      int state, byte[] bytes, int index, int limit) {
+    if (state != COMPLETE) {
+      // The previous decoding operation was incomplete (or malformed).
+      // We look for a well-formed sequence consisting of bytes from
+      // the previous decoding operation (stored in state) together
+      // with bytes from the array slice.
+      //
+      // We expect such "straddler characters" to be rare.
+
+      if (index >= limit) {  // No bytes? No progress.
+        return state;
+      }
+      int byte1 = (byte) state;
+      // byte1 is never ASCII.
+      if (byte1 < (byte) 0xE0) {
+        // two-byte form
+
+        // Simultaneously checks for illegal trailing-byte in
+        // leading position and overlong 2-byte form.
+        if (byte1 < (byte) 0xC2 ||
+            // byte2 trailing-byte test
+            bytes[index++] > (byte) 0xBF) {
+          return MALFORMED;
+        }
+      } else if (byte1 < (byte) 0xF0) {
+        // three-byte form
+
+        // Get byte2 from saved state or array
+        int byte2 = (byte) ~(state >> 8);
+        if (byte2 == 0) {
+          byte2 = bytes[index++];
+          if (index >= limit) {
+            return incompleteStateFor(byte1, byte2);
+          }
+        }
+        if (byte2 > (byte) 0xBF ||
+            // overlong? 5 most significant bits must not all be zero
+            (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) ||
+            // illegal surrogate codepoint?
+            (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) ||
+            // byte3 trailing-byte test
+            bytes[index++] > (byte) 0xBF) {
+          return MALFORMED;
+        }
+      } else {
+        // four-byte form
+
+        // Get byte2 and byte3 from saved state or array
+        int byte2 = (byte) ~(state >> 8);
+        int byte3 = 0;
+        if (byte2 == 0) {
+          byte2 = bytes[index++];
+          if (index >= limit) {
+            return incompleteStateFor(byte1, byte2);
+          }
+        } else {
+          byte3 = (byte) (state >> 16);
+        }
+        if (byte3 == 0) {
+          byte3 = bytes[index++];
+          if (index >= limit) {
+            return incompleteStateFor(byte1, byte2, byte3);
+          }
+        }
+
+        // If we were called with state == MALFORMED, then byte1 is 0xFF,
+        // which never occurs in well-formed UTF-8, and so we will return
+        // MALFORMED again below.
+
+        if (byte2 > (byte) 0xBF ||
+            // Check that 1 <= plane <= 16.  Tricky optimized form of:
+            // if (byte1 > (byte) 0xF4 ||
+            //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+            //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+            (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 ||
+            // byte3 trailing-byte test
+            byte3 > (byte) 0xBF ||
+            // byte4 trailing-byte test
+             bytes[index++] > (byte) 0xBF) {
+          return MALFORMED;
+        }
+      }
+    }
+
+    return partialIsValidUtf8(bytes, index, limit);
+  }
+
+  /**
+   * Tells whether the given byte array slice is a well-formed,
+   * malformed, or incomplete UTF-8 byte sequence.  The range of bytes
+   * to be checked extends from index {@code index}, inclusive, to
+   * {@code limit}, exclusive.
+   *
+   * <p>This is a convenience method, equivalent to a call to {@code
+   * partialIsValidUtf8(Utf8.COMPLETE, bytes, index, limit)}.
+   *
+   * @return {@link #MALFORMED} if the partial byte sequence is
+   * definitely not well-formed, {@link #COMPLETE} if it is well-formed
+   * (no additional input needed), or if the byte sequence is
+   * "incomplete", i.e. apparently terminated in the middle of a character,
+   * an opaque integer "state" value containing enough information to
+   * decode the character when passed to a subsequent invocation of a
+   * partial decoding method.
+   */
+  public static int partialIsValidUtf8(
+      byte[] bytes, int index, int limit) {
+    // Optimize for 100% ASCII.
+    // Hotspot loves small simple top-level loops like this.
+    while (index < limit && bytes[index] >= 0) {
+      index++;
+    }
+
+    return (index >= limit) ? COMPLETE :
+        partialIsValidUtf8NonAscii(bytes, index, limit);
+  }
+
+  private static int partialIsValidUtf8NonAscii(
+      byte[] bytes, int index, int limit) {
+    for (;;) {
+      int byte1, byte2;
+
+      // Optimize for interior runs of ASCII bytes.
+      do {
+        if (index >= limit) {
+          return COMPLETE;
+        }
+      } while ((byte1 = bytes[index++]) >= 0);
+
+      if (byte1 < (byte) 0xE0) {
+        // two-byte form
+
+        if (index >= limit) {
+          return byte1;
+        }
+
+        // Simultaneously checks for illegal trailing-byte in
+        // leading position and overlong 2-byte form.
+        if (byte1 < (byte) 0xC2 ||
+            bytes[index++] > (byte) 0xBF) {
+          return MALFORMED;
+        }
+      } else if (byte1 < (byte) 0xF0) {
+        // three-byte form
+
+        if (index >= limit - 1) { // incomplete sequence
+          return incompleteStateFor(bytes, index, limit);
+        }
+        if ((byte2 = bytes[index++]) > (byte) 0xBF ||
+            // overlong? 5 most significant bits must not all be zero
+            (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) ||
+            // check for illegal surrogate codepoints
+            (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) ||
+            // byte3 trailing-byte test
+            bytes[index++] > (byte) 0xBF) {
+          return MALFORMED;
+        }
+      } else {
+        // four-byte form
+
+        if (index >= limit - 2) {  // incomplete sequence
+          return incompleteStateFor(bytes, index, limit);
+        }
+        if ((byte2 = bytes[index++]) > (byte) 0xBF ||
+            // Check that 1 <= plane <= 16.  Tricky optimized form of:
+            // if (byte1 > (byte) 0xF4 ||
+            //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+            //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+            (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 ||
+            // byte3 trailing-byte test
+            bytes[index++] > (byte) 0xBF ||
+            // byte4 trailing-byte test
+            bytes[index++] > (byte) 0xBF) {
+          return MALFORMED;
+        }
+      }
+    }
+  }
+
+  private static int incompleteStateFor(int byte1) {
+    return (byte1 > (byte) 0xF4) ?
+        MALFORMED : byte1;
+  }
+
+  private static int incompleteStateFor(int byte1, int byte2) {
+    return (byte1 > (byte) 0xF4 ||
+            byte2 > (byte) 0xBF) ?
+        MALFORMED : byte1 ^ (byte2 << 8);
+  }
+
+  private static int incompleteStateFor(int byte1, int byte2, int byte3) {
+    return (byte1 > (byte) 0xF4 ||
+            byte2 > (byte) 0xBF ||
+            byte3 > (byte) 0xBF) ?
+        MALFORMED : byte1 ^ (byte2 << 8) ^ (byte3 << 16);
+  }
+
+  private static int incompleteStateFor(byte[] bytes, int index, int limit) {
+    int byte1 = bytes[index - 1];
+    switch (limit - index) {
+      case 0: return incompleteStateFor(byte1);
+      case 1: return incompleteStateFor(byte1, bytes[index]);
+      case 2: return incompleteStateFor(byte1, bytes[index], bytes[index + 1]);
+      default: throw new AssertionError();
+    }
+  }
+}

+ 1 - 1
java/src/main/java/com/google/protobuf/WireFormat.java

@@ -146,7 +146,7 @@ public final class WireFormat {
     public boolean isPackable() { return true; }
   }
 
-  // Field numbers for feilds in MessageSet wire format.
+  // Field numbers for fields in MessageSet wire format.
   static final int MESSAGE_SET_ITEM    = 1;
   static final int MESSAGE_SET_TYPE_ID = 2;
   static final int MESSAGE_SET_MESSAGE = 3;

+ 50 - 1
java/src/test/java/com/google/protobuf/AbstractMessageTest.java

@@ -30,6 +30,7 @@
 
 package com.google.protobuf;
 
+import com.google.protobuf.Descriptors.FieldDescriptor;
 import protobuf_unittest.UnittestOptimizeFor.TestOptimizedForSize;
 import protobuf_unittest.UnittestProto;
 import protobuf_unittest.UnittestProto.ForeignMessage;
@@ -167,6 +168,13 @@ public class AbstractMessageTest extends TestCase {
         wrappedBuilder.setUnknownFields(unknownFields);
         return this;
       }
+      @Override
+      public Message.Builder getFieldBuilder(FieldDescriptor field) {
+        return wrappedBuilder.getFieldBuilder(field);
+      }
+    }
+    public Parser<? extends Message> getParserForType() {
+      return wrappedMessage.getParserForType();
     }
   }
 
@@ -220,6 +228,34 @@ public class AbstractMessageTest extends TestCase {
     TestUtil.assertAllFieldsSet((TestAllTypes) message.wrappedMessage);
   }
 
+  public void testParsingUninitialized() throws Exception {
+    TestRequiredForeign.Builder builder = TestRequiredForeign.newBuilder();
+    builder.getOptionalMessageBuilder().setDummy2(10);
+    ByteString bytes = builder.buildPartial().toByteString();
+    Message.Builder abstractMessageBuilder =
+        new AbstractMessageWrapper.Builder(TestRequiredForeign.newBuilder());
+    // mergeFrom() should not throw initialization error.
+    abstractMessageBuilder.mergeFrom(bytes).buildPartial();
+    try {
+      abstractMessageBuilder.mergeFrom(bytes).build();
+      fail();
+    } catch (UninitializedMessageException ex) {
+      // pass
+    }
+
+    // test DynamicMessage directly.
+    Message.Builder dynamicMessageBuilder = DynamicMessage.newBuilder(
+        TestRequiredForeign.getDescriptor());
+    // mergeFrom() should not throw initialization error.
+    dynamicMessageBuilder.mergeFrom(bytes).buildPartial();
+    try {
+      dynamicMessageBuilder.mergeFrom(bytes).build();
+      fail();
+    } catch (UninitializedMessageException ex) {
+      // pass
+    }
+  }
+
   public void testPackedSerialization() throws Exception {
     Message abstractMessage =
         new AbstractMessageWrapper(TestUtil.getPackedSet());
@@ -298,12 +334,16 @@ public class AbstractMessageTest extends TestCase {
       new AbstractMessageWrapper.Builder(builder);
 
     assertFalse(abstractBuilder.isInitialized());
+    assertEquals("a, b, c", abstractBuilder.getInitializationErrorString());
     builder.setA(1);
     assertFalse(abstractBuilder.isInitialized());
+    assertEquals("b, c", abstractBuilder.getInitializationErrorString());
     builder.setB(1);
     assertFalse(abstractBuilder.isInitialized());
+    assertEquals("c", abstractBuilder.getInitializationErrorString());
     builder.setC(1);
     assertTrue(abstractBuilder.isInitialized());
+    assertEquals("", abstractBuilder.getInitializationErrorString());
   }
 
   public void testForeignIsInitialized() throws Exception {
@@ -312,18 +352,27 @@ public class AbstractMessageTest extends TestCase {
       new AbstractMessageWrapper.Builder(builder);
 
     assertTrue(abstractBuilder.isInitialized());
+    assertEquals("", abstractBuilder.getInitializationErrorString());
 
     builder.setOptionalMessage(TEST_REQUIRED_UNINITIALIZED);
     assertFalse(abstractBuilder.isInitialized());
+    assertEquals(
+        "optional_message.a, optional_message.b, optional_message.c",
+        abstractBuilder.getInitializationErrorString());
 
     builder.setOptionalMessage(TEST_REQUIRED_INITIALIZED);
     assertTrue(abstractBuilder.isInitialized());
+    assertEquals("", abstractBuilder.getInitializationErrorString());
 
     builder.addRepeatedMessage(TEST_REQUIRED_UNINITIALIZED);
     assertFalse(abstractBuilder.isInitialized());
+    assertEquals(
+        "repeated_message[0].a, repeated_message[0].b, repeated_message[0].c",
+        abstractBuilder.getInitializationErrorString());
 
     builder.setRepeatedMessage(0, TEST_REQUIRED_INITIALIZED);
     assertTrue(abstractBuilder.isInitialized());
+    assertEquals("", abstractBuilder.getInitializationErrorString());
   }
 
   // -----------------------------------------------------------------
@@ -421,7 +470,7 @@ public class AbstractMessageTest extends TestCase {
 
 
   /**
-   * Asserts that the given proto has symetric equals and hashCode methods.
+   * Asserts that the given proto has symmetric equals and hashCode methods.
    */
   private void checkEqualsIsConsistent(Message message) {
     // Object should be equal to itself.

+ 68 - 0
java/src/test/java/com/google/protobuf/BoundedByteStringTest.java

@@ -0,0 +1,68 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.UnsupportedEncodingException;
+
+/**
+ * This class tests {@link BoundedByteString}, which extends {@link LiteralByteString},
+ * by inheriting the tests from {@link LiteralByteStringTest}.  The only method which
+ * is strange enough that it needs to be overridden here is {@link #testToString()}.
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+public class BoundedByteStringTest extends LiteralByteStringTest {
+
+  @Override
+  protected void setUp() throws Exception {
+    classUnderTest = "BoundedByteString";
+    byte[] sourceBytes = ByteStringTest.getTestBytes(2341, 11337766L);
+    int from = 100;
+    int to = sourceBytes.length - 100;
+    stringUnderTest = ByteString.copyFrom(sourceBytes).substring(from, to);
+    referenceBytes = new byte[to - from];
+    System.arraycopy(sourceBytes, from, referenceBytes, 0, to - from);
+    expectedHashCode = 727575887;
+  }
+
+  @Override
+  public void testToString() throws UnsupportedEncodingException {
+    String testString = "I love unicode \u1234\u5678 characters";
+    LiteralByteString unicode = new LiteralByteString(testString.getBytes(UTF_8));
+    ByteString chopped = unicode.substring(2, unicode.size() - 6);
+    assertEquals(classUnderTest + ".substring() must have the expected type",
+        classUnderTest, getActualClassName(chopped));
+
+    String roundTripString = chopped.toString(UTF_8);
+    assertEquals(classUnderTest + " unicode bytes must match",
+        testString.substring(2, testString.length() - 6), roundTripString);
+  }
+}

+ 692 - 0
java/src/test/java/com/google/protobuf/ByteStringTest.java

@@ -0,0 +1,692 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import com.google.protobuf.ByteString.Output;
+
+import junit.framework.TestCase;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.Random;
+
+/**
+ * Test methods with implementations in {@link ByteString}, plus do some top-level "integration"
+ * tests.
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+public class ByteStringTest extends TestCase {
+
+  private static final String UTF_16 = "UTF-16";
+
+  static byte[] getTestBytes(int size, long seed) {
+    Random random = new Random(seed);
+    byte[] result = new byte[size];
+    random.nextBytes(result);
+    return result;
+  }
+
+  private byte[] getTestBytes(int size) {
+    return getTestBytes(size, 445566L);
+  }
+
+  private byte[] getTestBytes() {
+    return getTestBytes(1000);
+  }
+
+  // Compare the entire left array with a subset of the right array.
+  private boolean isArrayRange(byte[] left, byte[] right, int rightOffset, int length) {
+    boolean stillEqual = (left.length == length);
+    for (int i = 0; (stillEqual && i < length); ++i) {
+      stillEqual = (left[i] == right[rightOffset + i]);
+    }
+    return stillEqual;
+  }
+
+  // Returns true only if the given two arrays have identical contents.
+  private boolean isArray(byte[] left, byte[] right) {
+    return left.length == right.length && isArrayRange(left, right, 0, left.length);
+  }
+
+  public void testSubstring_BeginIndex() {
+    byte[] bytes = getTestBytes();
+    ByteString substring = ByteString.copyFrom(bytes).substring(500);
+    assertTrue("substring must contain the tail of the string",
+        isArrayRange(substring.toByteArray(), bytes, 500, bytes.length - 500));
+  }
+
+  public void testCopyFrom_BytesOffsetSize() {
+    byte[] bytes = getTestBytes();
+    ByteString byteString = ByteString.copyFrom(bytes, 500, 200);
+    assertTrue("copyFrom sub-range must contain the expected bytes",
+        isArrayRange(byteString.toByteArray(), bytes, 500, 200));
+  }
+
+  public void testCopyFrom_Bytes() {
+    byte[] bytes = getTestBytes();
+    ByteString byteString = ByteString.copyFrom(bytes);
+    assertTrue("copyFrom must contain the expected bytes",
+        isArray(byteString.toByteArray(), bytes));
+  }
+
+  public void testCopyFrom_ByteBufferSize() {
+    byte[] bytes = getTestBytes();
+    ByteBuffer byteBuffer = ByteBuffer.allocate(bytes.length);
+    byteBuffer.put(bytes);
+    byteBuffer.position(500);
+    ByteString byteString = ByteString.copyFrom(byteBuffer, 200);
+    assertTrue("copyFrom byteBuffer sub-range must contain the expected bytes",
+        isArrayRange(byteString.toByteArray(), bytes, 500, 200));
+  }
+
+  public void testCopyFrom_ByteBuffer() {
+    byte[] bytes = getTestBytes();
+    ByteBuffer byteBuffer = ByteBuffer.allocate(bytes.length);
+    byteBuffer.put(bytes);
+    byteBuffer.position(500);
+    ByteString byteString = ByteString.copyFrom(byteBuffer);
+    assertTrue("copyFrom byteBuffer sub-range must contain the expected bytes",
+        isArrayRange(byteString.toByteArray(), bytes, 500, bytes.length - 500));
+  }
+
+  public void testCopyFrom_StringEncoding() throws UnsupportedEncodingException {
+    String testString = "I love unicode \u1234\u5678 characters";
+    ByteString byteString = ByteString.copyFrom(testString, UTF_16);
+    byte[] testBytes = testString.getBytes(UTF_16);
+    assertTrue("copyFrom string must respect the charset",
+        isArrayRange(byteString.toByteArray(), testBytes, 0, testBytes.length));
+  }
+
+  public void testCopyFrom_Utf8() throws UnsupportedEncodingException {
+    String testString = "I love unicode \u1234\u5678 characters";
+    ByteString byteString = ByteString.copyFromUtf8(testString);
+    byte[] testBytes = testString.getBytes("UTF-8");
+    assertTrue("copyFromUtf8 string must respect the charset",
+        isArrayRange(byteString.toByteArray(), testBytes, 0, testBytes.length));
+  }
+
+  public void testCopyFrom_Iterable() {
+    byte[] testBytes = getTestBytes(77777, 113344L);
+    final List<ByteString> pieces = makeConcretePieces(testBytes);
+    // Call copyFrom() on a Collection
+    ByteString byteString = ByteString.copyFrom(pieces);
+    assertTrue("copyFrom a List must contain the expected bytes",
+        isArrayRange(byteString.toByteArray(), testBytes, 0, testBytes.length));
+    // Call copyFrom on an iteration that's not a collection
+    ByteString byteStringAlt = ByteString.copyFrom(new Iterable<ByteString>() {
+      public Iterator<ByteString> iterator() {
+        return pieces.iterator();
+      }
+    });
+    assertEquals("copyFrom from an Iteration must contain the expected bytes",
+        byteString, byteStringAlt);
+  }
+
+  public void testCopyTo_TargetOffset() {
+    byte[] bytes = getTestBytes();
+    ByteString byteString = ByteString.copyFrom(bytes);
+    byte[] target = new byte[bytes.length + 1000];
+    byteString.copyTo(target, 400);
+    assertTrue("copyFrom byteBuffer sub-range must contain the expected bytes",
+        isArrayRange(bytes, target, 400, bytes.length));
+  }
+
+  public void testReadFrom_emptyStream() throws IOException {
+    ByteString byteString =
+        ByteString.readFrom(new ByteArrayInputStream(new byte[0]));
+    assertSame("reading an empty stream must result in the EMPTY constant "
+        + "byte string", ByteString.EMPTY, byteString);
+  }
+
+  public void testReadFrom_smallStream() throws IOException {
+    assertReadFrom(getTestBytes(10));
+  }
+
+  public void testReadFrom_mutating() throws IOException {
+    byte[] capturedArray = null;
+    EvilInputStream eis = new EvilInputStream();
+    ByteString byteString = ByteString.readFrom(eis);
+
+    capturedArray = eis.capturedArray;
+    byte[] originalValue = byteString.toByteArray();
+    for (int x = 0; x < capturedArray.length; ++x) {
+      capturedArray[x] = (byte) 0;
+    }
+
+    byte[] newValue = byteString.toByteArray();
+    assertTrue("copyFrom byteBuffer must not grant access to underlying array",
+        Arrays.equals(originalValue, newValue));
+  }
+
+  // Tests sizes that are near the rope copy-out threshold.
+  public void testReadFrom_mediumStream() throws IOException {
+    assertReadFrom(getTestBytes(ByteString.CONCATENATE_BY_COPY_SIZE - 1));
+    assertReadFrom(getTestBytes(ByteString.CONCATENATE_BY_COPY_SIZE));
+    assertReadFrom(getTestBytes(ByteString.CONCATENATE_BY_COPY_SIZE + 1));
+    assertReadFrom(getTestBytes(200));
+  }
+
+  // Tests sizes that are over multi-segment rope threshold.
+  public void testReadFrom_largeStream() throws IOException {
+    assertReadFrom(getTestBytes(0x100));
+    assertReadFrom(getTestBytes(0x101));
+    assertReadFrom(getTestBytes(0x110));
+    assertReadFrom(getTestBytes(0x1000));
+    assertReadFrom(getTestBytes(0x1001));
+    assertReadFrom(getTestBytes(0x1010));
+    assertReadFrom(getTestBytes(0x10000));
+    assertReadFrom(getTestBytes(0x10001));
+    assertReadFrom(getTestBytes(0x10010));
+  }
+
+  // Tests sizes that are near the read buffer size.
+  public void testReadFrom_byteBoundaries() throws IOException {
+    final int min = ByteString.MIN_READ_FROM_CHUNK_SIZE;
+    final int max = ByteString.MAX_READ_FROM_CHUNK_SIZE;
+
+    assertReadFrom(getTestBytes(min - 1));
+    assertReadFrom(getTestBytes(min));
+    assertReadFrom(getTestBytes(min + 1));
+
+    assertReadFrom(getTestBytes(min * 2 - 1));
+    assertReadFrom(getTestBytes(min * 2));
+    assertReadFrom(getTestBytes(min * 2 + 1));
+
+    assertReadFrom(getTestBytes(min * 4 - 1));
+    assertReadFrom(getTestBytes(min * 4));
+    assertReadFrom(getTestBytes(min * 4 + 1));
+
+    assertReadFrom(getTestBytes(min * 8 - 1));
+    assertReadFrom(getTestBytes(min * 8));
+    assertReadFrom(getTestBytes(min * 8 + 1));
+
+    assertReadFrom(getTestBytes(max - 1));
+    assertReadFrom(getTestBytes(max));
+    assertReadFrom(getTestBytes(max + 1));
+
+    assertReadFrom(getTestBytes(max * 2 - 1));
+    assertReadFrom(getTestBytes(max * 2));
+    assertReadFrom(getTestBytes(max * 2 + 1));
+  }
+
+  // Tests that IOExceptions propagate through ByteString.readFrom().
+  public void testReadFrom_IOExceptions() {
+    try {
+      ByteString.readFrom(new FailStream());
+      fail("readFrom must throw the underlying IOException");
+
+    } catch (IOException e) {
+      assertEquals("readFrom must throw the expected exception",
+                   "synthetic failure", e.getMessage());
+    }
+  }
+
+  // Tests that ByteString.readFrom works with streams that don't
+  // always fill their buffers.
+  public void testReadFrom_reluctantStream() throws IOException {
+    final byte[] data = getTestBytes(0x1000);
+
+    ByteString byteString = ByteString.readFrom(new ReluctantStream(data));
+    assertTrue("readFrom byte stream must contain the expected bytes",
+        isArray(byteString.toByteArray(), data));
+
+    // Same test as above, but with some specific chunk sizes.
+    assertReadFromReluctantStream(data, 100);
+    assertReadFromReluctantStream(data, 248);
+    assertReadFromReluctantStream(data, 249);
+    assertReadFromReluctantStream(data, 250);
+    assertReadFromReluctantStream(data, 251);
+    assertReadFromReluctantStream(data, 0x1000);
+    assertReadFromReluctantStream(data, 0x1001);
+  }
+
+  // Fails unless ByteString.readFrom reads the bytes correctly from a
+  // reluctant stream with the given chunkSize parameter.
+  private void assertReadFromReluctantStream(byte[] bytes, int chunkSize)
+      throws IOException {
+    ByteString b = ByteString.readFrom(new ReluctantStream(bytes), chunkSize);
+    assertTrue("readFrom byte stream must contain the expected bytes",
+        isArray(b.toByteArray(), bytes));
+  }
+
+  // Tests that ByteString.readFrom works with streams that implement
+  // available().
+  public void testReadFrom_available() throws IOException {
+    final byte[] data = getTestBytes(0x1001);
+
+    ByteString byteString = ByteString.readFrom(new AvailableStream(data));
+    assertTrue("readFrom byte stream must contain the expected bytes",
+        isArray(byteString.toByteArray(), data));
+  }
+
+  // Fails unless ByteString.readFrom reads the bytes correctly.
+  private void assertReadFrom(byte[] bytes) throws IOException {
+    ByteString byteString =
+        ByteString.readFrom(new ByteArrayInputStream(bytes));
+    assertTrue("readFrom byte stream must contain the expected bytes",
+        isArray(byteString.toByteArray(), bytes));
+  }
+
+  // A stream that fails when read.
+  private static final class FailStream extends InputStream {
+    @Override public int read() throws IOException {
+      throw new IOException("synthetic failure");
+    }
+  }
+
+  // A stream that simulates blocking by only producing 250 characters
+  // per call to read(byte[]).
+  private static class ReluctantStream extends InputStream {
+    protected final byte[] data;
+    protected int pos = 0;
+
+    public ReluctantStream(byte[] data) {
+      this.data = data;
+    }
+
+    @Override public int read() {
+      if (pos == data.length) {
+        return -1;
+      } else {
+        return data[pos++];
+      }
+    }
+
+    @Override public int read(byte[] buf) {
+      return read(buf, 0, buf.length);
+    }
+
+    @Override public int read(byte[] buf, int offset, int size) {
+      if (pos == data.length) {
+        return -1;
+      }
+      int count = Math.min(Math.min(size, data.length - pos), 250);
+      System.arraycopy(data, pos, buf, offset, count);
+      pos += count;
+      return count;
+    }
+  }
+
+  // Same as above, but also implements available().
+  private static final class AvailableStream extends ReluctantStream {
+    public AvailableStream(byte[] data) {
+      super(data);
+    }
+
+    @Override public int available() {
+      return Math.min(250, data.length - pos);
+    }
+  }
+
+  // A stream which exposes the byte array passed into read(byte[], int, int).
+  private static class EvilInputStream extends InputStream {
+    public byte[] capturedArray = null;
+
+    @Override
+    public int read(byte[] buf, int off, int len) {
+      if (capturedArray != null) {
+        return -1;
+      } else {
+        capturedArray = buf;
+        for (int x = 0; x < len; ++x) {
+          buf[x] = (byte) x;
+        }
+        return len;
+      }
+    }
+
+    @Override
+    public int read() {
+      // Purposefully do nothing.
+      return -1;
+    }
+  }
+  
+  // A stream which exposes the byte array passed into write(byte[], int, int).
+  private static class EvilOutputStream extends OutputStream {
+    public byte[] capturedArray = null;
+
+    @Override
+    public void write(byte[] buf, int off, int len) {
+      if (capturedArray == null) {
+        capturedArray = buf;
+      }
+    }
+
+    @Override
+    public void write(int ignored) {
+      // Purposefully do nothing.
+    }
+  }
+
+  public void testToStringUtf8() throws UnsupportedEncodingException {
+    String testString = "I love unicode \u1234\u5678 characters";
+    byte[] testBytes = testString.getBytes("UTF-8");
+    ByteString byteString = ByteString.copyFrom(testBytes);
+    assertEquals("copyToStringUtf8 must respect the charset",
+        testString, byteString.toStringUtf8());
+  }
+
+  public void testNewOutput_InitialCapacity() throws IOException {
+    byte[] bytes = getTestBytes();
+    ByteString.Output output = ByteString.newOutput(bytes.length + 100);
+    output.write(bytes);
+    ByteString byteString = output.toByteString();
+    assertTrue(
+        "String built from newOutput(int) must contain the expected bytes",
+        isArrayRange(bytes, byteString.toByteArray(), 0, bytes.length));
+  }
+
+  // Test newOutput() using a variety of buffer sizes and a variety of (fixed)
+  // write sizes
+  public void testNewOutput_ArrayWrite() throws IOException {
+    byte[] bytes = getTestBytes();
+    int length = bytes.length;
+    int[] bufferSizes = {128, 256, length / 2, length - 1, length, length + 1,
+                         2 * length, 3 * length};
+    int[] writeSizes = {1, 4, 5, 7, 23, bytes.length};
+
+    for (int bufferSize : bufferSizes) {
+      for (int writeSize : writeSizes) {
+        // Test writing the entire output writeSize bytes at a time.
+        ByteString.Output output = ByteString.newOutput(bufferSize);
+        for (int i = 0; i < length; i += writeSize) {
+          output.write(bytes, i, Math.min(writeSize, length - i));
+        }
+        ByteString byteString = output.toByteString();
+        assertTrue("String built from newOutput() must contain the expected bytes",
+            isArrayRange(bytes, byteString.toByteArray(), 0, bytes.length));
+      }
+    }
+  }
+
+  // Test newOutput() using a variety of buffer sizes, but writing all the
+  // characters using write(byte);
+  public void testNewOutput_WriteChar() throws IOException {
+    byte[] bytes = getTestBytes();
+    int length = bytes.length;
+    int[] bufferSizes = {0, 1, 128, 256, length / 2,
+                         length - 1, length, length + 1,
+                         2 * length, 3 * length};
+    for (int bufferSize : bufferSizes) {
+      ByteString.Output output = ByteString.newOutput(bufferSize);
+      for (byte byteValue : bytes) {
+        output.write(byteValue);
+      }
+      ByteString byteString = output.toByteString();
+      assertTrue("String built from newOutput() must contain the expected bytes",
+          isArrayRange(bytes, byteString.toByteArray(), 0, bytes.length));
+    }
+  }
+
+  // Test newOutput() in which we write the bytes using a variety of methods
+  // and sizes, and in which we repeatedly call toByteString() in the middle.
+  public void testNewOutput_Mixed() throws IOException {
+    Random rng = new Random(1);
+    byte[] bytes = getTestBytes();
+    int length = bytes.length;
+    int[] bufferSizes = {0, 1, 128, 256, length / 2,
+                         length - 1, length, length + 1,
+                         2 * length, 3 * length};
+
+    for (int bufferSize : bufferSizes) {
+      // Test writing the entire output using a mixture of write sizes and
+      // methods;
+      ByteString.Output output = ByteString.newOutput(bufferSize);
+      int position = 0;
+      while (position < bytes.length) {
+        if (rng.nextBoolean()) {
+          int count = 1 + rng.nextInt(bytes.length - position);
+          output.write(bytes, position, count);
+          position += count;
+        } else {
+          output.write(bytes[position]);
+          position++;
+        }
+        assertEquals("size() returns the right value", position, output.size());
+        assertTrue("newOutput() substring must have correct bytes",
+            isArrayRange(output.toByteString().toByteArray(),
+                bytes, 0, position));
+      }
+      ByteString byteString = output.toByteString();
+      assertTrue("String built from newOutput() must contain the expected bytes",
+          isArrayRange(bytes, byteString.toByteArray(), 0, bytes.length));
+    }
+  }
+  
+  public void testNewOutputEmpty() throws IOException {
+    // Make sure newOutput() correctly builds empty byte strings
+    ByteString byteString = ByteString.newOutput().toByteString();
+    assertEquals(ByteString.EMPTY, byteString);
+  }
+  
+  public void testNewOutput_Mutating() throws IOException {
+    Output os = ByteString.newOutput(5);
+    os.write(new byte[] {1, 2, 3, 4, 5});
+    EvilOutputStream eos = new EvilOutputStream();
+    os.writeTo(eos);
+    byte[] capturedArray = eos.capturedArray;
+    ByteString byteString = os.toByteString();
+    byte[] oldValue = byteString.toByteArray();
+    Arrays.fill(capturedArray, (byte) 0);
+    byte[] newValue = byteString.toByteArray();
+    assertTrue("Output must not provide access to the underlying byte array",
+        Arrays.equals(oldValue, newValue));
+  }
+
+  public void testNewCodedBuilder() throws IOException {
+    byte[] bytes = getTestBytes();
+    ByteString.CodedBuilder builder = ByteString.newCodedBuilder(bytes.length);
+    builder.getCodedOutput().writeRawBytes(bytes);
+    ByteString byteString = builder.build();
+    assertTrue("String built from newCodedBuilder() must contain the expected bytes",
+        isArrayRange(bytes, byteString.toByteArray(), 0, bytes.length));
+  }
+
+  public void testSubstringParity() {
+    byte[] bigBytes = getTestBytes(2048 * 1024, 113344L);
+    int start = 512 * 1024 - 3333;
+    int end   = 512 * 1024 + 7777;
+    ByteString concreteSubstring = ByteString.copyFrom(bigBytes).substring(start, end);
+    boolean ok = true;
+    for (int i = start; ok && i < end; ++i) {
+      ok = (bigBytes[i] == concreteSubstring.byteAt(i - start));
+    }
+    assertTrue("Concrete substring didn't capture the right bytes", ok);
+
+    ByteString literalString = ByteString.copyFrom(bigBytes, start, end - start);
+    assertTrue("Substring must be equal to literal string",
+        concreteSubstring.equals(literalString));
+    assertEquals("Substring must have same hashcode as literal string",
+        literalString.hashCode(), concreteSubstring.hashCode());
+  }
+
+  public void testCompositeSubstring() {
+    byte[] referenceBytes = getTestBytes(77748, 113344L);
+
+    List<ByteString> pieces = makeConcretePieces(referenceBytes);
+    ByteString listString = ByteString.copyFrom(pieces);
+
+    int from = 1000;
+    int to = 40000;
+    ByteString compositeSubstring = listString.substring(from, to);
+    byte[] substringBytes = compositeSubstring.toByteArray();
+    boolean stillEqual = true;
+    for (int i = 0; stillEqual && i < to - from; ++i) {
+      stillEqual = referenceBytes[from + i] == substringBytes[i];
+    }
+    assertTrue("Substring must return correct bytes", stillEqual);
+
+    stillEqual = true;
+    for (int i = 0; stillEqual && i < to - from; ++i) {
+      stillEqual = referenceBytes[from + i] == compositeSubstring.byteAt(i);
+    }
+    assertTrue("Substring must support byteAt() correctly", stillEqual);
+
+    ByteString literalSubstring = ByteString.copyFrom(referenceBytes, from, to - from);
+    assertTrue("Composite substring must equal a literal substring over the same bytes",
+        compositeSubstring.equals(literalSubstring));
+    assertTrue("Literal substring must equal a composite substring over the same bytes",
+        literalSubstring.equals(compositeSubstring));
+
+    assertEquals("We must get the same hashcodes for composite and literal substrings",
+        literalSubstring.hashCode(), compositeSubstring.hashCode());
+
+    assertFalse("We can't be equal to a proper substring",
+        compositeSubstring.equals(literalSubstring.substring(0, literalSubstring.size() - 1)));
+  }
+
+  public void testCopyFromList() {
+    byte[] referenceBytes = getTestBytes(77748, 113344L);
+    ByteString literalString = ByteString.copyFrom(referenceBytes);
+
+    List<ByteString> pieces = makeConcretePieces(referenceBytes);
+    ByteString listString = ByteString.copyFrom(pieces);
+
+    assertTrue("Composite string must be equal to literal string",
+        listString.equals(literalString));
+    assertEquals("Composite string must have same hashcode as literal string",
+        literalString.hashCode(), listString.hashCode());
+  }
+
+  public void testConcat() {
+    byte[] referenceBytes = getTestBytes(77748, 113344L);
+    ByteString literalString = ByteString.copyFrom(referenceBytes);
+
+    List<ByteString> pieces = makeConcretePieces(referenceBytes);
+
+    Iterator<ByteString> iter = pieces.iterator();
+    ByteString concatenatedString = iter.next();
+    while (iter.hasNext()) {
+      concatenatedString = concatenatedString.concat(iter.next());
+    }
+
+    assertTrue("Concatenated string must be equal to literal string",
+        concatenatedString.equals(literalString));
+    assertEquals("Concatenated string must have same hashcode as literal string",
+        literalString.hashCode(), concatenatedString.hashCode());
+  }
+
+  /**
+   * Test the Rope implementation can deal with Empty nodes, even though we
+   * guard against them. See also {@link LiteralByteStringTest#testConcat_empty()}.
+   */
+  public void testConcat_empty() {
+    byte[] referenceBytes = getTestBytes(7748, 113344L);
+    ByteString literalString = ByteString.copyFrom(referenceBytes);
+
+    ByteString duo = RopeByteString.newInstanceForTest(literalString, literalString);
+    ByteString temp = RopeByteString.newInstanceForTest(
+        RopeByteString.newInstanceForTest(literalString, ByteString.EMPTY),
+        RopeByteString.newInstanceForTest(ByteString.EMPTY, literalString));
+    ByteString quintet = RopeByteString.newInstanceForTest(temp, ByteString.EMPTY);
+
+    assertTrue("String with concatenated nulls must equal simple concatenate",
+        duo.equals(quintet));
+    assertEquals("String with concatenated nulls have same hashcode as simple concatenate",
+        duo.hashCode(), quintet.hashCode());
+
+    ByteString.ByteIterator duoIter = duo.iterator();
+    ByteString.ByteIterator quintetIter = quintet.iterator();
+    boolean stillEqual = true;
+    while (stillEqual && quintetIter.hasNext()) {
+      stillEqual = (duoIter.nextByte() == quintetIter.nextByte());
+    }
+    assertTrue("We must get the same characters by iterating", stillEqual);
+    assertFalse("Iterator must be exhausted", duoIter.hasNext());
+    try {
+      duoIter.nextByte();
+      fail("Should have thrown an exception.");
+    } catch (NoSuchElementException e) {
+      // This is success
+    }
+    try {
+      quintetIter.nextByte();
+      fail("Should have thrown an exception.");
+    } catch (NoSuchElementException e) {
+      // This is success
+    }
+
+    // Test that even if we force empty strings in as rope leaves in this
+    // configuration, we always get a (possibly Bounded) LiteralByteString
+    // for a length 1 substring.
+    //
+    // It is possible, using the testing factory method to create deeply nested
+    // trees of empty leaves, to make a string that will fail this test.
+    for (int i = 1; i < duo.size(); ++i) {
+      assertTrue("Substrings of size() < 2 must not be RopeByteStrings",
+          duo.substring(i - 1, i) instanceof LiteralByteString);
+    }
+    for (int i = 1; i < quintet.size(); ++i) {
+      assertTrue("Substrings of size() < 2 must not be RopeByteStrings",
+          quintet.substring(i - 1, i) instanceof LiteralByteString);
+    }
+  }
+
+  public void testStartsWith() {
+    byte[] bytes = getTestBytes(1000, 1234L);
+    ByteString string = ByteString.copyFrom(bytes);
+    ByteString prefix = ByteString.copyFrom(bytes, 0, 500);
+    ByteString suffix = ByteString.copyFrom(bytes, 400, 600);
+    assertTrue(string.startsWith(ByteString.EMPTY));
+    assertTrue(string.startsWith(string));
+    assertTrue(string.startsWith(prefix));
+    assertFalse(string.startsWith(suffix));
+    assertFalse(prefix.startsWith(suffix));
+    assertFalse(suffix.startsWith(prefix));
+    assertFalse(ByteString.EMPTY.startsWith(prefix));
+    assertTrue(ByteString.EMPTY.startsWith(ByteString.EMPTY));
+  }
+
+  static List<ByteString> makeConcretePieces(byte[] referenceBytes) {
+    List<ByteString> pieces = new ArrayList<ByteString>();
+    // Starting length should be small enough that we'll do some concatenating by
+    // copying if we just concatenate all these pieces together.
+    for (int start = 0, length = 16; start < referenceBytes.length; start += length) {
+      length = (length << 1) - 1;
+      if (start + length > referenceBytes.length) {
+        length = referenceBytes.length - start;
+      }
+      pieces.add(ByteString.copyFrom(referenceBytes, start, length));
+    }
+    return pieces;
+  }
+}

+ 8 - 9
java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java

@@ -30,8 +30,10 @@
 
 package com.google.protobuf;
 
+import protobuf_unittest.UnittestProto.SparseEnumMessage;
 import protobuf_unittest.UnittestProto.TestAllTypes;
 import protobuf_unittest.UnittestProto.TestPackedTypes;
+import protobuf_unittest.UnittestProto.TestSparseEnum;
 
 import junit.framework.TestCase;
 
@@ -302,17 +304,14 @@ public class CodedOutputStreamTest extends TestCase {
   }
 
   /** Test writing a message containing a negative enum value. This used to
-   * fail because the size was not properly computed as a sign-extended varint. */
+   * fail because the size was not properly computed as a sign-extended varint.
+   */
   public void testWriteMessageWithNegativeEnumValue() throws Exception {
-    protobuf_unittest.UnittestProto.SparseEnumMessage message =
-        protobuf_unittest.UnittestProto.SparseEnumMessage.newBuilder()
-        .setSparseEnum(protobuf_unittest.UnittestProto.TestSparseEnum.SPARSE_E)
-        .build();
+    SparseEnumMessage message = SparseEnumMessage.newBuilder()
+        .setSparseEnum(TestSparseEnum.SPARSE_E) .build();
     assertTrue(message.getSparseEnum().getNumber() < 0);
     byte[] rawBytes = message.toByteArray();
-    protobuf_unittest.UnittestProto.SparseEnumMessage message2 =
-        protobuf_unittest.UnittestProto.SparseEnumMessage.parseFrom(rawBytes);
-    assertEquals(protobuf_unittest.UnittestProto.TestSparseEnum.SPARSE_E,
-                 message2.getSparseEnum());
+    SparseEnumMessage message2 = SparseEnumMessage.parseFrom(rawBytes);
+    assertEquals(TestSparseEnum.SPARSE_E, message2.getSparseEnum());
   }
 }

+ 190 - 2
java/src/test/java/com/google/protobuf/DescriptorsTest.java

@@ -31,6 +31,8 @@
 package com.google.protobuf;
 
 import com.google.protobuf.DescriptorProtos.DescriptorProto;
+import com.google.protobuf.DescriptorProtos.EnumDescriptorProto;
+import com.google.protobuf.DescriptorProtos.EnumValueDescriptorProto;
 import com.google.protobuf.DescriptorProtos.FieldDescriptorProto;
 import com.google.protobuf.DescriptorProtos.FileDescriptorProto;
 import com.google.protobuf.Descriptors.DescriptorValidationException;
@@ -60,6 +62,7 @@ import junit.framework.TestCase;
 
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.List;
 
 /**
  * Unit test for {@link Descriptors}.
@@ -426,7 +429,7 @@ public class DescriptorsTest extends TestCase {
         UnittestEnormousDescriptor.getDescriptor()
           .toProto().getSerializedSize() > 65536);
   }
-  
+
   /**
    * Tests that the DescriptorValidationException works as intended.
    */
@@ -445,7 +448,7 @@ public class DescriptorsTest extends TestCase {
         .build())
       .build();
     try {
-      Descriptors.FileDescriptor.buildFrom(fileDescriptorProto, 
+      Descriptors.FileDescriptor.buildFrom(fileDescriptorProto,
           new FileDescriptor[0]);
       fail("DescriptorValidationException expected");
     } catch (DescriptorValidationException e) {
@@ -457,4 +460,189 @@ public class DescriptorsTest extends TestCase {
       assertTrue(e.getCause().getMessage().indexOf("invalid") != -1);
     }
   }
+
+  /**
+   * Tests the translate/crosslink for an example where a message field's name
+   * and type name are the same.
+   */
+  public void testDescriptorComplexCrosslink() throws Exception {
+    FileDescriptorProto fileDescriptorProto = FileDescriptorProto.newBuilder()
+      .setName("foo.proto")
+      .addMessageType(DescriptorProto.newBuilder()
+        .setName("Foo")
+        .addField(FieldDescriptorProto.newBuilder()
+          .setLabel(FieldDescriptorProto.Label.LABEL_OPTIONAL)
+          .setType(FieldDescriptorProto.Type.TYPE_INT32)
+          .setName("foo")
+          .setNumber(1)
+          .build())
+        .build())
+      .addMessageType(DescriptorProto.newBuilder()
+        .setName("Bar")
+        .addField(FieldDescriptorProto.newBuilder()
+          .setLabel(FieldDescriptorProto.Label.LABEL_OPTIONAL)
+          .setTypeName("Foo")
+          .setName("Foo")
+          .setNumber(1)
+          .build())
+        .build())
+      .build();
+    // translate and crosslink
+    FileDescriptor file =
+      Descriptors.FileDescriptor.buildFrom(fileDescriptorProto, 
+          new FileDescriptor[0]);
+    // verify resulting descriptors
+    assertNotNull(file);
+    List<Descriptor> msglist = file.getMessageTypes();
+    assertNotNull(msglist);
+    assertTrue(msglist.size() == 2);
+    boolean barFound = false;
+    for (Descriptor desc : msglist) {
+      if (desc.getName().equals("Bar")) {
+        barFound = true;
+        assertNotNull(desc.getFields());
+        List<FieldDescriptor> fieldlist = desc.getFields();
+        assertNotNull(fieldlist);
+        assertTrue(fieldlist.size() == 1);
+        assertTrue(fieldlist.get(0).getType() == FieldDescriptor.Type.MESSAGE);
+        assertTrue(fieldlist.get(0).getMessageType().getName().equals("Foo"));
+      }
+    }
+    assertTrue(barFound);
+  }
+  
+  public void testInvalidPublicDependency() throws Exception {
+    FileDescriptorProto fooProto = FileDescriptorProto.newBuilder()
+        .setName("foo.proto") .build();
+    FileDescriptorProto barProto = FileDescriptorProto.newBuilder()
+        .setName("boo.proto")
+        .addDependency("foo.proto")
+        .addPublicDependency(1)  // Error, should be 0.
+        .build();
+    FileDescriptor fooFile = Descriptors.FileDescriptor.buildFrom(fooProto,
+        new FileDescriptor[0]);
+    try {
+      Descriptors.FileDescriptor.buildFrom(barProto,
+          new FileDescriptor[] {fooFile});
+      fail("DescriptorValidationException expected");
+    } catch (DescriptorValidationException e) {
+      assertTrue(
+          e.getMessage().indexOf("Invalid public dependency index.") != -1);
+    }
+  }
+
+  public void testHiddenDependency() throws Exception {
+    FileDescriptorProto barProto = FileDescriptorProto.newBuilder()
+        .setName("bar.proto")
+        .addMessageType(DescriptorProto.newBuilder().setName("Bar"))
+        .build();
+    FileDescriptorProto forwardProto = FileDescriptorProto.newBuilder()
+        .setName("forward.proto")
+        .addDependency("bar.proto")
+        .build();
+    FileDescriptorProto fooProto = FileDescriptorProto.newBuilder()
+        .setName("foo.proto")
+        .addDependency("forward.proto")
+        .addMessageType(DescriptorProto.newBuilder()
+            .setName("Foo")
+            .addField(FieldDescriptorProto.newBuilder()
+                .setLabel(FieldDescriptorProto.Label.LABEL_OPTIONAL)
+                .setTypeName("Bar")
+                .setName("bar")
+                .setNumber(1)))
+        .build();
+    FileDescriptor barFile = Descriptors.FileDescriptor.buildFrom(
+        barProto, new FileDescriptor[0]);
+    FileDescriptor forwardFile = Descriptors.FileDescriptor.buildFrom(
+        forwardProto, new FileDescriptor[] {barFile});
+
+    try {
+      Descriptors.FileDescriptor.buildFrom(
+          fooProto, new FileDescriptor[] {forwardFile});
+      fail("DescriptorValidationException expected");
+    } catch (DescriptorValidationException e) {
+      assertTrue(e.getMessage().indexOf("Bar") != -1);
+      assertTrue(e.getMessage().indexOf("is not defined") != -1);
+    }
+  }
+
+  public void testPublicDependency() throws Exception {
+    FileDescriptorProto barProto = FileDescriptorProto.newBuilder()
+        .setName("bar.proto")
+        .addMessageType(DescriptorProto.newBuilder().setName("Bar"))
+        .build();
+    FileDescriptorProto forwardProto = FileDescriptorProto.newBuilder()
+        .setName("forward.proto")
+        .addDependency("bar.proto")
+        .addPublicDependency(0)
+        .build();
+    FileDescriptorProto fooProto = FileDescriptorProto.newBuilder()
+        .setName("foo.proto")
+        .addDependency("forward.proto")
+        .addMessageType(DescriptorProto.newBuilder()
+            .setName("Foo")
+            .addField(FieldDescriptorProto.newBuilder()
+                .setLabel(FieldDescriptorProto.Label.LABEL_OPTIONAL)
+                .setTypeName("Bar")
+                .setName("bar")
+                .setNumber(1)))
+        .build();
+    FileDescriptor barFile = Descriptors.FileDescriptor.buildFrom(
+        barProto, new FileDescriptor[0]);
+    FileDescriptor forwardFile = Descriptors.FileDescriptor.buildFrom(
+        forwardProto, new FileDescriptor[]{barFile});
+    Descriptors.FileDescriptor.buildFrom(
+        fooProto, new FileDescriptor[] {forwardFile});
+  }
+  
+  /**
+   * Tests the translate/crosslink for an example with a more complex namespace
+   * referencing.
+   */
+  public void testComplexNamespacePublicDependency() throws Exception {
+    FileDescriptorProto fooProto = FileDescriptorProto.newBuilder()
+        .setName("bar.proto")
+        .setPackage("a.b.c.d.bar.shared")
+        .addEnumType(EnumDescriptorProto.newBuilder()
+            .setName("MyEnum")
+            .addValue(EnumValueDescriptorProto.newBuilder()
+                .setName("BLAH")
+                .setNumber(1)))
+        .build();
+    FileDescriptorProto barProto = FileDescriptorProto.newBuilder()
+        .setName("foo.proto")
+        .addDependency("bar.proto")
+        .setPackage("a.b.c.d.foo.shared")
+        .addMessageType(DescriptorProto.newBuilder()
+            .setName("MyMessage")
+            .addField(FieldDescriptorProto.newBuilder()
+                .setLabel(FieldDescriptorProto.Label.LABEL_REPEATED)
+                .setTypeName("bar.shared.MyEnum")
+                .setName("MyField")
+                .setNumber(1)))
+        .build();
+    // translate and crosslink
+    FileDescriptor fooFile = Descriptors.FileDescriptor.buildFrom(
+        fooProto, new FileDescriptor[0]);
+    FileDescriptor barFile = Descriptors.FileDescriptor.buildFrom(
+        barProto, new FileDescriptor[]{fooFile});
+    // verify resulting descriptors
+    assertNotNull(barFile);
+    List<Descriptor> msglist = barFile.getMessageTypes();
+    assertNotNull(msglist);
+    assertTrue(msglist.size() == 1);
+    Descriptor desc = msglist.get(0);
+    if (desc.getName().equals("MyMessage")) {
+      assertNotNull(desc.getFields());
+      List<FieldDescriptor> fieldlist = desc.getFields();
+      assertNotNull(fieldlist);
+      assertTrue(fieldlist.size() == 1);
+      FieldDescriptor field = fieldlist.get(0);
+      assertTrue(field.getType() == FieldDescriptor.Type.ENUM);
+      assertTrue(field.getEnumType().getName().equals("MyEnum"));
+      assertTrue(field.getEnumType().getFile().getName().equals("bar.proto"));
+      assertTrue(field.getEnumType().getFile().getPackage().equals(
+          "a.b.c.d.bar.shared"));
+    }   
+  }
 }

+ 55 - 17
java/src/test/java/com/google/protobuf/DynamicMessageTest.java

@@ -30,8 +30,9 @@
 
 package com.google.protobuf;
 
-import protobuf_unittest.UnittestProto.TestAllTypes;
 import protobuf_unittest.UnittestProto.TestAllExtensions;
+import protobuf_unittest.UnittestProto.TestAllTypes;
+import protobuf_unittest.UnittestProto.TestEmptyMessage;
 import protobuf_unittest.UnittestProto.TestPackedTypes;
 
 import junit.framework.TestCase;
@@ -61,28 +62,44 @@ public class DynamicMessageTest extends TestCase {
     reflectionTester.assertAllFieldsSetViaReflection(message);
   }
 
-  public void testDoubleBuildError() throws Exception {
+  public void testSettersAfterBuild() throws Exception {
     Message.Builder builder =
       DynamicMessage.newBuilder(TestAllTypes.getDescriptor());
+    Message firstMessage = builder.build();
+    // double build()
     builder.build();
-    try {
-      builder.build();
-      fail("Should have thrown exception.");
-    } catch (IllegalStateException e) {
-      // Success.
-    }
+    // clear() after build()
+    builder.clear();
+    // setters after build()
+    reflectionTester.setAllFieldsViaReflection(builder);
+    Message message = builder.build();
+    reflectionTester.assertAllFieldsSetViaReflection(message);
+    // repeated setters after build()
+    reflectionTester.modifyRepeatedFieldsViaReflection(builder);
+    message = builder.build();
+    reflectionTester.assertRepeatedFieldsModifiedViaReflection(message);
+    // firstMessage shouldn't have been modified.
+    reflectionTester.assertClearViaReflection(firstMessage);
   }
 
-  public void testClearAfterBuildError() throws Exception {
+  public void testUnknownFields() throws Exception {
     Message.Builder builder =
-      DynamicMessage.newBuilder(TestAllTypes.getDescriptor());
-    builder.build();
-    try {
-      builder.clear();
-      fail("Should have thrown exception.");
-    } catch (IllegalStateException e) {
-      // Success.
-    }
+        DynamicMessage.newBuilder(TestEmptyMessage.getDescriptor());
+    builder.setUnknownFields(UnknownFieldSet.newBuilder()
+        .addField(1, UnknownFieldSet.Field.newBuilder().addVarint(1).build())
+        .addField(2, UnknownFieldSet.Field.newBuilder().addFixed32(1).build())
+        .build());
+    Message message = builder.build();
+    assertEquals(2, message.getUnknownFields().asMap().size());
+    // clone() with unknown fields
+    Message.Builder newBuilder = builder.clone();
+    assertEquals(2, newBuilder.getUnknownFields().asMap().size());
+    // clear() with unknown fields
+    newBuilder.clear();
+    assertTrue(newBuilder.getUnknownFields().asMap().isEmpty());
+    // serialize/parse with unknown fields
+    newBuilder.mergeFrom(message.toByteString());
+    assertEquals(2, newBuilder.getUnknownFields().asMap().size());
   }
 
   public void testDynamicMessageSettersRejectNull() throws Exception {
@@ -167,6 +184,23 @@ public class DynamicMessageTest extends TestCase {
     Message message2 =
       DynamicMessage.parseFrom(TestAllTypes.getDescriptor(), rawBytes);
     reflectionTester.assertAllFieldsSetViaReflection(message2);
+
+    // Test Parser interface.
+    Message message3 = message2.getParserForType().parseFrom(rawBytes);
+    reflectionTester.assertAllFieldsSetViaReflection(message3);
+  }
+
+  public void testDynamicMessageExtensionParsing() throws Exception {
+    ByteString rawBytes = TestUtil.getAllExtensionsSet().toByteString();
+    Message message = DynamicMessage.parseFrom(
+        TestAllExtensions.getDescriptor(), rawBytes,
+        TestUtil.getExtensionRegistry());
+    extensionsReflectionTester.assertAllFieldsSetViaReflection(message);
+
+    // Test Parser interface.
+    Message message2 = message.getParserForType().parseFrom(
+        rawBytes, TestUtil.getExtensionRegistry());
+    extensionsReflectionTester.assertAllFieldsSetViaReflection(message2);
   }
 
   public void testDynamicMessagePackedSerialization() throws Exception {
@@ -194,6 +228,10 @@ public class DynamicMessageTest extends TestCase {
     Message message2 =
       DynamicMessage.parseFrom(TestPackedTypes.getDescriptor(), rawBytes);
     packedReflectionTester.assertPackedFieldsSetViaReflection(message2);
+
+    // Test Parser interface.
+    Message message3 = message2.getParserForType().parseFrom(rawBytes);
+    packedReflectionTester.assertPackedFieldsSetViaReflection(message3);
   }
 
   public void testDynamicMessageCopy() throws Exception {

+ 167 - 1
java/src/test/java/com/google/protobuf/GeneratedMessageTest.java

@@ -30,6 +30,8 @@
 
 package com.google.protobuf;
 
+import com.google.protobuf.Descriptors.Descriptor;
+import com.google.protobuf.Descriptors.FieldDescriptor;
 import com.google.protobuf.UnittestLite.TestAllExtensionsLite;
 import com.google.protobuf.test.UnittestImport;
 import protobuf_unittest.EnumWithNoOuter;
@@ -53,6 +55,7 @@ import protobuf_unittest.UnittestProto.ForeignMessage;
 import protobuf_unittest.UnittestProto.ForeignMessageOrBuilder;
 import protobuf_unittest.UnittestProto.TestAllExtensions;
 import protobuf_unittest.UnittestProto.TestAllTypes;
+import protobuf_unittest.UnittestProto.TestAllTypes.NestedMessage;
 import protobuf_unittest.UnittestProto.TestAllTypesOrBuilder;
 import protobuf_unittest.UnittestProto.TestExtremeDefaultValues;
 import protobuf_unittest.UnittestProto.TestPackedTypes;
@@ -180,6 +183,33 @@ public class GeneratedMessageTest extends TestCase {
     assertIsUnmodifiable(value.getRepeatedFloatList());
   }
 
+  public void testParsedMessagesAreImmutable() throws Exception {
+    TestAllTypes value = TestAllTypes.PARSER.parseFrom(
+        TestUtil.getAllSet().toByteString());
+    assertIsUnmodifiable(value.getRepeatedInt32List());
+    assertIsUnmodifiable(value.getRepeatedInt64List());
+    assertIsUnmodifiable(value.getRepeatedUint32List());
+    assertIsUnmodifiable(value.getRepeatedUint64List());
+    assertIsUnmodifiable(value.getRepeatedSint32List());
+    assertIsUnmodifiable(value.getRepeatedSint64List());
+    assertIsUnmodifiable(value.getRepeatedFixed32List());
+    assertIsUnmodifiable(value.getRepeatedFixed64List());
+    assertIsUnmodifiable(value.getRepeatedSfixed32List());
+    assertIsUnmodifiable(value.getRepeatedSfixed64List());
+    assertIsUnmodifiable(value.getRepeatedFloatList());
+    assertIsUnmodifiable(value.getRepeatedDoubleList());
+    assertIsUnmodifiable(value.getRepeatedBoolList());
+    assertIsUnmodifiable(value.getRepeatedStringList());
+    assertIsUnmodifiable(value.getRepeatedBytesList());
+    assertIsUnmodifiable(value.getRepeatedGroupList());
+    assertIsUnmodifiable(value.getRepeatedNestedMessageList());
+    assertIsUnmodifiable(value.getRepeatedForeignMessageList());
+    assertIsUnmodifiable(value.getRepeatedImportMessageList());
+    assertIsUnmodifiable(value.getRepeatedNestedEnumList());
+    assertIsUnmodifiable(value.getRepeatedForeignEnumList());
+    assertIsUnmodifiable(value.getRepeatedImportEnumList());
+  }
+
   private void assertIsUnmodifiable(List<?> list) {
     if (list == Collections.emptyList()) {
       // OKAY -- Need to check this b/c EmptyList allows you to call clear.
@@ -881,7 +911,7 @@ public class GeneratedMessageTest extends TestCase {
     builder.setOptionalNestedMessage(nestedMessage1);
     assertEquals(3, mockParent.getInvalidationCount());
 
-    // primitive repated
+    // primitive repeated
     builder.buildPartial();
     builder.addRepeatedInt32(2);
     builder.addRepeatedInt32(3);
@@ -977,4 +1007,140 @@ public class GeneratedMessageTest extends TestCase {
     assertSame(b1, messageOrBuilderList.get(1));
     assertSame(m2, messageOrBuilderList.get(2));
   }
+
+  public void testGetFieldBuilder() {
+    Descriptor descriptor = TestAllTypes.getDescriptor();
+
+    FieldDescriptor fieldDescriptor =
+        descriptor.findFieldByName("optional_nested_message");
+    FieldDescriptor foreignFieldDescriptor =
+        descriptor.findFieldByName("optional_foreign_message");
+    FieldDescriptor importFieldDescriptor =
+        descriptor.findFieldByName("optional_import_message");
+
+    // Mutate the message with new field builder
+    // Mutate nested message
+    TestAllTypes.Builder builder1 = TestAllTypes.newBuilder();
+    Message.Builder fieldBuilder1 = builder1.newBuilderForField(fieldDescriptor)
+        .mergeFrom((Message) builder1.getField(fieldDescriptor));
+    FieldDescriptor subFieldDescriptor1 =
+        fieldBuilder1.getDescriptorForType().findFieldByName("bb");
+    fieldBuilder1.setField(subFieldDescriptor1, 1);
+    builder1.setField(fieldDescriptor, fieldBuilder1.build());
+
+    // Mutate foreign message
+    Message.Builder foreignFieldBuilder1 = builder1.newBuilderForField(
+        foreignFieldDescriptor)
+        .mergeFrom((Message) builder1.getField(foreignFieldDescriptor));
+    FieldDescriptor subForeignFieldDescriptor1 =
+        foreignFieldBuilder1.getDescriptorForType().findFieldByName("c");
+    foreignFieldBuilder1.setField(subForeignFieldDescriptor1, 2);
+    builder1.setField(foreignFieldDescriptor, foreignFieldBuilder1.build());
+
+    // Mutate import message
+    Message.Builder importFieldBuilder1 = builder1.newBuilderForField(
+        importFieldDescriptor)
+        .mergeFrom((Message) builder1.getField(importFieldDescriptor));
+    FieldDescriptor subImportFieldDescriptor1 =
+        importFieldBuilder1.getDescriptorForType().findFieldByName("d");
+    importFieldBuilder1.setField(subImportFieldDescriptor1, 3);
+    builder1.setField(importFieldDescriptor, importFieldBuilder1.build());
+
+    Message newMessage1 = builder1.build();
+
+    // Mutate the message with existing field builder
+    // Mutate nested message
+    TestAllTypes.Builder builder2 = TestAllTypes.newBuilder();
+    Message.Builder fieldBuilder2 = builder2.getFieldBuilder(fieldDescriptor);
+    FieldDescriptor subFieldDescriptor2 =
+        fieldBuilder2.getDescriptorForType().findFieldByName("bb");
+    fieldBuilder2.setField(subFieldDescriptor2, 1);
+    builder2.setField(fieldDescriptor, fieldBuilder2.build());
+
+    // Mutate foreign message
+    Message.Builder foreignFieldBuilder2 = builder2.newBuilderForField(
+        foreignFieldDescriptor)
+        .mergeFrom((Message) builder2.getField(foreignFieldDescriptor));
+    FieldDescriptor subForeignFieldDescriptor2 =
+        foreignFieldBuilder2.getDescriptorForType().findFieldByName("c");
+    foreignFieldBuilder2.setField(subForeignFieldDescriptor2, 2);
+    builder2.setField(foreignFieldDescriptor, foreignFieldBuilder2.build());
+
+    // Mutate import message
+    Message.Builder importFieldBuilder2 = builder2.newBuilderForField(
+        importFieldDescriptor)
+        .mergeFrom((Message) builder2.getField(importFieldDescriptor));
+    FieldDescriptor subImportFieldDescriptor2 =
+        importFieldBuilder2.getDescriptorForType().findFieldByName("d");
+    importFieldBuilder2.setField(subImportFieldDescriptor2, 3);
+    builder2.setField(importFieldDescriptor, importFieldBuilder2.build());
+
+    Message newMessage2 = builder2.build();
+
+    // These two messages should be equal.
+    assertEquals(newMessage1, newMessage2);
+  }
+
+  public void testGetFieldBuilderWithInitializedValue() {
+    Descriptor descriptor = TestAllTypes.getDescriptor();
+    FieldDescriptor fieldDescriptor =
+        descriptor.findFieldByName("optional_nested_message");
+
+    // Before setting field, builder is initialized by default value. 
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    NestedMessage.Builder fieldBuilder =
+        (NestedMessage.Builder) builder.getFieldBuilder(fieldDescriptor);
+    assertEquals(0, fieldBuilder.getBb());
+
+    // Setting field value with new field builder instance.
+    builder = TestAllTypes.newBuilder();
+    NestedMessage.Builder newFieldBuilder =
+        builder.getOptionalNestedMessageBuilder();
+    newFieldBuilder.setBb(2);
+    // Then get the field builder instance by getFieldBuilder().
+    fieldBuilder =
+        (NestedMessage.Builder) builder.getFieldBuilder(fieldDescriptor);
+    // It should contain new value.
+    assertEquals(2, fieldBuilder.getBb());
+    // These two builder should be equal.
+    assertSame(fieldBuilder, newFieldBuilder);
+  }
+
+  public void testGetFieldBuilderNotSupportedException() {
+    Descriptor descriptor = TestAllTypes.getDescriptor();
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    try {
+      builder.getFieldBuilder(descriptor.findFieldByName("optional_int32"));
+      fail("Exception was not thrown");
+    } catch (UnsupportedOperationException e) {
+      // We expect this exception.
+    }
+    try {
+      builder.getFieldBuilder(
+          descriptor.findFieldByName("optional_nested_enum"));
+      fail("Exception was not thrown");
+    } catch (UnsupportedOperationException e) {
+      // We expect this exception.
+    }
+    try {
+      builder.getFieldBuilder(descriptor.findFieldByName("repeated_int32"));
+      fail("Exception was not thrown");
+    } catch (UnsupportedOperationException e) {
+      // We expect this exception.
+    }
+    try {
+      builder.getFieldBuilder(
+          descriptor.findFieldByName("repeated_nested_enum"));
+      fail("Exception was not thrown");
+    } catch (UnsupportedOperationException e) {
+      // We expect this exception.
+    }
+    try {
+      builder.getFieldBuilder(
+          descriptor.findFieldByName("repeated_nested_message"));
+      fail("Exception was not thrown");
+    } catch (UnsupportedOperationException e) {
+      // We expect this exception.
+    }
+  }
 }

+ 180 - 0
java/src/test/java/com/google/protobuf/IsValidUtf8Test.java

@@ -0,0 +1,180 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import com.google.protobuf.IsValidUtf8TestUtil.Shard;
+
+import junit.framework.TestCase;
+
+import java.io.UnsupportedEncodingException;
+
+/**
+ * Tests cases for {@link ByteString#isValidUtf8()}. This includes three
+ * brute force tests that actually test every permutation of one byte, two byte,
+ * and three byte sequences to ensure that the method produces the right result
+ * for every possible byte encoding where "right" means it's consistent with
+ * java's UTF-8 string encoding/decoding such that the method returns true for
+ * any sequence that will round trip when converted to a String and then back to
+ * bytes and will return false for any sequence that will not round trip.
+ * See also {@link IsValidUtf8FourByteTest}. It also includes some
+ * other more targeted tests.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ * @author martinrb@google.com (Martin Buchholz)
+ */
+public class IsValidUtf8Test extends TestCase {
+
+  /**
+   * Tests that round tripping of all two byte permutations work.
+   */
+  public void testIsValidUtf8_1Byte() throws UnsupportedEncodingException {
+    IsValidUtf8TestUtil.testBytes(1,
+        IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT);
+  }
+
+  /**
+   * Tests that round tripping of all two byte permutations work.
+   */
+  public void testIsValidUtf8_2Bytes() throws UnsupportedEncodingException {
+    IsValidUtf8TestUtil.testBytes(2,
+        IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT);
+  }
+
+  /**
+   * Tests that round tripping of all three byte permutations work.
+   */
+  public void testIsValidUtf8_3Bytes() throws UnsupportedEncodingException {
+    IsValidUtf8TestUtil.testBytes(3,
+        IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
+  }
+
+  /**
+   * Tests that round tripping of a sample of four byte permutations work.
+   * All permutations are prohibitively expensive to test for automated runs;
+   * {@link IsValidUtf8FourByteTest} is used for full coverage. This method
+   * tests specific four-byte cases.
+   */
+  public void testIsValidUtf8_4BytesSamples()
+      throws UnsupportedEncodingException {
+    // Valid 4 byte.
+    assertValidUtf8(0xF0, 0xA4, 0xAD, 0xA2);
+
+    // Bad trailing bytes
+    assertInvalidUtf8(0xF0, 0xA4, 0xAD, 0x7F);
+    assertInvalidUtf8(0xF0, 0xA4, 0xAD, 0xC0);
+
+    // Special cases for byte2
+    assertInvalidUtf8(0xF0, 0x8F, 0xAD, 0xA2);
+    assertInvalidUtf8(0xF4, 0x90, 0xAD, 0xA2);
+  }
+
+  /**
+   * Tests some hard-coded test cases.
+   */
+  public void testSomeSequences() {
+    // Empty
+    assertTrue(asBytes("").isValidUtf8());
+
+    // One-byte characters, including control characters
+    assertTrue(asBytes("\u0000abc\u007f").isValidUtf8());
+
+    // Two-byte characters
+    assertTrue(asBytes("\u00a2\u00a2").isValidUtf8());
+
+    // Three-byte characters
+    assertTrue(asBytes("\u020ac\u020ac").isValidUtf8());
+
+    // Four-byte characters
+    assertTrue(asBytes("\u024B62\u024B62").isValidUtf8());
+
+    // Mixed string
+    assertTrue(
+        asBytes("a\u020ac\u00a2b\\u024B62u020acc\u00a2de\u024B62")
+        .isValidUtf8());
+
+    // Not a valid string
+    assertInvalidUtf8(-1, 0, -1, 0);
+  }
+
+  private byte[] toByteArray(int... bytes) {
+    byte[] realBytes = new byte[bytes.length];
+    for (int i = 0; i < bytes.length; i++) {
+      realBytes[i] = (byte) bytes[i];
+    }
+    return realBytes;
+  }
+
+  private ByteString toByteString(int... bytes) {
+    return ByteString.copyFrom(toByteArray(bytes));
+  }
+
+  private void assertValidUtf8(int[] bytes, boolean not) {
+    byte[] realBytes = toByteArray(bytes);
+    assertTrue(not ^ Utf8.isValidUtf8(realBytes));
+    assertTrue(not ^ Utf8.isValidUtf8(realBytes, 0, bytes.length));
+    ByteString lit = ByteString.copyFrom(realBytes);
+    ByteString sub = lit.substring(0, bytes.length);
+    assertTrue(not ^ lit.isValidUtf8());
+    assertTrue(not ^ sub.isValidUtf8());
+    ByteString[] ropes = {
+      RopeByteString.newInstanceForTest(ByteString.EMPTY, lit),
+      RopeByteString.newInstanceForTest(ByteString.EMPTY, sub),
+      RopeByteString.newInstanceForTest(lit, ByteString.EMPTY),
+      RopeByteString.newInstanceForTest(sub, ByteString.EMPTY),
+      RopeByteString.newInstanceForTest(sub, lit)
+    };
+    for (ByteString rope : ropes) {
+      assertTrue(not ^ rope.isValidUtf8());
+    }
+  }
+
+  private void assertValidUtf8(int... bytes) {
+    assertValidUtf8(bytes, false);
+  }
+
+  private void assertInvalidUtf8(int... bytes) {
+    assertValidUtf8(bytes, true);
+  }
+
+  private static ByteString asBytes(String s) {
+    return ByteString.copyFromUtf8(s);
+  }
+
+  public void testShardsHaveExpectedRoundTrippables() {
+    // A sanity check.
+    int actual = 0;
+    for (Shard shard : IsValidUtf8TestUtil.FOUR_BYTE_SHARDS) {
+      actual += shard.expected;
+    }
+    assertEquals(IsValidUtf8TestUtil.EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT,
+        actual);
+  }
+}

+ 421 - 0
java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java

@@ -0,0 +1,421 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import static junit.framework.Assert.*;
+
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.logging.Logger;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.Charset;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+
+/**
+ * Shared testing code for {@link IsValidUtf8Test} and
+ * {@link IsValidUtf8FourByteTest}.
+ *
+ * @author jonp@google.com (Jon Perlow)
+ * @author martinrb@google.com (Martin Buchholz)
+ */
+class IsValidUtf8TestUtil {
+  private static Logger logger = Logger.getLogger(
+      IsValidUtf8TestUtil.class.getName());
+
+  // 128 - [chars 0x0000 to 0x007f]
+  static long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1;
+
+  // 128
+  static long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT =
+      ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
+
+  // 1920 [chars 0x0080 to 0x07FF]
+  static long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1;
+
+  // 18,304
+  static long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =
+      // Both bytes are one byte characters
+      (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2) +
+      // The possible number of two byte characters
+      TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS;
+
+  // 2048
+  static long THREE_BYTE_SURROGATES = 2 * 1024;
+
+  // 61,440 [chars 0x0800 to 0xFFFF, minus surrogates]
+  static long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
+      0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES;
+
+  // 2,650,112
+  static long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =
+      // All one byte characters
+      (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3) +
+      // One two byte character and a one byte character
+      2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
+          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
+       // Three byte characters
+      THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
+
+  // 1,048,576 [chars 0x10000L to 0x10FFFF]
+  static long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1;
+
+  // 289,571,839
+  static long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =
+      // All one byte characters
+      (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4) +
+      // One and three byte characters
+      2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
+          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
+      // Two two byte characters
+      TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS +
+      // Permutations of one and two byte characters
+      3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
+          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
+          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
+      // Four byte characters
+      FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS;
+
+  static class Shard {
+    final long index;
+    final long start;
+    final long lim;
+    final long expected;
+
+
+    public Shard(long index, long start, long lim, long expected) {
+      assertTrue(start < lim);
+      this.index = index;
+      this.start = start;
+      this.lim = lim;
+      this.expected = expected;
+    }
+  }
+
+  static final long[] FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES =
+      generateFourByteShardsExpectedRunnables();
+
+  private static long[] generateFourByteShardsExpectedRunnables() {
+    long[] expected = new long[128];
+
+    // 0-63 are all 5300224
+    for (int i = 0; i <= 63; i++) {
+      expected[i] = 5300224;
+    }
+
+    // 97-111 are all 2342912
+    for (int i = 97; i <= 111; i++) {
+     expected[i] = 2342912;
+    }
+
+    // 113-117 are all 1048576
+    for (int i = 113; i <= 117; i++) {
+      expected[i] = 1048576;
+    }
+
+    // One offs
+    expected[112] = 786432;
+    expected[118] = 786432;
+    expected[119] = 1048576;
+    expected[120] = 458752;
+    expected[121] = 524288;
+    expected[122] = 65536;
+
+    // Anything not assigned was the default 0.
+    return expected;
+  }
+
+  static final List<Shard> FOUR_BYTE_SHARDS = generateFourByteShards(
+      128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES);
+
+
+  private static List<Shard> generateFourByteShards(
+      int numShards, long[] expected) {
+    assertEquals(numShards, expected.length);
+    List<Shard> shards = new ArrayList<Shard>(numShards);
+    long LIM = 1L << 32;
+    long increment = LIM / numShards;
+    assertTrue(LIM % numShards == 0);
+    for (int i = 0; i < numShards; i++) {
+      shards.add(new Shard(i,
+          increment * i,
+          increment * (i + 1),
+          expected[i]));
+    }
+    return shards;
+  }
+
+  /**
+   * Helper to run the loop to test all the permutations for the number of bytes
+   * specified.
+   *
+   * @param numBytes the number of bytes in the byte array
+   * @param expectedCount the expected number of roundtrippable permutations
+   */
+  static void testBytes(int numBytes, long expectedCount)
+      throws UnsupportedEncodingException {
+    testBytes(numBytes, expectedCount, 0, -1);
+  }
+
+  /**
+   * Helper to run the loop to test all the permutations for the number of bytes
+   * specified. This overload is useful for debugging to get the loop to start
+   * at a certain character.
+   *
+   * @param numBytes the number of bytes in the byte array
+   * @param expectedCount the expected number of roundtrippable permutations
+   * @param start the starting bytes encoded as a long as big-endian
+   * @param lim the limit of bytes to process encoded as a long as big-endian,
+   *     or -1 to mean the max limit for numBytes
+   */
+  static void testBytes(int numBytes, long expectedCount, long start, long lim)
+      throws UnsupportedEncodingException {
+    Random rnd = new Random();
+    byte[] bytes = new byte[numBytes];
+
+    if (lim == -1) {
+      lim = 1L << (numBytes * 8);
+    }
+    long count = 0;
+    long countRoundTripped = 0;
+    for (long byteChar = start; byteChar < lim; byteChar++) {
+      long tmpByteChar = byteChar;
+      for (int i = 0; i < numBytes; i++) {
+        bytes[bytes.length - i - 1] = (byte) tmpByteChar;
+        tmpByteChar = tmpByteChar >> 8;
+      }
+      ByteString bs = ByteString.copyFrom(bytes);
+      boolean isRoundTrippable = bs.isValidUtf8();
+      String s = new String(bytes, "UTF-8");
+      byte[] bytesReencoded = s.getBytes("UTF-8");
+      boolean bytesEqual = Arrays.equals(bytes, bytesReencoded);
+
+      if (bytesEqual != isRoundTrippable) {
+        outputFailure(byteChar, bytes, bytesReencoded);
+      }
+
+      // Check agreement with static Utf8 methods.
+      assertEquals(isRoundTrippable, Utf8.isValidUtf8(bytes));
+      assertEquals(isRoundTrippable, Utf8.isValidUtf8(bytes, 0, numBytes));
+
+      // Test partial sequences.
+      // Partition numBytes into three segments (not necessarily non-empty).
+      int i = rnd.nextInt(numBytes);
+      int j = rnd.nextInt(numBytes);
+      if (j < i) {
+        int tmp = i; i = j; j = tmp;
+      }
+      int state1 = Utf8.partialIsValidUtf8(Utf8.COMPLETE, bytes, 0, i);
+      int state2 = Utf8.partialIsValidUtf8(state1, bytes, i, j);
+      int state3 = Utf8.partialIsValidUtf8(state2, bytes, j, numBytes);
+      if (isRoundTrippable != (state3 == Utf8.COMPLETE)) {
+        System.out.printf("state=%04x %04x %04x i=%d j=%d%n",
+                          state1, state2, state3, i, j);
+        outputFailure(byteChar, bytes, bytesReencoded);
+      }
+      assertEquals(isRoundTrippable, (state3 == Utf8.COMPLETE));
+
+      // Test ropes built out of small partial sequences
+      ByteString rope = RopeByteString.newInstanceForTest(
+          bs.substring(0, i),
+          RopeByteString.newInstanceForTest(
+              bs.substring(i, j),
+              bs.substring(j, numBytes)));
+      assertSame(RopeByteString.class, rope.getClass());
+
+      ByteString[] byteStrings = { bs, bs.substring(0, numBytes), rope };
+      for (ByteString x : byteStrings) {
+        assertEquals(isRoundTrippable,
+                     x.isValidUtf8());
+        assertEquals(state3,
+                     x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes));
+
+        assertEquals(state1,
+                     x.partialIsValidUtf8(Utf8.COMPLETE, 0, i));
+        assertEquals(state1,
+                     x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i));
+        assertEquals(state2,
+                     x.partialIsValidUtf8(state1, i, j - i));
+        assertEquals(state2,
+                     x.substring(i, j).partialIsValidUtf8(state1, 0, j - i));
+        assertEquals(state3,
+                     x.partialIsValidUtf8(state2, j, numBytes - j));
+        assertEquals(state3,
+                     x.substring(j, numBytes)
+                     .partialIsValidUtf8(state2, 0, numBytes - j));
+      }
+
+      // ByteString reduplication should not affect its UTF-8 validity.
+      ByteString ropeADope =
+          RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes));
+      assertEquals(isRoundTrippable, ropeADope.isValidUtf8());
+
+      if (isRoundTrippable) {
+        countRoundTripped++;
+      }
+      count++;
+      if (byteChar != 0 && byteChar % 1000000L == 0) {
+        logger.info("Processed " + (byteChar / 1000000L) +
+            " million characters");
+      }
+    }
+    logger.info("Round tripped " + countRoundTripped + " of " + count);
+    assertEquals(expectedCount, countRoundTripped);
+  }
+
+  /**
+   * Variation of {@link #testBytes} that does less allocation using the
+   * low-level encoders/decoders directly. Checked in because it's useful for
+   * debugging when trying to process bytes faster, but since it doesn't use the
+   * actual String class, it's possible for incompatibilities to develop
+   * (although unlikely).
+   *
+   * @param numBytes the number of bytes in the byte array
+   * @param expectedCount the expected number of roundtrippable permutations
+   * @param start the starting bytes encoded as a long as big-endian
+   * @param lim the limit of bytes to process encoded as a long as big-endian,
+   *     or -1 to mean the max limit for numBytes
+   */
+  void testBytesUsingByteBuffers(
+      int numBytes, long expectedCount, long start, long lim)
+      throws UnsupportedEncodingException {
+    CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
+        .onMalformedInput(CodingErrorAction.REPLACE)
+        .onUnmappableCharacter(CodingErrorAction.REPLACE);
+    CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder()
+        .onMalformedInput(CodingErrorAction.REPLACE)
+        .onUnmappableCharacter(CodingErrorAction.REPLACE);
+    byte[] bytes = new byte[numBytes];
+    int maxChars = (int) (decoder.maxCharsPerByte() * numBytes) + 1;
+    char[] charsDecoded =
+        new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1];
+    int maxBytes = (int) (encoder.maxBytesPerChar() * maxChars) + 1;
+    byte[] bytesReencoded = new byte[maxBytes];
+
+    ByteBuffer bb = ByteBuffer.wrap(bytes);
+    CharBuffer cb = CharBuffer.wrap(charsDecoded);
+    ByteBuffer bbReencoded = ByteBuffer.wrap(bytesReencoded);
+    if (lim == -1) {
+      lim = 1L << (numBytes * 8);
+    }
+    long count = 0;
+    long countRoundTripped = 0;
+    for (long byteChar = start; byteChar < lim; byteChar++) {
+      bb.rewind();
+      bb.limit(bytes.length);
+      cb.rewind();
+      cb.limit(charsDecoded.length);
+      bbReencoded.rewind();
+      bbReencoded.limit(bytesReencoded.length);
+      encoder.reset();
+      decoder.reset();
+      long tmpByteChar = byteChar;
+      for (int i = 0; i < bytes.length; i++) {
+        bytes[bytes.length - i - 1] = (byte) tmpByteChar;
+        tmpByteChar = tmpByteChar >> 8;
+      }
+      boolean isRoundTrippable = ByteString.copyFrom(bytes).isValidUtf8();
+      CoderResult result = decoder.decode(bb, cb, true);
+      assertFalse(result.isError());
+      result = decoder.flush(cb);
+      assertFalse(result.isError());
+
+      int charLen = cb.position();
+      cb.rewind();
+      cb.limit(charLen);
+      result = encoder.encode(cb, bbReencoded, true);
+      assertFalse(result.isError());
+      result = encoder.flush(bbReencoded);
+      assertFalse(result.isError());
+
+      boolean bytesEqual = true;
+      int bytesLen = bbReencoded.position();
+      if (bytesLen != numBytes) {
+        bytesEqual = false;
+      } else {
+        for (int i = 0; i < numBytes; i++) {
+          if (bytes[i] != bytesReencoded[i]) {
+            bytesEqual = false;
+            break;
+          }
+        }
+      }
+      if (bytesEqual != isRoundTrippable) {
+        outputFailure(byteChar, bytes, bytesReencoded, bytesLen);
+      }
+
+      count++;
+      if (isRoundTrippable) {
+        countRoundTripped++;
+      }
+      if (byteChar != 0 && byteChar % 1000000 == 0) {
+        logger.info("Processed " + (byteChar / 1000000) +
+            " million characters");
+      }
+    }
+    logger.info("Round tripped " + countRoundTripped + " of " + count);
+    assertEquals(expectedCount, countRoundTripped);
+  }
+
+  private static void outputFailure(long byteChar, byte[] bytes, byte[] after) {
+    outputFailure(byteChar, bytes, after, after.length);
+  }
+
+  private static void outputFailure(long byteChar, byte[] bytes, byte[] after,
+      int len) {
+    fail("Failure: (" + Long.toHexString(byteChar) + ") " +
+        toHexString(bytes) + " => " + toHexString(after, len));
+  }
+
+  private static String toHexString(byte[] b) {
+    return toHexString(b, b.length);
+  }
+
+  private static String toHexString(byte[] b, int len) {
+    StringBuilder s = new StringBuilder();
+    s.append("\"");
+    for (int i = 0; i < len; i++) {
+      if (i > 0) {
+        s.append(" ");
+      }
+      s.append(String.format("%02x", b[i] & 0xFF));
+    }
+    s.append("\"");
+    return s.toString();
+  }
+
+}

+ 44 - 0
java/src/test/java/com/google/protobuf/LazyStringArrayListTest.java

@@ -32,6 +32,9 @@ package com.google.protobuf;
 
 import junit.framework.TestCase;
 
+import java.util.ArrayList;
+import java.util.List;
+
 /**
  * Tests for {@link LazyStringArrayList}.
  *
@@ -115,4 +118,45 @@ public class LazyStringArrayListTest extends TestCase {
     assertSame(aPrimeByteString, list.getByteString(0));
     assertSame(bPrimeByteString, list.getByteString(1));
   }
+
+  public void testCopyConstructorCopiesByReference() {
+    LazyStringArrayList list1 = new LazyStringArrayList();
+    list1.add(STRING_A);
+    list1.add(BYTE_STRING_B);
+    list1.add(BYTE_STRING_C);
+
+    LazyStringArrayList list2 = new LazyStringArrayList(list1);
+    assertEquals(3, list2.size());
+    assertSame(STRING_A, list2.get(0));
+    assertSame(BYTE_STRING_B, list2.getByteString(1));
+    assertSame(BYTE_STRING_C, list2.getByteString(2));
+  }
+
+  public void testListCopyConstructor() {
+    List<String> list1 = new ArrayList<String>();
+    list1.add(STRING_A);
+    list1.add(STRING_B);
+    list1.add(STRING_C);
+
+    LazyStringArrayList list2 = new LazyStringArrayList(list1);
+    assertEquals(3, list2.size());
+    assertSame(STRING_A, list2.get(0));
+    assertSame(STRING_B, list2.get(1));
+    assertSame(STRING_C, list2.get(2));
+  }
+
+  public void testAddAllCopiesByReferenceIfPossible() {
+    LazyStringArrayList list1 = new LazyStringArrayList();
+    list1.add(STRING_A);
+    list1.add(BYTE_STRING_B);
+    list1.add(BYTE_STRING_C);
+
+    LazyStringArrayList list2 = new LazyStringArrayList();
+    list2.addAll(list1);
+
+    assertEquals(3, list2.size());
+    assertSame(STRING_A, list2.get(0));
+    assertSame(BYTE_STRING_B, list2.getByteString(1));
+    assertSame(BYTE_STRING_C, list2.getByteString(2));
+  }
 }

+ 28 - 0
java/src/test/java/com/google/protobuf/LazyStringEndToEndTest.java

@@ -50,6 +50,19 @@ public class LazyStringEndToEndTest extends TestCase {
           114, 4, -1, 0, -1, 0, -30, 2, 4, -1,
           0, -1, 0, -30, 2, 4, -1, 0, -1, 0, });
 
+  private ByteString encodedTestAllTypes;
+
+  @Override
+  protected void setUp() throws Exception {
+    super.setUp();
+    this.encodedTestAllTypes = UnittestProto.TestAllTypes.newBuilder()
+        .setOptionalString("foo")
+        .addRepeatedString("bar")
+        .addRepeatedString("baz")
+        .build()
+        .toByteString();
+  }
+
   /**
    * Tests that an invalid UTF8 string will roundtrip through a parse
    * and serialization.
@@ -112,4 +125,19 @@ public class LazyStringEndToEndTest extends TestCase {
     assertSame(bPrime, proto.getRepeatedString(0));
     assertSame(cPrime, proto.getRepeatedString(1));
   }
+
+  public void testNoStringCachingIfOnlyBytesAccessed() throws Exception {
+    UnittestProto.TestAllTypes proto =
+        UnittestProto.TestAllTypes.parseFrom(encodedTestAllTypes);
+    ByteString optional = proto.getOptionalStringBytes();
+    assertSame(optional, proto.getOptionalStringBytes());
+    assertSame(optional, proto.toBuilder().getOptionalStringBytes());
+
+    ByteString repeated0 = proto.getRepeatedStringBytes(0);
+    ByteString repeated1 = proto.getRepeatedStringBytes(1);
+    assertSame(repeated0, proto.getRepeatedStringBytes(0));
+    assertSame(repeated1, proto.getRepeatedStringBytes(1));
+    assertSame(repeated0, proto.toBuilder().getRepeatedStringBytes(0));
+    assertSame(repeated1, proto.toBuilder().getRepeatedStringBytes(1));
+  }
 }

+ 396 - 0
java/src/test/java/com/google/protobuf/LiteralByteStringTest.java

@@ -0,0 +1,396 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import junit.framework.TestCase;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * Test {@link LiteralByteString} by setting up a reference string in {@link #setUp()}.
+ * This class is designed to be extended for testing extensions of {@link LiteralByteString}
+ * such as {@link BoundedByteString}, see {@link BoundedByteStringTest}.
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+public class LiteralByteStringTest extends TestCase {
+  protected static final String UTF_8 = "UTF-8";
+
+  protected String classUnderTest;
+  protected byte[] referenceBytes;
+  protected ByteString stringUnderTest;
+  protected int expectedHashCode;
+
+  @Override
+  protected void setUp() throws Exception {
+    classUnderTest = "LiteralByteString";
+    referenceBytes = ByteStringTest.getTestBytes(1234, 11337766L);
+    stringUnderTest = ByteString.copyFrom(referenceBytes);
+    expectedHashCode = 331161852;
+  }
+
+  public void testExpectedType() {
+    String actualClassName = getActualClassName(stringUnderTest);
+    assertEquals(classUnderTest + " should match type exactly", classUnderTest, actualClassName);
+  }
+
+  protected String getActualClassName(Object object) {
+    String actualClassName = object.getClass().getName();
+    actualClassName = actualClassName.substring(actualClassName.lastIndexOf('.') + 1);
+    return actualClassName;
+  }
+
+  public void testByteAt() {
+    boolean stillEqual = true;
+    for (int i = 0; stillEqual && i < referenceBytes.length; ++i) {
+      stillEqual = (referenceBytes[i] == stringUnderTest.byteAt(i));
+    }
+    assertTrue(classUnderTest + " must capture the right bytes", stillEqual);
+  }
+
+  public void testByteIterator() {
+    boolean stillEqual = true;
+    ByteString.ByteIterator iter = stringUnderTest.iterator();
+    for (int i = 0; stillEqual && i < referenceBytes.length; ++i) {
+      stillEqual = (iter.hasNext() && referenceBytes[i] == iter.nextByte());
+    }
+    assertTrue(classUnderTest + " must capture the right bytes", stillEqual);
+    assertFalse(classUnderTest + " must have exhausted the itertor", iter.hasNext());
+
+    try {
+      iter.nextByte();
+      fail("Should have thrown an exception.");
+    } catch (NoSuchElementException e) {
+      // This is success
+    }
+  }
+
+  public void testByteIterable() {
+    boolean stillEqual = true;
+    int j = 0;
+    for (byte quantum : stringUnderTest) {
+      stillEqual = (referenceBytes[j] == quantum);
+      ++j;
+    }
+    assertTrue(classUnderTest + " must capture the right bytes as Bytes", stillEqual);
+    assertEquals(classUnderTest + " iterable character count", referenceBytes.length, j);
+  }
+
+  public void testSize() {
+    assertEquals(classUnderTest + " must have the expected size", referenceBytes.length,
+        stringUnderTest.size());
+  }
+
+  public void testGetTreeDepth() {
+    assertEquals(classUnderTest + " must have depth 0", 0, stringUnderTest.getTreeDepth());
+  }
+
+  public void testIsBalanced() {
+    assertTrue(classUnderTest + " is technically balanced", stringUnderTest.isBalanced());
+  }
+
+  public void testCopyTo_ByteArrayOffsetLength() {
+    int destinationOffset = 50;
+    int length = 100;
+    byte[] destination = new byte[destinationOffset + length];
+    int sourceOffset = 213;
+    stringUnderTest.copyTo(destination, sourceOffset, destinationOffset, length);
+    boolean stillEqual = true;
+    for (int i = 0; stillEqual && i < length; ++i) {
+      stillEqual = referenceBytes[i + sourceOffset] == destination[i + destinationOffset];
+    }
+    assertTrue(classUnderTest + ".copyTo(4 arg) must give the expected bytes", stillEqual);
+  }
+
+  public void testCopyTo_ByteArrayOffsetLengthErrors() {
+    int destinationOffset = 50;
+    int length = 100;
+    byte[] destination = new byte[destinationOffset + length];
+
+    try {
+      // Copy one too many bytes
+      stringUnderTest.copyTo(destination, stringUnderTest.size() + 1 - length,
+          destinationOffset, length);
+      fail("Should have thrown an exception when copying too many bytes of a "
+          + classUnderTest);
+    } catch (IndexOutOfBoundsException expected) {
+      // This is success
+    }
+
+    try {
+      // Copy with illegal negative sourceOffset
+      stringUnderTest.copyTo(destination, -1, destinationOffset, length);
+      fail("Should have thrown an exception when given a negative sourceOffset in "
+          + classUnderTest);
+    } catch (IndexOutOfBoundsException expected) {
+      // This is success
+    }
+
+    try {
+      // Copy with illegal negative destinationOffset
+      stringUnderTest.copyTo(destination, 0, -1, length);
+      fail("Should have thrown an exception when given a negative destinationOffset in "
+          + classUnderTest);
+    } catch (IndexOutOfBoundsException expected) {
+      // This is success
+    }
+
+    try {
+      // Copy with illegal negative size
+      stringUnderTest.copyTo(destination, 0, 0, -1);
+      fail("Should have thrown an exception when given a negative size in "
+          + classUnderTest);
+    } catch (IndexOutOfBoundsException expected) {
+      // This is success
+    }
+
+    try {
+      // Copy with illegal too-large sourceOffset
+      stringUnderTest.copyTo(destination, 2 * stringUnderTest.size(), 0, length);
+      fail("Should have thrown an exception when the destinationOffset is too large in "
+          + classUnderTest);
+    } catch (IndexOutOfBoundsException expected) {
+      // This is success
+    }
+
+    try {
+      // Copy with illegal too-large destinationOffset
+      stringUnderTest.copyTo(destination, 0, 2 * destination.length, length);
+      fail("Should have thrown an exception when the destinationOffset is too large in "
+          + classUnderTest);
+    } catch (IndexOutOfBoundsException expected) {
+      // This is success
+    }
+  }
+
+  public void testCopyTo_ByteBuffer() {
+    ByteBuffer myBuffer = ByteBuffer.allocate(referenceBytes.length);
+    stringUnderTest.copyTo(myBuffer);
+    assertTrue(classUnderTest + ".copyTo(ByteBuffer) must give back the same bytes",
+        Arrays.equals(referenceBytes, myBuffer.array()));
+  }
+
+  public void testAsReadOnlyByteBuffer() {
+    ByteBuffer byteBuffer = stringUnderTest.asReadOnlyByteBuffer();
+    byte[] roundTripBytes = new byte[referenceBytes.length];
+    assertTrue(byteBuffer.remaining() == referenceBytes.length);
+    assertTrue(byteBuffer.isReadOnly());
+    byteBuffer.get(roundTripBytes);
+    assertTrue(classUnderTest + ".asReadOnlyByteBuffer() must give back the same bytes",
+        Arrays.equals(referenceBytes, roundTripBytes));
+  }
+
+  public void testAsReadOnlyByteBufferList() {
+    List<ByteBuffer> byteBuffers = stringUnderTest.asReadOnlyByteBufferList();
+    int bytesSeen = 0;
+    byte[] roundTripBytes = new byte[referenceBytes.length];
+    for (ByteBuffer byteBuffer : byteBuffers) {
+      int thisLength = byteBuffer.remaining();
+      assertTrue(byteBuffer.isReadOnly());
+      assertTrue(bytesSeen + thisLength <= referenceBytes.length);
+      byteBuffer.get(roundTripBytes, bytesSeen, thisLength);
+      bytesSeen += thisLength;
+    }
+    assertTrue(bytesSeen == referenceBytes.length);
+    assertTrue(classUnderTest + ".asReadOnlyByteBufferTest() must give back the same bytes",
+        Arrays.equals(referenceBytes, roundTripBytes));
+  }
+
+  public void testToByteArray() {
+    byte[] roundTripBytes = stringUnderTest.toByteArray();
+    assertTrue(classUnderTest + ".toByteArray() must give back the same bytes",
+        Arrays.equals(referenceBytes, roundTripBytes));
+  }
+
+  public void testWriteTo() throws IOException {
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+    stringUnderTest.writeTo(bos);
+    byte[] roundTripBytes = bos.toByteArray();
+    assertTrue(classUnderTest + ".writeTo() must give back the same bytes",
+        Arrays.equals(referenceBytes, roundTripBytes));
+  }
+  
+  public void testWriteTo_mutating() throws IOException {
+    OutputStream os = new OutputStream() {
+      @Override
+      public void write(byte[] b, int off, int len) {
+        for (int x = 0; x < len; ++x) {
+          b[off + x] = (byte) 0;
+        }
+      }
+
+      @Override
+      public void write(int b) {
+        // Purposefully left blank.
+      }
+    };
+
+    stringUnderTest.writeTo(os);
+    byte[] newBytes = stringUnderTest.toByteArray();
+    assertTrue(classUnderTest + ".writeTo() must not grant access to underlying array",
+        Arrays.equals(referenceBytes, newBytes));
+  }
+
+  public void testNewOutput() throws IOException {
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+    ByteString.Output output = ByteString.newOutput();
+    stringUnderTest.writeTo(output);
+    assertEquals("Output Size returns correct result",
+        output.size(), stringUnderTest.size());
+    output.writeTo(bos);
+    assertTrue("Output.writeTo() must give back the same bytes",
+        Arrays.equals(referenceBytes, bos.toByteArray()));
+
+    // write the output stream to itself! This should cause it to double
+    output.writeTo(output);
+    assertEquals("Writing an output stream to itself is successful",
+        stringUnderTest.concat(stringUnderTest), output.toByteString());
+
+    output.reset();
+    assertEquals("Output.reset() resets the output", 0, output.size());
+    assertEquals("Output.reset() resets the output",
+        ByteString.EMPTY, output.toByteString());
+    
+  }
+
+  public void testToString() throws UnsupportedEncodingException {
+    String testString = "I love unicode \u1234\u5678 characters";
+    LiteralByteString unicode = new LiteralByteString(testString.getBytes(UTF_8));
+    String roundTripString = unicode.toString(UTF_8);
+    assertEquals(classUnderTest + " unicode must match", testString, roundTripString);
+  }
+
+  public void testEquals() {
+    assertEquals(classUnderTest + " must not equal null", false, stringUnderTest.equals(null));
+    assertEquals(classUnderTest + " must equal self", stringUnderTest, stringUnderTest);
+    assertFalse(classUnderTest + " must not equal the empty string",
+        stringUnderTest.equals(ByteString.EMPTY));
+    assertEquals(classUnderTest + " empty strings must be equal",
+        new LiteralByteString(new byte[]{}), stringUnderTest.substring(55, 55));
+    assertEquals(classUnderTest + " must equal another string with the same value",
+        stringUnderTest, new LiteralByteString(referenceBytes));
+
+    byte[] mungedBytes = new byte[referenceBytes.length];
+    System.arraycopy(referenceBytes, 0, mungedBytes, 0, referenceBytes.length);
+    mungedBytes[mungedBytes.length - 5] ^= 0xFF;
+    assertFalse(classUnderTest + " must not equal every string with the same length",
+        stringUnderTest.equals(new LiteralByteString(mungedBytes)));
+  }
+
+  public void testHashCode() {
+    int hash = stringUnderTest.hashCode();
+    assertEquals(classUnderTest + " must have expected hashCode", expectedHashCode, hash);
+  }
+
+  public void testPeekCachedHashCode() {
+    assertEquals(classUnderTest + ".peekCachedHashCode() should return zero at first", 0,
+        stringUnderTest.peekCachedHashCode());
+    stringUnderTest.hashCode();
+    assertEquals(classUnderTest + ".peekCachedHashCode should return zero at first",
+        expectedHashCode, stringUnderTest.peekCachedHashCode());
+  }
+
+  public void testPartialHash() {
+    // partialHash() is more strenuously tested elsewhere by testing hashes of substrings.
+    // This test would fail if the expected hash were 1.  It's not.
+    int hash = stringUnderTest.partialHash(stringUnderTest.size(), 0, stringUnderTest.size());
+    assertEquals(classUnderTest + ".partialHash() must yield expected hashCode",
+        expectedHashCode, hash);
+  }
+
+  public void testNewInput() throws IOException {
+    InputStream input = stringUnderTest.newInput();
+    assertEquals("InputStream.available() returns correct value",
+        stringUnderTest.size(), input.available());
+    boolean stillEqual = true;
+    for (byte referenceByte : referenceBytes) {
+      int expectedInt = (referenceByte & 0xFF);
+      stillEqual = (expectedInt == input.read());
+    }
+    assertEquals("InputStream.available() returns correct value",
+        0, input.available());
+    assertTrue(classUnderTest + " must give the same bytes from the InputStream", stillEqual);
+    assertEquals(classUnderTest + " InputStream must now be exhausted", -1, input.read());
+  }
+
+  public void testNewInput_skip() throws IOException {
+    InputStream input = stringUnderTest.newInput();
+    int stringSize = stringUnderTest.size();
+    int nearEndIndex = stringSize * 2 / 3;
+    long skipped1 = input.skip(nearEndIndex);
+    assertEquals("InputStream.skip()", skipped1, nearEndIndex);
+    assertEquals("InputStream.available()",
+        stringSize - skipped1, input.available());
+    assertTrue("InputStream.mark() is available", input.markSupported());
+    input.mark(0);
+    assertEquals("InputStream.skip(), read()",
+        stringUnderTest.byteAt(nearEndIndex) & 0xFF, input.read());
+    assertEquals("InputStream.available()",
+                 stringSize - skipped1 - 1, input.available());
+    long skipped2 = input.skip(stringSize);
+    assertEquals("InputStream.skip() incomplete",
+        skipped2, stringSize - skipped1 - 1);
+    assertEquals("InputStream.skip(), no more input", 0, input.available());
+    assertEquals("InputStream.skip(), no more input", -1, input.read());
+    input.reset();
+    assertEquals("InputStream.reset() succeded",
+                 stringSize - skipped1, input.available());
+    assertEquals("InputStream.reset(), read()",
+        stringUnderTest.byteAt(nearEndIndex) & 0xFF, input.read());
+  }
+
+  public void testNewCodedInput() throws IOException {
+    CodedInputStream cis = stringUnderTest.newCodedInput();
+    byte[] roundTripBytes = cis.readRawBytes(referenceBytes.length);
+    assertTrue(classUnderTest + " must give the same bytes back from the CodedInputStream",
+        Arrays.equals(referenceBytes, roundTripBytes));
+    assertTrue(classUnderTest + " CodedInputStream must now be exhausted", cis.isAtEnd());
+  }
+
+  /**
+   * Make sure we keep things simple when concatenating with empty. See also
+   * {@link ByteStringTest#testConcat_empty()}.
+   */
+  public void testConcat_empty() {
+    assertSame(classUnderTest + " concatenated with empty must give " + classUnderTest,
+        stringUnderTest.concat(ByteString.EMPTY), stringUnderTest);
+    assertSame("empty concatenated with " + classUnderTest + " must give " + classUnderTest,
+        ByteString.EMPTY.concat(stringUnderTest), stringUnderTest);
+  }
+}

+ 40 - 0
java/src/test/java/com/google/protobuf/MessageTest.java

@@ -38,6 +38,8 @@ import protobuf_unittest.UnittestProto.ForeignMessage;
 
 import junit.framework.TestCase;
 
+import java.util.List;
+
 /**
  * Misc. unit tests for message operations that apply to both generated
  * and dynamic messages.
@@ -310,4 +312,42 @@ public class MessageTest extends TestCase {
       assertEquals("Message missing required fields: a, b, c", e.getMessage());
     }
   }
+  
+  /** Test reading unset repeated message from DynamicMessage. */
+  public void testDynamicRepeatedMessageNull() throws Exception {
+    Descriptors.Descriptor descriptor = TestRequired.getDescriptor();
+    DynamicMessage result =
+      DynamicMessage.newBuilder(TestAllTypes.getDescriptor())
+        .mergeFrom(DynamicMessage.newBuilder(MERGE_SOURCE).build())
+        .build();
+
+    assertTrue(result.getField(result.getDescriptorForType()
+        .findFieldByName("repeated_foreign_message")) instanceof List<?>);
+    assertEquals(result.getRepeatedFieldCount(result.getDescriptorForType()
+        .findFieldByName("repeated_foreign_message")), 0);
+  }
+  
+  /** Test reading repeated message from DynamicMessage. */
+  public void testDynamicRepeatedMessageNotNull() throws Exception {
+
+    TestAllTypes REPEATED_NESTED =
+      TestAllTypes.newBuilder()
+        .setOptionalInt32(1)
+        .setOptionalString("foo")
+        .setOptionalForeignMessage(ForeignMessage.getDefaultInstance())
+        .addRepeatedString("bar")
+        .addRepeatedForeignMessage(ForeignMessage.getDefaultInstance())
+        .addRepeatedForeignMessage(ForeignMessage.getDefaultInstance())
+        .build();
+    Descriptors.Descriptor descriptor = TestRequired.getDescriptor();
+    DynamicMessage result =
+      DynamicMessage.newBuilder(TestAllTypes.getDescriptor())
+        .mergeFrom(DynamicMessage.newBuilder(REPEATED_NESTED).build())
+        .build();
+
+    assertTrue(result.getField(result.getDescriptorForType()
+        .findFieldByName("repeated_foreign_message")) instanceof List<?>);
+    assertEquals(result.getRepeatedFieldCount(result.getDescriptorForType()
+        .findFieldByName("repeated_foreign_message")), 2);
+  }
 }

+ 375 - 0
java/src/test/java/com/google/protobuf/ParserTest.java

@@ -0,0 +1,375 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import com.google.protobuf.UnittestLite.TestAllTypesLite;
+import com.google.protobuf.UnittestLite.TestPackedExtensionsLite;
+import com.google.protobuf.UnittestLite.TestParsingMergeLite;
+import com.google.protobuf.UnittestLite;
+import protobuf_unittest.UnittestOptimizeFor.TestOptimizedForSize;
+import protobuf_unittest.UnittestOptimizeFor.TestRequiredOptimizedForSize;
+import protobuf_unittest.UnittestOptimizeFor;
+import protobuf_unittest.UnittestProto.ForeignMessage;
+import protobuf_unittest.UnittestProto.TestAllTypes;
+import protobuf_unittest.UnittestProto.TestEmptyMessage;
+import protobuf_unittest.UnittestProto.TestRequired;
+import protobuf_unittest.UnittestProto.TestParsingMerge;
+import protobuf_unittest.UnittestProto;
+
+import junit.framework.TestCase;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Unit test for {@link Parser}.
+ *
+ * @author liujisi@google.com (Pherl Liu)
+ */
+public class ParserTest extends TestCase {
+  public void testGeneratedMessageParserSingleton() throws Exception {
+    for (int i = 0; i < 10; i++) {
+      assertEquals(TestAllTypes.PARSER,
+                   TestUtil.getAllSet().getParserForType());
+    }
+  }
+
+  private void assertRoundTripEquals(MessageLite message,
+                                     ExtensionRegistryLite registry)
+      throws Exception {
+    final byte[] data = message.toByteArray();
+    final int offset = 20;
+    final int length = data.length;
+    final int padding = 30;
+    Parser<? extends MessageLite> parser = message.getParserForType();
+    assertMessageEquals(message, parser.parseFrom(data, registry));
+    assertMessageEquals(message, parser.parseFrom(
+        generatePaddingArray(data, offset, padding),
+        offset, length, registry));
+    assertMessageEquals(message, parser.parseFrom(
+        message.toByteString(), registry));
+    assertMessageEquals(message, parser.parseFrom(
+        new ByteArrayInputStream(data), registry));
+    assertMessageEquals(message, parser.parseFrom(
+        CodedInputStream.newInstance(data), registry));
+  }
+
+  private void assertRoundTripEquals(MessageLite message) throws Exception {
+    final byte[] data = message.toByteArray();
+    final int offset = 20;
+    final int length = data.length;
+    final int padding = 30;
+    Parser<? extends MessageLite> parser = message.getParserForType();
+    assertMessageEquals(message, parser.parseFrom(data));
+    assertMessageEquals(message, parser.parseFrom(
+        generatePaddingArray(data, offset, padding),
+        offset, length));
+    assertMessageEquals(message, parser.parseFrom(message.toByteString()));
+    assertMessageEquals(message, parser.parseFrom(
+        new ByteArrayInputStream(data)));
+    assertMessageEquals(message, parser.parseFrom(
+        CodedInputStream.newInstance(data)));
+  }
+
+  private void assertMessageEquals(MessageLite expected, MessageLite actual)
+      throws Exception {
+    if (expected instanceof Message) {
+      assertEquals(expected, actual);
+    } else {
+      assertEquals(expected.toByteString(), actual.toByteString());
+    }
+  }
+
+  private byte[] generatePaddingArray(byte[] data, int offset, int padding) {
+    byte[] result = new byte[offset + data.length + padding];
+    System.arraycopy(data, 0, result, offset, data.length);
+    return result;
+  }
+
+  public void testNormalMessage() throws Exception {
+    assertRoundTripEquals(TestUtil.getAllSet());
+  }
+
+  public void testParsePartial() throws Exception {
+    Parser<TestRequired> parser = TestRequired.PARSER;
+    final String errorString =
+        "Should throw exceptions when the parsed message isn't initialized.";
+
+    // TestRequired.b and TestRequired.c are not set.
+    TestRequired partialMessage = TestRequired.newBuilder()
+        .setA(1).buildPartial();
+
+    // parsePartialFrom should pass.
+    byte[] data = partialMessage.toByteArray();
+    assertEquals(partialMessage, parser.parsePartialFrom(data));
+    assertEquals(partialMessage, parser.parsePartialFrom(
+        partialMessage.toByteString()));
+    assertEquals(partialMessage, parser.parsePartialFrom(
+        new ByteArrayInputStream(data)));
+    assertEquals(partialMessage, parser.parsePartialFrom(
+        CodedInputStream.newInstance(data)));
+
+    // parseFrom(ByteArray)
+    try {
+      parser.parseFrom(partialMessage.toByteArray());
+      fail(errorString);
+    } catch (InvalidProtocolBufferException e) {
+      // pass.
+    }
+
+    // parseFrom(ByteString)
+    try {
+      parser.parseFrom(partialMessage.toByteString());
+      fail(errorString);
+    } catch (InvalidProtocolBufferException e) {
+      // pass.
+    }
+
+    // parseFrom(InputStream)
+    try {
+      parser.parseFrom(new ByteArrayInputStream(partialMessage.toByteArray()));
+      fail(errorString);
+    } catch (IOException e) {
+      // pass.
+    }
+
+    // parseFrom(CodedInputStream)
+    try {
+      parser.parseFrom(CodedInputStream.newInstance(
+          partialMessage.toByteArray()));
+      fail(errorString);
+    } catch (IOException e) {
+      // pass.
+    }
+  }
+
+  public void testParseExtensions() throws Exception {
+    assertRoundTripEquals(TestUtil.getAllExtensionsSet(),
+                          TestUtil.getExtensionRegistry());
+    assertRoundTripEquals(TestUtil.getAllLiteExtensionsSet(),
+                          TestUtil.getExtensionRegistryLite());
+  }
+
+  public void testParsePacked() throws Exception {
+    assertRoundTripEquals(TestUtil.getPackedSet());
+    assertRoundTripEquals(TestUtil.getPackedExtensionsSet(),
+                          TestUtil.getExtensionRegistry());
+    assertRoundTripEquals(TestUtil.getLitePackedExtensionsSet(),
+                          TestUtil.getExtensionRegistryLite());
+  }
+
+  public void testParseDelimitedTo() throws Exception {
+    // Write normal Message.
+    TestAllTypes normalMessage = TestUtil.getAllSet();
+    ByteArrayOutputStream output = new ByteArrayOutputStream();
+    normalMessage.writeDelimitedTo(output);
+
+    // Write MessageLite with packed extension fields.
+    TestPackedExtensionsLite packedMessage =
+        TestUtil.getLitePackedExtensionsSet();
+    packedMessage.writeDelimitedTo(output);
+
+    InputStream input = new ByteArrayInputStream(output.toByteArray());
+    assertMessageEquals(
+        normalMessage,
+        normalMessage.getParserForType().parseDelimitedFrom(input));
+    assertMessageEquals(
+        packedMessage,
+        packedMessage.getParserForType().parseDelimitedFrom(
+            input, TestUtil.getExtensionRegistryLite()));
+  }
+
+  public void testParseUnknownFields() throws Exception {
+    // All fields will be treated as unknown fields in emptyMessage.
+    TestEmptyMessage emptyMessage = TestEmptyMessage.PARSER.parseFrom(
+        TestUtil.getAllSet().toByteString());
+    assertEquals(
+        TestUtil.getAllSet().toByteString(),
+        emptyMessage.toByteString());
+  }
+
+  public void testOptimizeForSize() throws Exception {
+    TestOptimizedForSize.Builder builder = TestOptimizedForSize.newBuilder();
+    builder.setI(12).setMsg(ForeignMessage.newBuilder().setC(34).build());
+    builder.setExtension(TestOptimizedForSize.testExtension, 56);
+    builder.setExtension(TestOptimizedForSize.testExtension2,
+        TestRequiredOptimizedForSize.newBuilder().setX(78).build());
+
+    TestOptimizedForSize message = builder.build();
+    ExtensionRegistry registry = ExtensionRegistry.newInstance();
+    UnittestOptimizeFor.registerAllExtensions(registry);
+
+    assertRoundTripEquals(message, registry);
+  }
+
+  /** Helper method for {@link #testParsingMerge()}.*/
+  private void assertMessageMerged(TestAllTypes allTypes)
+      throws Exception {
+    assertEquals(3, allTypes.getOptionalInt32());
+    assertEquals(2, allTypes.getOptionalInt64());
+    assertEquals("hello", allTypes.getOptionalString());
+  }
+
+  /** Helper method for {@link #testParsingMergeLite()}.*/
+  private void assertMessageMerged(TestAllTypesLite allTypes)
+      throws Exception {
+    assertEquals(3, allTypes.getOptionalInt32());
+    assertEquals(2, allTypes.getOptionalInt64());
+    assertEquals("hello", allTypes.getOptionalString());
+  }
+
+  public void testParsingMerge() throws Exception {
+    // Build messages.
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    TestAllTypes msg1 = builder.setOptionalInt32(1).build();
+    builder.clear();
+    TestAllTypes msg2 = builder.setOptionalInt64(2).build();
+    builder.clear();
+    TestAllTypes msg3 = builder.setOptionalInt32(3)
+        .setOptionalString("hello").build();
+
+    // Build groups.
+    TestParsingMerge.RepeatedFieldsGenerator.Group1 optionalG1 =
+        TestParsingMerge.RepeatedFieldsGenerator.Group1.newBuilder()
+        .setField1(msg1).build();
+    TestParsingMerge.RepeatedFieldsGenerator.Group1 optionalG2 =
+        TestParsingMerge.RepeatedFieldsGenerator.Group1.newBuilder()
+        .setField1(msg2).build();
+    TestParsingMerge.RepeatedFieldsGenerator.Group1 optionalG3 =
+        TestParsingMerge.RepeatedFieldsGenerator.Group1.newBuilder()
+        .setField1(msg3).build();
+    TestParsingMerge.RepeatedFieldsGenerator.Group2 repeatedG1 =
+        TestParsingMerge.RepeatedFieldsGenerator.Group2.newBuilder()
+        .setField1(msg1).build();
+    TestParsingMerge.RepeatedFieldsGenerator.Group2 repeatedG2 =
+        TestParsingMerge.RepeatedFieldsGenerator.Group2.newBuilder()
+        .setField1(msg2).build();
+    TestParsingMerge.RepeatedFieldsGenerator.Group2 repeatedG3 =
+        TestParsingMerge.RepeatedFieldsGenerator.Group2.newBuilder()
+        .setField1(msg3).build();
+
+    // Assign and serialize RepeatedFieldsGenerator.
+    ByteString data = TestParsingMerge.RepeatedFieldsGenerator.newBuilder()
+        .addField1(msg1).addField1(msg2).addField1(msg3)
+        .addField2(msg1).addField2(msg2).addField2(msg3)
+        .addField3(msg1).addField3(msg2).addField3(msg3)
+        .addGroup1(optionalG1).addGroup1(optionalG2).addGroup1(optionalG3)
+        .addGroup2(repeatedG1).addGroup2(repeatedG2).addGroup2(repeatedG3)
+        .addExt1(msg1).addExt1(msg2).addExt1(msg3)
+        .addExt2(msg1).addExt2(msg2).addExt2(msg3)
+        .build().toByteString();
+
+    // Parse TestParsingMerge.
+    ExtensionRegistry registry = ExtensionRegistry.newInstance();
+    UnittestProto.registerAllExtensions(registry);
+    TestParsingMerge parsingMerge =
+        TestParsingMerge.PARSER.parseFrom(data, registry);
+
+    // Required and optional fields should be merged.
+    assertMessageMerged(parsingMerge.getRequiredAllTypes());
+    assertMessageMerged(parsingMerge.getOptionalAllTypes());
+    assertMessageMerged(
+        parsingMerge.getOptionalGroup().getOptionalGroupAllTypes());
+    assertMessageMerged(parsingMerge.getExtension(
+        TestParsingMerge.optionalExt));
+
+    // Repeated fields should not be merged.
+    assertEquals(3, parsingMerge.getRepeatedAllTypesCount());
+    assertEquals(3, parsingMerge.getRepeatedGroupCount());
+    assertEquals(3, parsingMerge.getExtensionCount(
+        TestParsingMerge.repeatedExt));
+  }
+
+  public void testParsingMergeLite() throws Exception {
+    // Build messages.
+    TestAllTypesLite.Builder builder =
+        TestAllTypesLite.newBuilder();
+    TestAllTypesLite msg1 = builder.setOptionalInt32(1).build();
+    builder.clear();
+    TestAllTypesLite msg2 = builder.setOptionalInt64(2).build();
+    builder.clear();
+    TestAllTypesLite msg3 = builder.setOptionalInt32(3)
+        .setOptionalString("hello").build();
+
+    // Build groups.
+    TestParsingMergeLite.RepeatedFieldsGenerator.Group1 optionalG1 =
+        TestParsingMergeLite.RepeatedFieldsGenerator.Group1.newBuilder()
+        .setField1(msg1).build();
+    TestParsingMergeLite.RepeatedFieldsGenerator.Group1 optionalG2 =
+        TestParsingMergeLite.RepeatedFieldsGenerator.Group1.newBuilder()
+        .setField1(msg2).build();
+    TestParsingMergeLite.RepeatedFieldsGenerator.Group1 optionalG3 =
+        TestParsingMergeLite.RepeatedFieldsGenerator.Group1.newBuilder()
+        .setField1(msg3).build();
+    TestParsingMergeLite.RepeatedFieldsGenerator.Group2 repeatedG1 =
+        TestParsingMergeLite.RepeatedFieldsGenerator.Group2.newBuilder()
+        .setField1(msg1).build();
+    TestParsingMergeLite.RepeatedFieldsGenerator.Group2 repeatedG2 =
+        TestParsingMergeLite.RepeatedFieldsGenerator.Group2.newBuilder()
+        .setField1(msg2).build();
+    TestParsingMergeLite.RepeatedFieldsGenerator.Group2 repeatedG3 =
+        TestParsingMergeLite.RepeatedFieldsGenerator.Group2.newBuilder()
+        .setField1(msg3).build();
+
+    // Assign and serialize RepeatedFieldsGenerator.
+    ByteString data = TestParsingMergeLite.RepeatedFieldsGenerator.newBuilder()
+        .addField1(msg1).addField1(msg2).addField1(msg3)
+        .addField2(msg1).addField2(msg2).addField2(msg3)
+        .addField3(msg1).addField3(msg2).addField3(msg3)
+        .addGroup1(optionalG1).addGroup1(optionalG2).addGroup1(optionalG3)
+        .addGroup2(repeatedG1).addGroup2(repeatedG2).addGroup2(repeatedG3)
+        .addExt1(msg1).addExt1(msg2).addExt1(msg3)
+        .addExt2(msg1).addExt2(msg2).addExt2(msg3)
+        .build().toByteString();
+
+    // Parse TestParsingMergeLite.
+    ExtensionRegistry registry = ExtensionRegistry.newInstance();
+    UnittestLite.registerAllExtensions(registry);
+    TestParsingMergeLite parsingMerge =
+        TestParsingMergeLite.PARSER.parseFrom(data, registry);
+
+    // Required and optional fields should be merged.
+    assertMessageMerged(parsingMerge.getRequiredAllTypes());
+    assertMessageMerged(parsingMerge.getOptionalAllTypes());
+    assertMessageMerged(
+        parsingMerge.getOptionalGroup().getOptionalGroupAllTypes());
+    assertMessageMerged(parsingMerge.getExtension(
+        TestParsingMergeLite.optionalExt));
+
+    // Repeated fields should not be merged.
+    assertEquals(3, parsingMerge.getRepeatedAllTypesCount());
+    assertEquals(3, parsingMerge.getRepeatedGroupCount());
+    assertEquals(3, parsingMerge.getExtensionCount(
+        TestParsingMergeLite.repeatedExt));
+  }
+}

+ 97 - 0
java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java

@@ -0,0 +1,97 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.UnsupportedEncodingException;
+import java.util.Iterator;
+
+/**
+ * This class tests {@link RopeByteString#substring(int, int)} by inheriting the tests from
+ * {@link LiteralByteStringTest}.  Only a couple of methods are overridden.  
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+public class RopeByteStringSubstringTest extends LiteralByteStringTest {
+
+  @Override
+  protected void setUp() throws Exception {
+    classUnderTest = "RopeByteString";
+    byte[] sourceBytes = ByteStringTest.getTestBytes(22341, 22337766L);
+    Iterator<ByteString> iter = ByteStringTest.makeConcretePieces(sourceBytes).iterator();
+    ByteString sourceString = iter.next();
+    while (iter.hasNext()) {
+      sourceString = sourceString.concat(iter.next());
+    }
+
+    int from = 1130;
+    int to = sourceBytes.length - 5555;
+    stringUnderTest = sourceString.substring(from, to);
+    referenceBytes = new byte[to - from];
+    System.arraycopy(sourceBytes, from, referenceBytes, 0, to - from);
+    expectedHashCode = -1259260680;
+  }
+
+  @Override
+  public void testGetTreeDepth() {
+    assertEquals(classUnderTest + " must have the expected tree depth",
+        3, stringUnderTest.getTreeDepth());
+  }
+
+  @Override
+  public void testToString() throws UnsupportedEncodingException {
+    String sourceString = "I love unicode \u1234\u5678 characters";
+    ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
+    int copies = 250;
+
+    // By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
+    StringBuilder builder = new StringBuilder(copies * sourceString.length());
+    ByteString unicode = ByteString.EMPTY;
+    for (int i = 0; i < copies; ++i) {
+      builder.append(sourceString);
+      unicode = RopeByteString.concatenate(unicode, sourceByteString);
+    }
+    String testString = builder.toString();
+
+    // Do the substring part
+    testString = testString.substring(2, testString.length() - 6);
+    unicode = unicode.substring(2, unicode.size() - 6);
+
+    assertEquals(classUnderTest + " from string must have the expected type",
+        classUnderTest, getActualClassName(unicode));
+    String roundTripString = unicode.toString(UTF_8);
+    assertEquals(classUnderTest + " unicode bytes must match",
+        testString, roundTripString);
+    ByteString flatString = ByteString.copyFromUtf8(testString);
+    assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
+    assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
+        flatString.hashCode(), unicode.hashCode());
+  }
+}

+ 115 - 0
java/src/test/java/com/google/protobuf/RopeByteStringTest.java

@@ -0,0 +1,115 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+import java.util.Iterator;
+
+/**
+ * This class tests {@link RopeByteString} by inheriting the tests from
+ * {@link LiteralByteStringTest}.  Only a couple of methods are overridden.
+ * 
+ * <p>A full test of the result of {@link RopeByteString#substring(int, int)} is found in the
+ * separate class {@link RopeByteStringSubstringTest}.
+ *
+ * @author carlanton@google.com (Carl Haverl)
+ */
+public class RopeByteStringTest extends LiteralByteStringTest {
+
+  @Override
+  protected void setUp() throws Exception {
+    classUnderTest = "RopeByteString";
+    referenceBytes = ByteStringTest.getTestBytes(22341, 22337766L);
+    Iterator<ByteString> iter = ByteStringTest.makeConcretePieces(referenceBytes).iterator();
+    stringUnderTest = iter.next();
+    while (iter.hasNext()) {
+      stringUnderTest = stringUnderTest.concat(iter.next());
+    }
+    expectedHashCode = -1214197238;
+  }
+
+  @Override
+  public void testGetTreeDepth() {
+    assertEquals(classUnderTest + " must have the expected tree depth",
+        4, stringUnderTest.getTreeDepth());
+  }
+
+  public void testBalance() {
+    int numberOfPieces = 10000;
+    int pieceSize = 64;
+    byte[] testBytes = ByteStringTest.getTestBytes(numberOfPieces * pieceSize, 113377L);
+
+    // Build up a big ByteString from smaller pieces to force a rebalance
+    ByteString concatenated = ByteString.EMPTY;
+    for (int i = 0; i < numberOfPieces; ++i) {
+      concatenated = concatenated.concat(ByteString.copyFrom(testBytes, i * pieceSize, pieceSize));
+    }
+
+    assertEquals(classUnderTest + " from string must have the expected type",
+        classUnderTest, getActualClassName(concatenated));
+    assertTrue(classUnderTest + " underlying bytes must match after balancing",
+        Arrays.equals(testBytes, concatenated.toByteArray()));
+    ByteString testString = ByteString.copyFrom(testBytes);
+    assertTrue(classUnderTest + " balanced string must equal flat string",
+        concatenated.equals(testString));
+    assertTrue(classUnderTest + " flat string must equal balanced string",
+        testString.equals(concatenated));
+    assertEquals(classUnderTest + " balanced string must have same hash code as flat string",
+        testString.hashCode(), concatenated.hashCode());
+  }
+
+  @Override
+  public void testToString() throws UnsupportedEncodingException {
+    String sourceString = "I love unicode \u1234\u5678 characters";
+    ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
+    int copies = 250;
+
+    // By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
+    StringBuilder builder = new StringBuilder(copies * sourceString.length());
+    ByteString unicode = ByteString.EMPTY;
+    for (int i = 0; i < copies; ++i) {
+      builder.append(sourceString);
+      unicode = RopeByteString.concatenate(unicode, sourceByteString);
+    }
+    String testString = builder.toString();
+
+    assertEquals(classUnderTest + " from string must have the expected type",
+        classUnderTest, getActualClassName(unicode));
+    String roundTripString = unicode.toString(UTF_8);
+    assertEquals(classUnderTest + " unicode bytes must match",
+        testString, roundTripString);
+    ByteString flatString = ByteString.copyFromUtf8(testString);
+    assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
+    assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
+        flatString.hashCode(), unicode.hashCode());
+  }
+}

+ 16 - 2
java/src/test/java/com/google/protobuf/TestBadIdentifiers.java

@@ -35,7 +35,7 @@ import junit.framework.TestCase;
 /**
  * Tests that proto2 api generation doesn't cause compile errors when
  * compiling protocol buffers that have names that would otherwise conflict
- * if not fully qualified (like @Deprecated and @Override). 
+ * if not fully qualified (like @Deprecated and @Override).
  *
  * @author jonp@google.com (Jon Perlow)
  */
@@ -45,5 +45,19 @@ public class TestBadIdentifiers extends TestCase {
     // If this compiles, it means the generation was correct.
     TestBadIdentifiersProto.Deprecated.newBuilder();
     TestBadIdentifiersProto.Override.newBuilder();
-  } 
+  }
+
+  public void testGetDescriptor() {
+    Descriptors.FileDescriptor fileDescriptor =
+        TestBadIdentifiersProto.getDescriptor();
+    String descriptorField = TestBadIdentifiersProto.Descriptor
+        .getDefaultInstance().getDescriptor();
+    Descriptors.Descriptor protoDescriptor = TestBadIdentifiersProto.Descriptor
+        .getDefaultInstance().getDescriptorForType();
+    String nestedDescriptorField = TestBadIdentifiersProto.Descriptor
+        .NestedDescriptor.getDefaultInstance().getDescriptor();
+    Descriptors.Descriptor nestedProtoDescriptor = TestBadIdentifiersProto
+        .Descriptor.NestedDescriptor.getDefaultInstance()
+        .getDescriptorForType();
+  }
 }

+ 178 - 34
java/src/test/java/com/google/protobuf/TestUtil.java

@@ -72,14 +72,16 @@ import static protobuf_unittest.UnittestProto.optionalBoolExtension;
 import static protobuf_unittest.UnittestProto.optionalStringExtension;
 import static protobuf_unittest.UnittestProto.optionalBytesExtension;
 import static protobuf_unittest.UnittestProto.optionalGroupExtension;
-import static protobuf_unittest.UnittestProto.optionalNestedMessageExtension;
+import static protobuf_unittest.UnittestProto.optionalCordExtension;
+import static protobuf_unittest.UnittestProto.optionalForeignEnumExtension;
 import static protobuf_unittest.UnittestProto.optionalForeignMessageExtension;
+import static protobuf_unittest.UnittestProto.optionalImportEnumExtension;
 import static protobuf_unittest.UnittestProto.optionalImportMessageExtension;
 import static protobuf_unittest.UnittestProto.optionalNestedEnumExtension;
-import static protobuf_unittest.UnittestProto.optionalForeignEnumExtension;
-import static protobuf_unittest.UnittestProto.optionalImportEnumExtension;
+import static protobuf_unittest.UnittestProto.optionalNestedMessageExtension;
+import static protobuf_unittest.UnittestProto.optionalPublicImportMessageExtension;
+import static protobuf_unittest.UnittestProto.optionalLazyMessageExtension;
 import static protobuf_unittest.UnittestProto.optionalStringPieceExtension;
-import static protobuf_unittest.UnittestProto.optionalCordExtension;
 
 import static protobuf_unittest.UnittestProto.repeatedInt32Extension;
 import static protobuf_unittest.UnittestProto.repeatedInt64Extension;
@@ -100,6 +102,7 @@ import static protobuf_unittest.UnittestProto.repeatedGroupExtension;
 import static protobuf_unittest.UnittestProto.repeatedNestedMessageExtension;
 import static protobuf_unittest.UnittestProto.repeatedForeignMessageExtension;
 import static protobuf_unittest.UnittestProto.repeatedImportMessageExtension;
+import static protobuf_unittest.UnittestProto.repeatedLazyMessageExtension;
 import static protobuf_unittest.UnittestProto.repeatedNestedEnumExtension;
 import static protobuf_unittest.UnittestProto.repeatedForeignEnumExtension;
 import static protobuf_unittest.UnittestProto.repeatedImportEnumExtension;
@@ -162,11 +165,13 @@ import static com.google.protobuf.UnittestLite.optionalStringExtensionLite;
 import static com.google.protobuf.UnittestLite.optionalBytesExtensionLite;
 import static com.google.protobuf.UnittestLite.optionalGroupExtensionLite;
 import static com.google.protobuf.UnittestLite.optionalNestedMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalForeignEnumExtensionLite;
 import static com.google.protobuf.UnittestLite.optionalForeignMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalImportEnumExtensionLite;
 import static com.google.protobuf.UnittestLite.optionalImportMessageExtensionLite;
 import static com.google.protobuf.UnittestLite.optionalNestedEnumExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalForeignEnumExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalImportEnumExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalPublicImportMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalLazyMessageExtensionLite;
 import static com.google.protobuf.UnittestLite.optionalStringPieceExtensionLite;
 import static com.google.protobuf.UnittestLite.optionalCordExtensionLite;
 
@@ -189,6 +194,7 @@ import static com.google.protobuf.UnittestLite.repeatedGroupExtensionLite;
 import static com.google.protobuf.UnittestLite.repeatedNestedMessageExtensionLite;
 import static com.google.protobuf.UnittestLite.repeatedForeignMessageExtensionLite;
 import static com.google.protobuf.UnittestLite.repeatedImportMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedLazyMessageExtensionLite;
 import static com.google.protobuf.UnittestLite.repeatedNestedEnumExtensionLite;
 import static com.google.protobuf.UnittestLite.repeatedForeignEnumExtensionLite;
 import static com.google.protobuf.UnittestLite.repeatedImportEnumExtensionLite;
@@ -222,8 +228,9 @@ import protobuf_unittest.UnittestProto.TestPackedTypes;
 import protobuf_unittest.UnittestProto.TestUnpackedTypes;
 import protobuf_unittest.UnittestProto.ForeignMessage;
 import protobuf_unittest.UnittestProto.ForeignEnum;
-import com.google.protobuf.test.UnittestImport.ImportMessage;
 import com.google.protobuf.test.UnittestImport.ImportEnum;
+import com.google.protobuf.test.UnittestImport.ImportMessage;
+import com.google.protobuf.test.UnittestImportPublic.PublicImportMessage;
 
 import com.google.protobuf.UnittestLite.TestAllTypesLite;
 import com.google.protobuf.UnittestLite.TestAllExtensionsLite;
@@ -231,8 +238,9 @@ import com.google.protobuf.UnittestLite.TestAllExtensionsLiteOrBuilder;
 import com.google.protobuf.UnittestLite.TestPackedExtensionsLite;
 import com.google.protobuf.UnittestLite.ForeignMessageLite;
 import com.google.protobuf.UnittestLite.ForeignEnumLite;
-import com.google.protobuf.UnittestImportLite.ImportMessageLite;
 import com.google.protobuf.UnittestImportLite.ImportEnumLite;
+import com.google.protobuf.UnittestImportLite.ImportMessageLite;
+import com.google.protobuf.UnittestImportPublicLite.PublicImportMessageLite;
 
 import junit.framework.Assert;
 
@@ -242,7 +250,7 @@ import java.io.RandomAccessFile;
 
 /**
  * Contains methods for setting all fields of {@code TestAllTypes} to
- * some vaules as well as checking that all the fields are set to those values.
+ * some values as well as checking that all the fields are set to those values.
  * These are useful for testing various protocol message features, e.g.
  * set all fields of a message, serialize it, parse it, and check that all
  * fields are set.
@@ -274,6 +282,16 @@ public final class TestUtil {
     return builder.build();
   }
 
+  /**
+   * Get a {@code TestAllTypes.Builder} with all fields set as they would be by
+   * {@link #setAllFields(TestAllTypes.Builder)}.
+   */
+  public static TestAllTypes.Builder getAllSetBuilder() {
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    setAllFields(builder);
+    return builder;
+  }
+
   /**
    * Get a {@code TestAllExtensions} with all fields set as they would be by
    * {@link #setAllExtensions(TestAllExtensions.Builder)}.
@@ -344,6 +362,10 @@ public final class TestUtil {
       ForeignMessage.newBuilder().setC(119).build());
     message.setOptionalImportMessage(
       ImportMessage.newBuilder().setD(120).build());
+    message.setOptionalPublicImportMessage(
+      PublicImportMessage.newBuilder().setE(126).build());
+    message.setOptionalLazyMessage(
+      TestAllTypes.NestedMessage.newBuilder().setBb(127).build());
 
     message.setOptionalNestedEnum (TestAllTypes.NestedEnum.BAZ);
     message.setOptionalForeignEnum(ForeignEnum.FOREIGN_BAZ);
@@ -378,6 +400,8 @@ public final class TestUtil {
       ForeignMessage.newBuilder().setC(219).build());
     message.addRepeatedImportMessage(
       ImportMessage.newBuilder().setD(220).build());
+    message.addRepeatedLazyMessage(
+      TestAllTypes.NestedMessage.newBuilder().setBb(227).build());
 
     message.addRepeatedNestedEnum (TestAllTypes.NestedEnum.BAR);
     message.addRepeatedForeignEnum(ForeignEnum.FOREIGN_BAR);
@@ -411,6 +435,8 @@ public final class TestUtil {
       ForeignMessage.newBuilder().setC(319).build());
     message.addRepeatedImportMessage(
       ImportMessage.newBuilder().setD(320).build());
+    message.addRepeatedLazyMessage(
+      TestAllTypes.NestedMessage.newBuilder().setBb(327).build());
 
     message.addRepeatedNestedEnum (TestAllTypes.NestedEnum.BAZ);
     message.addRepeatedForeignEnum(ForeignEnum.FOREIGN_BAZ);
@@ -476,6 +502,8 @@ public final class TestUtil {
       ForeignMessage.newBuilder().setC(519).build());
     message.setRepeatedImportMessage(1,
       ImportMessage.newBuilder().setD(520).build());
+    message.setRepeatedLazyMessage(1,
+      TestAllTypes.NestedMessage.newBuilder().setBb(527).build());
 
     message.setRepeatedNestedEnum (1, TestAllTypes.NestedEnum.FOO);
     message.setRepeatedForeignEnum(1, ForeignEnum.FOREIGN_FOO);
@@ -541,10 +569,12 @@ public final class TestUtil {
     Assert.assertEquals("115", message.getOptionalString  ());
     Assert.assertEquals(toBytes("116"), message.getOptionalBytes());
 
-    Assert.assertEquals(117, message.getOptionalGroup         ().getA());
-    Assert.assertEquals(118, message.getOptionalNestedMessage ().getBb());
-    Assert.assertEquals(119, message.getOptionalForeignMessage().getC());
-    Assert.assertEquals(120, message.getOptionalImportMessage ().getD());
+    Assert.assertEquals(117, message.getOptionalGroup              ().getA());
+    Assert.assertEquals(118, message.getOptionalNestedMessage      ().getBb());
+    Assert.assertEquals(119, message.getOptionalForeignMessage     ().getC());
+    Assert.assertEquals(120, message.getOptionalImportMessage      ().getD());
+    Assert.assertEquals(126, message.getOptionalPublicImportMessage().getE());
+    Assert.assertEquals(127, message.getOptionalLazyMessage        ().getBb());
 
     Assert.assertEquals(TestAllTypes.NestedEnum.BAZ, message.getOptionalNestedEnum());
     Assert.assertEquals(ForeignEnum.FOREIGN_BAZ, message.getOptionalForeignEnum());
@@ -575,6 +605,7 @@ public final class TestUtil {
     Assert.assertEquals(2, message.getRepeatedNestedMessageCount ());
     Assert.assertEquals(2, message.getRepeatedForeignMessageCount());
     Assert.assertEquals(2, message.getRepeatedImportMessageCount ());
+    Assert.assertEquals(2, message.getRepeatedLazyMessageCount   ());
     Assert.assertEquals(2, message.getRepeatedNestedEnumCount    ());
     Assert.assertEquals(2, message.getRepeatedForeignEnumCount   ());
     Assert.assertEquals(2, message.getRepeatedImportEnumCount    ());
@@ -602,6 +633,7 @@ public final class TestUtil {
     Assert.assertEquals(218, message.getRepeatedNestedMessage (0).getBb());
     Assert.assertEquals(219, message.getRepeatedForeignMessage(0).getC());
     Assert.assertEquals(220, message.getRepeatedImportMessage (0).getD());
+    Assert.assertEquals(227, message.getRepeatedLazyMessage   (0).getBb());
 
     Assert.assertEquals(TestAllTypes.NestedEnum.BAR, message.getRepeatedNestedEnum (0));
     Assert.assertEquals(ForeignEnum.FOREIGN_BAR, message.getRepeatedForeignEnum(0));
@@ -630,6 +662,7 @@ public final class TestUtil {
     Assert.assertEquals(318, message.getRepeatedNestedMessage (1).getBb());
     Assert.assertEquals(319, message.getRepeatedForeignMessage(1).getC());
     Assert.assertEquals(320, message.getRepeatedImportMessage (1).getD());
+    Assert.assertEquals(327, message.getRepeatedLazyMessage   (1).getBb());
 
     Assert.assertEquals(TestAllTypes.NestedEnum.BAZ, message.getRepeatedNestedEnum (1));
     Assert.assertEquals(ForeignEnum.FOREIGN_BAZ, message.getRepeatedForeignEnum(1));
@@ -741,15 +774,19 @@ public final class TestUtil {
     Assert.assertEquals(ByteString.EMPTY, message.getOptionalBytes());
 
     // Embedded messages should also be clear.
-    Assert.assertFalse(message.getOptionalGroup         ().hasA());
-    Assert.assertFalse(message.getOptionalNestedMessage ().hasBb());
-    Assert.assertFalse(message.getOptionalForeignMessage().hasC());
-    Assert.assertFalse(message.getOptionalImportMessage ().hasD());
-
-    Assert.assertEquals(0, message.getOptionalGroup         ().getA());
-    Assert.assertEquals(0, message.getOptionalNestedMessage ().getBb());
-    Assert.assertEquals(0, message.getOptionalForeignMessage().getC());
-    Assert.assertEquals(0, message.getOptionalImportMessage ().getD());
+    Assert.assertFalse(message.getOptionalGroup              ().hasA());
+    Assert.assertFalse(message.getOptionalNestedMessage      ().hasBb());
+    Assert.assertFalse(message.getOptionalForeignMessage     ().hasC());
+    Assert.assertFalse(message.getOptionalImportMessage      ().hasD());
+    Assert.assertFalse(message.getOptionalPublicImportMessage().hasE());
+    Assert.assertFalse(message.getOptionalLazyMessage        ().hasBb());
+
+    Assert.assertEquals(0, message.getOptionalGroup              ().getA());
+    Assert.assertEquals(0, message.getOptionalNestedMessage      ().getBb());
+    Assert.assertEquals(0, message.getOptionalForeignMessage     ().getC());
+    Assert.assertEquals(0, message.getOptionalImportMessage      ().getD());
+    Assert.assertEquals(0, message.getOptionalPublicImportMessage().getE());
+    Assert.assertEquals(0, message.getOptionalLazyMessage        ().getBb());
 
     // Enums without defaults are set to the first value in the enum.
     Assert.assertEquals(TestAllTypes.NestedEnum.FOO, message.getOptionalNestedEnum ());
@@ -780,6 +817,7 @@ public final class TestUtil {
     Assert.assertEquals(0, message.getRepeatedNestedMessageCount ());
     Assert.assertEquals(0, message.getRepeatedForeignMessageCount());
     Assert.assertEquals(0, message.getRepeatedImportMessageCount ());
+    Assert.assertEquals(0, message.getRepeatedLazyMessageCount   ());
     Assert.assertEquals(0, message.getRepeatedNestedEnumCount    ());
     Assert.assertEquals(0, message.getRepeatedForeignEnumCount   ());
     Assert.assertEquals(0, message.getRepeatedImportEnumCount    ());
@@ -868,6 +906,7 @@ public final class TestUtil {
     Assert.assertEquals(2, message.getRepeatedNestedMessageCount ());
     Assert.assertEquals(2, message.getRepeatedForeignMessageCount());
     Assert.assertEquals(2, message.getRepeatedImportMessageCount ());
+    Assert.assertEquals(2, message.getRepeatedLazyMessageCount   ());
     Assert.assertEquals(2, message.getRepeatedNestedEnumCount    ());
     Assert.assertEquals(2, message.getRepeatedForeignEnumCount   ());
     Assert.assertEquals(2, message.getRepeatedImportEnumCount    ());
@@ -895,6 +934,7 @@ public final class TestUtil {
     Assert.assertEquals(218, message.getRepeatedNestedMessage (0).getBb());
     Assert.assertEquals(219, message.getRepeatedForeignMessage(0).getC());
     Assert.assertEquals(220, message.getRepeatedImportMessage (0).getD());
+    Assert.assertEquals(227, message.getRepeatedLazyMessage   (0).getBb());
 
     Assert.assertEquals(TestAllTypes.NestedEnum.BAR, message.getRepeatedNestedEnum (0));
     Assert.assertEquals(ForeignEnum.FOREIGN_BAR, message.getRepeatedForeignEnum(0));
@@ -924,6 +964,7 @@ public final class TestUtil {
     Assert.assertEquals(518, message.getRepeatedNestedMessage (1).getBb());
     Assert.assertEquals(519, message.getRepeatedForeignMessage(1).getC());
     Assert.assertEquals(520, message.getRepeatedImportMessage (1).getD());
+    Assert.assertEquals(527, message.getRepeatedLazyMessage   (1).getBb());
 
     Assert.assertEquals(TestAllTypes.NestedEnum.FOO, message.getRepeatedNestedEnum (1));
     Assert.assertEquals(ForeignEnum.FOREIGN_FOO, message.getRepeatedForeignEnum(1));
@@ -1210,6 +1251,10 @@ public final class TestUtil {
       ForeignMessage.newBuilder().setC(119).build());
     message.setExtension(optionalImportMessageExtension,
       ImportMessage.newBuilder().setD(120).build());
+    message.setExtension(optionalPublicImportMessageExtension,
+      PublicImportMessage.newBuilder().setE(126).build());
+    message.setExtension(optionalLazyMessageExtension,
+      TestAllTypes.NestedMessage.newBuilder().setBb(127).build());
 
     message.setExtension(optionalNestedEnumExtension, TestAllTypes.NestedEnum.BAZ);
     message.setExtension(optionalForeignEnumExtension, ForeignEnum.FOREIGN_BAZ);
@@ -1244,6 +1289,8 @@ public final class TestUtil {
       ForeignMessage.newBuilder().setC(219).build());
     message.addExtension(repeatedImportMessageExtension,
       ImportMessage.newBuilder().setD(220).build());
+    message.addExtension(repeatedLazyMessageExtension,
+      TestAllTypes.NestedMessage.newBuilder().setBb(227).build());
 
     message.addExtension(repeatedNestedEnumExtension, TestAllTypes.NestedEnum.BAR);
     message.addExtension(repeatedForeignEnumExtension, ForeignEnum.FOREIGN_BAR);
@@ -1277,6 +1324,8 @@ public final class TestUtil {
       ForeignMessage.newBuilder().setC(319).build());
     message.addExtension(repeatedImportMessageExtension,
       ImportMessage.newBuilder().setD(320).build());
+    message.addExtension(repeatedLazyMessageExtension,
+      TestAllTypes.NestedMessage.newBuilder().setBb(327).build());
 
     message.addExtension(repeatedNestedEnumExtension, TestAllTypes.NestedEnum.BAZ);
     message.addExtension(repeatedForeignEnumExtension, ForeignEnum.FOREIGN_BAZ);
@@ -1343,6 +1392,8 @@ public final class TestUtil {
       ForeignMessage.newBuilder().setC(519).build());
     message.setExtension(repeatedImportMessageExtension, 1,
       ImportMessage.newBuilder().setD(520).build());
+    message.setExtension(repeatedLazyMessageExtension, 1,
+      TestAllTypes.NestedMessage.newBuilder().setBb(527).build());
 
     message.setExtension(repeatedNestedEnumExtension , 1, TestAllTypes.NestedEnum.FOO);
     message.setExtension(repeatedForeignEnumExtension, 1, ForeignEnum.FOREIGN_FOO);
@@ -1409,10 +1460,12 @@ public final class TestUtil {
     assertEqualsExactType("115", message.getExtension(optionalStringExtension  ));
     assertEqualsExactType(toBytes("116"), message.getExtension(optionalBytesExtension));
 
-    assertEqualsExactType(117, message.getExtension(optionalGroupExtension         ).getA());
-    assertEqualsExactType(118, message.getExtension(optionalNestedMessageExtension ).getBb());
-    assertEqualsExactType(119, message.getExtension(optionalForeignMessageExtension).getC());
-    assertEqualsExactType(120, message.getExtension(optionalImportMessageExtension ).getD());
+    assertEqualsExactType(117, message.getExtension(optionalGroupExtension              ).getA());
+    assertEqualsExactType(118, message.getExtension(optionalNestedMessageExtension      ).getBb());
+    assertEqualsExactType(119, message.getExtension(optionalForeignMessageExtension     ).getC());
+    assertEqualsExactType(120, message.getExtension(optionalImportMessageExtension      ).getD());
+    assertEqualsExactType(126, message.getExtension(optionalPublicImportMessageExtension).getE());
+    assertEqualsExactType(127, message.getExtension(optionalLazyMessageExtension        ).getBb());
 
     assertEqualsExactType(TestAllTypes.NestedEnum.BAZ,
       message.getExtension(optionalNestedEnumExtension));
@@ -1446,6 +1499,7 @@ public final class TestUtil {
     Assert.assertEquals(2, message.getExtensionCount(repeatedNestedMessageExtension ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedForeignMessageExtension));
     Assert.assertEquals(2, message.getExtensionCount(repeatedImportMessageExtension ));
+    Assert.assertEquals(2, message.getExtensionCount(repeatedLazyMessageExtension   ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedNestedEnumExtension    ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedForeignEnumExtension   ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedImportEnumExtension    ));
@@ -1473,6 +1527,7 @@ public final class TestUtil {
     assertEqualsExactType(218, message.getExtension(repeatedNestedMessageExtension , 0).getBb());
     assertEqualsExactType(219, message.getExtension(repeatedForeignMessageExtension, 0).getC());
     assertEqualsExactType(220, message.getExtension(repeatedImportMessageExtension , 0).getD());
+    assertEqualsExactType(227, message.getExtension(repeatedLazyMessageExtension   , 0).getBb());
 
     assertEqualsExactType(TestAllTypes.NestedEnum.BAR,
       message.getExtension(repeatedNestedEnumExtension, 0));
@@ -1504,6 +1559,7 @@ public final class TestUtil {
     assertEqualsExactType(318, message.getExtension(repeatedNestedMessageExtension , 1).getBb());
     assertEqualsExactType(319, message.getExtension(repeatedForeignMessageExtension, 1).getC());
     assertEqualsExactType(320, message.getExtension(repeatedImportMessageExtension , 1).getD());
+    assertEqualsExactType(327, message.getExtension(repeatedLazyMessageExtension   , 1).getBb());
 
     assertEqualsExactType(TestAllTypes.NestedEnum.BAZ,
       message.getExtension(repeatedNestedEnumExtension, 1));
@@ -1664,6 +1720,7 @@ public final class TestUtil {
     Assert.assertEquals(0, message.getExtensionCount(repeatedNestedMessageExtension ));
     Assert.assertEquals(0, message.getExtensionCount(repeatedForeignMessageExtension));
     Assert.assertEquals(0, message.getExtensionCount(repeatedImportMessageExtension ));
+    Assert.assertEquals(0, message.getExtensionCount(repeatedLazyMessageExtension   ));
     Assert.assertEquals(0, message.getExtensionCount(repeatedNestedEnumExtension    ));
     Assert.assertEquals(0, message.getExtensionCount(repeatedForeignEnumExtension   ));
     Assert.assertEquals(0, message.getExtensionCount(repeatedImportEnumExtension    ));
@@ -1692,6 +1749,7 @@ public final class TestUtil {
     Assert.assertEquals(0, message.getExtension(repeatedNestedMessageExtension ).size());
     Assert.assertEquals(0, message.getExtension(repeatedForeignMessageExtension).size());
     Assert.assertEquals(0, message.getExtension(repeatedImportMessageExtension ).size());
+    Assert.assertEquals(0, message.getExtension(repeatedLazyMessageExtension   ).size());
     Assert.assertEquals(0, message.getExtension(repeatedNestedEnumExtension    ).size());
     Assert.assertEquals(0, message.getExtension(repeatedForeignEnumExtension   ).size());
     Assert.assertEquals(0, message.getExtension(repeatedImportEnumExtension    ).size());
@@ -1783,6 +1841,7 @@ public final class TestUtil {
     Assert.assertEquals(2, message.getExtensionCount(repeatedNestedMessageExtension ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedForeignMessageExtension));
     Assert.assertEquals(2, message.getExtensionCount(repeatedImportMessageExtension ));
+    Assert.assertEquals(2, message.getExtensionCount(repeatedLazyMessageExtension   ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedNestedEnumExtension    ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedForeignEnumExtension   ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedImportEnumExtension    ));
@@ -1810,6 +1869,7 @@ public final class TestUtil {
     assertEqualsExactType(218, message.getExtension(repeatedNestedMessageExtension , 0).getBb());
     assertEqualsExactType(219, message.getExtension(repeatedForeignMessageExtension, 0).getC());
     assertEqualsExactType(220, message.getExtension(repeatedImportMessageExtension , 0).getD());
+    assertEqualsExactType(227, message.getExtension(repeatedLazyMessageExtension   , 0).getBb());
 
     assertEqualsExactType(TestAllTypes.NestedEnum.BAR,
       message.getExtension(repeatedNestedEnumExtension, 0));
@@ -1842,6 +1902,7 @@ public final class TestUtil {
     assertEqualsExactType(518, message.getExtension(repeatedNestedMessageExtension , 1).getBb());
     assertEqualsExactType(519, message.getExtension(repeatedForeignMessageExtension, 1).getC());
     assertEqualsExactType(520, message.getExtension(repeatedImportMessageExtension , 1).getD());
+    assertEqualsExactType(527, message.getExtension(repeatedLazyMessageExtension   , 1).getBb());
 
     assertEqualsExactType(TestAllTypes.NestedEnum.FOO,
       message.getExtension(repeatedNestedEnumExtension, 1));
@@ -1965,6 +2026,10 @@ public final class TestUtil {
       ForeignMessageLite.newBuilder().setC(119).build());
     message.setExtension(optionalImportMessageExtensionLite,
       ImportMessageLite.newBuilder().setD(120).build());
+    message.setExtension(optionalPublicImportMessageExtensionLite,
+      PublicImportMessageLite.newBuilder().setE(126).build());
+    message.setExtension(optionalLazyMessageExtensionLite,
+      TestAllTypesLite.NestedMessage.newBuilder().setBb(127).build());
 
     message.setExtension(optionalNestedEnumExtensionLite, TestAllTypesLite.NestedEnum.BAZ);
     message.setExtension(optionalForeignEnumExtensionLite, ForeignEnumLite.FOREIGN_LITE_BAZ);
@@ -1999,6 +2064,8 @@ public final class TestUtil {
       ForeignMessageLite.newBuilder().setC(219).build());
     message.addExtension(repeatedImportMessageExtensionLite,
       ImportMessageLite.newBuilder().setD(220).build());
+    message.addExtension(repeatedLazyMessageExtensionLite,
+      TestAllTypesLite.NestedMessage.newBuilder().setBb(227).build());
 
     message.addExtension(repeatedNestedEnumExtensionLite, TestAllTypesLite.NestedEnum.BAR);
     message.addExtension(repeatedForeignEnumExtensionLite, ForeignEnumLite.FOREIGN_LITE_BAR);
@@ -2032,6 +2099,8 @@ public final class TestUtil {
       ForeignMessageLite.newBuilder().setC(319).build());
     message.addExtension(repeatedImportMessageExtensionLite,
       ImportMessageLite.newBuilder().setD(320).build());
+    message.addExtension(repeatedLazyMessageExtensionLite,
+      TestAllTypesLite.NestedMessage.newBuilder().setBb(327).build());
 
     message.addExtension(repeatedNestedEnumExtensionLite, TestAllTypesLite.NestedEnum.BAZ);
     message.addExtension(repeatedForeignEnumExtensionLite, ForeignEnumLite.FOREIGN_LITE_BAZ);
@@ -2098,6 +2167,8 @@ public final class TestUtil {
       ForeignMessageLite.newBuilder().setC(519).build());
     message.setExtension(repeatedImportMessageExtensionLite, 1,
       ImportMessageLite.newBuilder().setD(520).build());
+    message.setExtension(repeatedLazyMessageExtensionLite, 1,
+      TestAllTypesLite.NestedMessage.newBuilder().setBb(527).build());
 
     message.setExtension(repeatedNestedEnumExtensionLite , 1, TestAllTypesLite.NestedEnum.FOO);
     message.setExtension(repeatedForeignEnumExtensionLite, 1, ForeignEnumLite.FOREIGN_LITE_FOO);
@@ -2168,6 +2239,9 @@ public final class TestUtil {
     assertEqualsExactType(118, message.getExtension(optionalNestedMessageExtensionLite ).getBb());
     assertEqualsExactType(119, message.getExtension(optionalForeignMessageExtensionLite).getC());
     assertEqualsExactType(120, message.getExtension(optionalImportMessageExtensionLite ).getD());
+    assertEqualsExactType(126, message.getExtension(
+        optionalPublicImportMessageExtensionLite).getE());
+    assertEqualsExactType(127, message.getExtension(optionalLazyMessageExtensionLite).getBb());
 
     assertEqualsExactType(TestAllTypesLite.NestedEnum.BAZ,
       message.getExtension(optionalNestedEnumExtensionLite));
@@ -2201,6 +2275,7 @@ public final class TestUtil {
     Assert.assertEquals(2, message.getExtensionCount(repeatedNestedMessageExtensionLite ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedForeignMessageExtensionLite));
     Assert.assertEquals(2, message.getExtensionCount(repeatedImportMessageExtensionLite ));
+    Assert.assertEquals(2, message.getExtensionCount(repeatedLazyMessageExtensionLite   ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedNestedEnumExtensionLite    ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedForeignEnumExtensionLite   ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedImportEnumExtensionLite    ));
@@ -2228,6 +2303,7 @@ public final class TestUtil {
     assertEqualsExactType(218, message.getExtension(repeatedNestedMessageExtensionLite ,0).getBb());
     assertEqualsExactType(219, message.getExtension(repeatedForeignMessageExtensionLite,0).getC());
     assertEqualsExactType(220, message.getExtension(repeatedImportMessageExtensionLite ,0).getD());
+    assertEqualsExactType(227, message.getExtension(repeatedLazyMessageExtensionLite   ,0).getBb());
 
     assertEqualsExactType(TestAllTypesLite.NestedEnum.BAR,
       message.getExtension(repeatedNestedEnumExtensionLite, 0));
@@ -2259,6 +2335,7 @@ public final class TestUtil {
     assertEqualsExactType(318, message.getExtension(repeatedNestedMessageExtensionLite ,1).getBb());
     assertEqualsExactType(319, message.getExtension(repeatedForeignMessageExtensionLite,1).getC());
     assertEqualsExactType(320, message.getExtension(repeatedImportMessageExtensionLite ,1).getD());
+    assertEqualsExactType(327, message.getExtension(repeatedLazyMessageExtensionLite   ,1).getBb());
 
     assertEqualsExactType(TestAllTypesLite.NestedEnum.BAZ,
       message.getExtension(repeatedNestedEnumExtensionLite, 1));
@@ -2348,10 +2425,12 @@ public final class TestUtil {
     Assert.assertFalse(message.hasExtension(optionalStringExtensionLite  ));
     Assert.assertFalse(message.hasExtension(optionalBytesExtensionLite   ));
 
-    Assert.assertFalse(message.hasExtension(optionalGroupExtensionLite         ));
-    Assert.assertFalse(message.hasExtension(optionalNestedMessageExtensionLite ));
-    Assert.assertFalse(message.hasExtension(optionalForeignMessageExtensionLite));
-    Assert.assertFalse(message.hasExtension(optionalImportMessageExtensionLite ));
+    Assert.assertFalse(message.hasExtension(optionalGroupExtensionLite              ));
+    Assert.assertFalse(message.hasExtension(optionalNestedMessageExtensionLite      ));
+    Assert.assertFalse(message.hasExtension(optionalForeignMessageExtensionLite     ));
+    Assert.assertFalse(message.hasExtension(optionalImportMessageExtensionLite      ));
+    Assert.assertFalse(message.hasExtension(optionalPublicImportMessageExtensionLite));
+    Assert.assertFalse(message.hasExtension(optionalLazyMessageExtensionLite        ));
 
     Assert.assertFalse(message.hasExtension(optionalNestedEnumExtensionLite ));
     Assert.assertFalse(message.hasExtension(optionalForeignEnumExtensionLite));
@@ -2378,15 +2457,20 @@ public final class TestUtil {
     assertEqualsExactType(ByteString.EMPTY, message.getExtension(optionalBytesExtensionLite));
 
     // Embedded messages should also be clear.
-    Assert.assertFalse(message.getExtension(optionalGroupExtensionLite         ).hasA());
-    Assert.assertFalse(message.getExtension(optionalNestedMessageExtensionLite ).hasBb());
-    Assert.assertFalse(message.getExtension(optionalForeignMessageExtensionLite).hasC());
-    Assert.assertFalse(message.getExtension(optionalImportMessageExtensionLite ).hasD());
+    Assert.assertFalse(message.getExtension(optionalGroupExtensionLite              ).hasA());
+    Assert.assertFalse(message.getExtension(optionalNestedMessageExtensionLite      ).hasBb());
+    Assert.assertFalse(message.getExtension(optionalForeignMessageExtensionLite     ).hasC());
+    Assert.assertFalse(message.getExtension(optionalImportMessageExtensionLite      ).hasD());
+    Assert.assertFalse(message.getExtension(optionalPublicImportMessageExtensionLite).hasE());
+    Assert.assertFalse(message.getExtension(optionalLazyMessageExtensionLite        ).hasBb());
 
     assertEqualsExactType(0, message.getExtension(optionalGroupExtensionLite         ).getA());
     assertEqualsExactType(0, message.getExtension(optionalNestedMessageExtensionLite ).getBb());
     assertEqualsExactType(0, message.getExtension(optionalForeignMessageExtensionLite).getC());
     assertEqualsExactType(0, message.getExtension(optionalImportMessageExtensionLite ).getD());
+    assertEqualsExactType(0, message.getExtension(
+        optionalPublicImportMessageExtensionLite).getE());
+    assertEqualsExactType(0, message.getExtension(optionalLazyMessageExtensionLite   ).getBb());
 
     // Enums without defaults are set to the first value in the enum.
     assertEqualsExactType(TestAllTypesLite.NestedEnum.FOO,
@@ -2420,6 +2504,7 @@ public final class TestUtil {
     Assert.assertEquals(0, message.getExtensionCount(repeatedNestedMessageExtensionLite ));
     Assert.assertEquals(0, message.getExtensionCount(repeatedForeignMessageExtensionLite));
     Assert.assertEquals(0, message.getExtensionCount(repeatedImportMessageExtensionLite ));
+    Assert.assertEquals(0, message.getExtensionCount(repeatedLazyMessageExtensionLite   ));
     Assert.assertEquals(0, message.getExtensionCount(repeatedNestedEnumExtensionLite    ));
     Assert.assertEquals(0, message.getExtensionCount(repeatedForeignEnumExtensionLite   ));
     Assert.assertEquals(0, message.getExtensionCount(repeatedImportEnumExtensionLite    ));
@@ -2511,6 +2596,7 @@ public final class TestUtil {
     Assert.assertEquals(2, message.getExtensionCount(repeatedNestedMessageExtensionLite ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedForeignMessageExtensionLite));
     Assert.assertEquals(2, message.getExtensionCount(repeatedImportMessageExtensionLite ));
+    Assert.assertEquals(2, message.getExtensionCount(repeatedLazyMessageExtensionLite   ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedNestedEnumExtensionLite    ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedForeignEnumExtensionLite   ));
     Assert.assertEquals(2, message.getExtensionCount(repeatedImportEnumExtensionLite    ));
@@ -2538,6 +2624,7 @@ public final class TestUtil {
     assertEqualsExactType(218, message.getExtension(repeatedNestedMessageExtensionLite ,0).getBb());
     assertEqualsExactType(219, message.getExtension(repeatedForeignMessageExtensionLite,0).getC());
     assertEqualsExactType(220, message.getExtension(repeatedImportMessageExtensionLite ,0).getD());
+    assertEqualsExactType(227, message.getExtension(repeatedLazyMessageExtensionLite   ,0).getBb());
 
     assertEqualsExactType(TestAllTypesLite.NestedEnum.BAR,
       message.getExtension(repeatedNestedEnumExtensionLite, 0));
@@ -2570,6 +2657,7 @@ public final class TestUtil {
     assertEqualsExactType(518, message.getExtension(repeatedNestedMessageExtensionLite ,1).getBb());
     assertEqualsExactType(519, message.getExtension(repeatedForeignMessageExtensionLite,1).getC());
     assertEqualsExactType(520, message.getExtension(repeatedImportMessageExtensionLite ,1).getD());
+    assertEqualsExactType(527, message.getExtension(repeatedLazyMessageExtensionLite   ,1).getBb());
 
     assertEqualsExactType(TestAllTypesLite.NestedEnum.FOO,
       message.getExtension(repeatedNestedEnumExtensionLite, 1));
@@ -2674,18 +2762,21 @@ public final class TestUtil {
 
     private final Descriptors.FileDescriptor file;
     private final Descriptors.FileDescriptor importFile;
+    private final Descriptors.FileDescriptor publicImportFile;
 
     private final Descriptors.Descriptor optionalGroup;
     private final Descriptors.Descriptor repeatedGroup;
     private final Descriptors.Descriptor nestedMessage;
     private final Descriptors.Descriptor foreignMessage;
     private final Descriptors.Descriptor importMessage;
+    private final Descriptors.Descriptor publicImportMessage;
 
     private final Descriptors.FieldDescriptor groupA;
     private final Descriptors.FieldDescriptor repeatedGroupA;
     private final Descriptors.FieldDescriptor nestedB;
     private final Descriptors.FieldDescriptor foreignC;
     private final Descriptors.FieldDescriptor importD;
+    private final Descriptors.FieldDescriptor importE;
 
     private final Descriptors.EnumDescriptor nestedEnum;
     private final Descriptors.EnumDescriptor foreignEnum;
@@ -2722,6 +2813,7 @@ public final class TestUtil {
       this.file = baseDescriptor.getFile();
       Assert.assertEquals(1, file.getDependencies().size());
       this.importFile = file.getDependencies().get(0);
+      this.publicImportFile = importFile.getDependencies().get(0);
 
       Descriptors.Descriptor testAllTypes;
       if (baseDescriptor.getName() == "TestAllTypes") {
@@ -2748,6 +2840,8 @@ public final class TestUtil {
       this.nestedMessage = testAllTypes.findNestedTypeByName("NestedMessage");
       this.foreignMessage = file.findMessageTypeByName("ForeignMessage");
       this.importMessage = importFile.findMessageTypeByName("ImportMessage");
+      this.publicImportMessage = publicImportFile.findMessageTypeByName(
+          "PublicImportMessage");
 
       this.nestedEnum = testAllTypes.findEnumTypeByName("NestedEnum");
       this.foreignEnum = file.findEnumTypeByName("ForeignEnum");
@@ -2765,6 +2859,7 @@ public final class TestUtil {
       this.nestedB  = nestedMessage .findFieldByName("bb");
       this.foreignC = foreignMessage.findFieldByName("c");
       this.importD  = importMessage .findFieldByName("d");
+      this.importE  = publicImportMessage.findFieldByName("e");
       this.nestedFoo = nestedEnum.findValueByName("FOO");
       this.nestedBar = nestedEnum.findValueByName("BAR");
       this.nestedBaz = nestedEnum.findValueByName("BAZ");
@@ -2783,6 +2878,7 @@ public final class TestUtil {
       Assert.assertNotNull(nestedB       );
       Assert.assertNotNull(foreignC      );
       Assert.assertNotNull(importD       );
+      Assert.assertNotNull(importE       );
       Assert.assertNotNull(nestedFoo     );
       Assert.assertNotNull(nestedBar     );
       Assert.assertNotNull(nestedBaz     );
@@ -2863,6 +2959,12 @@ public final class TestUtil {
       message.setField(f("optional_import_message"),
         newBuilderForField(message, f("optional_import_message"))
                .setField(importD, 120).build());
+      message.setField(f("optional_public_import_message"),
+        newBuilderForField(message, f("optional_public_import_message"))
+               .setField(importE, 126).build());
+      message.setField(f("optional_lazy_message"),
+        newBuilderForField(message, f("optional_lazy_message"))
+               .setField(nestedB, 127).build());
 
       message.setField(f("optional_nested_enum" ),  nestedBaz);
       message.setField(f("optional_foreign_enum"), foreignBaz);
@@ -2901,6 +3003,9 @@ public final class TestUtil {
       message.addRepeatedField(f("repeated_import_message"),
         newBuilderForField(message, f("repeated_import_message"))
                .setField(importD, 220).build());
+      message.addRepeatedField(f("repeated_lazy_message"),
+        newBuilderForField(message, f("repeated_lazy_message"))
+               .setField(nestedB, 227).build());
 
       message.addRepeatedField(f("repeated_nested_enum" ),  nestedBar);
       message.addRepeatedField(f("repeated_foreign_enum"), foreignBar);
@@ -2938,6 +3043,9 @@ public final class TestUtil {
       message.addRepeatedField(f("repeated_import_message"),
         newBuilderForField(message, f("repeated_import_message"))
                .setField(importD, 320).build());
+      message.addRepeatedField(f("repeated_lazy_message"),
+        newBuilderForField(message, f("repeated_lazy_message"))
+               .setField(nestedB, 327).build());
 
       message.addRepeatedField(f("repeated_nested_enum" ),  nestedBaz);
       message.addRepeatedField(f("repeated_foreign_enum"), foreignBaz);
@@ -3008,6 +3116,9 @@ public final class TestUtil {
       message.setRepeatedField(f("repeated_import_message"), 1,
         newBuilderForField(message, f("repeated_import_message"))
                .setField(importD, 520).build());
+      message.setRepeatedField(f("repeated_lazy_message"), 1,
+        newBuilderForField(message, f("repeated_lazy_message"))
+               .setField(nestedB, 527).build());
 
       message.setRepeatedField(f("repeated_nested_enum" ), 1,  nestedFoo);
       message.setRepeatedField(f("repeated_foreign_enum"), 1, foreignFoo);
@@ -3092,6 +3203,12 @@ public final class TestUtil {
       Assert.assertEquals(120,
         ((Message)message.getField(f("optional_import_message")))
                          .getField(importD));
+      Assert.assertEquals(126,
+        ((Message)message.getField(f("optional_public_import_message")))
+                         .getField(importE));
+      Assert.assertEquals(127,
+        ((Message)message.getField(f("optional_lazy_message")))
+                         .getField(nestedB));
 
       Assert.assertEquals( nestedBaz, message.getField(f("optional_nested_enum" )));
       Assert.assertEquals(foreignBaz, message.getField(f("optional_foreign_enum")));
@@ -3122,6 +3239,7 @@ public final class TestUtil {
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_nested_message" )));
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_foreign_message")));
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_import_message" )));
+      Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_lazy_message" )));
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_nested_enum"    )));
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_foreign_enum"   )));
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_import_enum"    )));
@@ -3157,6 +3275,9 @@ public final class TestUtil {
       Assert.assertEquals(220,
         ((Message)message.getRepeatedField(f("repeated_import_message"), 0))
                          .getField(importD));
+      Assert.assertEquals(227,
+        ((Message)message.getRepeatedField(f("repeated_lazy_message"), 0))
+                         .getField(nestedB));
 
       Assert.assertEquals( nestedBar, message.getRepeatedField(f("repeated_nested_enum" ),0));
       Assert.assertEquals(foreignBar, message.getRepeatedField(f("repeated_foreign_enum"),0));
@@ -3193,6 +3314,9 @@ public final class TestUtil {
       Assert.assertEquals(320,
         ((Message)message.getRepeatedField(f("repeated_import_message"), 1))
                          .getField(importD));
+      Assert.assertEquals(327,
+        ((Message)message.getRepeatedField(f("repeated_lazy_message"), 1))
+                         .getField(nestedB));
 
       Assert.assertEquals( nestedBaz, message.getRepeatedField(f("repeated_nested_enum" ),1));
       Assert.assertEquals(foreignBaz, message.getRepeatedField(f("repeated_foreign_enum"),1));
@@ -3316,6 +3440,12 @@ public final class TestUtil {
       Assert.assertFalse(
         ((Message)message.getField(f("optional_import_message")))
                          .hasField(importD));
+      Assert.assertFalse(
+        ((Message)message.getField(f("optional_public_import_message")))
+                         .hasField(importE));
+      Assert.assertFalse(
+        ((Message)message.getField(f("optional_lazy_message")))
+                         .hasField(nestedB));
 
       Assert.assertEquals(0,
         ((Message)message.getField(f("optionalgroup"))).getField(groupA));
@@ -3328,6 +3458,12 @@ public final class TestUtil {
       Assert.assertEquals(0,
         ((Message)message.getField(f("optional_import_message")))
                          .getField(importD));
+      Assert.assertEquals(0,
+        ((Message)message.getField(f("optional_public_import_message")))
+                         .getField(importE));
+      Assert.assertEquals(0,
+        ((Message)message.getField(f("optional_lazy_message")))
+                         .getField(nestedB));
 
       // Enums without defaults are set to the first value in the enum.
       Assert.assertEquals( nestedFoo, message.getField(f("optional_nested_enum" )));
@@ -3358,6 +3494,7 @@ public final class TestUtil {
       Assert.assertEquals(0, message.getRepeatedFieldCount(f("repeated_nested_message" )));
       Assert.assertEquals(0, message.getRepeatedFieldCount(f("repeated_foreign_message")));
       Assert.assertEquals(0, message.getRepeatedFieldCount(f("repeated_import_message" )));
+      Assert.assertEquals(0, message.getRepeatedFieldCount(f("repeated_lazy_message"   )));
       Assert.assertEquals(0, message.getRepeatedFieldCount(f("repeated_nested_enum"    )));
       Assert.assertEquals(0, message.getRepeatedFieldCount(f("repeated_foreign_enum"   )));
       Assert.assertEquals(0, message.getRepeatedFieldCount(f("repeated_import_enum"    )));
@@ -3442,6 +3579,7 @@ public final class TestUtil {
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_nested_message" )));
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_foreign_message")));
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_import_message" )));
+      Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_lazy_message"   )));
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_nested_enum"    )));
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_foreign_enum"   )));
       Assert.assertEquals(2, message.getRepeatedFieldCount(f("repeated_import_enum"    )));
@@ -3477,6 +3615,9 @@ public final class TestUtil {
       Assert.assertEquals(220,
         ((Message)message.getRepeatedField(f("repeated_import_message"), 0))
                          .getField(importD));
+      Assert.assertEquals(227,
+        ((Message)message.getRepeatedField(f("repeated_lazy_message"), 0))
+                         .getField(nestedB));
 
       Assert.assertEquals( nestedBar, message.getRepeatedField(f("repeated_nested_enum" ),0));
       Assert.assertEquals(foreignBar, message.getRepeatedField(f("repeated_foreign_enum"),0));
@@ -3513,6 +3654,9 @@ public final class TestUtil {
       Assert.assertEquals(520,
         ((Message)message.getRepeatedField(f("repeated_import_message"), 1))
                          .getField(importD));
+      Assert.assertEquals(527,
+        ((Message)message.getRepeatedField(f("repeated_lazy_message"), 1))
+                         .getField(nestedB));
 
       Assert.assertEquals( nestedFoo, message.getRepeatedField(f("repeated_nested_enum" ),1));
       Assert.assertEquals(foreignFoo, message.getRepeatedField(f("repeated_foreign_enum"),1));

+ 34 - 0
java/src/test/java/com/google/protobuf/TextFormatTest.java

@@ -121,6 +121,18 @@ public class TextFormatTest extends TestCase {
     assertEquals(allFieldsSetText, javaText);
   }
 
+  /** Print TestAllTypes as Builder and compare with golden file. */
+  public void testPrintMessageBuilder() throws Exception {
+    String javaText = TextFormat.printToString(TestUtil.getAllSetBuilder());
+
+    // Java likes to add a trailing ".0" to floats and doubles.  C printf
+    // (with %g format) does not.  Our golden files are used for both
+    // C++ and Java TextFormat classes, so we need to conform.
+    javaText = javaText.replace(".0\n", "\n");
+
+    assertEquals(allFieldsSetText, javaText);
+  }
+
   /** Print TestAllExtensions and compare with golden file. */
   public void testPrintExtensions() throws Exception {
     String javaText = TextFormat.printToString(TestUtil.getAllExtensionsSet());
@@ -749,4 +761,26 @@ public class TextFormatTest extends TestCase {
         + " 0xabcdef1234567890",
         TextFormat.shortDebugString(makeUnknownFieldSet()));
   }
+
+  public void testPrintToUnicodeString() {
+    assertEquals(
+        "optional_string: \"abc\u3042efg\"\n" +
+        "optional_bytes: \"\\343\\201\\202\"\n" +
+        "repeated_string: \"\u3093XYZ\"\n",
+        TextFormat.printToUnicodeString(TestAllTypes.newBuilder()
+            .setOptionalString("abc\u3042efg")
+            .setOptionalBytes(bytes(0xe3, 0x81, 0x82))
+            .addRepeatedString("\u3093XYZ")
+            .build()));
+  }
+
+  public void testPrintToUnicodeString_unknown() {
+    assertEquals(
+        "1: \"\\343\\201\\202\"\n",
+        TextFormat.printToUnicodeString(UnknownFieldSet.newBuilder()
+            .addField(1,
+                UnknownFieldSet.Field.newBuilder()
+                .addLengthDelimited(bytes(0xe3, 0x81, 0x82)).build())
+            .build()));
+  }
 }

+ 155 - 2
java/src/test/java/com/google/protobuf/WireFormatTest.java

@@ -34,6 +34,7 @@ import junit.framework.TestCase;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.util.List;
 
 import protobuf_unittest.UnittestProto;
 import protobuf_unittest.UnittestProto.TestAllExtensions;
@@ -328,7 +329,17 @@ public class WireFormatTest extends TestCase {
   private static final int TYPE_ID_2 =
     TestMessageSetExtension2.getDescriptor().getExtensions().get(0).getNumber();
 
-  public void testSerializeMessageSet() throws Exception {
+  public void testSerializeMessageSetEagerly() throws Exception {
+    testSerializeMessageSetWithFlag(true);
+  }
+
+  public void testSerializeMessageSetNotEagerly() throws Exception {
+    testSerializeMessageSetWithFlag(false);
+  }
+
+  private void testSerializeMessageSetWithFlag(boolean eagerParsing)
+      throws Exception {
+    ExtensionRegistryLite.setEagerlyParseMessageSets(eagerParsing);
     // Set up a TestMessageSet with two known messages and an unknown one.
     TestMessageSet messageSet =
       TestMessageSet.newBuilder()
@@ -372,7 +383,17 @@ public class WireFormatTest extends TestCase {
     assertEquals("bar", raw.getItem(2).getMessage().toStringUtf8());
   }
 
-  public void testParseMessageSet() throws Exception {
+  public void testParseMessageSetEagerly() throws Exception {
+    testParseMessageSetWithFlag(true);
+  }
+
+  public void testParseMessageSetNotEagerly()throws Exception {
+    testParseMessageSetWithFlag(false);
+  }
+
+  private void testParseMessageSetWithFlag(boolean eagerParsing)
+      throws Exception {
+    ExtensionRegistryLite.setEagerlyParseMessageSets(eagerParsing);
     ExtensionRegistry extensionRegistry = ExtensionRegistry.newInstance();
     extensionRegistry.add(TestMessageSetExtension1.messageSetExtension);
     extensionRegistry.add(TestMessageSetExtension2.messageSetExtension);
@@ -424,4 +445,136 @@ public class WireFormatTest extends TestCase {
     assertEquals(1, field.getLengthDelimitedList().size());
     assertEquals("bar", field.getLengthDelimitedList().get(0).toStringUtf8());
   }
+
+  public void testParseMessageSetExtensionEagerly() throws Exception {
+    testParseMessageSetExtensionWithFlag(true);
+  }
+
+  public void testParseMessageSetExtensionNotEagerly() throws Exception {
+    testParseMessageSetExtensionWithFlag(false);
+  }
+
+  private void testParseMessageSetExtensionWithFlag(boolean eagerParsing)
+      throws Exception {
+    ExtensionRegistryLite.setEagerlyParseMessageSets(eagerParsing);
+    ExtensionRegistry extensionRegistry = ExtensionRegistry.newInstance();
+    extensionRegistry.add(TestMessageSetExtension1.messageSetExtension);
+
+    // Set up a RawMessageSet with a known messages.
+    int TYPE_ID_1 =
+        TestMessageSetExtension1
+            .getDescriptor().getExtensions().get(0).getNumber();
+    RawMessageSet raw =
+      RawMessageSet.newBuilder()
+        .addItem(
+          RawMessageSet.Item.newBuilder()
+            .setTypeId(TYPE_ID_1)
+            .setMessage(
+              TestMessageSetExtension1.newBuilder()
+                .setI(123)
+                .build().toByteString())
+            .build())
+        .build();
+
+    ByteString data = raw.toByteString();
+
+    // Parse as a TestMessageSet and check the contents.
+    TestMessageSet messageSet =
+        TestMessageSet.parseFrom(data, extensionRegistry);
+    assertEquals(123, messageSet.getExtension(
+        TestMessageSetExtension1.messageSetExtension).getI());
+  }
+
+  public void testMergeLazyMessageSetExtensionEagerly() throws Exception {
+    testMergeLazyMessageSetExtensionWithFlag(true);
+  }
+
+  public void testMergeLazyMessageSetExtensionNotEagerly() throws Exception {
+    testMergeLazyMessageSetExtensionWithFlag(false);
+  }
+
+  private void testMergeLazyMessageSetExtensionWithFlag(boolean eagerParsing)
+      throws Exception {
+    ExtensionRegistryLite.setEagerlyParseMessageSets(eagerParsing);
+    ExtensionRegistry extensionRegistry = ExtensionRegistry.newInstance();
+    extensionRegistry.add(TestMessageSetExtension1.messageSetExtension);
+
+    // Set up a RawMessageSet with a known messages.
+    int TYPE_ID_1 =
+        TestMessageSetExtension1
+            .getDescriptor().getExtensions().get(0).getNumber();
+    RawMessageSet raw =
+      RawMessageSet.newBuilder()
+        .addItem(
+          RawMessageSet.Item.newBuilder()
+            .setTypeId(TYPE_ID_1)
+            .setMessage(
+              TestMessageSetExtension1.newBuilder()
+                .setI(123)
+                .build().toByteString())
+            .build())
+        .build();
+
+    ByteString data = raw.toByteString();
+
+    // Parse as a TestMessageSet and store value into lazy field
+    TestMessageSet messageSet =
+        TestMessageSet.parseFrom(data, extensionRegistry);
+    // Merge lazy field check the contents.
+    messageSet =
+        messageSet.toBuilder().mergeFrom(data, extensionRegistry).build();
+    assertEquals(123, messageSet.getExtension(
+        TestMessageSetExtension1.messageSetExtension).getI());
+  }
+
+  public void testMergeMessageSetExtensionEagerly() throws Exception {
+    testMergeMessageSetExtensionWithFlag(true);
+  }
+
+  public void testMergeMessageSetExtensionNotEagerly() throws Exception {
+    testMergeMessageSetExtensionWithFlag(false);
+  }
+
+  private void testMergeMessageSetExtensionWithFlag(boolean eagerParsing)
+      throws Exception {
+    ExtensionRegistryLite.setEagerlyParseMessageSets(eagerParsing);
+    ExtensionRegistry extensionRegistry = ExtensionRegistry.newInstance();
+    extensionRegistry.add(TestMessageSetExtension1.messageSetExtension);
+
+    // Set up a RawMessageSet with a known messages.
+    int TYPE_ID_1 =
+        TestMessageSetExtension1
+            .getDescriptor().getExtensions().get(0).getNumber();
+    RawMessageSet raw =
+      RawMessageSet.newBuilder()
+        .addItem(
+          RawMessageSet.Item.newBuilder()
+            .setTypeId(TYPE_ID_1)
+            .setMessage(
+              TestMessageSetExtension1.newBuilder()
+                .setI(123)
+                .build().toByteString())
+            .build())
+        .build();
+
+    // Serialize RawMessageSet unnormally (message value before type id)
+    ByteString.CodedBuilder out = ByteString.newCodedBuilder(
+        raw.getSerializedSize());
+    CodedOutputStream output = out.getCodedOutput();
+    List<RawMessageSet.Item> items = raw.getItemList();
+    for (int i = 0; i < items.size(); i++) {
+      RawMessageSet.Item item = items.get(i);
+      output.writeTag(1, WireFormat.WIRETYPE_START_GROUP);
+      output.writeBytes(3, item.getMessage());
+      output.writeInt32(2, item.getTypeId());
+      output.writeTag(1, WireFormat.WIRETYPE_END_GROUP);
+    }
+    ByteString data = out.build();
+
+    // Merge bytes into TestMessageSet and check the contents.
+    TestMessageSet messageSet =
+        TestMessageSet.newBuilder().mergeFrom(data, extensionRegistry).build();
+    assertEquals(123, messageSet.getExtension(
+        TestMessageSetExtension1.messageSetExtension).getI());
+  }
 }

+ 18 - 0
java/src/test/java/com/google/protobuf/test_bad_identifiers.proto

@@ -46,6 +46,23 @@ option java_outer_classname = "TestBadIdentifiersProto";
 message TestMessage {
 }
 
+message Descriptor {
+  option no_standard_descriptor_accessor = true;
+  optional string descriptor = 1;
+  message NestedDescriptor {
+    option no_standard_descriptor_accessor = true;
+    optional string descriptor = 1;
+  }
+  optional NestedDescriptor nested_descriptor = 2;
+}
+
+message Parser {
+  enum ParserEnum {
+    PARSER = 1;
+  }
+  optional ParserEnum parser = 1;
+}
+
 message Deprecated {
   enum TestEnum {
     FOO = 1;
@@ -62,6 +79,7 @@ message Override {
 
 message Object {
   optional int32 object = 1;
+  optional string string_object = 2;
 }
 
 message String {

+ 120 - 5
python/google/protobuf/descriptor.py

@@ -39,13 +39,20 @@ from google.protobuf.internal import api_implementation
 
 
 if api_implementation.Type() == 'cpp':
-  from google.protobuf.internal import cpp_message
+  if api_implementation.Version() == 2:
+    from google.protobuf.internal.cpp import _message
+  else:
+    from google.protobuf.internal import cpp_message
 
 
 class Error(Exception):
   """Base error for this module."""
 
 
+class TypeTransformationError(Error):
+  """Error transforming between python proto type and corresponding C++ type."""
+
+
 class DescriptorBase(object):
 
   """Descriptors base class.
@@ -72,6 +79,18 @@ class DescriptorBase(object):
     # Does this descriptor have non-default options?
     self.has_options = options is not None
 
+  def _SetOptions(self, options, options_class_name):
+    """Sets the descriptor's options
+
+    This function is used in generated proto2 files to update descriptor
+    options. It must not be used outside proto2.
+    """
+    self._options = options
+    self._options_class_name = options_class_name
+
+    # Does this descriptor have non-default options?
+    self.has_options = options is not None
+
   def GetOptions(self):
     """Retrieves descriptor options.
 
@@ -250,6 +269,24 @@ class Descriptor(_NestedDescriptorBase):
     self._serialized_start = serialized_start
     self._serialized_end = serialized_end
 
+  def EnumValueName(self, enum, value):
+    """Returns the string name of an enum value.
+
+    This is just a small helper method to simplify a common operation.
+
+    Args:
+      enum: string name of the Enum.
+      value: int, value of the enum.
+
+    Returns:
+      string name of the enum value.
+
+    Raises:
+      KeyError if either the Enum doesn't exist or the value is not a valid
+        value for the enum.
+    """
+    return self.enum_types_by_name[enum].values_by_number[value].name
+
   def CopyToProto(self, proto):
     """Copies this to a descriptor_pb2.DescriptorProto.
 
@@ -275,7 +312,7 @@ class FieldDescriptor(DescriptorBase):
 
   """Descriptor for a single field in a .proto file.
 
-  A FieldDescriptor instance has the following attriubtes:
+  A FieldDescriptor instance has the following attributes:
 
     name: (str) Name of this field, exactly as it appears in .proto.
     full_name: (str) Name of this field, including containing scope.  This is
@@ -358,6 +395,27 @@ class FieldDescriptor(DescriptorBase):
   CPPTYPE_MESSAGE     = 10
   MAX_CPPTYPE         = 10
 
+  _PYTHON_TO_CPP_PROTO_TYPE_MAP = {
+      TYPE_DOUBLE: CPPTYPE_DOUBLE,
+      TYPE_FLOAT: CPPTYPE_FLOAT,
+      TYPE_ENUM: CPPTYPE_ENUM,
+      TYPE_INT64: CPPTYPE_INT64,
+      TYPE_SINT64: CPPTYPE_INT64,
+      TYPE_SFIXED64: CPPTYPE_INT64,
+      TYPE_UINT64: CPPTYPE_UINT64,
+      TYPE_FIXED64: CPPTYPE_UINT64,
+      TYPE_INT32: CPPTYPE_INT32,
+      TYPE_SFIXED32: CPPTYPE_INT32,
+      TYPE_SINT32: CPPTYPE_INT32,
+      TYPE_UINT32: CPPTYPE_UINT32,
+      TYPE_FIXED32: CPPTYPE_UINT32,
+      TYPE_BYTES: CPPTYPE_STRING,
+      TYPE_STRING: CPPTYPE_STRING,
+      TYPE_BOOL: CPPTYPE_BOOL,
+      TYPE_MESSAGE: CPPTYPE_MESSAGE,
+      TYPE_GROUP: CPPTYPE_MESSAGE
+      }
+
   # Must be consistent with C++ FieldDescriptor::Label enum in
   # descriptor.h.
   #
@@ -395,12 +453,38 @@ class FieldDescriptor(DescriptorBase):
     self.extension_scope = extension_scope
     if api_implementation.Type() == 'cpp':
       if is_extension:
-        self._cdescriptor = cpp_message.GetExtensionDescriptor(full_name)
+        if api_implementation.Version() == 2:
+          self._cdescriptor = _message.GetExtensionDescriptor(full_name)
+        else:
+          self._cdescriptor = cpp_message.GetExtensionDescriptor(full_name)
       else:
-        self._cdescriptor = cpp_message.GetFieldDescriptor(full_name)
+        if api_implementation.Version() == 2:
+          self._cdescriptor = _message.GetFieldDescriptor(full_name)
+        else:
+          self._cdescriptor = cpp_message.GetFieldDescriptor(full_name)
     else:
       self._cdescriptor = None
 
+  @staticmethod
+  def ProtoTypeToCppProtoType(proto_type):
+    """Converts from a Python proto type to a C++ Proto Type.
+
+    The Python ProtocolBuffer classes specify both the 'Python' datatype and the
+    'C++' datatype - and they're not the same. This helper method should
+    translate from one to another.
+
+    Args:
+      proto_type: the Python proto type (descriptor.FieldDescriptor.TYPE_*)
+    Returns:
+      descriptor.FieldDescriptor.CPPTYPE_*, the C++ type.
+    Raises:
+      TypeTransformationError: when the Python proto type isn't known.
+    """
+    try:
+      return FieldDescriptor._PYTHON_TO_CPP_PROTO_TYPE_MAP[proto_type]
+    except KeyError:
+      raise TypeTransformationError('Unknown proto_type: %s' % proto_type)
+
 
 class EnumDescriptor(_NestedDescriptorBase):
 
@@ -577,7 +661,10 @@ class FileDescriptor(DescriptorBase):
     self.serialized_pb = serialized_pb
     if (api_implementation.Type() == 'cpp' and
         self.serialized_pb is not None):
-      cpp_message.BuildFile(self.serialized_pb)
+      if api_implementation.Version() == 2:
+        _message.BuildFile(self.serialized_pb)
+      else:
+        cpp_message.BuildFile(self.serialized_pb)
 
   def CopyToProto(self, proto):
     """Copies this to a descriptor_pb2.FileDescriptorProto.
@@ -596,3 +683,31 @@ def _ParseOptions(message, string):
   """
   message.ParseFromString(string)
   return message
+
+
+def MakeDescriptor(desc_proto, package=''):
+  """Make a protobuf Descriptor given a DescriptorProto protobuf.
+
+  Args:
+    desc_proto: The descriptor_pb2.DescriptorProto protobuf message.
+    package: Optional package name for the new message Descriptor (string).
+
+  Returns:
+    A Descriptor for protobuf messages.
+  """
+  full_message_name = [desc_proto.name]
+  if package: full_message_name.insert(0, package)
+  fields = []
+  for field_proto in desc_proto.field:
+    full_name = '.'.join(full_message_name + [field_proto.name])
+    field = FieldDescriptor(
+        field_proto.name, full_name, field_proto.number - 1,
+        field_proto.number, field_proto.type,
+        FieldDescriptor.ProtoTypeToCppProtoType(field_proto.type),
+        field_proto.label, None, None, None, None, False, None,
+        has_default_value=False)
+    fields.append(field)
+
+  desc_name = '.'.join(full_message_name)
+  return Descriptor(desc_proto.name, desc_name, None, None, fields,
+                    [], [], [])

+ 120 - 0
python/google/protobuf/descriptor_database.py

@@ -0,0 +1,120 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Provides a container for DescriptorProtos."""
+
+__author__ = 'matthewtoia@google.com (Matt Toia)'
+
+
+class DescriptorDatabase(object):
+  """A container accepting FileDescriptorProtos and maps DescriptorProtos."""
+
+  def __init__(self):
+    self._file_desc_protos_by_file = {}
+    self._file_desc_protos_by_symbol = {}
+
+  def Add(self, file_desc_proto):
+    """Adds the FileDescriptorProto and its types to this database.
+
+    Args:
+      file_desc_proto: The FileDescriptorProto to add.
+    """
+
+    self._file_desc_protos_by_file[file_desc_proto.name] = file_desc_proto
+    package = file_desc_proto.package
+    for message in file_desc_proto.message_type:
+      self._file_desc_protos_by_symbol.update(
+          (name, file_desc_proto) for name in _ExtractSymbols(message, package))
+    for enum in file_desc_proto.enum_type:
+      self._file_desc_protos_by_symbol[
+          '.'.join((package, enum.name))] = file_desc_proto
+
+  def FindFileByName(self, name):
+    """Finds the file descriptor proto by file name.
+
+    Typically the file name is a relative path ending to a .proto file. The
+    proto with the given name will have to have been added to this database
+    using the Add method or else an error will be raised.
+
+    Args:
+      name: The file name to find.
+
+    Returns:
+      The file descriptor proto matching the name.
+
+    Raises:
+      KeyError if no file by the given name was added.
+    """
+
+    return self._file_desc_protos_by_file[name]
+
+  def FindFileContainingSymbol(self, symbol):
+    """Finds the file descriptor proto containing the specified symbol.
+
+    The symbol should be a fully qualified name including the file descriptor's
+    package and any containing messages. Some examples:
+
+    'some.package.name.Message'
+    'some.package.name.Message.NestedEnum'
+
+    The file descriptor proto containing the specified symbol must be added to
+    this database using the Add method or else an error will be raised.
+
+    Args:
+      symbol: The fully qualified symbol name.
+
+    Returns:
+      The file descriptor proto containing the symbol.
+
+    Raises:
+      KeyError if no file contains the specified symbol.
+    """
+
+    return self._file_desc_protos_by_symbol[symbol]
+
+
+def _ExtractSymbols(desc_proto, package):
+  """Pulls out all the symbols from a descriptor proto.
+
+  Args:
+    desc_proto: The proto to extract symbols from.
+    package: The package containing the descriptor type.
+
+  Yields:
+    The fully qualified name found in the descriptor.
+  """
+
+  message_name = '.'.join((package, desc_proto.name))
+  yield message_name
+  for nested_type in desc_proto.nested_type:
+    for symbol in _ExtractSymbols(nested_type, message_name):
+      yield symbol
+    for enum_type in desc_proto.enum_type:
+      yield '.'.join((message_name, enum_type.name))

+ 527 - 0
python/google/protobuf/descriptor_pool.py

@@ -0,0 +1,527 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Provides DescriptorPool to use as a container for proto2 descriptors.
+
+The DescriptorPool is used in conjection with a DescriptorDatabase to maintain
+a collection of protocol buffer descriptors for use when dynamically creating
+message types at runtime.
+
+For most applications protocol buffers should be used via modules generated by
+the protocol buffer compiler tool. This should only be used when the type of
+protocol buffers used in an application or library cannot be predetermined.
+
+Below is a straightforward example on how to use this class:
+
+  pool = DescriptorPool()
+  file_descriptor_protos = [ ... ]
+  for file_descriptor_proto in file_descriptor_protos:
+    pool.Add(file_descriptor_proto)
+  my_message_descriptor = pool.FindMessageTypeByName('some.package.MessageType')
+
+The message descriptor can be used in conjunction with the message_factory
+module in order to create a protocol buffer class that can be encoded and
+decoded.
+"""
+
+__author__ = 'matthewtoia@google.com (Matt Toia)'
+
+from google.protobuf import descriptor_pb2
+from google.protobuf import descriptor
+from google.protobuf import descriptor_database
+
+
+class DescriptorPool(object):
+  """A collection of protobufs dynamically constructed by descriptor protos."""
+
+  def __init__(self, descriptor_db=None):
+    """Initializes a Pool of proto buffs.
+
+    The descriptor_db argument to the constructor is provided to allow
+    specialized file descriptor proto lookup code to be triggered on demand. An
+    example would be an implementation which will read and compile a file
+    specified in a call to FindFileByName() and not require the call to Add()
+    at all. Results from this database will be cached internally here as well.
+
+    Args:
+      descriptor_db: A secondary source of file descriptors.
+    """
+
+    self._internal_db = descriptor_database.DescriptorDatabase()
+    self._descriptor_db = descriptor_db
+    self._descriptors = {}
+    self._enum_descriptors = {}
+    self._file_descriptors = {}
+
+  def Add(self, file_desc_proto):
+    """Adds the FileDescriptorProto and its types to this pool.
+
+    Args:
+      file_desc_proto: The FileDescriptorProto to add.
+    """
+
+    self._internal_db.Add(file_desc_proto)
+
+  def FindFileByName(self, file_name):
+    """Gets a FileDescriptor by file name.
+
+    Args:
+      file_name: The path to the file to get a descriptor for.
+
+    Returns:
+      A FileDescriptor for the named file.
+
+    Raises:
+      KeyError: if the file can not be found in the pool.
+    """
+
+    try:
+      file_proto = self._internal_db.FindFileByName(file_name)
+    except KeyError as error:
+      if self._descriptor_db:
+        file_proto = self._descriptor_db.FindFileByName(file_name)
+      else:
+        raise error
+    if not file_proto:
+      raise KeyError('Cannot find a file named %s' % file_name)
+    return self._ConvertFileProtoToFileDescriptor(file_proto)
+
+  def FindFileContainingSymbol(self, symbol):
+    """Gets the FileDescriptor for the file containing the specified symbol.
+
+    Args:
+      symbol: The name of the symbol to search for.
+
+    Returns:
+      A FileDescriptor that contains the specified symbol.
+
+    Raises:
+      KeyError: if the file can not be found in the pool.
+    """
+
+    try:
+      file_proto = self._internal_db.FindFileContainingSymbol(symbol)
+    except KeyError as error:
+      if self._descriptor_db:
+        file_proto = self._descriptor_db.FindFileContainingSymbol(symbol)
+      else:
+        raise error
+    if not file_proto:
+      raise KeyError('Cannot find a file containing %s' % symbol)
+    return self._ConvertFileProtoToFileDescriptor(file_proto)
+
+  def FindMessageTypeByName(self, full_name):
+    """Loads the named descriptor from the pool.
+
+    Args:
+      full_name: The full name of the descriptor to load.
+
+    Returns:
+      The descriptor for the named type.
+    """
+
+    full_name = full_name.lstrip('.')  # fix inconsistent qualified name formats
+    if full_name not in self._descriptors:
+      self.FindFileContainingSymbol(full_name)
+    return self._descriptors[full_name]
+
+  def FindEnumTypeByName(self, full_name):
+    """Loads the named enum descriptor from the pool.
+
+    Args:
+      full_name: The full name of the enum descriptor to load.
+
+    Returns:
+      The enum descriptor for the named type.
+    """
+
+    full_name = full_name.lstrip('.')  # fix inconsistent qualified name formats
+    if full_name not in self._enum_descriptors:
+      self.FindFileContainingSymbol(full_name)
+    return self._enum_descriptors[full_name]
+
+  def _ConvertFileProtoToFileDescriptor(self, file_proto):
+    """Creates a FileDescriptor from a proto or returns a cached copy.
+
+    This method also has the side effect of loading all the symbols found in
+    the file into the appropriate dictionaries in the pool.
+
+    Args:
+      file_proto: The proto to convert.
+
+    Returns:
+      A FileDescriptor matching the passed in proto.
+    """
+
+    if file_proto.name not in self._file_descriptors:
+      file_descriptor = descriptor.FileDescriptor(
+          name=file_proto.name,
+          package=file_proto.package,
+          options=file_proto.options,
+          serialized_pb=file_proto.SerializeToString())
+      scope = {}
+      dependencies = list(self._GetDeps(file_proto))
+
+      for dependency in dependencies:
+        dep_desc = self.FindFileByName(dependency.name)
+        dep_proto = descriptor_pb2.FileDescriptorProto.FromString(
+            dep_desc.serialized_pb)
+        package = '.' + dep_proto.package
+        package_prefix = package + '.'
+
+        def _strip_package(symbol):
+          if symbol.startswith(package_prefix):
+            return symbol[len(package_prefix):]
+          return symbol
+
+        symbols = list(self._ExtractSymbols(dep_proto.message_type, package))
+        scope.update(symbols)
+        scope.update((_strip_package(k), v) for k, v in symbols)
+
+        symbols = list(self._ExtractEnums(dep_proto.enum_type, package))
+        scope.update(symbols)
+        scope.update((_strip_package(k), v) for k, v in symbols)
+
+      for message_type in file_proto.message_type:
+        message_desc = self._ConvertMessageDescriptor(
+            message_type, file_proto.package, file_descriptor, scope)
+        file_descriptor.message_types_by_name[message_desc.name] = message_desc
+      for enum_type in file_proto.enum_type:
+        self._ConvertEnumDescriptor(enum_type, file_proto.package,
+                                    file_descriptor, None, scope)
+      for desc_proto in self._ExtractMessages(file_proto.message_type):
+        self._SetFieldTypes(desc_proto, scope)
+
+      for desc_proto in file_proto.message_type:
+        desc = scope[desc_proto.name]
+        file_descriptor.message_types_by_name[desc_proto.name] = desc
+      self.Add(file_proto)
+      self._file_descriptors[file_proto.name] = file_descriptor
+
+    return self._file_descriptors[file_proto.name]
+
+  def _ConvertMessageDescriptor(self, desc_proto, package=None, file_desc=None,
+                                scope=None):
+    """Adds the proto to the pool in the specified package.
+
+    Args:
+      desc_proto: The descriptor_pb2.DescriptorProto protobuf message.
+      package: The package the proto should be located in.
+      file_desc: The file containing this message.
+      scope: Dict mapping short and full symbols to message and enum types.
+
+    Returns:
+      The added descriptor.
+    """
+
+    if package:
+      desc_name = '.'.join((package, desc_proto.name))
+    else:
+      desc_name = desc_proto.name
+
+    if file_desc is None:
+      file_name = None
+    else:
+      file_name = file_desc.name
+
+    if scope is None:
+      scope = {}
+
+    nested = [
+        self._ConvertMessageDescriptor(nested, desc_name, file_desc, scope)
+        for nested in desc_proto.nested_type]
+    enums = [
+        self._ConvertEnumDescriptor(enum, desc_name, file_desc, None, scope)
+        for enum in desc_proto.enum_type]
+    fields = [self._MakeFieldDescriptor(field, desc_name, index)
+              for index, field in enumerate(desc_proto.field)]
+    extensions = [self._MakeFieldDescriptor(extension, desc_name, True)
+                  for index, extension in enumerate(desc_proto.extension)]
+    extension_ranges = [(r.start, r.end) for r in desc_proto.extension_range]
+    if extension_ranges:
+      is_extendable = True
+    else:
+      is_extendable = False
+    desc = descriptor.Descriptor(
+        name=desc_proto.name,
+        full_name=desc_name,
+        filename=file_name,
+        containing_type=None,
+        fields=fields,
+        nested_types=nested,
+        enum_types=enums,
+        extensions=extensions,
+        options=desc_proto.options,
+        is_extendable=is_extendable,
+        extension_ranges=extension_ranges,
+        file=file_desc,
+        serialized_start=None,
+        serialized_end=None)
+    for nested in desc.nested_types:
+      nested.containing_type = desc
+    for enum in desc.enum_types:
+      enum.containing_type = desc
+    scope[desc_proto.name] = desc
+    scope['.' + desc_name] = desc
+    self._descriptors[desc_name] = desc
+    return desc
+
+  def _ConvertEnumDescriptor(self, enum_proto, package=None, file_desc=None,
+                             containing_type=None, scope=None):
+    """Make a protobuf EnumDescriptor given an EnumDescriptorProto protobuf.
+
+    Args:
+      enum_proto: The descriptor_pb2.EnumDescriptorProto protobuf message.
+      package: Optional package name for the new message EnumDescriptor.
+      file_desc: The file containing the enum descriptor.
+      containing_type: The type containing this enum.
+      scope: Scope containing available types.
+
+    Returns:
+      The added descriptor
+    """
+
+    if package:
+      enum_name = '.'.join((package, enum_proto.name))
+    else:
+      enum_name = enum_proto.name
+
+    if file_desc is None:
+      file_name = None
+    else:
+      file_name = file_desc.name
+
+    values = [self._MakeEnumValueDescriptor(value, index)
+              for index, value in enumerate(enum_proto.value)]
+    desc = descriptor.EnumDescriptor(name=enum_proto.name,
+                                     full_name=enum_name,
+                                     filename=file_name,
+                                     file=file_desc,
+                                     values=values,
+                                     containing_type=containing_type,
+                                     options=enum_proto.options)
+    scope[enum_proto.name] = desc
+    scope['.%s' % enum_name] = desc
+    self._enum_descriptors[enum_name] = desc
+    return desc
+
+  def _MakeFieldDescriptor(self, field_proto, message_name, index,
+                           is_extension=False):
+    """Creates a field descriptor from a FieldDescriptorProto.
+
+    For message and enum type fields, this method will do a look up
+    in the pool for the appropriate descriptor for that type. If it
+    is unavailable, it will fall back to the _source function to
+    create it. If this type is still unavailable, construction will
+    fail.
+
+    Args:
+      field_proto: The proto describing the field.
+      message_name: The name of the containing message.
+      index: Index of the field
+      is_extension: Indication that this field is for an extension.
+
+    Returns:
+      An initialized FieldDescriptor object
+    """
+
+    if message_name:
+      full_name = '.'.join((message_name, field_proto.name))
+    else:
+      full_name = field_proto.name
+
+    return descriptor.FieldDescriptor(
+        name=field_proto.name,
+        full_name=full_name,
+        index=index,
+        number=field_proto.number,
+        type=field_proto.type,
+        cpp_type=None,
+        message_type=None,
+        enum_type=None,
+        containing_type=None,
+        label=field_proto.label,
+        has_default_value=False,
+        default_value=None,
+        is_extension=is_extension,
+        extension_scope=None,
+        options=field_proto.options)
+
+  def _SetFieldTypes(self, desc_proto, scope):
+    """Sets the field's type, cpp_type, message_type and enum_type.
+
+    Args:
+      desc_proto: The message descriptor to update.
+      scope: Enclosing scope of available types.
+    """
+
+    desc = scope[desc_proto.name]
+    for field_proto, field_desc in zip(desc_proto.field, desc.fields):
+      if field_proto.type_name:
+        type_name = field_proto.type_name
+        if type_name not in scope:
+          type_name = '.' + type_name
+        desc = scope[type_name]
+      else:
+        desc = None
+
+      if not field_proto.HasField('type'):
+        if isinstance(desc, descriptor.Descriptor):
+          field_proto.type = descriptor.FieldDescriptor.TYPE_MESSAGE
+        else:
+          field_proto.type = descriptor.FieldDescriptor.TYPE_ENUM
+
+      field_desc.cpp_type = descriptor.FieldDescriptor.ProtoTypeToCppProtoType(
+          field_proto.type)
+
+      if (field_proto.type == descriptor.FieldDescriptor.TYPE_MESSAGE
+          or field_proto.type == descriptor.FieldDescriptor.TYPE_GROUP):
+        field_desc.message_type = desc
+
+      if field_proto.type == descriptor.FieldDescriptor.TYPE_ENUM:
+        field_desc.enum_type = desc
+
+      if field_proto.label == descriptor.FieldDescriptor.LABEL_REPEATED:
+        field_desc.has_default = False
+        field_desc.default_value = []
+      elif field_proto.HasField('default_value'):
+        field_desc.has_default = True
+        if (field_proto.type == descriptor.FieldDescriptor.TYPE_DOUBLE or
+            field_proto.type == descriptor.FieldDescriptor.TYPE_FLOAT):
+          field_desc.default_value = float(field_proto.default_value)
+        elif field_proto.type == descriptor.FieldDescriptor.TYPE_STRING:
+          field_desc.default_value = field_proto.default_value
+        elif field_proto.type == descriptor.FieldDescriptor.TYPE_BOOL:
+          field_desc.default_value = field_proto.default_value.lower() == 'true'
+        elif field_proto.type == descriptor.FieldDescriptor.TYPE_ENUM:
+          field_desc.default_value = field_desc.enum_type.values_by_name[
+              field_proto.default_value].index
+        else:
+          field_desc.default_value = int(field_proto.default_value)
+      else:
+        field_desc.has_default = False
+        field_desc.default_value = None
+
+      field_desc.type = field_proto.type
+
+    for nested_type in desc_proto.nested_type:
+      self._SetFieldTypes(nested_type, scope)
+
+  def _MakeEnumValueDescriptor(self, value_proto, index):
+    """Creates a enum value descriptor object from a enum value proto.
+
+    Args:
+      value_proto: The proto describing the enum value.
+      index: The index of the enum value.
+
+    Returns:
+      An initialized EnumValueDescriptor object.
+    """
+
+    return descriptor.EnumValueDescriptor(
+        name=value_proto.name,
+        index=index,
+        number=value_proto.number,
+        options=value_proto.options,
+        type=None)
+
+  def _ExtractSymbols(self, desc_protos, package):
+    """Pulls out all the symbols from descriptor protos.
+
+    Args:
+      desc_protos: The protos to extract symbols from.
+      package: The package containing the descriptor type.
+    Yields:
+      A two element tuple of the type name and descriptor object.
+    """
+
+    for desc_proto in desc_protos:
+      if package:
+        message_name = '.'.join((package, desc_proto.name))
+      else:
+        message_name = desc_proto.name
+      message_desc = self.FindMessageTypeByName(message_name)
+      yield (message_name, message_desc)
+      for symbol in self._ExtractSymbols(desc_proto.nested_type, message_name):
+        yield symbol
+      for symbol in self._ExtractEnums(desc_proto.enum_type, message_name):
+        yield symbol
+
+  def _ExtractEnums(self, enum_protos, package):
+    """Pulls out all the symbols from enum protos.
+
+    Args:
+      enum_protos: The protos to extract symbols from.
+      package: The package containing the enum type.
+
+    Yields:
+      A two element tuple of the type name and enum descriptor object.
+    """
+
+    for enum_proto in enum_protos:
+      if package:
+        enum_name = '.'.join((package, enum_proto.name))
+      else:
+        enum_name = enum_proto.name
+      enum_desc = self.FindEnumTypeByName(enum_name)
+      yield (enum_name, enum_desc)
+
+  def _ExtractMessages(self, desc_protos):
+    """Pulls out all the message protos from descriptos.
+
+    Args:
+      desc_protos: The protos to extract symbols from.
+
+    Yields:
+      Descriptor protos.
+    """
+
+    for desc_proto in desc_protos:
+      yield desc_proto
+      for message in self._ExtractMessages(desc_proto.nested_type):
+        yield message
+
+  def _GetDeps(self, file_proto):
+    """Recursively finds dependencies for file protos.
+
+    Args:
+      file_proto: The proto to get dependencies from.
+
+    Yields:
+      Each direct and indirect dependency.
+    """
+
+    for dependency in file_proto.dependency:
+      dep_desc = self.FindFileByName(dependency)
+      dep_proto = descriptor_pb2.FileDescriptorProto.FromString(
+          dep_desc.serialized_pb)
+      yield dep_proto
+      for parent_dep in self._GetDeps(dep_proto):
+        yield parent_dep

+ 23 - 0
python/google/protobuf/internal/api_implementation.py

@@ -56,9 +56,32 @@ if _implementation_type != 'python':
   #  _implementation_type = 'python'
 
 
+# This environment variable can be used to switch between the two
+# 'cpp' implementations. Right now only 1 and 2 are valid values. Any
+# other value will be ignored.
+_implementation_version_str = os.getenv(
+    'PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION_VERSION',
+    '1')
+
+
+if _implementation_version_str not in ('1', '2'):
+  raise ValueError(
+      "unsupported PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION_VERSION: '" +
+      _implementation_version_str + "' (supported versions: 1, 2)"
+      )
+
+
+_implementation_version = int(_implementation_version_str)
+
+
+
 # Usage of this function is discouraged. Clients shouldn't care which
 # implementation of the API is in use. Note that there is no guarantee
 # that differences between APIs will be maintained.
 # Please don't use this function if possible.
 def Type():
   return _implementation_type
+
+# See comment on 'Type' above.
+def Version():
+  return _implementation_version

+ 12 - 2
python/google/protobuf/internal/containers.py

@@ -78,8 +78,13 @@ class BaseContainer(object):
   def __repr__(self):
     return repr(self._values)
 
-  def sort(self, sort_function=cmp):
-    self._values.sort(sort_function)
+  def sort(self, *args, **kwargs):
+    # Continue to support the old sort_function keyword argument.
+    # This is expected to be a rare occurrence, so use LBYL to avoid
+    # the overhead of actually catching KeyError.
+    if 'sort_function' in kwargs:
+      kwargs['cmp'] = kwargs.pop('sort_function')
+    self._values.sort(*args, **kwargs)
 
 
 class RepeatedScalarFieldContainer(BaseContainer):
@@ -235,6 +240,11 @@ class RepeatedCompositeFieldContainer(BaseContainer):
     """
     self.extend(other._values)
 
+  def remove(self, elem):
+    """Removes an item from the list. Similar to list.remove()."""
+    self._values.remove(elem)
+    self._message_listener.Modified()
+
   def __getslice__(self, start, stop):
     """Retrieves the subset of items from between the specified indices."""
     return self._values[start:stop]

+ 82 - 35
python/google/protobuf/internal/cpp_message.py

@@ -34,8 +34,10 @@ Descriptor objects at runtime backed by the protocol buffer C++ API.
 
 __author__ = 'petar@google.com (Petar Petrov)'
 
+import copy_reg
 import operator
 from google.protobuf.internal import _net_proto2___python
+from google.protobuf.internal import enum_type_wrapper
 from google.protobuf import message
 
 
@@ -156,10 +158,12 @@ class RepeatedScalarContainer(object):
   def __hash__(self):
     raise TypeError('unhashable object')
 
-  def sort(self, sort_function=cmp):
-    values = self[slice(None, None, None)]
-    values.sort(sort_function)
-    self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values)
+  def sort(self, *args, **kwargs):
+    # Maintain compatibility with the previous interface.
+    if 'sort_function' in kwargs:
+      kwargs['cmp'] = kwargs.pop('sort_function')
+    self._cmsg.AssignRepeatedScalar(self._cfield_descriptor,
+                                    sorted(self, *args, **kwargs))
 
 
 def RepeatedScalarProperty(cdescriptor):
@@ -202,6 +206,12 @@ class RepeatedCompositeContainer(object):
     for message in elem_seq:
       self.add().MergeFrom(message)
 
+  def remove(self, value):
+    # TODO(protocol-devel): This is inefficient as it needs to generate a
+    # message pointer for each message only to do index().  Move this to a C++
+    # extension function.
+    self.__delitem__(self[slice(None, None, None)].index(value))
+
   def MergeFrom(self, other):
     for message in other[:]:
       self.add().MergeFrom(message)
@@ -236,27 +246,29 @@ class RepeatedCompositeContainer(object):
   def __hash__(self):
     raise TypeError('unhashable object')
 
-  def sort(self, sort_function=cmp):
-    messages = []
-    for index in range(len(self)):
-      # messages[i][0] is where the i-th element of the new array has to come
-      # from.
-      # messages[i][1] is where the i-th element of the old array has to go.
-      messages.append([index, 0, self[index]])
-    messages.sort(lambda x,y: sort_function(x[2], y[2]))
+  def sort(self, cmp=None, key=None, reverse=False, **kwargs):
+    # Maintain compatibility with the old interface.
+    if cmp is None and 'sort_function' in kwargs:
+      cmp = kwargs.pop('sort_function')
 
-    # Remember which position each elements has to move to.
-    for i in range(len(messages)):
-      messages[messages[i][0]][1] = i
+    # The cmp function, if provided, is passed the results of the key function,
+    # so we only need to wrap one of them.
+    if key is None:
+      index_key = self.__getitem__
+    else:
+      index_key = lambda i: key(self[i])
+
+    # Sort the list of current indexes by the underlying object.
+    indexes = range(len(self))
+    indexes.sort(cmp=cmp, key=index_key, reverse=reverse)
 
     # Apply the transposition.
-    for i in range(len(messages)):
-      from_position = messages[i][0]
-      if i == from_position:
+    for dest, src in enumerate(indexes):
+      if dest == src:
         continue
-      self._cmsg.SwapRepeatedFieldElements(
-          self._cfield_descriptor, i, from_position)
-      messages[messages[i][1]][0] = from_position
+      self._cmsg.SwapRepeatedFieldElements(self._cfield_descriptor, dest, src)
+      # Don't swap the same value twice.
+      indexes[src] = src
 
 
 def RepeatedCompositeProperty(cdescriptor, message_type):
@@ -359,11 +371,12 @@ class ExtensionDict(object):
     return None
 
 
-def NewMessage(message_descriptor, dictionary):
+def NewMessage(bases, message_descriptor, dictionary):
   """Creates a new protocol message *class*."""
   _AddClassAttributesForNestedExtensions(message_descriptor, dictionary)
   _AddEnumValues(message_descriptor, dictionary)
   _AddDescriptors(message_descriptor, dictionary)
+  return bases
 
 
 def InitMessage(message_descriptor, cls):
@@ -372,6 +385,7 @@ def InitMessage(message_descriptor, cls):
   _AddInitMethod(message_descriptor, cls)
   _AddMessageMethods(message_descriptor, cls)
   _AddPropertiesForExtensions(message_descriptor, cls)
+  copy_reg.pickle(cls, lambda obj: (cls, (), obj.__getstate__()))
 
 
 def _AddDescriptors(message_descriptor, dictionary):
@@ -387,7 +401,7 @@ def _AddDescriptors(message_descriptor, dictionary):
         field.full_name)
 
   dictionary['__slots__'] = list(dictionary['__descriptors'].iterkeys()) + [
-      '_cmsg', '_owner', '_composite_fields', 'Extensions']
+      '_cmsg', '_owner', '_composite_fields', 'Extensions', '_HACK_REFCOUNTS']
 
 
 def _AddEnumValues(message_descriptor, dictionary):
@@ -398,6 +412,7 @@ def _AddEnumValues(message_descriptor, dictionary):
     dictionary: Class dictionary that should be populated.
   """
   for enum_type in message_descriptor.enum_types:
+    dictionary[enum_type.name] = enum_type_wrapper.EnumTypeWrapper(enum_type)
     for enum_value in enum_type.values:
       dictionary[enum_value.name] = enum_value.number
 
@@ -439,28 +454,35 @@ def _AddInitMethod(message_descriptor, cls):
   def Init(self, **kwargs):
     """Message constructor."""
     cmessage = kwargs.pop('__cmessage', None)
-    if cmessage is None:
-      self._cmsg = NewCMessage(message_descriptor.full_name)
-    else:
+    if cmessage:
       self._cmsg = cmessage
+    else:
+      self._cmsg = NewCMessage(message_descriptor.full_name)
 
     # Keep a reference to the owner, as the owner keeps a reference to the
     # underlying protocol buffer message.
     owner = kwargs.pop('__owner', None)
-    if owner is not None:
+    if owner:
       self._owner = owner
 
-    self.Extensions = ExtensionDict(self)
+    if message_descriptor.is_extendable:
+      self.Extensions = ExtensionDict(self)
+    else:
+      # Reference counting in the C++ code is broken and depends on
+      # the Extensions reference to keep this object alive during unit
+      # tests (see b/4856052).  Remove this once b/4945904 is fixed.
+      self._HACK_REFCOUNTS = self
     self._composite_fields = {}
 
     for field_name, field_value in kwargs.iteritems():
       field_cdescriptor = self.__descriptors.get(field_name, None)
-      if field_cdescriptor is None:
+      if not field_cdescriptor:
         raise ValueError('Protocol message has no "%s" field.' % field_name)
       if field_cdescriptor.label == _LABEL_REPEATED:
         if field_cdescriptor.cpp_type == _CPPTYPE_MESSAGE:
+          field_name = getattr(self, field_name)
           for val in field_value:
-            getattr(self, field_name).add().MergeFrom(val)
+            field_name.add().MergeFrom(val)
         else:
           getattr(self, field_name).extend(field_value)
       elif field_cdescriptor.cpp_type == _CPPTYPE_MESSAGE:
@@ -497,12 +519,34 @@ def _AddMessageMethods(message_descriptor, cls):
     return self._cmsg.HasField(field_name)
 
   def ClearField(self, field_name):
+    child_cmessage = None
     if field_name in self._composite_fields:
+      child_field = self._composite_fields[field_name]
       del self._composite_fields[field_name]
-    self._cmsg.ClearField(field_name)
+
+      child_cdescriptor = self.__descriptors[field_name]
+      # TODO(anuraag): Support clearing repeated message fields as well.
+      if (child_cdescriptor.label != _LABEL_REPEATED and
+          child_cdescriptor.cpp_type == _CPPTYPE_MESSAGE):
+        child_field._owner = None
+        child_cmessage = child_field._cmsg
+
+    if child_cmessage is not None:
+      self._cmsg.ClearField(field_name, child_cmessage)
+    else:
+      self._cmsg.ClearField(field_name)
 
   def Clear(self):
-    return self._cmsg.Clear()
+    cmessages_to_release = []
+    for field_name, child_field in self._composite_fields.iteritems():
+      child_cdescriptor = self.__descriptors[field_name]
+      # TODO(anuraag): Support clearing repeated message fields as well.
+      if (child_cdescriptor.label != _LABEL_REPEATED and
+          child_cdescriptor.cpp_type == _CPPTYPE_MESSAGE):
+        child_field._owner = None
+        cmessages_to_release.append((child_cdescriptor, child_field._cmsg))
+    self._composite_fields.clear()
+    self._cmsg.Clear(cmessages_to_release)
 
   def IsInitialized(self, errors=None):
     if self._cmsg.IsInitialized():
@@ -514,8 +558,8 @@ def _AddMessageMethods(message_descriptor, cls):
   def SerializeToString(self):
     if not self.IsInitialized():
       raise message.EncodeError(
-          'Message is missing required fields: ' +
-          ','.join(self.FindInitializationErrors()))
+          'Message %s is missing required fields: %s' % (
+          self._cmsg.full_name, ','.join(self.FindInitializationErrors())))
     return self._cmsg.SerializeToString()
 
   def SerializePartialToString(self):
@@ -534,7 +578,8 @@ def _AddMessageMethods(message_descriptor, cls):
   def MergeFrom(self, msg):
     if not isinstance(msg, cls):
       raise TypeError(
-          "Parameter to MergeFrom() must be instance of same class.")
+          "Parameter to MergeFrom() must be instance of same class: "
+          "expected %s got %s." % (cls.__name__, type(msg).__name__))
     self._cmsg.MergeFrom(msg._cmsg)
 
   def CopyFrom(self, msg):
@@ -581,6 +626,8 @@ def _AddMessageMethods(message_descriptor, cls):
     raise TypeError('unhashable object')
 
   def __unicode__(self):
+    # Lazy import to prevent circular import when text_format imports this file.
+    from google.protobuf import text_format
     return text_format.MessageToString(self, as_utf8=True).decode('utf-8')
 
   # Attach the local methods to the message class.

+ 6 - 0
python/google/protobuf/internal/decoder.py

@@ -576,6 +576,7 @@ def MessageSetItemDecoder(extensions_by_number):
   local_SkipField = SkipField
 
   def DecodeItem(buffer, pos, end, message, field_dict):
+    message_set_item_start = pos
     type_id = -1
     message_start = -1
     message_end = -1
@@ -614,6 +615,11 @@ def MessageSetItemDecoder(extensions_by_number):
         # The only reason _InternalParse would return early is if it encountered
         # an end-group tag.
         raise _DecodeError('Unexpected end-group tag.')
+    else:
+      if not message._unknown_fields:
+        message._unknown_fields = []
+      message._unknown_fields.append((MESSAGE_SET_ITEM_TAG,
+                                      buffer[message_set_item_start:pos]))
 
     return pos
 

+ 63 - 0
python/google/protobuf/internal/descriptor_database_test.py

@@ -0,0 +1,63 @@
+#! /usr/bin/python
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Tests for google.protobuf.descriptor_database."""
+
+__author__ = 'matthewtoia@google.com (Matt Toia)'
+
+import unittest
+from google.protobuf import descriptor_pb2
+from google.protobuf.internal import factory_test2_pb2
+from google.protobuf import descriptor_database
+
+
+class DescriptorDatabaseTest(unittest.TestCase):
+
+  def testAdd(self):
+    db = descriptor_database.DescriptorDatabase()
+    file_desc_proto = descriptor_pb2.FileDescriptorProto.FromString(
+        factory_test2_pb2.DESCRIPTOR.serialized_pb)
+    db.Add(file_desc_proto)
+
+    self.assertEquals(file_desc_proto, db.FindFileByName(
+        'net/proto2/python/internal/factory_test2.proto'))
+    self.assertEquals(file_desc_proto, db.FindFileContainingSymbol(
+        'net.proto2.python.internal.Factory2Message'))
+    self.assertEquals(file_desc_proto, db.FindFileContainingSymbol(
+        'net.proto2.python.internal.Factory2Message.NestedFactory2Message'))
+    self.assertEquals(file_desc_proto, db.FindFileContainingSymbol(
+        'net.proto2.python.internal.Factory2Enum'))
+    self.assertEquals(file_desc_proto, db.FindFileContainingSymbol(
+        'net.proto2.python.internal.Factory2Message.NestedFactory2Enum'))
+
+if __name__ == '__main__':
+  unittest.main()

+ 220 - 0
python/google/protobuf/internal/descriptor_pool_test.py

@@ -0,0 +1,220 @@
+#! /usr/bin/python
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Tests for google.protobuf.descriptor_pool."""
+
+__author__ = 'matthewtoia@google.com (Matt Toia)'
+
+import unittest
+from google.protobuf import descriptor_pb2
+from google.protobuf.internal import factory_test1_pb2
+from google.protobuf.internal import factory_test2_pb2
+from google.protobuf import descriptor
+from google.protobuf import descriptor_database
+from google.protobuf import descriptor_pool
+
+
+class DescriptorPoolTest(unittest.TestCase):
+
+  def setUp(self):
+    self.pool = descriptor_pool.DescriptorPool()
+    self.factory_test1_fd = descriptor_pb2.FileDescriptorProto.FromString(
+        factory_test1_pb2.DESCRIPTOR.serialized_pb)
+    self.factory_test2_fd = descriptor_pb2.FileDescriptorProto.FromString(
+        factory_test2_pb2.DESCRIPTOR.serialized_pb)
+    self.pool.Add(self.factory_test1_fd)
+    self.pool.Add(self.factory_test2_fd)
+
+  def testFindFileByName(self):
+    name1 = 'net/proto2/python/internal/factory_test1.proto'
+    file_desc1 = self.pool.FindFileByName(name1)
+    self.assertIsInstance(file_desc1, descriptor.FileDescriptor)
+    self.assertEquals(name1, file_desc1.name)
+    self.assertEquals('net.proto2.python.internal', file_desc1.package)
+    self.assertIn('Factory1Message', file_desc1.message_types_by_name)
+
+    name2 = 'net/proto2/python/internal/factory_test2.proto'
+    file_desc2 = self.pool.FindFileByName(name2)
+    self.assertIsInstance(file_desc2, descriptor.FileDescriptor)
+    self.assertEquals(name2, file_desc2.name)
+    self.assertEquals('net.proto2.python.internal', file_desc2.package)
+    self.assertIn('Factory2Message', file_desc2.message_types_by_name)
+
+  def testFindFileByNameFailure(self):
+    try:
+      self.pool.FindFileByName('Does not exist')
+      self.fail('Expected KeyError')
+    except KeyError:
+      pass
+
+  def testFindFileContainingSymbol(self):
+    file_desc1 = self.pool.FindFileContainingSymbol(
+        'net.proto2.python.internal.Factory1Message')
+    self.assertIsInstance(file_desc1, descriptor.FileDescriptor)
+    self.assertEquals('net/proto2/python/internal/factory_test1.proto',
+                      file_desc1.name)
+    self.assertEquals('net.proto2.python.internal', file_desc1.package)
+    self.assertIn('Factory1Message', file_desc1.message_types_by_name)
+
+    file_desc2 = self.pool.FindFileContainingSymbol(
+        'net.proto2.python.internal.Factory2Message')
+    self.assertIsInstance(file_desc2, descriptor.FileDescriptor)
+    self.assertEquals('net/proto2/python/internal/factory_test2.proto',
+                      file_desc2.name)
+    self.assertEquals('net.proto2.python.internal', file_desc2.package)
+    self.assertIn('Factory2Message', file_desc2.message_types_by_name)
+
+  def testFindFileContainingSymbolFailure(self):
+    try:
+      self.pool.FindFileContainingSymbol('Does not exist')
+      self.fail('Expected KeyError')
+    except KeyError:
+      pass
+
+  def testFindMessageTypeByName(self):
+    msg1 = self.pool.FindMessageTypeByName(
+        'net.proto2.python.internal.Factory1Message')
+    self.assertIsInstance(msg1, descriptor.Descriptor)
+    self.assertEquals('Factory1Message', msg1.name)
+    self.assertEquals('net.proto2.python.internal.Factory1Message',
+                      msg1.full_name)
+    self.assertEquals(None, msg1.containing_type)
+
+    nested_msg1 = msg1.nested_types[0]
+    self.assertEquals('NestedFactory1Message', nested_msg1.name)
+    self.assertEquals(msg1, nested_msg1.containing_type)
+
+    nested_enum1 = msg1.enum_types[0]
+    self.assertEquals('NestedFactory1Enum', nested_enum1.name)
+    self.assertEquals(msg1, nested_enum1.containing_type)
+
+    self.assertEquals(nested_msg1, msg1.fields_by_name[
+        'nested_factory_1_message'].message_type)
+    self.assertEquals(nested_enum1, msg1.fields_by_name[
+        'nested_factory_1_enum'].enum_type)
+
+    msg2 = self.pool.FindMessageTypeByName(
+        'net.proto2.python.internal.Factory2Message')
+    self.assertIsInstance(msg2, descriptor.Descriptor)
+    self.assertEquals('Factory2Message', msg2.name)
+    self.assertEquals('net.proto2.python.internal.Factory2Message',
+                      msg2.full_name)
+    self.assertIsNone(msg2.containing_type)
+
+    nested_msg2 = msg2.nested_types[0]
+    self.assertEquals('NestedFactory2Message', nested_msg2.name)
+    self.assertEquals(msg2, nested_msg2.containing_type)
+
+    nested_enum2 = msg2.enum_types[0]
+    self.assertEquals('NestedFactory2Enum', nested_enum2.name)
+    self.assertEquals(msg2, nested_enum2.containing_type)
+
+    self.assertEquals(nested_msg2, msg2.fields_by_name[
+        'nested_factory_2_message'].message_type)
+    self.assertEquals(nested_enum2, msg2.fields_by_name[
+        'nested_factory_2_enum'].enum_type)
+
+    self.assertTrue(msg2.fields_by_name['int_with_default'].has_default)
+    self.assertEquals(
+        1776, msg2.fields_by_name['int_with_default'].default_value)
+
+    self.assertTrue(msg2.fields_by_name['double_with_default'].has_default)
+    self.assertEquals(
+        9.99, msg2.fields_by_name['double_with_default'].default_value)
+
+    self.assertTrue(msg2.fields_by_name['string_with_default'].has_default)
+    self.assertEquals(
+        'hello world', msg2.fields_by_name['string_with_default'].default_value)
+
+    self.assertTrue(msg2.fields_by_name['bool_with_default'].has_default)
+    self.assertFalse(msg2.fields_by_name['bool_with_default'].default_value)
+
+    self.assertTrue(msg2.fields_by_name['enum_with_default'].has_default)
+    self.assertEquals(
+        1, msg2.fields_by_name['enum_with_default'].default_value)
+
+    msg3 = self.pool.FindMessageTypeByName(
+        'net.proto2.python.internal.Factory2Message.NestedFactory2Message')
+    self.assertEquals(nested_msg2, msg3)
+
+  def testFindMessageTypeByNameFailure(self):
+    try:
+      self.pool.FindMessageTypeByName('Does not exist')
+      self.fail('Expected KeyError')
+    except KeyError:
+      pass
+
+  def testFindEnumTypeByName(self):
+    enum1 = self.pool.FindEnumTypeByName(
+        'net.proto2.python.internal.Factory1Enum')
+    self.assertIsInstance(enum1, descriptor.EnumDescriptor)
+    self.assertEquals(0, enum1.values_by_name['FACTORY_1_VALUE_0'].number)
+    self.assertEquals(1, enum1.values_by_name['FACTORY_1_VALUE_1'].number)
+
+    nested_enum1 = self.pool.FindEnumTypeByName(
+        'net.proto2.python.internal.Factory1Message.NestedFactory1Enum')
+    self.assertIsInstance(nested_enum1, descriptor.EnumDescriptor)
+    self.assertEquals(
+        0, nested_enum1.values_by_name['NESTED_FACTORY_1_VALUE_0'].number)
+    self.assertEquals(
+        1, nested_enum1.values_by_name['NESTED_FACTORY_1_VALUE_1'].number)
+
+    enum2 = self.pool.FindEnumTypeByName(
+        'net.proto2.python.internal.Factory2Enum')
+    self.assertIsInstance(enum2, descriptor.EnumDescriptor)
+    self.assertEquals(0, enum2.values_by_name['FACTORY_2_VALUE_0'].number)
+    self.assertEquals(1, enum2.values_by_name['FACTORY_2_VALUE_1'].number)
+
+    nested_enum2 = self.pool.FindEnumTypeByName(
+        'net.proto2.python.internal.Factory2Message.NestedFactory2Enum')
+    self.assertIsInstance(nested_enum2, descriptor.EnumDescriptor)
+    self.assertEquals(
+        0, nested_enum2.values_by_name['NESTED_FACTORY_2_VALUE_0'].number)
+    self.assertEquals(
+        1, nested_enum2.values_by_name['NESTED_FACTORY_2_VALUE_1'].number)
+
+  def testFindEnumTypeByNameFailure(self):
+    try:
+      self.pool.FindEnumTypeByName('Does not exist')
+      self.fail('Expected KeyError')
+    except KeyError:
+      pass
+
+  def testUserDefinedDB(self):
+    db = descriptor_database.DescriptorDatabase()
+    self.pool = descriptor_pool.DescriptorPool(db)
+    db.Add(self.factory_test1_fd)
+    db.Add(self.factory_test2_fd)
+    self.testFindMessageTypeByName()
+
+if __name__ == '__main__':
+  unittest.main()

+ 279 - 0
python/google/protobuf/internal/descriptor_test.py

@@ -35,6 +35,7 @@
 __author__ = 'robinson@google.com (Will Robinson)'
 
 import unittest
+from google.protobuf import unittest_custom_options_pb2
 from google.protobuf import unittest_import_pb2
 from google.protobuf import unittest_pb2
 from google.protobuf import descriptor_pb2
@@ -101,6 +102,15 @@ class DescriptorTest(unittest.TestCase):
             self.my_method
         ])
 
+  def testEnumValueName(self):
+    self.assertEqual(self.my_message.EnumValueName('ForeignEnum', 4),
+                     'FOREIGN_FOO')
+
+    self.assertEqual(
+        self.my_message.enum_types_by_name[
+            'ForeignEnum'].values_by_number[4].name,
+        self.my_message.EnumValueName('ForeignEnum', 4))
+
   def testEnumFixups(self):
     self.assertEqual(self.my_enum, self.my_enum.values[0].type)
 
@@ -125,6 +135,257 @@ class DescriptorTest(unittest.TestCase):
     self.assertEqual(self.my_service.GetOptions(),
                      descriptor_pb2.ServiceOptions())
 
+  def testSimpleCustomOptions(self):
+    file_descriptor = unittest_custom_options_pb2.DESCRIPTOR
+    message_descriptor =\
+        unittest_custom_options_pb2.TestMessageWithCustomOptions.DESCRIPTOR
+    field_descriptor = message_descriptor.fields_by_name["field1"]
+    enum_descriptor = message_descriptor.enum_types_by_name["AnEnum"]
+    enum_value_descriptor =\
+        message_descriptor.enum_values_by_name["ANENUM_VAL2"]
+    service_descriptor =\
+        unittest_custom_options_pb2.TestServiceWithCustomOptions.DESCRIPTOR
+    method_descriptor = service_descriptor.FindMethodByName("Foo")
+
+    file_options = file_descriptor.GetOptions()
+    file_opt1 = unittest_custom_options_pb2.file_opt1
+    self.assertEqual(9876543210, file_options.Extensions[file_opt1])
+    message_options = message_descriptor.GetOptions()
+    message_opt1 = unittest_custom_options_pb2.message_opt1
+    self.assertEqual(-56, message_options.Extensions[message_opt1])
+    field_options = field_descriptor.GetOptions()
+    field_opt1 = unittest_custom_options_pb2.field_opt1
+    self.assertEqual(8765432109, field_options.Extensions[field_opt1])
+    field_opt2 = unittest_custom_options_pb2.field_opt2
+    self.assertEqual(42, field_options.Extensions[field_opt2])
+    enum_options = enum_descriptor.GetOptions()
+    enum_opt1 = unittest_custom_options_pb2.enum_opt1
+    self.assertEqual(-789, enum_options.Extensions[enum_opt1])
+    enum_value_options = enum_value_descriptor.GetOptions()
+    enum_value_opt1 = unittest_custom_options_pb2.enum_value_opt1
+    self.assertEqual(123, enum_value_options.Extensions[enum_value_opt1])
+
+    service_options = service_descriptor.GetOptions()
+    service_opt1 = unittest_custom_options_pb2.service_opt1
+    self.assertEqual(-9876543210, service_options.Extensions[service_opt1])
+    method_options = method_descriptor.GetOptions()
+    method_opt1 = unittest_custom_options_pb2.method_opt1
+    self.assertEqual(unittest_custom_options_pb2.METHODOPT1_VAL2,
+                     method_options.Extensions[method_opt1])
+
+  def testDifferentCustomOptionTypes(self):
+    kint32min = -2**31
+    kint64min = -2**63
+    kint32max = 2**31 - 1
+    kint64max = 2**63 - 1
+    kuint32max = 2**32 - 1
+    kuint64max = 2**64 - 1
+
+    message_descriptor =\
+        unittest_custom_options_pb2.CustomOptionMinIntegerValues.DESCRIPTOR
+    message_options = message_descriptor.GetOptions()
+    self.assertEqual(False, message_options.Extensions[
+        unittest_custom_options_pb2.bool_opt])
+    self.assertEqual(kint32min, message_options.Extensions[
+        unittest_custom_options_pb2.int32_opt])
+    self.assertEqual(kint64min, message_options.Extensions[
+        unittest_custom_options_pb2.int64_opt])
+    self.assertEqual(0, message_options.Extensions[
+        unittest_custom_options_pb2.uint32_opt])
+    self.assertEqual(0, message_options.Extensions[
+        unittest_custom_options_pb2.uint64_opt])
+    self.assertEqual(kint32min, message_options.Extensions[
+        unittest_custom_options_pb2.sint32_opt])
+    self.assertEqual(kint64min, message_options.Extensions[
+        unittest_custom_options_pb2.sint64_opt])
+    self.assertEqual(0, message_options.Extensions[
+        unittest_custom_options_pb2.fixed32_opt])
+    self.assertEqual(0, message_options.Extensions[
+        unittest_custom_options_pb2.fixed64_opt])
+    self.assertEqual(kint32min, message_options.Extensions[
+        unittest_custom_options_pb2.sfixed32_opt])
+    self.assertEqual(kint64min, message_options.Extensions[
+        unittest_custom_options_pb2.sfixed64_opt])
+
+    message_descriptor =\
+        unittest_custom_options_pb2.CustomOptionMaxIntegerValues.DESCRIPTOR
+    message_options = message_descriptor.GetOptions()
+    self.assertEqual(True, message_options.Extensions[
+        unittest_custom_options_pb2.bool_opt])
+    self.assertEqual(kint32max, message_options.Extensions[
+        unittest_custom_options_pb2.int32_opt])
+    self.assertEqual(kint64max, message_options.Extensions[
+        unittest_custom_options_pb2.int64_opt])
+    self.assertEqual(kuint32max, message_options.Extensions[
+        unittest_custom_options_pb2.uint32_opt])
+    self.assertEqual(kuint64max, message_options.Extensions[
+        unittest_custom_options_pb2.uint64_opt])
+    self.assertEqual(kint32max, message_options.Extensions[
+        unittest_custom_options_pb2.sint32_opt])
+    self.assertEqual(kint64max, message_options.Extensions[
+        unittest_custom_options_pb2.sint64_opt])
+    self.assertEqual(kuint32max, message_options.Extensions[
+        unittest_custom_options_pb2.fixed32_opt])
+    self.assertEqual(kuint64max, message_options.Extensions[
+        unittest_custom_options_pb2.fixed64_opt])
+    self.assertEqual(kint32max, message_options.Extensions[
+        unittest_custom_options_pb2.sfixed32_opt])
+    self.assertEqual(kint64max, message_options.Extensions[
+        unittest_custom_options_pb2.sfixed64_opt])
+
+    message_descriptor =\
+        unittest_custom_options_pb2.CustomOptionOtherValues.DESCRIPTOR
+    message_options = message_descriptor.GetOptions()
+    self.assertEqual(-100, message_options.Extensions[
+        unittest_custom_options_pb2.int32_opt])
+    self.assertAlmostEqual(12.3456789, message_options.Extensions[
+        unittest_custom_options_pb2.float_opt], 6)
+    self.assertAlmostEqual(1.234567890123456789, message_options.Extensions[
+        unittest_custom_options_pb2.double_opt])
+    self.assertEqual("Hello, \"World\"", message_options.Extensions[
+        unittest_custom_options_pb2.string_opt])
+    self.assertEqual("Hello\0World", message_options.Extensions[
+        unittest_custom_options_pb2.bytes_opt])
+    dummy_enum = unittest_custom_options_pb2.DummyMessageContainingEnum
+    self.assertEqual(
+        dummy_enum.TEST_OPTION_ENUM_TYPE2,
+        message_options.Extensions[unittest_custom_options_pb2.enum_opt])
+
+    message_descriptor =\
+        unittest_custom_options_pb2.SettingRealsFromPositiveInts.DESCRIPTOR
+    message_options = message_descriptor.GetOptions()
+    self.assertAlmostEqual(12, message_options.Extensions[
+        unittest_custom_options_pb2.float_opt], 6)
+    self.assertAlmostEqual(154, message_options.Extensions[
+        unittest_custom_options_pb2.double_opt])
+
+    message_descriptor =\
+        unittest_custom_options_pb2.SettingRealsFromNegativeInts.DESCRIPTOR
+    message_options = message_descriptor.GetOptions()
+    self.assertAlmostEqual(-12, message_options.Extensions[
+        unittest_custom_options_pb2.float_opt], 6)
+    self.assertAlmostEqual(-154, message_options.Extensions[
+        unittest_custom_options_pb2.double_opt])
+
+  def testComplexExtensionOptions(self):
+    descriptor =\
+        unittest_custom_options_pb2.VariousComplexOptions.DESCRIPTOR
+    options = descriptor.GetOptions()
+    self.assertEqual(42, options.Extensions[
+        unittest_custom_options_pb2.complex_opt1].foo)
+    self.assertEqual(324, options.Extensions[
+        unittest_custom_options_pb2.complex_opt1].Extensions[
+            unittest_custom_options_pb2.quux])
+    self.assertEqual(876, options.Extensions[
+        unittest_custom_options_pb2.complex_opt1].Extensions[
+            unittest_custom_options_pb2.corge].qux)
+    self.assertEqual(987, options.Extensions[
+        unittest_custom_options_pb2.complex_opt2].baz)
+    self.assertEqual(654, options.Extensions[
+        unittest_custom_options_pb2.complex_opt2].Extensions[
+            unittest_custom_options_pb2.grault])
+    self.assertEqual(743, options.Extensions[
+        unittest_custom_options_pb2.complex_opt2].bar.foo)
+    self.assertEqual(1999, options.Extensions[
+        unittest_custom_options_pb2.complex_opt2].bar.Extensions[
+            unittest_custom_options_pb2.quux])
+    self.assertEqual(2008, options.Extensions[
+        unittest_custom_options_pb2.complex_opt2].bar.Extensions[
+            unittest_custom_options_pb2.corge].qux)
+    self.assertEqual(741, options.Extensions[
+        unittest_custom_options_pb2.complex_opt2].Extensions[
+            unittest_custom_options_pb2.garply].foo)
+    self.assertEqual(1998, options.Extensions[
+        unittest_custom_options_pb2.complex_opt2].Extensions[
+            unittest_custom_options_pb2.garply].Extensions[
+                unittest_custom_options_pb2.quux])
+    self.assertEqual(2121, options.Extensions[
+        unittest_custom_options_pb2.complex_opt2].Extensions[
+            unittest_custom_options_pb2.garply].Extensions[
+                unittest_custom_options_pb2.corge].qux)
+    self.assertEqual(1971, options.Extensions[
+        unittest_custom_options_pb2.ComplexOptionType2
+        .ComplexOptionType4.complex_opt4].waldo)
+    self.assertEqual(321, options.Extensions[
+        unittest_custom_options_pb2.complex_opt2].fred.waldo)
+    self.assertEqual(9, options.Extensions[
+        unittest_custom_options_pb2.complex_opt3].qux)
+    self.assertEqual(22, options.Extensions[
+        unittest_custom_options_pb2.complex_opt3].complexoptiontype5.plugh)
+    self.assertEqual(24, options.Extensions[
+        unittest_custom_options_pb2.complexopt6].xyzzy)
+
+  # Check that aggregate options were parsed and saved correctly in
+  # the appropriate descriptors.
+  def testAggregateOptions(self):
+    file_descriptor = unittest_custom_options_pb2.DESCRIPTOR
+    message_descriptor =\
+        unittest_custom_options_pb2.AggregateMessage.DESCRIPTOR
+    field_descriptor = message_descriptor.fields_by_name["fieldname"]
+    enum_descriptor = unittest_custom_options_pb2.AggregateEnum.DESCRIPTOR
+    enum_value_descriptor = enum_descriptor.values_by_name["VALUE"]
+    service_descriptor =\
+        unittest_custom_options_pb2.AggregateService.DESCRIPTOR
+    method_descriptor = service_descriptor.FindMethodByName("Method")
+
+    # Tests for the different types of data embedded in fileopt
+    file_options = file_descriptor.GetOptions().Extensions[
+        unittest_custom_options_pb2.fileopt]
+    self.assertEqual(100, file_options.i)
+    self.assertEqual("FileAnnotation", file_options.s)
+    self.assertEqual("NestedFileAnnotation", file_options.sub.s)
+    self.assertEqual("FileExtensionAnnotation", file_options.file.Extensions[
+        unittest_custom_options_pb2.fileopt].s)
+    self.assertEqual("EmbeddedMessageSetElement", file_options.mset.Extensions[
+        unittest_custom_options_pb2.AggregateMessageSetElement
+        .message_set_extension].s)
+
+    # Simple tests for all the other types of annotations
+    self.assertEqual(
+        "MessageAnnotation",
+        message_descriptor.GetOptions().Extensions[
+            unittest_custom_options_pb2.msgopt].s)
+    self.assertEqual(
+        "FieldAnnotation",
+        field_descriptor.GetOptions().Extensions[
+            unittest_custom_options_pb2.fieldopt].s)
+    self.assertEqual(
+        "EnumAnnotation",
+        enum_descriptor.GetOptions().Extensions[
+            unittest_custom_options_pb2.enumopt].s)
+    self.assertEqual(
+        "EnumValueAnnotation",
+        enum_value_descriptor.GetOptions().Extensions[
+            unittest_custom_options_pb2.enumvalopt].s)
+    self.assertEqual(
+        "ServiceAnnotation",
+        service_descriptor.GetOptions().Extensions[
+            unittest_custom_options_pb2.serviceopt].s)
+    self.assertEqual(
+        "MethodAnnotation",
+        method_descriptor.GetOptions().Extensions[
+            unittest_custom_options_pb2.methodopt].s)
+
+  def testNestedOptions(self):
+    nested_message =\
+        unittest_custom_options_pb2.NestedOptionType.NestedMessage.DESCRIPTOR
+    self.assertEqual(1001, nested_message.GetOptions().Extensions[
+        unittest_custom_options_pb2.message_opt1])
+    nested_field = nested_message.fields_by_name["nested_field"]
+    self.assertEqual(1002, nested_field.GetOptions().Extensions[
+        unittest_custom_options_pb2.field_opt1])
+    outer_message =\
+        unittest_custom_options_pb2.NestedOptionType.DESCRIPTOR
+    nested_enum = outer_message.enum_types_by_name["NestedEnum"]
+    self.assertEqual(1003, nested_enum.GetOptions().Extensions[
+        unittest_custom_options_pb2.enum_opt1])
+    nested_enum_value = outer_message.enum_values_by_name["NESTED_ENUM_VALUE"]
+    self.assertEqual(1004, nested_enum_value.GetOptions().Extensions[
+        unittest_custom_options_pb2.enum_value_opt1])
+    nested_extension = outer_message.extensions_by_name["nested_extension"]
+    self.assertEqual(1005, nested_extension.GetOptions().Extensions[
+        unittest_custom_options_pb2.field_opt2])
+
   def testFileDescriptorReferences(self):
     self.assertEqual(self.my_enum.file, self.my_file)
     self.assertEqual(self.my_message.file, self.my_file)
@@ -273,6 +534,7 @@ class DescriptorCopyToProtoTest(unittest.TestCase):
     UNITTEST_IMPORT_FILE_DESCRIPTOR_ASCII = ("""
       name: 'google/protobuf/unittest_import.proto'
       package: 'protobuf_unittest_import'
+      dependency: 'google/protobuf/unittest_import_public.proto'
       message_type: <
         name: 'ImportMessage'
         field: <
@@ -302,6 +564,7 @@ class DescriptorCopyToProtoTest(unittest.TestCase):
         java_package: 'com.google.protobuf.test'
         optimize_for: 1  # SPEED
       >
+      public_dependency: 0
       """)
 
     self._InternalTestCopyToProto(
@@ -330,5 +593,21 @@ class DescriptorCopyToProtoTest(unittest.TestCase):
         TEST_SERVICE_ASCII)
 
 
+class MakeDescriptorTest(unittest.TestCase):
+  def testMakeDescriptorWithUnsignedIntField(self):
+    file_descriptor_proto = descriptor_pb2.FileDescriptorProto()
+    file_descriptor_proto.name = 'Foo'
+    message_type = file_descriptor_proto.message_type.add()
+    message_type.name = file_descriptor_proto.name
+    field = message_type.field.add()
+    field.number = 1
+    field.name = 'uint64_field'
+    field.label = descriptor.FieldDescriptor.LABEL_REQUIRED
+    field.type = descriptor.FieldDescriptor.TYPE_UINT64
+    result = descriptor.MakeDescriptor(message_type)
+    self.assertEqual(result.fields[0].cpp_type,
+                     descriptor.FieldDescriptor.CPPTYPE_UINT64)
+
+
 if __name__ == '__main__':
   unittest.main()

+ 89 - 0
python/google/protobuf/internal/enum_type_wrapper.py

@@ -0,0 +1,89 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""A simple wrapper around enum types to expose utility functions.
+
+Instances are created as properties with the same name as the enum they wrap
+on proto classes.  For usage, see:
+  reflection_test.py
+"""
+
+__author__ = 'rabsatt@google.com (Kevin Rabsatt)'
+
+
+class EnumTypeWrapper(object):
+  """A utility for finding the names of enum values."""
+
+  DESCRIPTOR = None
+
+  def __init__(self, enum_type):
+    """Inits EnumTypeWrapper with an EnumDescriptor."""
+    self._enum_type = enum_type
+    self.DESCRIPTOR = enum_type;
+
+  def Name(self, number):
+    """Returns a string containing the name of an enum value."""
+    if number in self._enum_type.values_by_number:
+      return self._enum_type.values_by_number[number].name
+    raise ValueError('Enum %s has no name defined for value %d' % (
+        self._enum_type.name, number))
+
+  def Value(self, name):
+    """Returns the value coresponding to the given enum name."""
+    if name in self._enum_type.values_by_name:
+      return self._enum_type.values_by_name[name].number
+    raise ValueError('Enum %s has no value defined for name %s' % (
+        self._enum_type.name, name))
+
+  def keys(self):
+    """Return a list of the string names in the enum.
+
+    These are returned in the order they were defined in the .proto file.
+    """
+
+    return [value_descriptor.name
+            for value_descriptor in self._enum_type.values]
+
+  def values(self):
+    """Return a list of the integer values in the enum.
+
+    These are returned in the order they were defined in the .proto file.
+    """
+
+    return [value_descriptor.number
+            for value_descriptor in self._enum_type.values]
+
+  def items(self):
+    """Return a list of the (name, value) pairs of the enum.
+
+    These are returned in the order they were defined in the .proto file.
+    """
+    return [(value_descriptor.name, value_descriptor.number)
+            for value_descriptor in self._enum_type.values]

+ 55 - 0
python/google/protobuf/internal/factory_test1.proto

@@ -0,0 +1,55 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: matthewtoia@google.com (Matt Toia)
+
+
+package google.protobuf.python.internal;
+
+
+enum Factory1Enum {
+  FACTORY_1_VALUE_0 = 0;
+  FACTORY_1_VALUE_1 = 1;
+}
+
+message Factory1Message {
+  optional Factory1Enum factory_1_enum = 1;
+  enum NestedFactory1Enum {
+    NESTED_FACTORY_1_VALUE_0 = 0;
+    NESTED_FACTORY_1_VALUE_1 = 1;
+  }
+  optional NestedFactory1Enum nested_factory_1_enum = 2;
+  message NestedFactory1Message {
+    optional string value = 1;
+  }
+  optional NestedFactory1Message nested_factory_1_message = 3;
+  optional int32 scalar_value = 4;
+  repeated string list_value = 5;
+}

+ 77 - 0
python/google/protobuf/internal/factory_test2.proto

@@ -0,0 +1,77 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: matthewtoia@google.com (Matt Toia)
+
+
+package google.protobuf.python.internal;
+
+import "google/protobuf/internal/factory_test1.proto";
+
+
+enum Factory2Enum {
+  FACTORY_2_VALUE_0 = 0;
+  FACTORY_2_VALUE_1 = 1;
+}
+
+message Factory2Message {
+  required int32 mandatory = 1;
+  optional Factory2Enum factory_2_enum = 2;
+  enum NestedFactory2Enum {
+    NESTED_FACTORY_2_VALUE_0 = 0;
+    NESTED_FACTORY_2_VALUE_1 = 1;
+  }
+  optional NestedFactory2Enum nested_factory_2_enum = 3;
+  message NestedFactory2Message {
+    optional string value = 1;
+  }
+  optional NestedFactory2Message nested_factory_2_message = 4;
+  optional Factory1Message factory_1_message = 5;
+  optional Factory1Enum factory_1_enum = 6;
+  optional Factory1Message.NestedFactory1Enum nested_factory_1_enum = 7;
+  optional Factory1Message.NestedFactory1Message nested_factory_1_message = 8;
+  optional Factory2Message circular_message = 9;
+  optional string scalar_value = 10;
+  repeated string list_value = 11;
+  repeated group Grouped = 12 {
+    optional string part_1 = 13;
+    optional string part_2 = 14;
+  }
+  optional LoopMessage loop = 15;
+  optional int32 int_with_default = 16 [default = 1776];
+  optional double double_with_default = 17 [default = 9.99];
+  optional string string_with_default = 18 [default = "hello world"];
+  optional bool bool_with_default = 19 [default = false];
+  optional Factory2Enum enum_with_default = 20 [default = FACTORY_2_VALUE_1];
+}
+
+message LoopMessage {
+  optional Factory2Message loop = 1;
+}

+ 25 - 0
python/google/protobuf/internal/generator_test.py

@@ -42,8 +42,10 @@ further ensures that we can use Python protocol message objects as we expect.
 __author__ = 'robinson@google.com (Will Robinson)'
 
 import unittest
+from google.protobuf.internal import test_bad_identifiers_pb2
 from google.protobuf import unittest_custom_options_pb2
 from google.protobuf import unittest_import_pb2
+from google.protobuf import unittest_import_public_pb2
 from google.protobuf import unittest_mset_pb2
 from google.protobuf import unittest_pb2
 from google.protobuf import unittest_no_generic_services_pb2
@@ -239,6 +241,29 @@ class GeneratorTest(unittest.TestCase):
         unittest_pb2._TESTALLTYPES_NESTEDMESSAGE.name in
         file_type.message_types_by_name)
 
+  def testPublicImports(self):
+    # Test public imports as embedded message.
+    all_type_proto = unittest_pb2.TestAllTypes()
+    self.assertEqual(0, all_type_proto.optional_public_import_message.e)
+
+    # PublicImportMessage is actually defined in unittest_import_public_pb2
+    # module, and is public imported by unittest_import_pb2 module.
+    public_import_proto = unittest_import_pb2.PublicImportMessage()
+    self.assertEqual(0, public_import_proto.e)
+    self.assertTrue(unittest_import_public_pb2.PublicImportMessage is
+                    unittest_import_pb2.PublicImportMessage)
+
+  def testBadIdentifiers(self):
+    # We're just testing that the code was imported without problems.
+    message = test_bad_identifiers_pb2.TestBadIdentifiers()
+    self.assertEqual(message.Extensions[test_bad_identifiers_pb2.message],
+                     "foo")
+    self.assertEqual(message.Extensions[test_bad_identifiers_pb2.descriptor],
+                     "bar")
+    self.assertEqual(message.Extensions[test_bad_identifiers_pb2.reflection],
+                     "baz")
+    self.assertEqual(message.Extensions[test_bad_identifiers_pb2.service],
+                     "qux")
 
 if __name__ == '__main__':
   unittest.main()

+ 45 - 0
python/google/protobuf/internal/message_cpp_test.py

@@ -0,0 +1,45 @@
+#! /usr/bin/python
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Tests for google.protobuf.internal.message_cpp."""
+
+__author__ = 'shahms@google.com (Shahms King)'
+
+import os
+os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'cpp'
+
+import unittest
+from google.protobuf.internal.message_test import *
+
+
+if __name__ == '__main__':
+  unittest.main()

+ 113 - 0
python/google/protobuf/internal/message_factory_test.py

@@ -0,0 +1,113 @@
+#! /usr/bin/python
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Tests for google.protobuf.message_factory."""
+
+__author__ = 'matthewtoia@google.com (Matt Toia)'
+
+import unittest
+from google.protobuf import descriptor_pb2
+from google.protobuf.internal import factory_test1_pb2
+from google.protobuf.internal import factory_test2_pb2
+from google.protobuf import descriptor_database
+from google.protobuf import descriptor_pool
+from google.protobuf import message_factory
+
+
+class MessageFactoryTest(unittest.TestCase):
+
+  def setUp(self):
+    self.factory_test1_fd = descriptor_pb2.FileDescriptorProto.FromString(
+        factory_test1_pb2.DESCRIPTOR.serialized_pb)
+    self.factory_test2_fd = descriptor_pb2.FileDescriptorProto.FromString(
+        factory_test2_pb2.DESCRIPTOR.serialized_pb)
+
+  def _ExerciseDynamicClass(self, cls):
+    msg = cls()
+    msg.mandatory = 42
+    msg.nested_factory_2_enum = 0
+    msg.nested_factory_2_message.value = 'nested message value'
+    msg.factory_1_message.factory_1_enum = 1
+    msg.factory_1_message.nested_factory_1_enum = 0
+    msg.factory_1_message.nested_factory_1_message.value = (
+        'nested message value')
+    msg.factory_1_message.scalar_value = 22
+    msg.factory_1_message.list_value.extend(['one', 'two', 'three'])
+    msg.factory_1_message.list_value.append('four')
+    msg.factory_1_enum = 1
+    msg.nested_factory_1_enum = 0
+    msg.nested_factory_1_message.value = 'nested message value'
+    msg.circular_message.mandatory = 1
+    msg.circular_message.circular_message.mandatory = 2
+    msg.circular_message.scalar_value = 'one deep'
+    msg.scalar_value = 'zero deep'
+    msg.list_value.extend(['four', 'three', 'two'])
+    msg.list_value.append('one')
+    msg.grouped.add()
+    msg.grouped[0].part_1 = 'hello'
+    msg.grouped[0].part_2 = 'world'
+    msg.grouped.add(part_1='testing', part_2='123')
+    msg.loop.loop.mandatory = 2
+    msg.loop.loop.loop.loop.mandatory = 4
+    serialized = msg.SerializeToString()
+    converted = factory_test2_pb2.Factory2Message.FromString(serialized)
+    reserialized = converted.SerializeToString()
+    self.assertEquals(serialized, reserialized)
+    result = cls.FromString(reserialized)
+    self.assertEquals(msg, result)
+
+  def testGetPrototype(self):
+    db = descriptor_database.DescriptorDatabase()
+    pool = descriptor_pool.DescriptorPool(db)
+    db.Add(self.factory_test1_fd)
+    db.Add(self.factory_test2_fd)
+    factory = message_factory.MessageFactory()
+    cls = factory.GetPrototype(pool.FindMessageTypeByName(
+        'net.proto2.python.internal.Factory2Message'))
+    self.assertIsNot(cls, factory_test2_pb2.Factory2Message)
+    self._ExerciseDynamicClass(cls)
+    cls2 = factory.GetPrototype(pool.FindMessageTypeByName(
+        'net.proto2.python.internal.Factory2Message'))
+    self.assertIs(cls, cls2)
+
+  def testGetMessages(self):
+    messages = message_factory.GetMessages([self.factory_test2_fd,
+                                            self.factory_test1_fd])
+    self.assertContainsSubset(
+        ['net.proto2.python.internal.Factory2Message',
+         'net.proto2.python.internal.Factory1Message'],
+        messages.keys())
+    self._ExerciseDynamicClass(
+        messages['net.proto2.python.internal.Factory2Message'])
+
+if __name__ == '__main__':
+  unittest.main()

+ 166 - 15
python/google/protobuf/internal/message_test.py

@@ -45,10 +45,15 @@ __author__ = 'gps@google.com (Gregory P. Smith)'
 
 import copy
 import math
+import operator
+import pickle
+
 import unittest
 from google.protobuf import unittest_import_pb2
 from google.protobuf import unittest_pb2
+from google.protobuf.internal import api_implementation
 from google.protobuf.internal import test_util
+from google.protobuf import message
 
 # Python pre-2.6 does not have isinf() or isnan() functions, so we have
 # to provide our own.
@@ -70,9 +75,9 @@ class MessageTest(unittest.TestCase):
     golden_message = unittest_pb2.TestAllTypes()
     golden_message.ParseFromString(golden_data)
     test_util.ExpectAllFieldsSet(self, golden_message)
-    self.assertTrue(golden_message.SerializeToString() == golden_data)
+    self.assertEqual(golden_data, golden_message.SerializeToString())
     golden_copy = copy.deepcopy(golden_message)
-    self.assertTrue(golden_copy.SerializeToString() == golden_data)
+    self.assertEqual(golden_data, golden_copy.SerializeToString())
 
   def testGoldenExtensions(self):
     golden_data = test_util.GoldenFile('golden_message').read()
@@ -81,9 +86,9 @@ class MessageTest(unittest.TestCase):
     all_set = unittest_pb2.TestAllExtensions()
     test_util.SetAllExtensions(all_set)
     self.assertEquals(all_set, golden_message)
-    self.assertTrue(golden_message.SerializeToString() == golden_data)
+    self.assertEqual(golden_data, golden_message.SerializeToString())
     golden_copy = copy.deepcopy(golden_message)
-    self.assertTrue(golden_copy.SerializeToString() == golden_data)
+    self.assertEqual(golden_data, golden_copy.SerializeToString())
 
   def testGoldenPackedMessage(self):
     golden_data = test_util.GoldenFile('golden_packed_fields_message').read()
@@ -92,9 +97,9 @@ class MessageTest(unittest.TestCase):
     all_set = unittest_pb2.TestPackedTypes()
     test_util.SetAllPackedFields(all_set)
     self.assertEquals(all_set, golden_message)
-    self.assertTrue(all_set.SerializeToString() == golden_data)
+    self.assertEqual(golden_data, all_set.SerializeToString())
     golden_copy = copy.deepcopy(golden_message)
-    self.assertTrue(golden_copy.SerializeToString() == golden_data)
+    self.assertEqual(golden_data, golden_copy.SerializeToString())
 
   def testGoldenPackedExtensions(self):
     golden_data = test_util.GoldenFile('golden_packed_fields_message').read()
@@ -103,9 +108,28 @@ class MessageTest(unittest.TestCase):
     all_set = unittest_pb2.TestPackedExtensions()
     test_util.SetAllPackedExtensions(all_set)
     self.assertEquals(all_set, golden_message)
-    self.assertTrue(all_set.SerializeToString() == golden_data)
+    self.assertEqual(golden_data, all_set.SerializeToString())
     golden_copy = copy.deepcopy(golden_message)
-    self.assertTrue(golden_copy.SerializeToString() == golden_data)
+    self.assertEqual(golden_data, golden_copy.SerializeToString())
+
+  def testPickleSupport(self):
+    golden_data = test_util.GoldenFile('golden_message').read()
+    golden_message = unittest_pb2.TestAllTypes()
+    golden_message.ParseFromString(golden_data)
+    pickled_message = pickle.dumps(golden_message)
+
+    unpickled_message = pickle.loads(pickled_message)
+    self.assertEquals(unpickled_message, golden_message)
+
+  def testPickleIncompleteProto(self):
+    golden_message = unittest_pb2.TestRequired(a=1)
+    pickled_message = pickle.dumps(golden_message)
+
+    unpickled_message = pickle.loads(pickled_message)
+    self.assertEquals(unpickled_message, golden_message)
+    self.assertEquals(unpickled_message.a, 1)
+    # This is still an incomplete proto - so serializing should fail
+    self.assertRaises(message.EncodeError, unpickled_message.SerializeToString)
 
   def testPositiveInfinity(self):
     golden_data = ('\x5D\x00\x00\x80\x7F'
@@ -118,7 +142,7 @@ class MessageTest(unittest.TestCase):
     self.assertTrue(IsPosInf(golden_message.optional_double))
     self.assertTrue(IsPosInf(golden_message.repeated_float[0]))
     self.assertTrue(IsPosInf(golden_message.repeated_double[0]))
-    self.assertTrue(golden_message.SerializeToString() == golden_data)
+    self.assertEqual(golden_data, golden_message.SerializeToString())
 
   def testNegativeInfinity(self):
     golden_data = ('\x5D\x00\x00\x80\xFF'
@@ -131,7 +155,7 @@ class MessageTest(unittest.TestCase):
     self.assertTrue(IsNegInf(golden_message.optional_double))
     self.assertTrue(IsNegInf(golden_message.repeated_float[0]))
     self.assertTrue(IsNegInf(golden_message.repeated_double[0]))
-    self.assertTrue(golden_message.SerializeToString() == golden_data)
+    self.assertEqual(golden_data, golden_message.SerializeToString())
 
   def testNotANumber(self):
     golden_data = ('\x5D\x00\x00\xC0\x7F'
@@ -144,7 +168,18 @@ class MessageTest(unittest.TestCase):
     self.assertTrue(isnan(golden_message.optional_double))
     self.assertTrue(isnan(golden_message.repeated_float[0]))
     self.assertTrue(isnan(golden_message.repeated_double[0]))
-    self.assertTrue(golden_message.SerializeToString() == golden_data)
+
+    # The protocol buffer may serialize to any one of multiple different
+    # representations of a NaN.  Rather than verify a specific representation,
+    # verify the serialized string can be converted into a correctly
+    # behaving protocol buffer.
+    serialized = golden_message.SerializeToString()
+    message = unittest_pb2.TestAllTypes()
+    message.ParseFromString(serialized)
+    self.assertTrue(isnan(message.optional_float))
+    self.assertTrue(isnan(message.optional_double))
+    self.assertTrue(isnan(message.repeated_float[0]))
+    self.assertTrue(isnan(message.repeated_double[0]))
 
   def testPositiveInfinityPacked(self):
     golden_data = ('\xA2\x06\x04\x00\x00\x80\x7F'
@@ -153,7 +188,7 @@ class MessageTest(unittest.TestCase):
     golden_message.ParseFromString(golden_data)
     self.assertTrue(IsPosInf(golden_message.packed_float[0]))
     self.assertTrue(IsPosInf(golden_message.packed_double[0]))
-    self.assertTrue(golden_message.SerializeToString() == golden_data)
+    self.assertEqual(golden_data, golden_message.SerializeToString())
 
   def testNegativeInfinityPacked(self):
     golden_data = ('\xA2\x06\x04\x00\x00\x80\xFF'
@@ -162,7 +197,7 @@ class MessageTest(unittest.TestCase):
     golden_message.ParseFromString(golden_data)
     self.assertTrue(IsNegInf(golden_message.packed_float[0]))
     self.assertTrue(IsNegInf(golden_message.packed_double[0]))
-    self.assertTrue(golden_message.SerializeToString() == golden_data)
+    self.assertEqual(golden_data, golden_message.SerializeToString())
 
   def testNotANumberPacked(self):
     golden_data = ('\xA2\x06\x04\x00\x00\xC0\x7F'
@@ -171,7 +206,12 @@ class MessageTest(unittest.TestCase):
     golden_message.ParseFromString(golden_data)
     self.assertTrue(isnan(golden_message.packed_float[0]))
     self.assertTrue(isnan(golden_message.packed_double[0]))
-    self.assertTrue(golden_message.SerializeToString() == golden_data)
+
+    serialized = golden_message.SerializeToString()
+    message = unittest_pb2.TestPackedTypes()
+    message.ParseFromString(serialized)
+    self.assertTrue(isnan(message.packed_float[0]))
+    self.assertTrue(isnan(message.packed_double[0]))
 
   def testExtremeFloatValues(self):
     message = unittest_pb2.TestAllTypes()
@@ -218,7 +258,7 @@ class MessageTest(unittest.TestCase):
     message.ParseFromString(message.SerializeToString())
     self.assertTrue(message.optional_float == -kMostNegExponentOneSigBit)
 
-  def testExtremeFloatValues(self):
+  def testExtremeDoubleValues(self):
     message = unittest_pb2.TestAllTypes()
 
     # Most positive exponent, no significand bits set.
@@ -338,6 +378,117 @@ class MessageTest(unittest.TestCase):
     self.assertEqual(message.repeated_nested_message[4].bb, 5)
     self.assertEqual(message.repeated_nested_message[5].bb, 6)
 
+  def testRepeatedCompositeFieldSortArguments(self):
+    """Check sorting a repeated composite field using list.sort() arguments."""
+    message = unittest_pb2.TestAllTypes()
+
+    get_bb = operator.attrgetter('bb')
+    cmp_bb = lambda a, b: cmp(a.bb, b.bb)
+    message.repeated_nested_message.add().bb = 1
+    message.repeated_nested_message.add().bb = 3
+    message.repeated_nested_message.add().bb = 2
+    message.repeated_nested_message.add().bb = 6
+    message.repeated_nested_message.add().bb = 5
+    message.repeated_nested_message.add().bb = 4
+    message.repeated_nested_message.sort(key=get_bb)
+    self.assertEqual([k.bb for k in message.repeated_nested_message],
+                     [1, 2, 3, 4, 5, 6])
+    message.repeated_nested_message.sort(key=get_bb, reverse=True)
+    self.assertEqual([k.bb for k in message.repeated_nested_message],
+                     [6, 5, 4, 3, 2, 1])
+    message.repeated_nested_message.sort(sort_function=cmp_bb)
+    self.assertEqual([k.bb for k in message.repeated_nested_message],
+                     [1, 2, 3, 4, 5, 6])
+    message.repeated_nested_message.sort(cmp=cmp_bb, reverse=True)
+    self.assertEqual([k.bb for k in message.repeated_nested_message],
+                     [6, 5, 4, 3, 2, 1])
+
+  def testRepeatedScalarFieldSortArguments(self):
+    """Check sorting a scalar field using list.sort() arguments."""
+    message = unittest_pb2.TestAllTypes()
+
+    abs_cmp = lambda a, b: cmp(abs(a), abs(b))
+    message.repeated_int32.append(-3)
+    message.repeated_int32.append(-2)
+    message.repeated_int32.append(-1)
+    message.repeated_int32.sort(key=abs)
+    self.assertEqual(list(message.repeated_int32), [-1, -2, -3])
+    message.repeated_int32.sort(key=abs, reverse=True)
+    self.assertEqual(list(message.repeated_int32), [-3, -2, -1])
+    message.repeated_int32.sort(sort_function=abs_cmp)
+    self.assertEqual(list(message.repeated_int32), [-1, -2, -3])
+    message.repeated_int32.sort(cmp=abs_cmp, reverse=True)
+    self.assertEqual(list(message.repeated_int32), [-3, -2, -1])
+
+    len_cmp = lambda a, b: cmp(len(a), len(b))
+    message.repeated_string.append('aaa')
+    message.repeated_string.append('bb')
+    message.repeated_string.append('c')
+    message.repeated_string.sort(key=len)
+    self.assertEqual(list(message.repeated_string), ['c', 'bb', 'aaa'])
+    message.repeated_string.sort(key=len, reverse=True)
+    self.assertEqual(list(message.repeated_string), ['aaa', 'bb', 'c'])
+    message.repeated_string.sort(sort_function=len_cmp)
+    self.assertEqual(list(message.repeated_string), ['c', 'bb', 'aaa'])
+    message.repeated_string.sort(cmp=len_cmp, reverse=True)
+    self.assertEqual(list(message.repeated_string), ['aaa', 'bb', 'c'])
+
+  def testParsingMerge(self):
+    """Check the merge behavior when a required or optional field appears
+    multiple times in the input."""
+    messages = [
+        unittest_pb2.TestAllTypes(),
+        unittest_pb2.TestAllTypes(),
+        unittest_pb2.TestAllTypes() ]
+    messages[0].optional_int32 = 1
+    messages[1].optional_int64 = 2
+    messages[2].optional_int32 = 3
+    messages[2].optional_string = 'hello'
+
+    merged_message = unittest_pb2.TestAllTypes()
+    merged_message.optional_int32 = 3
+    merged_message.optional_int64 = 2
+    merged_message.optional_string = 'hello'
+
+    generator = unittest_pb2.TestParsingMerge.RepeatedFieldsGenerator()
+    generator.field1.extend(messages)
+    generator.field2.extend(messages)
+    generator.field3.extend(messages)
+    generator.ext1.extend(messages)
+    generator.ext2.extend(messages)
+    generator.group1.add().field1.MergeFrom(messages[0])
+    generator.group1.add().field1.MergeFrom(messages[1])
+    generator.group1.add().field1.MergeFrom(messages[2])
+    generator.group2.add().field1.MergeFrom(messages[0])
+    generator.group2.add().field1.MergeFrom(messages[1])
+    generator.group2.add().field1.MergeFrom(messages[2])
+
+    data = generator.SerializeToString()
+    parsing_merge = unittest_pb2.TestParsingMerge()
+    parsing_merge.ParseFromString(data)
+
+    # Required and optional fields should be merged.
+    self.assertEqual(parsing_merge.required_all_types, merged_message)
+    self.assertEqual(parsing_merge.optional_all_types, merged_message)
+    self.assertEqual(parsing_merge.optionalgroup.optional_group_all_types,
+                     merged_message)
+    self.assertEqual(parsing_merge.Extensions[
+                     unittest_pb2.TestParsingMerge.optional_ext],
+                     merged_message)
+
+    # Repeated fields should not be merged.
+    self.assertEqual(len(parsing_merge.repeated_all_types), 3)
+    self.assertEqual(len(parsing_merge.repeatedgroup), 3)
+    self.assertEqual(len(parsing_merge.Extensions[
+        unittest_pb2.TestParsingMerge.repeated_ext]), 3)
+
+
+  def testSortEmptyRepeatedCompositeContainer(self):
+    """Exercise a scenario that has led to segfaults in the past.
+    """
+    m = unittest_pb2.TestAllTypes()
+    m.repeated_nested_message.sort()
+
 
 if __name__ == '__main__':
   unittest.main()

+ 49 - 0
python/google/protobuf/internal/more_extensions_dynamic.proto

@@ -0,0 +1,49 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: jasonh@google.com (Jason Hsueh)
+//
+// This file is used to test a corner case in the CPP implementation where the
+// generated C++ type is available for the extendee, but the extension is
+// defined in a file whose C++ type is not in the binary.
+
+
+import "google/protobuf/internal/more_extensions.proto";
+
+package google.protobuf.internal;
+
+message DynamicMessageType {
+  optional int32 a = 1;
+}
+
+extend ExtendedMessage {
+  optional int32 dynamic_int32_extension = 100;
+  optional DynamicMessageType dynamic_message_extension = 101;
+}

+ 59 - 7
python/google/protobuf/internal/python_message.py

@@ -54,6 +54,7 @@ try:
   from cStringIO import StringIO
 except ImportError:
   from StringIO import StringIO
+import copy_reg
 import struct
 import weakref
 
@@ -61,6 +62,7 @@ import weakref
 from google.protobuf.internal import containers
 from google.protobuf.internal import decoder
 from google.protobuf.internal import encoder
+from google.protobuf.internal import enum_type_wrapper
 from google.protobuf.internal import message_listener as message_listener_mod
 from google.protobuf.internal import type_checkers
 from google.protobuf.internal import wire_format
@@ -71,9 +73,10 @@ from google.protobuf import text_format
 _FieldDescriptor = descriptor_mod.FieldDescriptor
 
 
-def NewMessage(descriptor, dictionary):
+def NewMessage(bases, descriptor, dictionary):
   _AddClassAttributesForNestedExtensions(descriptor, dictionary)
   _AddSlots(descriptor, dictionary)
+  return bases
 
 
 def InitMessage(descriptor, cls):
@@ -96,6 +99,7 @@ def InitMessage(descriptor, cls):
   _AddStaticMethods(cls)
   _AddMessageMethods(descriptor, cls)
   _AddPrivateHelperMethods(cls)
+  copy_reg.pickle(cls, lambda obj: (cls, (), obj.__getstate__()))
 
 
 # Stateless helpers for GeneratedProtocolMessageType below.
@@ -145,6 +149,10 @@ def _VerifyExtensionHandle(message, extension_handle):
   if not extension_handle.is_extension:
     raise KeyError('"%s" is not an extension.' % extension_handle.full_name)
 
+  if not extension_handle.containing_type:
+    raise KeyError('"%s" is missing a containing_type.'
+                   % extension_handle.full_name)
+
   if extension_handle.containing_type is not message.DESCRIPTOR:
     raise KeyError('Extension "%s" extends message type "%s", but this '
                    'message is of type "%s".' %
@@ -164,6 +172,7 @@ def _AddSlots(message_descriptor, dictionary):
   dictionary['__slots__'] = ['_cached_byte_size',
                              '_cached_byte_size_dirty',
                              '_fields',
+                             '_unknown_fields',
                              '_is_present_in_parent',
                              '_listener',
                              '_listener_for_children',
@@ -224,11 +233,14 @@ def _AddClassAttributesForNestedExtensions(descriptor, dictionary):
 def _AddEnumValues(descriptor, cls):
   """Sets class-level attributes for all enum fields defined in this message.
 
+  Also exporting a class-level object that can name enum values.
+
   Args:
     descriptor: Descriptor object for this message type.
     cls: Class we're constructing for this message type.
   """
   for enum_type in descriptor.enum_types:
+    setattr(cls, enum_type.name, enum_type_wrapper.EnumTypeWrapper(enum_type))
     for enum_value in enum_type.values:
       setattr(cls, enum_value.name, enum_value.number)
 
@@ -248,7 +260,7 @@ def _DefaultValueConstructorForField(field):
   """
 
   if field.label == _FieldDescriptor.LABEL_REPEATED:
-    if field.default_value != []:
+    if field.has_default_value and field.default_value != []:
       raise ValueError('Repeated field default value not empty list: %s' % (
           field.default_value))
     if field.cpp_type == _FieldDescriptor.CPPTYPE_MESSAGE:
@@ -276,6 +288,8 @@ def _DefaultValueConstructorForField(field):
     return MakeSubMessageDefault
 
   def MakeScalarDefault(message):
+    # TODO(protobuf-team): This may be broken since there may not be
+    # default_value.  Combine with has_default_value somehow.
     return field.default_value
   return MakeScalarDefault
 
@@ -287,6 +301,9 @@ def _AddInitMethod(message_descriptor, cls):
     self._cached_byte_size = 0
     self._cached_byte_size_dirty = len(kwargs) > 0
     self._fields = {}
+    # _unknown_fields is () when empty for efficiency, and will be turned into
+    # a list if fields are added.
+    self._unknown_fields = ()
     self._is_present_in_parent = False
     self._listener = message_listener_mod.NullMessageListener()
     self._listener_for_children = _Listener(self)
@@ -428,6 +445,8 @@ def _AddPropertiesForNonRepeatedScalarField(field, cls):
   valid_values = set()
 
   def getter(self):
+    # TODO(protobuf-team): This may be broken since there may not be
+    # default_value.  Combine with has_default_value somehow.
     return self._fields.get(field, default_value)
   getter.__module__ = None
   getter.__doc__ = 'Getter for %s.' % proto_field_name
@@ -462,13 +481,18 @@ def _AddPropertiesForNonRepeatedCompositeField(field, cls):
   # for non-repeated scalars.
   proto_field_name = field.name
   property_name = _PropertyName(proto_field_name)
+
+  # TODO(komarek): Can anyone explain to me why we cache the message_type this
+  # way, instead of referring to field.message_type inside of getter(self)?
+  # What if someone sets message_type later on (which makes for simpler
+  # dyanmic proto descriptor and class creation code).
   message_type = field.message_type
 
   def getter(self):
     field_value = self._fields.get(field)
     if field_value is None:
       # Construct a new object to represent this field.
-      field_value = message_type._concrete_class()
+      field_value = message_type._concrete_class()  # use field.message_type?
       field_value._SetListener(self._listener_for_children)
 
       # Atomically check if another thread has preempted us and, if not, swap
@@ -620,6 +644,7 @@ def _AddClearMethod(message_descriptor, cls):
   def Clear(self):
     # Clear fields.
     self._fields = {}
+    self._unknown_fields = ()
     self._Modified()
   cls.Clear = Clear
 
@@ -649,7 +674,16 @@ def _AddEqualsMethod(message_descriptor, cls):
     if self is other:
       return True
 
-    return self.ListFields() == other.ListFields()
+    if not self.ListFields() == other.ListFields():
+      return False
+
+    # Sort unknown fields because their order shouldn't affect equality test.
+    unknown_fields = list(self._unknown_fields)
+    unknown_fields.sort()
+    other_unknown_fields = list(other._unknown_fields)
+    other_unknown_fields.sort()
+
+    return unknown_fields == other_unknown_fields
 
   cls.__eq__ = __eq__
 
@@ -710,6 +744,9 @@ def _AddByteSizeMethod(message_descriptor, cls):
     for field_descriptor, field_value in self.ListFields():
       size += field_descriptor._sizer(field_value)
 
+    for tag_bytes, value_bytes in self._unknown_fields:
+      size += len(tag_bytes) + len(value_bytes)
+
     self._cached_byte_size = size
     self._cached_byte_size_dirty = False
     self._listener_for_children.dirty = False
@@ -726,8 +763,8 @@ def _AddSerializeToStringMethod(message_descriptor, cls):
     errors = []
     if not self.IsInitialized():
       raise message_mod.EncodeError(
-          'Message is missing required fields: ' +
-          ','.join(self.FindInitializationErrors()))
+          'Message %s is missing required fields: %s' % (
+          self.DESCRIPTOR.full_name, ','.join(self.FindInitializationErrors())))
     return self.SerializePartialToString()
   cls.SerializeToString = SerializeToString
 
@@ -744,6 +781,9 @@ def _AddSerializePartialToStringMethod(message_descriptor, cls):
   def InternalSerialize(self, write_bytes):
     for field_descriptor, field_value in self.ListFields():
       field_descriptor._encoder(write_bytes, field_value)
+    for tag_bytes, value_bytes in self._unknown_fields:
+      write_bytes(tag_bytes)
+      write_bytes(value_bytes)
   cls._InternalSerialize = InternalSerialize
 
 
@@ -770,13 +810,18 @@ def _AddMergeFromStringMethod(message_descriptor, cls):
   def InternalParse(self, buffer, pos, end):
     self._Modified()
     field_dict = self._fields
+    unknown_field_list = self._unknown_fields
     while pos != end:
       (tag_bytes, new_pos) = local_ReadTag(buffer, pos)
       field_decoder = decoders_by_tag.get(tag_bytes)
       if field_decoder is None:
+        value_start_pos = new_pos
         new_pos = local_SkipField(buffer, new_pos, end, tag_bytes)
         if new_pos == -1:
           return pos
+        if not unknown_field_list:
+          unknown_field_list = self._unknown_fields = []
+        unknown_field_list.append((tag_bytes, buffer[value_start_pos:new_pos]))
         pos = new_pos
       else:
         pos = field_decoder(buffer, new_pos, end, self, field_dict)
@@ -873,7 +918,8 @@ def _AddMergeFromMethod(cls):
   def MergeFrom(self, msg):
     if not isinstance(msg, cls):
       raise TypeError(
-          "Parameter to MergeFrom() must be instance of same class.")
+          "Parameter to MergeFrom() must be instance of same class: "
+          "expected %s got %s." % (cls.__name__, type(msg).__name__))
 
     assert msg is not self
     self._Modified()
@@ -898,6 +944,12 @@ def _AddMergeFromMethod(cls):
           field_value.MergeFrom(value)
       else:
         self._fields[field] = value
+
+    if msg._unknown_fields:
+      if not self._unknown_fields:
+        self._unknown_fields = []
+      self._unknown_fields.extend(msg._unknown_fields)
+
   cls.MergeFrom = MergeFrom
 
 

+ 91 - 0
python/google/protobuf/internal/reflection_cpp_generated_test.py

@@ -0,0 +1,91 @@
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Unittest for reflection.py, which tests the generated C++ implementation."""
+
+__author__ = 'jasonh@google.com (Jason Hsueh)'
+
+import os
+os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'cpp'
+
+import unittest
+from google.protobuf.internal import api_implementation
+from google.protobuf.internal import more_extensions_dynamic_pb2
+from google.protobuf.internal import more_extensions_pb2
+from google.protobuf.internal.reflection_test import *
+
+
+class ReflectionCppTest(unittest.TestCase):
+  def testImplementationSetting(self):
+    self.assertEqual('cpp', api_implementation.Type())
+
+  def testExtensionOfGeneratedTypeInDynamicFile(self):
+    """Tests that a file built dynamically can extend a generated C++ type.
+
+    The C++ implementation uses a DescriptorPool that has the generated
+    DescriptorPool as an underlay. Typically, a type can only find
+    extensions in its own pool. With the python C-extension, the generated C++
+    extendee may be available, but not the extension. This tests that the
+    C-extension implements the correct special handling to make such extensions
+    available.
+    """
+    pb1 = more_extensions_pb2.ExtendedMessage()
+    # Test that basic accessors work.
+    self.assertFalse(
+        pb1.HasExtension(more_extensions_dynamic_pb2.dynamic_int32_extension))
+    self.assertFalse(
+        pb1.HasExtension(more_extensions_dynamic_pb2.dynamic_message_extension))
+    pb1.Extensions[more_extensions_dynamic_pb2.dynamic_int32_extension] = 17
+    pb1.Extensions[more_extensions_dynamic_pb2.dynamic_message_extension].a = 24
+    self.assertTrue(
+        pb1.HasExtension(more_extensions_dynamic_pb2.dynamic_int32_extension))
+    self.assertTrue(
+        pb1.HasExtension(more_extensions_dynamic_pb2.dynamic_message_extension))
+
+    # Now serialize the data and parse to a new message.
+    pb2 = more_extensions_pb2.ExtendedMessage()
+    pb2.MergeFromString(pb1.SerializeToString())
+
+    self.assertTrue(
+        pb2.HasExtension(more_extensions_dynamic_pb2.dynamic_int32_extension))
+    self.assertTrue(
+        pb2.HasExtension(more_extensions_dynamic_pb2.dynamic_message_extension))
+    self.assertEqual(
+        17, pb2.Extensions[more_extensions_dynamic_pb2.dynamic_int32_extension])
+    self.assertEqual(
+        24,
+        pb2.Extensions[more_extensions_dynamic_pb2.dynamic_message_extension].a)
+
+
+if __name__ == '__main__':
+  unittest.main()

+ 269 - 19
python/google/protobuf/internal/reflection_test.py

@@ -37,6 +37,7 @@ pure-Python protocol compiler.
 
 __author__ = 'robinson@google.com (Will Robinson)'
 
+import gc
 import operator
 import struct
 
@@ -318,15 +319,6 @@ class ReflectionTest(unittest.TestCase):
       # ...and ensure that the scalar field has returned to its default.
       self.assertEqual(0, getattr(composite_field, scalar_field_name))
 
-      # Finally, ensure that modifications to the old composite field object
-      # don't have any effect on the parent. Possible only with the pure-python
-      # implementation of the API.
-      #
-      # (NOTE that when we clear the composite field in the parent, we actually
-      # don't recursively clear down the tree.  Instead, we just disconnect the
-      # cleared composite from the tree.)
-      if api_implementation.Type() != 'python':
-        return
       self.assertTrue(old_composite_field is not composite_field)
       setattr(old_composite_field, scalar_field_name, new_val)
       self.assertTrue(not composite_field.HasField(scalar_field_name))
@@ -348,8 +340,6 @@ class ReflectionTest(unittest.TestCase):
     nested.bb = 23
 
   def testDisconnectingNestedMessageBeforeSettingField(self):
-    if api_implementation.Type() != 'python':
-      return
     proto = unittest_pb2.TestAllTypes()
     nested = proto.optional_nested_message
     proto.ClearField('optional_nested_message')  # Should disconnect from parent
@@ -358,6 +348,64 @@ class ReflectionTest(unittest.TestCase):
     self.assertTrue(not proto.HasField('optional_nested_message'))
     self.assertEqual(0, proto.optional_nested_message.bb)
 
+  def testGetDefaultMessageAfterDisconnectingDefaultMessage(self):
+    proto = unittest_pb2.TestAllTypes()
+    nested = proto.optional_nested_message
+    proto.ClearField('optional_nested_message')
+    del proto
+    del nested
+    # Force a garbage collect so that the underlying CMessages are freed along
+    # with the Messages they point to. This is to make sure we're not deleting
+    # default message instances.
+    gc.collect()
+    proto = unittest_pb2.TestAllTypes()
+    nested = proto.optional_nested_message
+
+  def testDisconnectingNestedMessageAfterSettingField(self):
+    proto = unittest_pb2.TestAllTypes()
+    nested = proto.optional_nested_message
+    nested.bb = 5
+    self.assertTrue(proto.HasField('optional_nested_message'))
+    proto.ClearField('optional_nested_message')  # Should disconnect from parent
+    self.assertEqual(5, nested.bb)
+    self.assertEqual(0, proto.optional_nested_message.bb)
+    self.assertTrue(nested is not proto.optional_nested_message)
+    nested.bb = 23
+    self.assertTrue(not proto.HasField('optional_nested_message'))
+    self.assertEqual(0, proto.optional_nested_message.bb)
+
+  def testDisconnectingNestedMessageBeforeGettingField(self):
+    proto = unittest_pb2.TestAllTypes()
+    self.assertTrue(not proto.HasField('optional_nested_message'))
+    proto.ClearField('optional_nested_message')
+    self.assertTrue(not proto.HasField('optional_nested_message'))
+
+  def testDisconnectingNestedMessageAfterMerge(self):
+    # This test exercises the code path that does not use ReleaseMessage().
+    # The underlying fear is that if we use ReleaseMessage() incorrectly,
+    # we will have memory leaks.  It's hard to check that that doesn't happen,
+    # but at least we can exercise that code path to make sure it works.
+    proto1 = unittest_pb2.TestAllTypes()
+    proto2 = unittest_pb2.TestAllTypes()
+    proto2.optional_nested_message.bb = 5
+    proto1.MergeFrom(proto2)
+    self.assertTrue(proto1.HasField('optional_nested_message'))
+    proto1.ClearField('optional_nested_message')
+    self.assertTrue(not proto1.HasField('optional_nested_message'))
+
+  def testDisconnectingLazyNestedMessage(self):
+    # This test exercises releasing a nested message that is lazy. This test
+    # only exercises real code in the C++ implementation as Python does not
+    # support lazy parsing, but the current C++ implementation results in
+    # memory corruption and a crash.
+    if api_implementation.Type() != 'python':
+      return
+    proto = unittest_pb2.TestAllTypes()
+    proto.optional_lazy_message.bb = 5
+    proto.ClearField('optional_lazy_message')
+    del proto
+    gc.collect()
+
   def testHasBitsWhenModifyingRepeatedFields(self):
     # Test nesting when we add an element to a repeated field in a submessage.
     proto = unittest_pb2.TestNestedMessageHasBits()
@@ -635,6 +683,77 @@ class ReflectionTest(unittest.TestCase):
     self.assertEqual(3, proto.BAZ)
     self.assertEqual(3, unittest_pb2.TestAllTypes.BAZ)
 
+  def testEnum_Name(self):
+    self.assertEqual('FOREIGN_FOO',
+                     unittest_pb2.ForeignEnum.Name(unittest_pb2.FOREIGN_FOO))
+    self.assertEqual('FOREIGN_BAR',
+                     unittest_pb2.ForeignEnum.Name(unittest_pb2.FOREIGN_BAR))
+    self.assertEqual('FOREIGN_BAZ',
+                     unittest_pb2.ForeignEnum.Name(unittest_pb2.FOREIGN_BAZ))
+    self.assertRaises(ValueError,
+                      unittest_pb2.ForeignEnum.Name, 11312)
+
+    proto = unittest_pb2.TestAllTypes()
+    self.assertEqual('FOO',
+                     proto.NestedEnum.Name(proto.FOO))
+    self.assertEqual('FOO',
+                     unittest_pb2.TestAllTypes.NestedEnum.Name(proto.FOO))
+    self.assertEqual('BAR',
+                     proto.NestedEnum.Name(proto.BAR))
+    self.assertEqual('BAR',
+                     unittest_pb2.TestAllTypes.NestedEnum.Name(proto.BAR))
+    self.assertEqual('BAZ',
+                     proto.NestedEnum.Name(proto.BAZ))
+    self.assertEqual('BAZ',
+                     unittest_pb2.TestAllTypes.NestedEnum.Name(proto.BAZ))
+    self.assertRaises(ValueError,
+                      proto.NestedEnum.Name, 11312)
+    self.assertRaises(ValueError,
+                      unittest_pb2.TestAllTypes.NestedEnum.Name, 11312)
+
+  def testEnum_Value(self):
+    self.assertEqual(unittest_pb2.FOREIGN_FOO,
+                     unittest_pb2.ForeignEnum.Value('FOREIGN_FOO'))
+    self.assertEqual(unittest_pb2.FOREIGN_BAR,
+                     unittest_pb2.ForeignEnum.Value('FOREIGN_BAR'))
+    self.assertEqual(unittest_pb2.FOREIGN_BAZ,
+                     unittest_pb2.ForeignEnum.Value('FOREIGN_BAZ'))
+    self.assertRaises(ValueError,
+                      unittest_pb2.ForeignEnum.Value, 'FO')
+
+    proto = unittest_pb2.TestAllTypes()
+    self.assertEqual(proto.FOO,
+                     proto.NestedEnum.Value('FOO'))
+    self.assertEqual(proto.FOO,
+                     unittest_pb2.TestAllTypes.NestedEnum.Value('FOO'))
+    self.assertEqual(proto.BAR,
+                     proto.NestedEnum.Value('BAR'))
+    self.assertEqual(proto.BAR,
+                     unittest_pb2.TestAllTypes.NestedEnum.Value('BAR'))
+    self.assertEqual(proto.BAZ,
+                     proto.NestedEnum.Value('BAZ'))
+    self.assertEqual(proto.BAZ,
+                     unittest_pb2.TestAllTypes.NestedEnum.Value('BAZ'))
+    self.assertRaises(ValueError,
+                      proto.NestedEnum.Value, 'Foo')
+    self.assertRaises(ValueError,
+                      unittest_pb2.TestAllTypes.NestedEnum.Value, 'Foo')
+
+  def testEnum_KeysAndValues(self):
+    self.assertEqual(['FOREIGN_FOO', 'FOREIGN_BAR', 'FOREIGN_BAZ'],
+                     unittest_pb2.ForeignEnum.keys())
+    self.assertEqual([4, 5, 6],
+                     unittest_pb2.ForeignEnum.values())
+    self.assertEqual([('FOREIGN_FOO', 4), ('FOREIGN_BAR', 5),
+                      ('FOREIGN_BAZ', 6)],
+                     unittest_pb2.ForeignEnum.items())
+
+    proto = unittest_pb2.TestAllTypes()
+    self.assertEqual(['FOO', 'BAR', 'BAZ'], proto.NestedEnum.keys())
+    self.assertEqual([1, 2, 3], proto.NestedEnum.values())
+    self.assertEqual([('FOO', 1), ('BAR', 2), ('BAZ', 3)],
+                     proto.NestedEnum.items())
+
   def testRepeatedScalars(self):
     proto = unittest_pb2.TestAllTypes()
 
@@ -826,6 +945,35 @@ class ReflectionTest(unittest.TestCase):
     self.assertEqual(1, len(proto.repeated_nested_message))
     self.assertEqual(23, proto.repeated_nested_message[0].bb)
 
+  def testRepeatedCompositeRemove(self):
+    proto = unittest_pb2.TestAllTypes()
+
+    self.assertEqual(0, len(proto.repeated_nested_message))
+    m0 = proto.repeated_nested_message.add()
+    # Need to set some differentiating variable so m0 != m1 != m2:
+    m0.bb = len(proto.repeated_nested_message)
+    m1 = proto.repeated_nested_message.add()
+    m1.bb = len(proto.repeated_nested_message)
+    self.assertTrue(m0 != m1)
+    m2 = proto.repeated_nested_message.add()
+    m2.bb = len(proto.repeated_nested_message)
+    self.assertListsEqual([m0, m1, m2], proto.repeated_nested_message)
+
+    self.assertEqual(3, len(proto.repeated_nested_message))
+    proto.repeated_nested_message.remove(m0)
+    self.assertEqual(2, len(proto.repeated_nested_message))
+    self.assertEqual(m1, proto.repeated_nested_message[0])
+    self.assertEqual(m2, proto.repeated_nested_message[1])
+
+    # Removing m0 again or removing None should raise error
+    self.assertRaises(ValueError, proto.repeated_nested_message.remove, m0)
+    self.assertRaises(ValueError, proto.repeated_nested_message.remove, None)
+    self.assertEqual(2, len(proto.repeated_nested_message))
+
+    proto.repeated_nested_message.remove(m2)
+    self.assertEqual(1, len(proto.repeated_nested_message))
+    self.assertEqual(m1, proto.repeated_nested_message[0])
+
   def testHandWrittenReflection(self):
     # Hand written extensions are only supported by the pure-Python
     # implementation of the API.
@@ -856,6 +1004,68 @@ class ReflectionTest(unittest.TestCase):
     self.assertEqual(23, myproto_instance.foo_field)
     self.assertTrue(myproto_instance.HasField('foo_field'))
 
+  def testDescriptorProtoSupport(self):
+    # Hand written descriptors/reflection are only supported by the pure-Python
+    # implementation of the API.
+    if api_implementation.Type() != 'python':
+      return
+
+    def AddDescriptorField(proto, field_name, field_type):
+      AddDescriptorField.field_index += 1
+      new_field = proto.field.add()
+      new_field.name = field_name
+      new_field.type = field_type
+      new_field.number = AddDescriptorField.field_index
+      new_field.label = descriptor_pb2.FieldDescriptorProto.LABEL_OPTIONAL
+
+    AddDescriptorField.field_index = 0
+
+    desc_proto = descriptor_pb2.DescriptorProto()
+    desc_proto.name = 'Car'
+    fdp = descriptor_pb2.FieldDescriptorProto
+    AddDescriptorField(desc_proto, 'name', fdp.TYPE_STRING)
+    AddDescriptorField(desc_proto, 'year', fdp.TYPE_INT64)
+    AddDescriptorField(desc_proto, 'automatic', fdp.TYPE_BOOL)
+    AddDescriptorField(desc_proto, 'price', fdp.TYPE_DOUBLE)
+    # Add a repeated field
+    AddDescriptorField.field_index += 1
+    new_field = desc_proto.field.add()
+    new_field.name = 'owners'
+    new_field.type = fdp.TYPE_STRING
+    new_field.number = AddDescriptorField.field_index
+    new_field.label = descriptor_pb2.FieldDescriptorProto.LABEL_REPEATED
+
+    desc = descriptor.MakeDescriptor(desc_proto)
+    self.assertTrue(desc.fields_by_name.has_key('name'))
+    self.assertTrue(desc.fields_by_name.has_key('year'))
+    self.assertTrue(desc.fields_by_name.has_key('automatic'))
+    self.assertTrue(desc.fields_by_name.has_key('price'))
+    self.assertTrue(desc.fields_by_name.has_key('owners'))
+
+    class CarMessage(message.Message):
+      __metaclass__ = reflection.GeneratedProtocolMessageType
+      DESCRIPTOR = desc
+
+    prius = CarMessage()
+    prius.name = 'prius'
+    prius.year = 2010
+    prius.automatic = True
+    prius.price = 25134.75
+    prius.owners.extend(['bob', 'susan'])
+
+    serialized_prius = prius.SerializeToString()
+    new_prius = reflection.ParseMessage(desc, serialized_prius)
+    self.assertTrue(new_prius is not prius)
+    self.assertEqual(prius, new_prius)
+
+    # these are unnecessary assuming message equality works as advertised but
+    # explicitly check to be safe since we're mucking about in metaclass foo
+    self.assertEqual(prius.name, new_prius.name)
+    self.assertEqual(prius.year, new_prius.year)
+    self.assertEqual(prius.automatic, new_prius.automatic)
+    self.assertEqual(prius.price, new_prius.price)
+    self.assertEqual(prius.owners, new_prius.owners)
+
   def testTopLevelExtensionsForOptionalScalar(self):
     extendee_proto = unittest_pb2.TestAllExtensions()
     extension = unittest_pb2.optional_int32_extension
@@ -1243,7 +1453,12 @@ class ReflectionTest(unittest.TestCase):
 
   def testClear(self):
     proto = unittest_pb2.TestAllTypes()
-    test_util.SetAllFields(proto)
+    # C++ implementation does not support lazy fields right now so leave it
+    # out for now.
+    if api_implementation.Type() == 'python':
+      test_util.SetAllFields(proto)
+    else:
+      test_util.SetAllNonLazyFields(proto)
     # Clear the message.
     proto.Clear()
     self.assertEquals(proto.ByteSize(), 0)
@@ -1259,6 +1474,33 @@ class ReflectionTest(unittest.TestCase):
     empty_proto = unittest_pb2.TestAllExtensions()
     self.assertEquals(proto, empty_proto)
 
+  def testDisconnectingBeforeClear(self):
+    proto = unittest_pb2.TestAllTypes()
+    nested = proto.optional_nested_message
+    proto.Clear()
+    self.assertTrue(nested is not proto.optional_nested_message)
+    nested.bb = 23
+    self.assertTrue(not proto.HasField('optional_nested_message'))
+    self.assertEqual(0, proto.optional_nested_message.bb)
+
+    proto = unittest_pb2.TestAllTypes()
+    nested = proto.optional_nested_message
+    nested.bb = 5
+    foreign = proto.optional_foreign_message
+    foreign.c = 6
+
+    proto.Clear()
+    self.assertTrue(nested is not proto.optional_nested_message)
+    self.assertTrue(foreign is not proto.optional_foreign_message)
+    self.assertEqual(5, nested.bb)
+    self.assertEqual(6, foreign.c)
+    nested.bb = 15
+    foreign.c = 16
+    self.assertTrue(not proto.HasField('optional_nested_message'))
+    self.assertEqual(0, proto.optional_nested_message.bb)
+    self.assertTrue(not proto.HasField('optional_foreign_message'))
+    self.assertEqual(0, proto.optional_foreign_message.c)
+
   def assertInitialized(self, proto):
     self.assertTrue(proto.IsInitialized())
     # Neither method should raise an exception.
@@ -1408,7 +1650,7 @@ class ReflectionTest(unittest.TestCase):
     unicode_decode_failed = False
     try:
       message2.MergeFromString(bytes)
-    except UnicodeDecodeError, e:
+    except UnicodeDecodeError as e:
       unicode_decode_failed = True
     string_field = message2.str
     self.assertTrue(unicode_decode_failed or type(string_field) == str)
@@ -2119,7 +2361,7 @@ class SerializationTest(unittest.TestCase):
     """This method checks if the excpetion type and message are as expected."""
     try:
       callable_obj()
-    except exc_class, ex:
+    except exc_class as ex:
       # Check if the exception message is the right one.
       self.assertEqual(exception, str(ex))
       return
@@ -2131,15 +2373,22 @@ class SerializationTest(unittest.TestCase):
     self._CheckRaises(
         message.EncodeError,
         proto.SerializeToString,
-        'Message is missing required fields: a,b,c')
+        'Message protobuf_unittest.TestRequired is missing required fields: '
+        'a,b,c')
     # Shouldn't raise exceptions.
     partial = proto.SerializePartialToString()
 
+    proto2 = unittest_pb2.TestRequired()
+    self.assertFalse(proto2.HasField('a'))
+    # proto2 ParseFromString does not check that required fields are set.
+    proto2.ParseFromString(partial)
+    self.assertFalse(proto2.HasField('a'))
+
     proto.a = 1
     self._CheckRaises(
         message.EncodeError,
         proto.SerializeToString,
-        'Message is missing required fields: b,c')
+        'Message protobuf_unittest.TestRequired is missing required fields: b,c')
     # Shouldn't raise exceptions.
     partial = proto.SerializePartialToString()
 
@@ -2147,7 +2396,7 @@ class SerializationTest(unittest.TestCase):
     self._CheckRaises(
         message.EncodeError,
         proto.SerializeToString,
-        'Message is missing required fields: c')
+        'Message protobuf_unittest.TestRequired is missing required fields: c')
     # Shouldn't raise exceptions.
     partial = proto.SerializePartialToString()
 
@@ -2176,7 +2425,8 @@ class SerializationTest(unittest.TestCase):
     self._CheckRaises(
         message.EncodeError,
         proto.SerializeToString,
-        'Message is missing required fields: '
+        'Message protobuf_unittest.TestRequiredForeign '
+        'is missing required fields: '
         'optional_message.b,optional_message.c')
 
     proto.optional_message.b = 2
@@ -2188,7 +2438,7 @@ class SerializationTest(unittest.TestCase):
     self._CheckRaises(
         message.EncodeError,
         proto.SerializeToString,
-        'Message is missing required fields: '
+        'Message protobuf_unittest.TestRequiredForeign is missing required fields: '
         'repeated_message[0].b,repeated_message[0].c,'
         'repeated_message[1].a,repeated_message[1].c')
 

+ 52 - 0
python/google/protobuf/internal/test_bad_identifiers.proto

@@ -0,0 +1,52 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+
+
+package protobuf_unittest;
+
+option py_generic_services = true;
+
+message TestBadIdentifiers {
+  extensions 100 to max;
+}
+
+// Make sure these reasonable extension names don't conflict with internal
+// variables.
+extend TestBadIdentifiers {
+  optional string message = 100 [default="foo"];
+  optional string descriptor = 101 [default="bar"];
+  optional string reflection = 102 [default="baz"];
+  optional string service = 103 [default="qux"];
+}
+
+message AnotherMessage {}
+service AnotherService {}

+ 18 - 2
python/google/protobuf/internal/test_util.py

@@ -42,8 +42,8 @@ from google.protobuf import unittest_import_pb2
 from google.protobuf import unittest_pb2
 
 
-def SetAllFields(message):
-  """Sets every field in the message to a unique value.
+def SetAllNonLazyFields(message):
+  """Sets every non-lazy field in the message to a unique value.
 
   Args:
     message: A unittest_pb2.TestAllTypes instance.
@@ -79,6 +79,7 @@ def SetAllFields(message):
   message.optional_nested_message.bb = 118
   message.optional_foreign_message.c = 119
   message.optional_import_message.d = 120
+  message.optional_public_import_message.e = 126
 
   message.optional_nested_enum = unittest_pb2.TestAllTypes.BAZ
   message.optional_foreign_enum = unittest_pb2.FOREIGN_BAZ
@@ -111,6 +112,7 @@ def SetAllFields(message):
   message.repeated_nested_message.add().bb = 218
   message.repeated_foreign_message.add().c = 219
   message.repeated_import_message.add().d = 220
+  message.repeated_lazy_message.add().bb = 227
 
   message.repeated_nested_enum.append(unittest_pb2.TestAllTypes.BAR)
   message.repeated_foreign_enum.append(unittest_pb2.FOREIGN_BAR)
@@ -140,6 +142,7 @@ def SetAllFields(message):
   message.repeated_nested_message.add().bb = 318
   message.repeated_foreign_message.add().c = 319
   message.repeated_import_message.add().d = 320
+  message.repeated_lazy_message.add().bb = 327
 
   message.repeated_nested_enum.append(unittest_pb2.TestAllTypes.BAZ)
   message.repeated_foreign_enum.append(unittest_pb2.FOREIGN_BAZ)
@@ -176,6 +179,11 @@ def SetAllFields(message):
   message.default_cord = '425'
 
 
+def SetAllFields(message):
+  SetAllNonLazyFields(message)
+  message.optional_lazy_message.bb = 127
+
+
 def SetAllExtensions(message):
   """Sets every extension in the message to a unique value.
 
@@ -211,6 +219,8 @@ def SetAllExtensions(message):
   extensions[pb2.optional_nested_message_extension].bb = 118
   extensions[pb2.optional_foreign_message_extension].c = 119
   extensions[pb2.optional_import_message_extension].d = 120
+  extensions[pb2.optional_public_import_message_extension].e = 126
+  extensions[pb2.optional_lazy_message_extension].bb = 127
 
   extensions[pb2.optional_nested_enum_extension] = pb2.TestAllTypes.BAZ
   extensions[pb2.optional_nested_enum_extension] = pb2.TestAllTypes.BAZ
@@ -244,6 +254,7 @@ def SetAllExtensions(message):
   extensions[pb2.repeated_nested_message_extension].add().bb = 218
   extensions[pb2.repeated_foreign_message_extension].add().c = 219
   extensions[pb2.repeated_import_message_extension].add().d = 220
+  extensions[pb2.repeated_lazy_message_extension].add().bb = 227
 
   extensions[pb2.repeated_nested_enum_extension].append(pb2.TestAllTypes.BAR)
   extensions[pb2.repeated_foreign_enum_extension].append(pb2.FOREIGN_BAR)
@@ -273,6 +284,7 @@ def SetAllExtensions(message):
   extensions[pb2.repeated_nested_message_extension].add().bb = 318
   extensions[pb2.repeated_foreign_message_extension].add().c = 319
   extensions[pb2.repeated_import_message_extension].add().d = 320
+  extensions[pb2.repeated_lazy_message_extension].add().bb = 327
 
   extensions[pb2.repeated_nested_enum_extension].append(pb2.TestAllTypes.BAZ)
   extensions[pb2.repeated_foreign_enum_extension].append(pb2.FOREIGN_BAZ)
@@ -407,6 +419,8 @@ def ExpectAllFieldsSet(test_case, message):
   test_case.assertEqual(118, message.optional_nested_message.bb)
   test_case.assertEqual(119, message.optional_foreign_message.c)
   test_case.assertEqual(120, message.optional_import_message.d)
+  test_case.assertEqual(126, message.optional_public_import_message.e)
+  test_case.assertEqual(127, message.optional_lazy_message.bb)
 
   test_case.assertEqual(unittest_pb2.TestAllTypes.BAZ,
                         message.optional_nested_enum)
@@ -464,6 +478,7 @@ def ExpectAllFieldsSet(test_case, message):
   test_case.assertEqual(218, message.repeated_nested_message[0].bb)
   test_case.assertEqual(219, message.repeated_foreign_message[0].c)
   test_case.assertEqual(220, message.repeated_import_message[0].d)
+  test_case.assertEqual(227, message.repeated_lazy_message[0].bb)
 
   test_case.assertEqual(unittest_pb2.TestAllTypes.BAR,
                         message.repeated_nested_enum[0])
@@ -492,6 +507,7 @@ def ExpectAllFieldsSet(test_case, message):
   test_case.assertEqual(318, message.repeated_nested_message[1].bb)
   test_case.assertEqual(319, message.repeated_foreign_message[1].c)
   test_case.assertEqual(320, message.repeated_import_message[1].d)
+  test_case.assertEqual(327, message.repeated_lazy_message[1].bb)
 
   test_case.assertEqual(unittest_pb2.TestAllTypes.BAZ,
                         message.repeated_nested_enum[1])

+ 40 - 14
python/google/protobuf/internal/text_format_test.py

@@ -94,6 +94,28 @@ class TextFormatTest(unittest.TestCase):
       '  }\n'
       '}\n')
 
+  def testPrintBadEnumValue(self):
+    message = unittest_pb2.TestAllTypes()
+    message.optional_nested_enum = 100
+    message.optional_foreign_enum = 101
+    message.optional_import_enum = 102
+    self.CompareToGoldenText(
+        text_format.MessageToString(message),
+        'optional_nested_enum: 100\n'
+        'optional_foreign_enum: 101\n'
+        'optional_import_enum: 102\n')
+
+  def testPrintBadEnumValueExtensions(self):
+    message = unittest_pb2.TestAllExtensions()
+    message.Extensions[unittest_pb2.optional_nested_enum_extension] = 100
+    message.Extensions[unittest_pb2.optional_foreign_enum_extension] = 101
+    message.Extensions[unittest_pb2.optional_import_enum_extension] = 102
+    self.CompareToGoldenText(
+        text_format.MessageToString(message),
+        '[protobuf_unittest.optional_nested_enum_extension]: 100\n'
+        '[protobuf_unittest.optional_foreign_enum_extension]: 101\n'
+        '[protobuf_unittest.optional_import_enum_extension]: 102\n')
+
   def testPrintExotic(self):
     message = unittest_pb2.TestAllTypes()
     message.repeated_int64.append(-9223372036854775808)
@@ -399,6 +421,14 @@ class TextFormatTest(unittest.TestCase):
          'has no value with number 100.'),
         text_format.Merge, text, message)
 
+  def testMergeBadIntValue(self):
+    message = unittest_pb2.TestAllTypes()
+    text = 'optional_int32: bork'
+    self.assertRaisesWithMessage(
+        text_format.ParseError,
+        ('1:17 : Couldn\'t parse integer: bork'),
+        text_format.Merge, text, message)
+
   def assertRaisesWithMessage(self, e_class, e, func, *args, **kwargs):
     """Same as assertRaises, but also compares the exception message."""
     if hasattr(e_class, '__name__'):
@@ -408,7 +438,7 @@ class TextFormatTest(unittest.TestCase):
 
     try:
       func(*args, **kwargs)
-    except e_class, expr:
+    except e_class as expr:
       if str(expr) != e:
         msg = '%s raised, but with wrong message: "%s" instead of "%s"'
         raise self.failureException(msg % (exc_name,
@@ -427,7 +457,7 @@ class TokenizerTest(unittest.TestCase):
             'identifiER_4 : 1.1e+2 ID5:-0.23 ID6:\'aaaa\\\'bbbb\'\n'
             'ID7 : "aa\\"bb"\n\n\n\n ID8: {A:inf B:-inf C:true D:false}\n'
             'ID9: 22 ID10: -111111111111111111 ID11: -22\n'
-            'ID12: 2222222222222222222 '
+            'ID12: 2222222222222222222 ID13: 1.23456f ID14: 1.2e+2f '
             'false_bool:  0 true_BOOL:t \n true_bool1:  1 false_BOOL1:f ' )
     tokenizer = text_format._Tokenizer(text)
     methods = [(tokenizer.ConsumeIdentifier, 'identifier1'),
@@ -456,10 +486,10 @@ class TokenizerTest(unittest.TestCase):
                '{',
                (tokenizer.ConsumeIdentifier, 'A'),
                ':',
-               (tokenizer.ConsumeFloat, text_format._INFINITY),
+               (tokenizer.ConsumeFloat, float('inf')),
                (tokenizer.ConsumeIdentifier, 'B'),
                ':',
-               (tokenizer.ConsumeFloat, -text_format._INFINITY),
+               (tokenizer.ConsumeFloat, -float('inf')),
                (tokenizer.ConsumeIdentifier, 'C'),
                ':',
                (tokenizer.ConsumeBool, True),
@@ -479,6 +509,12 @@ class TokenizerTest(unittest.TestCase):
                (tokenizer.ConsumeIdentifier, 'ID12'),
                ':',
                (tokenizer.ConsumeUint64, 2222222222222222222),
+               (tokenizer.ConsumeIdentifier, 'ID13'),
+               ':',
+               (tokenizer.ConsumeFloat, 1.23456),
+               (tokenizer.ConsumeIdentifier, 'ID14'),
+               ':',
+               (tokenizer.ConsumeFloat, 1.2e+2),
                (tokenizer.ConsumeIdentifier, 'false_bool'),
                ':',
                (tokenizer.ConsumeBool, False),
@@ -556,16 +592,6 @@ class TokenizerTest(unittest.TestCase):
     tokenizer = text_format._Tokenizer(text)
     self.assertRaises(text_format.ParseError, tokenizer.ConsumeBool)
 
-  def testInfNan(self):
-    # Make sure our infinity and NaN definitions are sound.
-    self.assertEquals(float, type(text_format._INFINITY))
-    self.assertEquals(float, type(text_format._NAN))
-    self.assertTrue(text_format._NAN != text_format._NAN)
-
-    inf_times_zero = text_format._INFINITY * 0
-    self.assertTrue(inf_times_zero != inf_times_zero)
-    self.assertTrue(text_format._INFINITY > 0)
-
 
 if __name__ == '__main__':
   unittest.main()

+ 170 - 0
python/google/protobuf/internal/unknown_fields_test.py

@@ -0,0 +1,170 @@
+#! /usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Test for preservation of unknown fields in the pure Python implementation."""
+
+__author__ = 'bohdank@google.com (Bohdan Koval)'
+
+import unittest
+from google.protobuf import unittest_mset_pb2
+from google.protobuf import unittest_pb2
+from google.protobuf.internal import encoder
+from google.protobuf.internal import test_util
+from google.protobuf.internal import type_checkers
+
+
+class UnknownFieldsTest(unittest.TestCase):
+
+  def setUp(self):
+    self.descriptor = unittest_pb2.TestAllTypes.DESCRIPTOR
+    self.all_fields = unittest_pb2.TestAllTypes()
+    test_util.SetAllFields(self.all_fields)
+    self.all_fields_data = self.all_fields.SerializeToString()
+    self.empty_message = unittest_pb2.TestEmptyMessage()
+    self.empty_message.ParseFromString(self.all_fields_data)
+    self.unknown_fields = self.empty_message._unknown_fields
+
+  def GetField(self, name):
+    field_descriptor = self.descriptor.fields_by_name[name]
+    wire_type = type_checkers.FIELD_TYPE_TO_WIRE_TYPE[field_descriptor.type]
+    field_tag = encoder.TagBytes(field_descriptor.number, wire_type)
+    for tag_bytes, value in self.unknown_fields:
+      if tag_bytes == field_tag:
+        decoder = unittest_pb2.TestAllTypes._decoders_by_tag[tag_bytes]
+        result_dict = {}
+        decoder(value, 0, len(value), self.all_fields, result_dict)
+        return result_dict[field_descriptor]
+
+  def testVarint(self):
+    value = self.GetField('optional_int32')
+    self.assertEqual(self.all_fields.optional_int32, value)
+
+  def testFixed32(self):
+    value = self.GetField('optional_fixed32')
+    self.assertEqual(self.all_fields.optional_fixed32, value)
+
+  def testFixed64(self):
+    value = self.GetField('optional_fixed64')
+    self.assertEqual(self.all_fields.optional_fixed64, value)
+
+  def testLengthDelimited(self):
+    value = self.GetField('optional_string')
+    self.assertEqual(self.all_fields.optional_string, value)
+
+  def testGroup(self):
+    value = self.GetField('optionalgroup')
+    self.assertEqual(self.all_fields.optionalgroup, value)
+
+  def testSerialize(self):
+    data = self.empty_message.SerializeToString()
+
+    # Don't use assertEqual because we don't want to dump raw binary data to
+    # stdout.
+    self.assertTrue(data == self.all_fields_data)
+
+  def testCopyFrom(self):
+    message = unittest_pb2.TestEmptyMessage()
+    message.CopyFrom(self.empty_message)
+    self.assertEqual(self.unknown_fields, message._unknown_fields)
+
+  def testMergeFrom(self):
+    message = unittest_pb2.TestAllTypes()
+    message.optional_int32 = 1
+    message.optional_uint32 = 2
+    source = unittest_pb2.TestEmptyMessage()
+    source.ParseFromString(message.SerializeToString())
+
+    message.ClearField('optional_int32')
+    message.optional_int64 = 3
+    message.optional_uint32 = 4
+    destination = unittest_pb2.TestEmptyMessage()
+    destination.ParseFromString(message.SerializeToString())
+    unknown_fields = destination._unknown_fields[:]
+
+    destination.MergeFrom(source)
+    self.assertEqual(unknown_fields + source._unknown_fields,
+                     destination._unknown_fields)
+
+  def testClear(self):
+    self.empty_message.Clear()
+    self.assertEqual(0, len(self.empty_message._unknown_fields))
+
+  def testByteSize(self):
+    self.assertEqual(self.all_fields.ByteSize(), self.empty_message.ByteSize())
+
+  def testUnknownExtensions(self):
+    message = unittest_pb2.TestEmptyMessageWithExtensions()
+    message.ParseFromString(self.all_fields_data)
+    self.assertEqual(self.empty_message._unknown_fields,
+                     message._unknown_fields)
+
+  def testListFields(self):
+    # Make sure ListFields doesn't return unknown fields.
+    self.assertEqual(0, len(self.empty_message.ListFields()))
+
+  def testSerializeMessageSetWireFormatUnknownExtension(self):
+    # Create a message using the message set wire format with an unknown
+    # message.
+    raw = unittest_mset_pb2.RawMessageSet()
+
+    # Add an unknown extension.
+    item = raw.item.add()
+    item.type_id = 1545009
+    message1 = unittest_mset_pb2.TestMessageSetExtension1()
+    message1.i = 12345
+    item.message = message1.SerializeToString()
+
+    serialized = raw.SerializeToString()
+
+    # Parse message using the message set wire format.
+    proto = unittest_mset_pb2.TestMessageSet()
+    proto.MergeFromString(serialized)
+
+    # Verify that the unknown extension is serialized unchanged
+    reserialized = proto.SerializeToString()
+    new_raw = unittest_mset_pb2.RawMessageSet()
+    new_raw.MergeFromString(reserialized)
+    self.assertEqual(raw, new_raw)
+
+  def testEquals(self):
+    message = unittest_pb2.TestEmptyMessage()
+    message.ParseFromString(self.all_fields_data)
+    self.assertEqual(self.empty_message, message)
+
+    self.all_fields.ClearField('optional_string')
+    message.ParseFromString(self.all_fields.SerializeToString())
+    self.assertNotEqual(self.empty_message, message)
+
+
+if __name__ == '__main__':
+  unittest.main()

+ 12 - 0
python/google/protobuf/message.py

@@ -73,6 +73,7 @@ class Message(object):
     return clone
 
   def __eq__(self, other_msg):
+    """Recursively compares two messages by value and structure."""
     raise NotImplementedError
 
   def __ne__(self, other_msg):
@@ -83,9 +84,11 @@ class Message(object):
     raise TypeError('unhashable object')
 
   def __str__(self):
+    """Outputs a human-readable representation of the message."""
     raise NotImplementedError
 
   def __unicode__(self):
+    """Outputs a human-readable representation of the message."""
     raise NotImplementedError
 
   def MergeFrom(self, other_msg):
@@ -266,3 +269,12 @@ class Message(object):
     via a previous _SetListener() call.
     """
     raise NotImplementedError
+
+  def __getstate__(self):
+    """Support the pickle protocol."""
+    return dict(serialized=self.SerializePartialToString())
+
+  def __setstate__(self, state):
+    """Support the pickle protocol."""
+    self.__init__()
+    self.ParseFromString(state['serialized'])

+ 113 - 0
python/google/protobuf/message_factory.py

@@ -0,0 +1,113 @@
+# Protocol Buffers - Google's data interchange format
+# Copyright 2008 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Provides a factory class for generating dynamic messages."""
+
+__author__ = 'matthewtoia@google.com (Matt Toia)'
+
+from google.protobuf import descriptor_database
+from google.protobuf import descriptor_pool
+from google.protobuf import message
+from google.protobuf import reflection
+
+
+class MessageFactory(object):
+  """Factory for creating Proto2 messages from descriptors in a pool."""
+
+  def __init__(self):
+    """Initializes a new factory."""
+    self._classes = {}
+
+  def GetPrototype(self, descriptor):
+    """Builds a proto2 message class based on the passed in descriptor.
+
+    Passing a descriptor with a fully qualified name matching a previous
+    invocation will cause the same class to be returned.
+
+    Args:
+      descriptor: The descriptor to build from.
+
+    Returns:
+      A class describing the passed in descriptor.
+    """
+
+    if descriptor.full_name not in self._classes:
+      result_class = reflection.GeneratedProtocolMessageType(
+          descriptor.name.encode('ascii', 'ignore'),
+          (message.Message,),
+          {'DESCRIPTOR': descriptor})
+      self._classes[descriptor.full_name] = result_class
+      for field in descriptor.fields:
+        if field.message_type:
+          self.GetPrototype(field.message_type)
+    return self._classes[descriptor.full_name]
+
+
+_DB = descriptor_database.DescriptorDatabase()
+_POOL = descriptor_pool.DescriptorPool(_DB)
+_FACTORY = MessageFactory()
+
+
+def GetMessages(file_protos):
+  """Builds a dictionary of all the messages available in a set of files.
+
+  Args:
+    file_protos: A sequence of file protos to build messages out of.
+
+  Returns:
+    A dictionary containing all the message types in the files mapping the
+    fully qualified name to a Message subclass for the descriptor.
+  """
+
+  result = {}
+  for file_proto in file_protos:
+    _DB.Add(file_proto)
+  for file_proto in file_protos:
+    for desc in _GetAllDescriptors(file_proto.message_type, file_proto.package):
+      result[desc.full_name] = _FACTORY.GetPrototype(desc)
+  return result
+
+
+def _GetAllDescriptors(desc_protos, package):
+  """Gets all levels of nested message types as a flattened list of descriptors.
+
+  Args:
+    desc_protos: The descriptor protos to process.
+    package: The package where the protos are defined.
+
+  Yields:
+    Each message descriptor for each nested type.
+  """
+
+  for desc_proto in desc_protos:
+    name = '.'.join((package, desc_proto.name))
+    yield _POOL.FindMessageTypeByName(name)
+    for nested_desc in _GetAllDescriptors(desc_proto.nested_type, name):
+      yield nested_desc

+ 77 - 14
python/google/protobuf/pyext/python-proto2.cc

@@ -207,9 +207,9 @@ static PyMethodDef CMessageMethods[] = {
           "Clears and sets the values of a repeated scalar field."),
   CMETHOD(ByteSize, METH_NOARGS,
           "Returns the size of the message in bytes."),
-  CMETHOD(Clear, METH_NOARGS,
+  CMETHOD(Clear, METH_O,
           "Clears a protocol message."),
-  CMETHOD(ClearField, METH_O,
+  CMETHOD(ClearField, METH_VARARGS,
           "Clears a protocol message field by name."),
   CMETHOD(ClearFieldByDescriptor, METH_O,
           "Clears a protocol message field by descriptor."),
@@ -274,7 +274,7 @@ static PyMemberDef CMessageMembers[] = {
 PyTypeObject CMessage_Type = {
   PyObject_HEAD_INIT(&PyType_Type)
   0,
-  C("google3.net.google.protobuf.python.internal."
+  C("google.protobuf.internal."
     "_net_proto2___python."
     "CMessage"),                       // tp_name
   sizeof(CMessage),                    //  tp_basicsize
@@ -319,14 +319,12 @@ PyTypeObject CMessage_Type = {
 // ------ Helper Functions:
 
 static void FormatTypeError(PyObject* arg, char* expected_types) {
-  PyObject* s = PyObject_Str(PyObject_Type(arg));
-  PyObject* repr = PyObject_Repr(PyObject_Type(arg));
+  PyObject* repr = PyObject_Repr(arg);
   PyErr_Format(PyExc_TypeError,
                "%.100s has type %.100s, but expected one of: %s",
                PyString_AS_STRING(repr),
-               PyString_AS_STRING(s),
+               arg->ob_type->tp_name,
                expected_types);
-  Py_DECREF(s);
   Py_DECREF(repr);
 }
 
@@ -398,6 +396,28 @@ static const google::protobuf::Message* CreateMessage(const char* message_type)
   return global_message_factory->GetPrototype(descriptor);
 }
 
+static void ReleaseSubMessage(google::protobuf::Message* message,
+                           const google::protobuf::FieldDescriptor* field_descriptor,
+                           CMessage* child_cmessage) {
+  Message* released_message = message->GetReflection()->ReleaseMessage(
+      message, field_descriptor, global_message_factory);
+  GOOGLE_DCHECK(child_cmessage->message != NULL);
+  // ReleaseMessage will return NULL which differs from
+  // child_cmessage->message, if the field does not exist.  In this case,
+  // the latter points to the default instance via a const_cast<>, so we
+  // have to reset it to a new mutable object since we are taking ownership.
+  if (released_message == NULL) {
+    const Message* prototype = global_message_factory->GetPrototype(
+        child_cmessage->message->GetDescriptor());
+    GOOGLE_DCHECK(prototype != NULL);
+    child_cmessage->message = prototype->New();
+  }
+  child_cmessage->parent = NULL;
+  child_cmessage->parent_field = NULL;
+  child_cmessage->free_message = true;
+  child_cmessage->read_only = false;
+}
+
 static bool CheckAndSetString(
     PyObject* arg, google::protobuf::Message* message,
     const google::protobuf::FieldDescriptor* descriptor,
@@ -407,6 +427,9 @@ static bool CheckAndSetString(
   GOOGLE_DCHECK(descriptor->type() == google::protobuf::FieldDescriptor::TYPE_STRING ||
          descriptor->type() == google::protobuf::FieldDescriptor::TYPE_BYTES);
   if (descriptor->type() == google::protobuf::FieldDescriptor::TYPE_STRING) {
+#else
+  if (descriptor->file()->options().cc_api_version() == 2 &&
+      descriptor->type() == google::protobuf::FieldDescriptor::TYPE_STRING) {
     if (!PyString_Check(arg) && !PyUnicode_Check(arg)) {
       FormatTypeError(arg, "str, unicode");
       return false;
@@ -434,6 +457,9 @@ static bool CheckAndSetString(
 
   PyObject* encoded_string = NULL;
   if (descriptor->type() == google::protobuf::FieldDescriptor::TYPE_STRING) {
+#else
+  if (descriptor->file()->options().cc_api_version() == 2 &&
+      descriptor->type() == google::protobuf::FieldDescriptor::TYPE_STRING) {
     if (PyString_Check(arg)) {
       encoded_string = PyString_AsEncodedObject(arg, "utf-8", NULL);
     } else {
@@ -504,8 +530,6 @@ static void AssureWritable(CMessage* self) {
   self->message = reflection->MutableMessage(
       message, self->parent_field->descriptor, global_message_factory);
   self->read_only = false;
-  self->parent = NULL;
-  self->parent_field = NULL;
 }
 
 static PyObject* InternalGetScalar(
@@ -955,9 +979,41 @@ static void CMessageDealloc(CMessage* self) {
 
 // ------ Methods:
 
-static PyObject* CMessage_Clear(CMessage* self, PyObject* args) {
+static PyObject* CMessage_Clear(CMessage* self, PyObject* arg) {
   AssureWritable(self);
-  self->message->Clear();
+  google::protobuf::Message* message = self->message;
+
+  // This block of code is equivalent to the following:
+  // for cfield_descriptor, child_cmessage in arg:
+  //   ReleaseSubMessage(cfield_descriptor, child_cmessage)
+  if (!PyList_Check(arg)) {
+    PyErr_SetString(PyExc_TypeError, "Must be a list");
+    return NULL;
+  }
+  PyObject* messages_to_clear = arg;
+  Py_ssize_t num_messages_to_clear = PyList_GET_SIZE(messages_to_clear);
+  for(int i = 0; i < num_messages_to_clear; ++i) {
+    PyObject* message_tuple = PyList_GET_ITEM(messages_to_clear, i);
+    if (!PyTuple_Check(message_tuple) || PyTuple_GET_SIZE(message_tuple) != 2) {
+      PyErr_SetString(PyExc_TypeError, "Must be a tuple of size 2");
+      return NULL;
+    }
+
+    PyObject* py_cfield_descriptor = PyTuple_GET_ITEM(message_tuple, 0);
+    PyObject* py_child_cmessage = PyTuple_GET_ITEM(message_tuple, 1);
+    if (!PyObject_TypeCheck(py_cfield_descriptor, &CFieldDescriptor_Type) ||
+        !PyObject_TypeCheck(py_child_cmessage, &CMessage_Type)) {
+      PyErr_SetString(PyExc_ValueError, "Invalid Tuple");
+      return NULL;
+    }
+
+    CFieldDescriptor* cfield_descriptor = reinterpret_cast<CFieldDescriptor *>(
+        py_cfield_descriptor);
+    CMessage* child_cmessage = reinterpret_cast<CMessage *>(py_child_cmessage);
+    ReleaseSubMessage(message, cfield_descriptor->descriptor, child_cmessage);
+  }
+
+  message->Clear();
   Py_RETURN_NONE;
 }
 
@@ -1039,9 +1095,11 @@ static PyObject* CMessage_ClearFieldByDescriptor(
   Py_RETURN_NONE;
 }
 
-static PyObject* CMessage_ClearField(CMessage* self, PyObject* arg) {
+static PyObject* CMessage_ClearField(CMessage* self, PyObject* args) {
   char* field_name;
-  if (PyString_AsStringAndSize(arg, &field_name, NULL) < 0) {
+  CMessage* child_cmessage = NULL;
+  if (!PyArg_ParseTuple(args, C("s|O!:ClearField"), &field_name,
+                        &CMessage_Type, &child_cmessage)) {
     return NULL;
   }
 
@@ -1054,7 +1112,11 @@ static PyObject* CMessage_ClearField(CMessage* self, PyObject* arg) {
     return NULL;
   }
 
-  message->GetReflection()->ClearField(message, field_descriptor);
+  if (child_cmessage != NULL && !FIELD_IS_REPEATED(field_descriptor)) {
+    ReleaseSubMessage(message, field_descriptor, child_cmessage);
+  } else {
+    message->GetReflection()->ClearField(message, field_descriptor);
+  }
   Py_RETURN_NONE;
 }
 
@@ -1313,6 +1375,7 @@ static PyObject* CMessage_MergeFromString(CMessage* self, PyObject* arg) {
   AssureWritable(self);
   google::protobuf::io::CodedInputStream input(
       reinterpret_cast<const uint8*>(data), data_length);
+  input.SetExtensionRegistry(GetDescriptorPool(), global_message_factory);
   bool success = self->message->MergePartialFromCodedStream(&input);
   if (success) {
     return PyInt_FromLong(self->message->ByteSize());

+ 6 - 3
python/google/protobuf/pyext/python_descriptor.cc

@@ -31,6 +31,7 @@
 // Author: petar@google.com (Petar Petrov)
 
 #include <Python.h>
+#include <string>
 
 #include <google/protobuf/pyext/python_descriptor.h>
 #include <google/protobuf/descriptor.pb.h>
@@ -41,6 +42,7 @@ namespace google {
 namespace protobuf {
 namespace python {
 
+
 static void CFieldDescriptorDealloc(CFieldDescriptor* self);
 
 static google::protobuf::DescriptorPool* g_descriptor_pool = NULL;
@@ -93,7 +95,7 @@ static PyGetSetDef CFieldDescriptorGetters[] = {
 PyTypeObject CFieldDescriptor_Type = {
   PyObject_HEAD_INIT(&PyType_Type)
   0,
-  C("google3.net.google.protobuf.python.internal."
+  C("google.protobuf.internal."
     "_net_proto2___python."
     "CFieldDescriptor"),                // tp_name
   sizeof(CFieldDescriptor),             // tp_basicsize
@@ -181,6 +183,8 @@ static PyObject* CDescriptorPool_FindFieldByName(
   const google::protobuf::FieldDescriptor* field_descriptor = NULL;
 
   field_descriptor = self->pool->FindFieldByName(full_field_name);
+
+
   if (field_descriptor == NULL) {
     PyErr_Format(PyExc_TypeError, "Couldn't find field %.200s",
                  full_field_name);
@@ -223,7 +227,7 @@ static PyMethodDef CDescriptorPoolMethods[] = {
 PyTypeObject CDescriptorPool_Type = {
   PyObject_HEAD_INIT(&PyType_Type)
   0,
-  C("google3.net.google.protobuf.python.internal."
+  C("google.protobuf.internal."
     "_net_proto2___python."
     "CFieldDescriptor"),               // tp_name
   sizeof(CDescriptorPool),             // tp_basicsize
@@ -301,7 +305,6 @@ PyObject* Python_BuildFile(PyObject* ignored, PyObject* arg) {
     return NULL;
   }
 
-  // If this file is already in the generated pool, don't add it again.
   if (google::protobuf::DescriptorPool::generated_pool()->FindFileByName(
       file_proto.name()) != NULL) {
     Py_RETURN_NONE;

+ 31 - 4
python/google/protobuf/reflection.py

@@ -50,13 +50,20 @@ __author__ = 'robinson@google.com (Will Robinson)'
 
 from google.protobuf.internal import api_implementation
 from google.protobuf import descriptor as descriptor_mod
+from google.protobuf import message
+
 _FieldDescriptor = descriptor_mod.FieldDescriptor
 
 
 if api_implementation.Type() == 'cpp':
-  from google.protobuf.internal import cpp_message
-  _NewMessage = cpp_message.NewMessage
-  _InitMessage = cpp_message.InitMessage
+  if api_implementation.Version() == 2:
+    from google.protobuf.internal.cpp import cpp_message
+    _NewMessage = cpp_message.NewMessage
+    _InitMessage = cpp_message.InitMessage
+  else:
+    from google.protobuf.internal import cpp_message
+    _NewMessage = cpp_message.NewMessage
+    _InitMessage = cpp_message.InitMessage
 else:
   from google.protobuf.internal import python_message
   _NewMessage = python_message.NewMessage
@@ -112,7 +119,7 @@ class GeneratedProtocolMessageType(type):
       Newly-allocated class.
     """
     descriptor = dictionary[GeneratedProtocolMessageType._DESCRIPTOR_KEY]
-    _NewMessage(descriptor, dictionary)
+    bases = _NewMessage(bases, descriptor, dictionary)
     superclass = super(GeneratedProtocolMessageType, cls)
 
     new_class = superclass.__new__(cls, name, bases, dictionary)
@@ -140,3 +147,23 @@ class GeneratedProtocolMessageType(type):
     _InitMessage(descriptor, cls)
     superclass = super(GeneratedProtocolMessageType, cls)
     superclass.__init__(name, bases, dictionary)
+
+
+def ParseMessage(descriptor, byte_str):
+  """Generate a new Message instance from this Descriptor and a byte string.
+
+  Args:
+    descriptor: Protobuf Descriptor object
+    byte_str: Serialized protocol buffer byte string
+
+  Returns:
+    Newly created protobuf Message object.
+  """
+
+  class _ResultClass(message.Message):
+    __metaclass__ = GeneratedProtocolMessageType
+    DESCRIPTOR = descriptor
+
+  new_msg = _ResultClass()
+  new_msg.ParseFromString(byte_str)
+  return new_msg

+ 150 - 107
python/google/protobuf/text_format.py

@@ -43,10 +43,12 @@ __all__ = [ 'MessageToString', 'PrintMessage', 'PrintField',
             'PrintFieldValue', 'Merge' ]
 
 
-# Infinity and NaN are not explicitly supported by Python pre-2.6, and
-# float('inf') does not work on Windows (pre-2.6).
-_INFINITY = 1e10000    # overflows, thus will actually be infinity.
-_NAN = _INFINITY * 0
+_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
+                     type_checkers.Int32ValueChecker(),
+                     type_checkers.Uint64ValueChecker(),
+                     type_checkers.Int64ValueChecker())
+_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
+_FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
 
 
 class ParseError(Exception):
@@ -120,7 +122,11 @@ def PrintFieldValue(field, value, out, indent=0,
       PrintMessage(value, out, indent + 2, as_utf8, as_one_line)
       out.write(' ' * indent + '}')
   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
-    out.write(field.enum_type.values_by_number[value].name)
+    enum_value = field.enum_type.values_by_number.get(value, None)
+    if enum_value is not None:
+      out.write(enum_value.name)
+    else:
+      out.write(str(value))
   elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
     out.write('\"')
     if type(value) is unicode:
@@ -271,24 +277,7 @@ def _MergeScalarField(tokenizer, message, field):
   elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
     value = tokenizer.ConsumeByteString()
   elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
-    # Enum can be specified by a number (the enum value), or by
-    # a string literal (the enum name).
-    enum_descriptor = field.enum_type
-    if tokenizer.LookingAtInteger():
-      number = tokenizer.ConsumeInt32()
-      enum_value = enum_descriptor.values_by_number.get(number, None)
-      if enum_value is None:
-        raise tokenizer.ParseErrorPreviousToken(
-            'Enum type "%s" has no value with number %d.' % (
-                enum_descriptor.full_name, number))
-    else:
-      identifier = tokenizer.ConsumeIdentifier()
-      enum_value = enum_descriptor.values_by_name.get(identifier, None)
-      if enum_value is None:
-        raise tokenizer.ParseErrorPreviousToken(
-            'Enum type "%s" has no value named %s.' % (
-                enum_descriptor.full_name, identifier))
-    value = enum_value.number
+    value = tokenizer.ConsumeEnum(field)
   else:
     raise RuntimeError('Unknown field type %d' % field.type)
 
@@ -320,12 +309,6 @@ class _Tokenizer(object):
       '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|'  # a double-quoted string
       '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)')  # a single-quoted string
   _IDENTIFIER = re.compile('\w+')
-  _INTEGER_CHECKERS = [type_checkers.Uint32ValueChecker(),
-                       type_checkers.Int32ValueChecker(),
-                       type_checkers.Uint64ValueChecker(),
-                       type_checkers.Int64ValueChecker()]
-  _FLOAT_INFINITY = re.compile('-?inf(inity)?f?', re.IGNORECASE)
-  _FLOAT_NAN = re.compile("nanf?", re.IGNORECASE)
 
   def __init__(self, text_message):
     self._text_message = text_message
@@ -394,17 +377,6 @@ class _Tokenizer(object):
     if not self.TryConsume(token):
       raise self._ParseError('Expected "%s".' % token)
 
-  def LookingAtInteger(self):
-    """Checks if the current token is an integer.
-
-    Returns:
-      True iff the current token is an integer.
-    """
-    if not self.token:
-      return False
-    c = self.token[0]
-    return (c >= '0' and c <= '9') or c == '-' or c == '+'
-
   def ConsumeIdentifier(self):
     """Consumes protocol message field identifier.
 
@@ -430,9 +402,9 @@ class _Tokenizer(object):
       ParseError: If a signed 32bit integer couldn't be consumed.
     """
     try:
-      result = self._ParseInteger(self.token, is_signed=True, is_long=False)
+      result = ParseInteger(self.token, is_signed=True, is_long=False)
     except ValueError, e:
-      raise self._IntegerParseError(e)
+      raise self._ParseError(str(e))
     self.NextToken()
     return result
 
@@ -446,9 +418,9 @@ class _Tokenizer(object):
       ParseError: If an unsigned 32bit integer couldn't be consumed.
     """
     try:
-      result = self._ParseInteger(self.token, is_signed=False, is_long=False)
+      result = ParseInteger(self.token, is_signed=False, is_long=False)
     except ValueError, e:
-      raise self._IntegerParseError(e)
+      raise self._ParseError(str(e))
     self.NextToken()
     return result
 
@@ -462,9 +434,9 @@ class _Tokenizer(object):
       ParseError: If a signed 64bit integer couldn't be consumed.
     """
     try:
-      result = self._ParseInteger(self.token, is_signed=True, is_long=True)
+      result = ParseInteger(self.token, is_signed=True, is_long=True)
     except ValueError, e:
-      raise self._IntegerParseError(e)
+      raise self._ParseError(str(e))
     self.NextToken()
     return result
 
@@ -478,9 +450,9 @@ class _Tokenizer(object):
       ParseError: If an unsigned 64bit integer couldn't be consumed.
     """
     try:
-      result = self._ParseInteger(self.token, is_signed=False, is_long=True)
+      result = ParseInteger(self.token, is_signed=False, is_long=True)
     except ValueError, e:
-      raise self._IntegerParseError(e)
+      raise self._ParseError(str(e))
     self.NextToken()
     return result
 
@@ -493,21 +465,10 @@ class _Tokenizer(object):
     Raises:
       ParseError: If a floating point number couldn't be consumed.
     """
-    text = self.token
-    if self._FLOAT_INFINITY.match(text):
-      self.NextToken()
-      if text.startswith('-'):
-        return -_INFINITY
-      return _INFINITY
-
-    if self._FLOAT_NAN.match(text):
-      self.NextToken()
-      return _NAN
-
     try:
-      result = float(text)
+      result = ParseFloat(self.token)
     except ValueError, e:
-      raise self._FloatParseError(e)
+      raise self._ParseError(str(e))
     self.NextToken()
     return result
 
@@ -520,14 +481,12 @@ class _Tokenizer(object):
     Raises:
       ParseError: If a boolean value couldn't be consumed.
     """
-    if self.token in ('true', 't', '1'):
-      self.NextToken()
-      return True
-    elif self.token in ('false', 'f', '0'):
-      self.NextToken()
-      return False
-    else:
-      raise self._ParseError('Expected "true" or "false".')
+    try:
+      result = ParseBool(self.token)
+    except ValueError, e:
+      raise self._ParseError(str(e))
+    self.NextToken()
+    return result
 
   def ConsumeString(self):
     """Consumes a string value.
@@ -567,7 +526,7 @@ class _Tokenizer(object):
     """
     text = self.token
     if len(text) < 1 or text[0] not in ('\'', '"'):
-      raise self._ParseError('Exptected string.')
+      raise self._ParseError('Expected string.')
 
     if len(text) < 2 or text[-1] != text[0]:
       raise self._ParseError('String missing ending quote.')
@@ -579,36 +538,12 @@ class _Tokenizer(object):
     self.NextToken()
     return result
 
-  def _ParseInteger(self, text, is_signed=False, is_long=False):
-    """Parses an integer.
-
-    Args:
-      text: The text to parse.
-      is_signed: True if a signed integer must be parsed.
-      is_long: True if a long integer must be parsed.
-
-    Returns:
-      The integer value.
-
-    Raises:
-      ValueError: Thrown Iff the text is not a valid integer.
-    """
-    pos = 0
-    if text.startswith('-'):
-      pos += 1
-
-    base = 10
-    if text.startswith('0x', pos) or text.startswith('0X', pos):
-      base = 16
-    elif text.startswith('0', pos):
-      base = 8
-
-    # Do the actual parsing. Exception handling is propagated to caller.
-    result = int(text, base)
-
-    # Check if the integer is sane. Exceptions handled by callers.
-    checker = self._INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
-    checker.CheckValue(result)
+  def ConsumeEnum(self, field):
+    try:
+      result = ParseEnum(field, self.token)
+    except ValueError, e:
+      raise self._ParseError(str(e))
+    self.NextToken()
     return result
 
   def ParseErrorPreviousToken(self, message):
@@ -626,13 +561,7 @@ class _Tokenizer(object):
   def _ParseError(self, message):
     """Creates and *returns* a ParseError for the current token."""
     return ParseError('%d:%d : %s' % (
-        self._line + 1, self._column - len(self.token) + 1, message))
-
-  def _IntegerParseError(self, e):
-    return self._ParseError('Couldn\'t parse integer: ' + str(e))
-
-  def _FloatParseError(self, e):
-    return self._ParseError('Couldn\'t parse number: ' + str(e))
+        self._line + 1, self._column + 1, message))
 
   def _StringParseError(self, e):
     return self._ParseError('Couldn\'t parse string: ' + str(e))
@@ -689,3 +618,117 @@ def _CUnescape(text):
   # allow single-digit hex escapes (like '\xf').
   result = _CUNESCAPE_HEX.sub(ReplaceHex, text)
   return result.decode('string_escape')
+
+
+def ParseInteger(text, is_signed=False, is_long=False):
+  """Parses an integer.
+
+  Args:
+    text: The text to parse.
+    is_signed: True if a signed integer must be parsed.
+    is_long: True if a long integer must be parsed.
+
+  Returns:
+    The integer value.
+
+  Raises:
+    ValueError: Thrown Iff the text is not a valid integer.
+  """
+  # Do the actual parsing. Exception handling is propagated to caller.
+  try:
+    result = int(text, 0)
+  except ValueError:
+    raise ValueError('Couldn\'t parse integer: %s' % text)
+
+  # Check if the integer is sane. Exceptions handled by callers.
+  checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
+  checker.CheckValue(result)
+  return result
+
+
+def ParseFloat(text):
+  """Parse a floating point number.
+
+  Args:
+    text: Text to parse.
+
+  Returns:
+    The number parsed.
+
+  Raises:
+    ValueError: If a floating point number couldn't be parsed.
+  """
+  try:
+    # Assume Python compatible syntax.
+    return float(text)
+  except ValueError:
+    # Check alternative spellings.
+    if _FLOAT_INFINITY.match(text):
+      if text[0] == '-':
+        return float('-inf')
+      else:
+        return float('inf')
+    elif _FLOAT_NAN.match(text):
+      return float('nan')
+    else:
+      # assume '1.0f' format
+      try:
+        return float(text.rstrip('f'))
+      except ValueError:
+        raise ValueError('Couldn\'t parse float: %s' % text)
+
+
+def ParseBool(text):
+  """Parse a boolean value.
+
+  Args:
+    text: Text to parse.
+
+  Returns:
+    Boolean values parsed
+
+  Raises:
+    ValueError: If text is not a valid boolean.
+  """
+  if text in ('true', 't', '1'):
+    return True
+  elif text in ('false', 'f', '0'):
+    return False
+  else:
+    raise ValueError('Expected "true" or "false".')
+
+
+def ParseEnum(field, value):
+  """Parse an enum value.
+
+  The value can be specified by a number (the enum value), or by
+  a string literal (the enum name).
+
+  Args:
+    field: Enum field descriptor.
+    value: String value.
+
+  Returns:
+    Enum value number.
+
+  Raises:
+    ValueError: If the enum value could not be parsed.
+  """
+  enum_descriptor = field.enum_type
+  try:
+    number = int(value, 0)
+  except ValueError:
+    # Identifier.
+    enum_value = enum_descriptor.values_by_name.get(value, None)
+    if enum_value is None:
+      raise ValueError(
+          'Enum type "%s" has no value named %s.' % (
+              enum_descriptor.full_name, value))
+  else:
+    # Numeric value.
+    enum_value = enum_descriptor.values_by_number.get(number, None)
+    if enum_value is None:
+      raise ValueError(
+          'Enum type "%s" has no value with number %d.' % (
+              enum_descriptor.full_name, number))
+  return enum_value.number

+ 26 - 6
python/setup.py

@@ -63,19 +63,26 @@ def generate_proto(source):
     if subprocess.call(protoc_command) != 0:
       sys.exit(-1)
 
-def MakeTestSuite():
-  # This is apparently needed on some systems to make sure that the tests
-  # work even if a previous version is already installed.
-  if 'google' in sys.modules:
-    del sys.modules['google']
-
+def GenerateUnittestProtos():
   generate_proto("../src/google/protobuf/unittest.proto")
   generate_proto("../src/google/protobuf/unittest_custom_options.proto")
   generate_proto("../src/google/protobuf/unittest_import.proto")
+  generate_proto("../src/google/protobuf/unittest_import_public.proto")
   generate_proto("../src/google/protobuf/unittest_mset.proto")
   generate_proto("../src/google/protobuf/unittest_no_generic_services.proto")
+  generate_proto("google/protobuf/internal/test_bad_identifiers.proto")
   generate_proto("google/protobuf/internal/more_extensions.proto")
+  generate_proto("google/protobuf/internal/more_extensions_dynamic.proto")
   generate_proto("google/protobuf/internal/more_messages.proto")
+  generate_proto("google/protobuf/internal/factory_test1.proto")
+  generate_proto("google/protobuf/internal/factory_test2.proto")
+
+def MakeTestSuite():
+  # This is apparently needed on some systems to make sure that the tests
+  # work even if a previous version is already installed.
+  if 'google' in sys.modules:
+    del sys.modules['google']
+  GenerateUnittestProtos()
 
   import unittest
   import google.protobuf.internal.generator_test     as generator_test
@@ -85,6 +92,14 @@ def MakeTestSuite():
     as service_reflection_test
   import google.protobuf.internal.text_format_test   as text_format_test
   import google.protobuf.internal.wire_format_test   as wire_format_test
+  import google.protobuf.internal.unknown_fields_test as unknown_fields_test
+  import google.protobuf.internal.descriptor_database_test \
+      as descriptor_database_test
+  import google.protobuf.internal.descriptor_pool_test as descriptor_pool_test
+  import google.protobuf.internal.message_factory_test as message_factory_test
+  import google.protobuf.internal.message_cpp_test as message_cpp_test
+  import google.protobuf.internal.reflection_cpp_generated_test \
+      as reflection_cpp_generated_test
 
   loader = unittest.defaultTestLoader
   suite = unittest.TestSuite()
@@ -117,6 +132,8 @@ class build_py(_build_py):
     # Generate necessary .proto file if it doesn't exist.
     generate_proto("../src/google/protobuf/descriptor.proto")
     generate_proto("../src/google/protobuf/compiler/plugin.proto")
+
+    GenerateUnittestProtos()
     # Make sure google.protobuf.compiler is a valid package.
     open('google/protobuf/compiler/__init__.py', 'a').close()
     # _build_py is an old-style class, so super() doesn't work.
@@ -156,6 +173,9 @@ if __name__ == '__main__':
           'google.protobuf.descriptor_pb2',
           'google.protobuf.compiler.plugin_pb2',
           'google.protobuf.message',
+          'google.protobuf.descriptor_database',
+          'google.protobuf.descriptor_pool',
+          'google.protobuf.message_factory',
           'google.protobuf.reflection',
           'google.protobuf.service',
           'google.protobuf.service_reflection',

+ 22 - 2
src/Makefile.am

@@ -52,6 +52,7 @@ nobase_include_HEADERS =                                       \
   google/protobuf/descriptor_database.h                        \
   google/protobuf/dynamic_message.h                            \
   google/protobuf/extension_set.h                              \
+  google/protobuf/generated_enum_reflection.h                  \
   google/protobuf/generated_message_util.h                     \
   google/protobuf/generated_message_reflection.h               \
   google/protobuf/message.h                                    \
@@ -91,7 +92,11 @@ libprotobuf_lite_la_SOURCES =                                  \
   google/protobuf/stubs/once.cc                                \
   google/protobuf/stubs/hash.h                                 \
   google/protobuf/stubs/map-util.h                             \
-  google/protobuf/stubs/stl_util-inl.h                         \
+  google/protobuf/stubs/stl_util.h                             \
+  google/protobuf/stubs/stringprintf.cc                        \
+  google/protobuf/stubs/stringprintf.h                         \
+  google/protobuf/stubs/template_util.h                        \
+  google/protobuf/stubs/type_traits.h                          \
   google/protobuf/extension_set.cc                             \
   google/protobuf/generated_message_util.cc                    \
   google/protobuf/message_lite.cc                              \
@@ -158,6 +163,7 @@ libprotoc_la_SOURCES =                                         \
   google/protobuf/compiler/cpp/cpp_message.h                   \
   google/protobuf/compiler/cpp/cpp_message_field.cc            \
   google/protobuf/compiler/cpp/cpp_message_field.h             \
+  google/protobuf/compiler/cpp/cpp_options.h                   \
   google/protobuf/compiler/cpp/cpp_primitive_field.cc          \
   google/protobuf/compiler/cpp/cpp_primitive_field.h           \
   google/protobuf/compiler/cpp/cpp_service.cc                  \
@@ -187,6 +193,8 @@ libprotoc_la_SOURCES =                                         \
   google/protobuf/compiler/java/java_service.h                 \
   google/protobuf/compiler/java/java_string_field.cc           \
   google/protobuf/compiler/java/java_string_field.h            \
+  google/protobuf/compiler/java/java_doc_comment.cc            \
+  google/protobuf/compiler/java/java_doc_comment.h             \
   google/protobuf/compiler/python/python_generator.cc
 
 bin_PROGRAMS = protoc
@@ -199,12 +207,14 @@ protoc_inputs =                                                \
   google/protobuf/unittest.proto                               \
   google/protobuf/unittest_empty.proto                         \
   google/protobuf/unittest_import.proto                        \
+  google/protobuf/unittest_import_public.proto                 \
   google/protobuf/unittest_mset.proto                          \
   google/protobuf/unittest_optimize_for.proto                  \
   google/protobuf/unittest_embed_optimize_for.proto            \
   google/protobuf/unittest_custom_options.proto                \
   google/protobuf/unittest_lite.proto                          \
   google/protobuf/unittest_import_lite.proto                   \
+  google/protobuf/unittest_import_public_lite.proto            \
   google/protobuf/unittest_lite_imports_nonlite.proto          \
   google/protobuf/unittest_no_generic_services.proto           \
   google/protobuf/compiler/cpp/cpp_test_bad_identifiers.proto
@@ -228,7 +238,9 @@ protoc_lite_outputs =                                          \
   google/protobuf/unittest_lite.pb.cc                          \
   google/protobuf/unittest_lite.pb.h                           \
   google/protobuf/unittest_import_lite.pb.cc                   \
-  google/protobuf/unittest_import_lite.pb.h
+  google/protobuf/unittest_import_lite.pb.h                    \
+  google/protobuf/unittest_import_public_lite.pb.cc            \
+  google/protobuf/unittest_import_public_lite.pb.h
 
 protoc_outputs =                                               \
   $(protoc_lite_outputs)                                       \
@@ -238,6 +250,8 @@ protoc_outputs =                                               \
   google/protobuf/unittest_empty.pb.h                          \
   google/protobuf/unittest_import.pb.cc                        \
   google/protobuf/unittest_import.pb.h                         \
+  google/protobuf/unittest_import_public.pb.cc                 \
+  google/protobuf/unittest_import_public.pb.h                  \
   google/protobuf/unittest_mset.pb.cc                          \
   google/protobuf/unittest_mset.pb.h                           \
   google/protobuf/unittest_optimize_for.pb.cc                  \
@@ -298,6 +312,9 @@ protobuf_test_SOURCES =                                        \
   google/protobuf/stubs/once_unittest.cc                       \
   google/protobuf/stubs/strutil_unittest.cc                    \
   google/protobuf/stubs/structurally_valid_unittest.cc         \
+  google/protobuf/stubs/stringprintf_unittest.cc               \
+  google/protobuf/stubs/template_util_unittest.cc              \
+  google/protobuf/stubs/type_traits_unittest.cc                \
   google/protobuf/descriptor_database_unittest.cc              \
   google/protobuf/descriptor_unittest.cc                       \
   google/protobuf/dynamic_message_unittest.cc                  \
@@ -306,6 +323,7 @@ protobuf_test_SOURCES =                                        \
   google/protobuf/message_unittest.cc                          \
   google/protobuf/reflection_ops_unittest.cc                   \
   google/protobuf/repeated_field_unittest.cc                   \
+  google/protobuf/repeated_field_reflection_unittest.cc        \
   google/protobuf/text_format_unittest.cc                      \
   google/protobuf/unknown_field_set_unittest.cc                \
   google/protobuf/wire_format_unittest.cc                      \
@@ -319,9 +337,11 @@ protobuf_test_SOURCES =                                        \
   google/protobuf/compiler/mock_code_generator.h               \
   google/protobuf/compiler/parser_unittest.cc                  \
   google/protobuf/compiler/cpp/cpp_bootstrap_unittest.cc       \
+  google/protobuf/compiler/cpp/cpp_unittest.h                  \
   google/protobuf/compiler/cpp/cpp_unittest.cc                 \
   google/protobuf/compiler/cpp/cpp_plugin_unittest.cc          \
   google/protobuf/compiler/java/java_plugin_unittest.cc        \
+  google/protobuf/compiler/java/java_doc_comment_unittest.cc   \
   google/protobuf/compiler/python/python_plugin_unittest.cc    \
   $(COMMON_TEST_SOURCES)
 nodist_protobuf_test_SOURCES = $(protoc_outputs)

+ 156 - 76
src/google/protobuf/compiler/command_line_interface.cc

@@ -59,12 +59,13 @@
 #include <google/protobuf/descriptor.h>
 #include <google/protobuf/text_format.h>
 #include <google/protobuf/dynamic_message.h>
+#include <google/protobuf/io/coded_stream.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 #include <google/protobuf/io/printer.h>
 #include <google/protobuf/stubs/strutil.h>
 #include <google/protobuf/stubs/substitute.h>
 #include <google/protobuf/stubs/map-util.h>
-#include <google/protobuf/stubs/stl_util-inl.h>
+#include <google/protobuf/stubs/stl_util.h>
 
 
 namespace google {
@@ -145,7 +146,7 @@ void AddTrailingSlash(string* path) {
 bool VerifyDirectoryExists(const string& path) {
   if (path.empty()) return true;
 
-  if (access(path.c_str(), W_OK) == -1) {
+  if (access(path.c_str(), F_OK) == -1) {
     cerr << path << ": " << strerror(errno) << endl;
     return false;
   } else {
@@ -566,6 +567,7 @@ CommandLineInterface::CommandLineInterface()
   : mode_(MODE_COMPILE),
     error_format_(ERROR_FORMAT_GCC),
     imports_in_descriptor_set_(false),
+    source_info_in_descriptor_set_(false),
     disallow_services_(false),
     inputs_are_proto_path_relative_(false) {}
 CommandLineInterface::~CommandLineInterface() {}
@@ -574,9 +576,23 @@ void CommandLineInterface::RegisterGenerator(const string& flag_name,
                                              CodeGenerator* generator,
                                              const string& help_text) {
   GeneratorInfo info;
+  info.flag_name = flag_name;
   info.generator = generator;
   info.help_text = help_text;
-  generators_[flag_name] = info;
+  generators_by_flag_name_[flag_name] = info;
+}
+
+void CommandLineInterface::RegisterGenerator(const string& flag_name,
+                                             const string& option_flag_name,
+                                             CodeGenerator* generator,
+                                             const string& help_text) {
+  GeneratorInfo info;
+  info.flag_name = flag_name;
+  info.option_flag_name = option_flag_name;
+  info.generator = generator;
+  info.help_text = help_text;
+  generators_by_flag_name_[flag_name] = info;
+  generators_by_option_name_[option_flag_name] = info;
 }
 
 void CommandLineInterface::AllowPlugins(const string& exe_name_prefix) {
@@ -585,7 +601,14 @@ void CommandLineInterface::AllowPlugins(const string& exe_name_prefix) {
 
 int CommandLineInterface::Run(int argc, const char* const argv[]) {
   Clear();
-  if (!ParseArguments(argc, argv)) return 1;
+  switch (ParseArguments(argc, argv)) {
+    case PARSE_ARGUMENT_DONE_AND_EXIT:
+      return 0;
+    case PARSE_ARGUMENT_FAIL:
+      return 1;
+    case PARSE_ARGUMENT_DONE_AND_CONTINUE:
+      break;
+  }
 
   // Set up the source tree.
   DiskSourceTree source_tree;
@@ -713,6 +736,7 @@ void CommandLineInterface::Clear() {
 
   mode_ = MODE_COMPILE;
   imports_in_descriptor_set_ = false;
+  source_info_in_descriptor_set_ = false;
   disallow_services_ = false;
 }
 
@@ -755,7 +779,8 @@ bool CommandLineInterface::MakeInputsBeProtoPathRelative(
   return true;
 }
 
-bool CommandLineInterface::ParseArguments(int argc, const char* const argv[]) {
+CommandLineInterface::ParseArgumentStatus
+CommandLineInterface::ParseArguments(int argc, const char* const argv[]) {
   executable_name_ = argv[0];
 
   // Iterate through all arguments and parse them.
@@ -769,41 +794,50 @@ bool CommandLineInterface::ParseArguments(int argc, const char* const argv[]) {
         if (name == "--decode") {
           cerr << "To decode an unknown message, use --decode_raw." << endl;
         }
-        return false;
+        return PARSE_ARGUMENT_FAIL;
       } else {
         ++i;
         value = argv[i];
       }
     }
 
-    if (!InterpretArgument(name, value)) return false;
+    ParseArgumentStatus status = InterpretArgument(name, value);
+    if (status != PARSE_ARGUMENT_DONE_AND_CONTINUE)
+      return status;
   }
 
   // If no --proto_path was given, use the current working directory.
   if (proto_path_.empty()) {
-    proto_path_.push_back(make_pair<string, string>("", "."));
+    // Don't use make_pair as the old/default standard library on Solaris
+    // doesn't support it without explicit template parameters, which are
+    // incompatible with C++0x's make_pair.
+    proto_path_.push_back(pair<string, string>("", "."));
   }
 
   // Check some errror cases.
   bool decoding_raw = (mode_ == MODE_DECODE) && codec_type_.empty();
   if (decoding_raw && !input_files_.empty()) {
     cerr << "When using --decode_raw, no input files should be given." << endl;
-    return false;
+    return PARSE_ARGUMENT_FAIL;
   } else if (!decoding_raw && input_files_.empty()) {
     cerr << "Missing input file." << endl;
-    return false;
+    return PARSE_ARGUMENT_FAIL;
   }
   if (mode_ == MODE_COMPILE && output_directives_.empty() &&
       descriptor_set_name_.empty()) {
     cerr << "Missing output directives." << endl;
-    return false;
+    return PARSE_ARGUMENT_FAIL;
   }
   if (imports_in_descriptor_set_ && descriptor_set_name_.empty()) {
     cerr << "--include_imports only makes sense when combined with "
             "--descriptor_set_out." << endl;
   }
+  if (source_info_in_descriptor_set_ && descriptor_set_name_.empty()) {
+    cerr << "--include_source_info only makes sense when combined with "
+            "--descriptor_set_out." << endl;
+  }
 
-  return true;
+  return PARSE_ARGUMENT_DONE_AND_CONTINUE;
 }
 
 bool CommandLineInterface::ParseArgument(const char* arg,
@@ -853,6 +887,7 @@ bool CommandLineInterface::ParseArgument(const char* arg,
   if (*name == "-h" || *name == "--help" ||
       *name == "--disallow_services" ||
       *name == "--include_imports" ||
+      *name == "--include_source_info" ||
       *name == "--version" ||
       *name == "--decode_raw") {
     // HACK:  These are the only flags that don't take a value.
@@ -865,8 +900,9 @@ bool CommandLineInterface::ParseArgument(const char* arg,
   return true;
 }
 
-bool CommandLineInterface::InterpretArgument(const string& name,
-                                             const string& value) {
+CommandLineInterface::ParseArgumentStatus
+CommandLineInterface::InterpretArgument(const string& name,
+                                        const string& value) {
   if (name.empty()) {
     // Not a flag.  Just a filename.
     if (value.empty()) {
@@ -874,7 +910,7 @@ bool CommandLineInterface::InterpretArgument(const string& name,
               "arguments to " << executable_name_ << ".  This is actually "
               "sort of hard to do.  Congrats.  Unfortunately it is not valid "
               "input so the program is going to die now." << endl;
-      return false;
+      return PARSE_ARGUMENT_FAIL;
     }
 
     input_files_.push_back(value);
@@ -902,7 +938,7 @@ bool CommandLineInterface::InterpretArgument(const string& name,
       if (disk_path.empty()) {
         cerr << "--proto_path passed empty directory name.  (Use \".\" for "
                 "current directory.)" << endl;
-        return false;
+        return PARSE_ARGUMENT_FAIL;
       }
 
       // Make sure disk path exists, warn otherwise.
@@ -910,35 +946,45 @@ bool CommandLineInterface::InterpretArgument(const string& name,
         cerr << disk_path << ": warning: directory does not exist." << endl;
       }
 
-      proto_path_.push_back(make_pair<string, string>(virtual_path, disk_path));
+      // Don't use make_pair as the old/default standard library on Solaris
+      // doesn't support it without explicit template parameters, which are
+      // incompatible with C++0x's make_pair.
+      proto_path_.push_back(pair<string, string>(virtual_path, disk_path));
     }
 
   } else if (name == "-o" || name == "--descriptor_set_out") {
     if (!descriptor_set_name_.empty()) {
       cerr << name << " may only be passed once." << endl;
-      return false;
+      return PARSE_ARGUMENT_FAIL;
     }
     if (value.empty()) {
       cerr << name << " requires a non-empty value." << endl;
-      return false;
+      return PARSE_ARGUMENT_FAIL;
     }
     if (mode_ != MODE_COMPILE) {
       cerr << "Cannot use --encode or --decode and generate descriptors at the "
               "same time." << endl;
-      return false;
+      return PARSE_ARGUMENT_FAIL;
     }
     descriptor_set_name_ = value;
 
   } else if (name == "--include_imports") {
     if (imports_in_descriptor_set_) {
       cerr << name << " may only be passed once." << endl;
-      return false;
+      return PARSE_ARGUMENT_FAIL;
     }
     imports_in_descriptor_set_ = true;
 
+  } else if (name == "--include_source_info") {
+    if (source_info_in_descriptor_set_) {
+      cerr << name << " may only be passed once." << endl;
+      return PARSE_ARGUMENT_FAIL;
+    }
+    source_info_in_descriptor_set_ = true;
+
   } else if (name == "-h" || name == "--help") {
     PrintHelpText();
-    return false;  // Exit without running compiler.
+    return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
 
   } else if (name == "--version") {
     if (!version_info_.empty()) {
@@ -947,7 +993,7 @@ bool CommandLineInterface::InterpretArgument(const string& name,
     cout << "libprotoc "
          << protobuf::internal::VersionString(GOOGLE_PROTOBUF_VERSION)
          << endl;
-    return false;  // Exit without running compiler.
+    return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
 
   } else if (name == "--disallow_services") {
     disallow_services_ = true;
@@ -956,12 +1002,12 @@ bool CommandLineInterface::InterpretArgument(const string& name,
              name == "--decode_raw") {
     if (mode_ != MODE_COMPILE) {
       cerr << "Only one of --encode and --decode can be specified." << endl;
-      return false;
+      return PARSE_ARGUMENT_FAIL;
     }
     if (!output_directives_.empty() || !descriptor_set_name_.empty()) {
       cerr << "Cannot use " << name
            << " and generate code or descriptors at the same time." << endl;
-      return false;
+      return PARSE_ARGUMENT_FAIL;
     }
 
     mode_ = (name == "--encode") ? MODE_ENCODE : MODE_DECODE;
@@ -971,10 +1017,10 @@ bool CommandLineInterface::InterpretArgument(const string& name,
       if (name == "--decode") {
         cerr << "To decode an unknown message, use --decode_raw." << endl;
       }
-      return false;
+      return PARSE_ARGUMENT_FAIL;
     } else if (!value.empty() && name == "--decode_raw") {
       cerr << "--decode_raw does not take a parameter." << endl;
-      return false;
+      return PARSE_ARGUMENT_FAIL;
     }
 
     codec_type_ = value;
@@ -986,16 +1032,16 @@ bool CommandLineInterface::InterpretArgument(const string& name,
       error_format_ = ERROR_FORMAT_MSVS;
     } else {
       cerr << "Unknown error format: " << value << endl;
-      return false;
+      return PARSE_ARGUMENT_FAIL;
     }
 
   } else if (name == "--plugin") {
     if (plugin_prefix_.empty()) {
       cerr << "This compiler does not support plugins." << endl;
-      return false;
+      return PARSE_ARGUMENT_FAIL;
     }
 
-    string name;
+    string plugin_name;
     string path;
 
     string::size_type equals_pos = value.find_first_of('=');
@@ -1003,57 +1049,68 @@ bool CommandLineInterface::InterpretArgument(const string& name,
       // Use the basename of the file.
       string::size_type slash_pos = value.find_last_of('/');
       if (slash_pos == string::npos) {
-        name = value;
+        plugin_name = value;
       } else {
-        name = value.substr(slash_pos + 1);
+        plugin_name = value.substr(slash_pos + 1);
       }
       path = value;
     } else {
-      name = value.substr(0, equals_pos);
+      plugin_name = value.substr(0, equals_pos);
       path = value.substr(equals_pos + 1);
     }
 
-    plugins_[name] = path;
+    plugins_[plugin_name] = path;
 
   } else {
     // Some other flag.  Look it up in the generators list.
-    const GeneratorInfo* generator_info = FindOrNull(generators_, name);
+    const GeneratorInfo* generator_info =
+        FindOrNull(generators_by_flag_name_, name);
     if (generator_info == NULL &&
         (plugin_prefix_.empty() || !HasSuffixString(name, "_out"))) {
-      cerr << "Unknown flag: " << name << endl;
-      return false;
-    }
+      // Check if it's a generator option flag.
+      generator_info = FindOrNull(generators_by_option_name_, name);
+      if (generator_info == NULL) {
+        cerr << "Unknown flag: " << name << endl;
+        return PARSE_ARGUMENT_FAIL;
+      } else {
+        string* parameters = &generator_parameters_[generator_info->flag_name];
+        if (!parameters->empty()) {
+          parameters->append(",");
+        }
+        parameters->append(value);
+      }
+    } else {
+      // It's an output flag.  Add it to the output directives.
+      if (mode_ != MODE_COMPILE) {
+        cerr << "Cannot use --encode or --decode and generate code at the "
+                "same time." << endl;
+        return PARSE_ARGUMENT_FAIL;
+      }
 
-    // It's an output flag.  Add it to the output directives.
-    if (mode_ != MODE_COMPILE) {
-      cerr << "Cannot use --encode or --decode and generate code at the "
-              "same time." << endl;
-      return false;
-    }
+      OutputDirective directive;
+      directive.name = name;
+      if (generator_info == NULL) {
+        directive.generator = NULL;
+      } else {
+        directive.generator = generator_info->generator;
+      }
 
-    OutputDirective directive;
-    directive.name = name;
-    if (generator_info == NULL) {
-      directive.generator = NULL;
-    } else {
-      directive.generator = generator_info->generator;
-    }
+      // Split value at ':' to separate the generator parameter from the
+      // filename.  However, avoid doing this if the colon is part of a valid
+      // Windows-style absolute path.
+      string::size_type colon_pos = value.find_first_of(':');
+      if (colon_pos == string::npos || IsWindowsAbsolutePath(value)) {
+        directive.output_location = value;
+      } else {
+        directive.parameter = value.substr(0, colon_pos);
+        directive.output_location = value.substr(colon_pos + 1);
+      }
 
-    // Split value at ':' to separate the generator parameter from the
-    // filename.  However, avoid doing this if the colon is part of a valid
-    // Windows-style absolute path.
-    string::size_type colon_pos = value.find_first_of(':');
-    if (colon_pos == string::npos || IsWindowsAbsolutePath(value)) {
-      directive.output_location = value;
-    } else {
-      directive.parameter = value.substr(0, colon_pos);
-      directive.output_location = value.substr(colon_pos + 1);
+      output_directives_.push_back(directive);
     }
-
-    output_directives_.push_back(directive);
   }
 
-  return true;
+  return PARSE_ARGUMENT_DONE_AND_CONTINUE;
 }
 
 void CommandLineInterface::PrintHelpText() {
@@ -1086,6 +1143,12 @@ void CommandLineInterface::PrintHelpText() {
 "  --include_imports           When using --descriptor_set_out, also include\n"
 "                              all dependencies of the input files in the\n"
 "                              set, so that the set is self-contained.\n"
+"  --include_source_info       When using --descriptor_set_out, do not strip\n"
+"                              SourceCodeInfo from the FileDescriptorProto.\n"
+"                              This results in vastly larger descriptors that\n"
+"                              include information about the original\n"
+"                              location of each decl in the source file as\n"
+"                              well as surrounding comments.\n"
 "  --error_format=FORMAT       Set the format in which to print errors.\n"
 "                              FORMAT may be 'gcc' (the default) or 'msvs'\n"
 "                              (Microsoft Visual Studio format)." << endl;
@@ -1101,8 +1164,8 @@ void CommandLineInterface::PrintHelpText() {
 "                              the executable's own name differs." << endl;
   }
 
-  for (GeneratorMap::iterator iter = generators_.begin();
-       iter != generators_.end(); ++iter) {
+  for (GeneratorMap::iterator iter = generators_by_flag_name_.begin();
+       iter != generators_by_flag_name_.end(); ++iter) {
     // FIXME(kenton):  If the text is long enough it will wrap, which is ugly,
     //   but fixing this nicely (e.g. splitting on spaces) is probably more
     //   trouble than it's worth.
@@ -1136,10 +1199,16 @@ bool CommandLineInterface::GenerateOutput(
     }
   } else {
     // Regular generator.
+    string parameters = output_directive.parameter;
+    if (!generator_parameters_[output_directive.name].empty()) {
+      if (!parameters.empty()) {
+        parameters.append(",");
+      }
+      parameters.append(generator_parameters_[output_directive.name]);
+    }
     for (int i = 0; i < parsed_files.size(); i++) {
-      if (!output_directive.generator->Generate(
-          parsed_files[i], output_directive.parameter,
-          generator_context, &error)) {
+      if (!output_directive.generator->Generate(parsed_files[i], parameters,
+                                                generator_context, &error)) {
         // Generator returned an error.
         cerr << output_directive.name << ": " << parsed_files[i]->name() << ": "
              << error << endl;
@@ -1168,8 +1237,9 @@ bool CommandLineInterface::GeneratePluginOutput(
   set<const FileDescriptor*> already_seen;
   for (int i = 0; i < parsed_files.size(); i++) {
     request.add_file_to_generate(parsed_files[i]->name());
-    GetTransitiveDependencies(parsed_files[i], &already_seen,
-                              request.mutable_proto_file());
+    GetTransitiveDependencies(parsed_files[i],
+                              true,  // Include source code info.
+                              &already_seen, request.mutable_proto_file());
   }
 
   // Invoke the plugin.
@@ -1299,12 +1369,17 @@ bool CommandLineInterface::WriteDescriptorSet(
   if (imports_in_descriptor_set_) {
     set<const FileDescriptor*> already_seen;
     for (int i = 0; i < parsed_files.size(); i++) {
-      GetTransitiveDependencies(
-          parsed_files[i], &already_seen, file_set.mutable_file());
+      GetTransitiveDependencies(parsed_files[i],
+                                source_info_in_descriptor_set_,
+                                &already_seen, file_set.mutable_file());
     }
   } else {
     for (int i = 0; i < parsed_files.size(); i++) {
-      parsed_files[i]->CopyTo(file_set.add_file());
+      FileDescriptorProto* file_proto = file_set.add_file();
+      parsed_files[i]->CopyTo(file_proto);
+      if (source_info_in_descriptor_set_) {
+        parsed_files[i]->CopySourceCodeInfoTo(file_proto);
+      }
     }
   }
 
@@ -1334,7 +1409,7 @@ bool CommandLineInterface::WriteDescriptorSet(
 }
 
 void CommandLineInterface::GetTransitiveDependencies(
-    const FileDescriptor* file,
+    const FileDescriptor* file, bool include_source_code_info,
     set<const FileDescriptor*>* already_seen,
     RepeatedPtrField<FileDescriptorProto>* output) {
   if (!already_seen->insert(file).second) {
@@ -1344,11 +1419,16 @@ void CommandLineInterface::GetTransitiveDependencies(
 
   // Add all dependencies.
   for (int i = 0; i < file->dependency_count(); i++) {
-    GetTransitiveDependencies(file->dependency(i), already_seen, output);
+    GetTransitiveDependencies(file->dependency(i), include_source_code_info,
+                              already_seen, output);
   }
 
   // Add this file.
-  file->CopyTo(output->Add());
+  FileDescriptorProto* new_descriptor = output->Add();
+  file->CopyTo(new_descriptor);
+  if (include_source_code_info) {
+    file->CopySourceCodeInfoTo(new_descriptor);
+  }
 }
 
 

+ 40 - 5
src/google/protobuf/compiler/command_line_interface.h

@@ -112,6 +112,19 @@ class LIBPROTOC_EXPORT CommandLineInterface {
                          CodeGenerator* generator,
                          const string& help_text);
 
+  // Register a code generator for a language.
+  // Besides flag_name you can specify another option_flag_name that could be
+  // used to pass extra parameters to the registered code generator.
+  // Suppose you have registered a generator by calling:
+  //   command_line_interface.RegisterGenerator("--foo_out", "--foo_opt", ...)
+  // Then you could invoke the compiler with a command like:
+  //   protoc --foo_out=enable_bar:outdir --foo_opt=enable_baz
+  // This will pass "enable_bar,enable_baz" as the parameter to the generator.
+  void RegisterGenerator(const string& flag_name,
+                         const string& option_flag_name,
+                         CodeGenerator* generator,
+                         const string& help_text);
+
   // Enables "plugins".  In this mode, if a command-line flag ends with "_out"
   // but does not match any registered generator, the compiler will attempt to
   // find a "plugin" to implement the generator.  Plugins are just executables.
@@ -186,8 +199,15 @@ class LIBPROTOC_EXPORT CommandLineInterface {
   bool MakeInputsBeProtoPathRelative(
     DiskSourceTree* source_tree);
 
+  // Return status for ParseArguments() and InterpretArgument().
+  enum ParseArgumentStatus {
+    PARSE_ARGUMENT_DONE_AND_CONTINUE,
+    PARSE_ARGUMENT_DONE_AND_EXIT,
+    PARSE_ARGUMENT_FAIL
+  };
+
   // Parse all command-line arguments.
-  bool ParseArguments(int argc, const char* const argv[]);
+  ParseArgumentStatus ParseArguments(int argc, const char* const argv[]);
 
   // Parses a command-line argument into a name/value pair.  Returns
   // true if the next argument in the argv should be used as the value,
@@ -203,7 +223,8 @@ class LIBPROTOC_EXPORT CommandLineInterface {
   bool ParseArgument(const char* arg, string* name, string* value);
 
   // Interprets arguments parsed with ParseArgument.
-  bool InterpretArgument(const string& name, const string& value);
+  ParseArgumentStatus InterpretArgument(const string& name,
+                                        const string& value);
 
   // Print the --help text to stderr.
   void PrintHelpText();
@@ -230,9 +251,11 @@ class LIBPROTOC_EXPORT CommandLineInterface {
   // protos will be ordered such that every file is listed before any file that
   // depends on it, so that you can call DescriptorPool::BuildFile() on them
   // in order.  Any files in *already_seen will not be added, and each file
-  // added will be inserted into *already_seen.
+  // added will be inserted into *already_seen.  If include_source_code_info is
+  // true then include the source code information in the FileDescriptorProtos.
   static void GetTransitiveDependencies(
       const FileDescriptor* file,
+      bool include_source_code_info,
       set<const FileDescriptor*>* already_seen,
       RepeatedPtrField<FileDescriptorProto>* output);
 
@@ -244,13 +267,21 @@ class LIBPROTOC_EXPORT CommandLineInterface {
   // Version info set with SetVersionInfo().
   string version_info_;
 
-  // Map from flag names to registered generators.
+  // Registered generators.
   struct GeneratorInfo {
+    string flag_name;
+    string option_flag_name;
     CodeGenerator* generator;
     string help_text;
   };
   typedef map<string, GeneratorInfo> GeneratorMap;
-  GeneratorMap generators_;
+  GeneratorMap generators_by_flag_name_;
+  GeneratorMap generators_by_option_name_;
+  // A map from generator names to the parameters specified using the option
+  // flag. For example, if the user invokes the compiler with:
+  //   protoc --foo_out=outputdir --foo_opt=enable_bar ...
+  // Then there will be an entry ("--foo_out", "enable_bar") in this map.
+  map<string, string> generator_parameters_;
 
   // See AllowPlugins().  If this is empty, plugins aren't allowed.
   string plugin_prefix_;
@@ -302,6 +333,10 @@ class LIBPROTOC_EXPORT CommandLineInterface {
   // the .proto files listed on the command-line are added.
   bool imports_in_descriptor_set_;
 
+  // True if --include_source_info was given, meaning that we should not strip
+  // SourceCodeInfo from the DescriptorSet.
+  bool source_info_in_descriptor_set_;
+
   // Was the --disallow_services flag used?
   bool disallow_services_;
 

+ 114 - 6
src/google/protobuf/compiler/command_line_interface_unittest.cc

@@ -122,6 +122,10 @@ class CommandLineInterfaceTest : public testing::Test {
   // substring.
   void ExpectErrorSubstring(const string& expected_substring);
 
+  // Like ExpectErrorSubstring, but checks that Run() returned zero.
+  void ExpectErrorSubstringWithZeroReturnCode(
+      const string& expected_substring);
+
   // Returns true if ExpectErrorSubstring(expected_substring) would pass, but
   // does not fail otherwise.
   bool HasAlternateErrorSubstring(const string& expected_substring);
@@ -225,7 +229,7 @@ void CommandLineInterfaceTest::SetUp() {
   // Register generators.
   CodeGenerator* generator = new MockCodeGenerator("test_generator");
   mock_generators_to_delete_.push_back(generator);
-  cli_.RegisterGenerator("--test_out", generator, "Test output.");
+  cli_.RegisterGenerator("--test_out", "--test_opt", generator, "Test output.");
   cli_.RegisterGenerator("-t", generator, "Test output.");
 
   generator = new MockCodeGenerator("alt_generator");
@@ -345,6 +349,12 @@ void CommandLineInterfaceTest::ExpectErrorSubstring(
   EXPECT_PRED_FORMAT2(testing::IsSubstring, expected_substring, error_text_);
 }
 
+void CommandLineInterfaceTest::ExpectErrorSubstringWithZeroReturnCode(
+    const string& expected_substring) {
+  EXPECT_EQ(0, return_code_);
+  EXPECT_PRED_FORMAT2(testing::IsSubstring, expected_substring, error_text_);
+}
+
 bool CommandLineInterfaceTest::HasAlternateErrorSubstring(
     const string& expected_substring) {
   EXPECT_NE(0, return_code_);
@@ -544,6 +554,32 @@ TEST_F(CommandLineInterfaceTest, GeneratorParameters) {
   ExpectGenerated("test_plugin", "TestPluginParameter", "foo.proto", "Foo");
 }
 
+TEST_F(CommandLineInterfaceTest, ExtraGeneratorParameters) {
+  // Test that generator parameters specified with the option flag are
+  // correctly passed to the code generator.
+
+  CreateTempFile("foo.proto",
+    "syntax = \"proto2\";\n"
+    "message Foo {}\n");
+  // Create the "a" and "b" sub-directories.
+  CreateTempDir("a");
+  CreateTempDir("b");
+
+  Run("protocol_compiler "
+      "--test_opt=foo1 "
+      "--test_out=bar:$tmpdir/a "
+      "--test_opt=foo2 "
+      "--test_out=baz:$tmpdir/b "
+      "--test_opt=foo3 "
+      "--proto_path=$tmpdir foo.proto");
+
+  ExpectNoErrors();
+  ExpectGenerated(
+      "test_generator", "bar,foo1,foo2,foo3", "foo.proto", "Foo", "a");
+  ExpectGenerated(
+      "test_generator", "baz,foo1,foo2,foo3", "foo.proto", "Foo", "b");
+}
+
 TEST_F(CommandLineInterfaceTest, Insert) {
   // Test running a generator that inserts code into another's output.
 
@@ -779,6 +815,33 @@ TEST_F(CommandLineInterfaceTest, WriteDescriptorSet) {
   if (HasFatalFailure()) return;
   ASSERT_EQ(1, descriptor_set.file_size());
   EXPECT_EQ("bar.proto", descriptor_set.file(0).name());
+  // Descriptor set should not have source code info.
+  EXPECT_FALSE(descriptor_set.file(0).has_source_code_info());
+}
+
+TEST_F(CommandLineInterfaceTest, WriteDescriptorSetWithSourceInfo) {
+  CreateTempFile("foo.proto",
+    "syntax = \"proto2\";\n"
+    "message Foo {}\n");
+  CreateTempFile("bar.proto",
+    "syntax = \"proto2\";\n"
+    "import \"foo.proto\";\n"
+    "message Bar {\n"
+    "  optional Foo foo = 1;\n"
+    "}\n");
+
+  Run("protocol_compiler --descriptor_set_out=$tmpdir/descriptor_set "
+      "--include_source_info --proto_path=$tmpdir bar.proto");
+
+  ExpectNoErrors();
+
+  FileDescriptorSet descriptor_set;
+  ReadDescriptorSet("descriptor_set", &descriptor_set);
+  if (HasFatalFailure()) return;
+  ASSERT_EQ(1, descriptor_set.file_size());
+  EXPECT_EQ("bar.proto", descriptor_set.file(0).name());
+  // Source code info included.
+  EXPECT_TRUE(descriptor_set.file(0).has_source_code_info());
 }
 
 TEST_F(CommandLineInterfaceTest, WriteTransitiveDescriptorSet) {
@@ -807,6 +870,40 @@ TEST_F(CommandLineInterfaceTest, WriteTransitiveDescriptorSet) {
   }
   EXPECT_EQ("foo.proto", descriptor_set.file(0).name());
   EXPECT_EQ("bar.proto", descriptor_set.file(1).name());
+  // Descriptor set should not have source code info.
+  EXPECT_FALSE(descriptor_set.file(0).has_source_code_info());
+  EXPECT_FALSE(descriptor_set.file(1).has_source_code_info());
+}
+
+TEST_F(CommandLineInterfaceTest, WriteTransitiveDescriptorSetWithSourceInfo) {
+  CreateTempFile("foo.proto",
+    "syntax = \"proto2\";\n"
+    "message Foo {}\n");
+  CreateTempFile("bar.proto",
+    "syntax = \"proto2\";\n"
+    "import \"foo.proto\";\n"
+    "message Bar {\n"
+    "  optional Foo foo = 1;\n"
+    "}\n");
+
+  Run("protocol_compiler --descriptor_set_out=$tmpdir/descriptor_set "
+      "--include_imports --include_source_info --proto_path=$tmpdir bar.proto");
+
+  ExpectNoErrors();
+
+  FileDescriptorSet descriptor_set;
+  ReadDescriptorSet("descriptor_set", &descriptor_set);
+  if (HasFatalFailure()) return;
+  ASSERT_EQ(2, descriptor_set.file_size());
+  if (descriptor_set.file(0).name() == "bar.proto") {
+    std::swap(descriptor_set.mutable_file()->mutable_data()[0],
+              descriptor_set.mutable_file()->mutable_data()[1]);
+  }
+  EXPECT_EQ("foo.proto", descriptor_set.file(0).name());
+  EXPECT_EQ("bar.proto", descriptor_set.file(1).name());
+  // Source code info included.
+  EXPECT_TRUE(descriptor_set.file(0).has_source_code_info());
+  EXPECT_TRUE(descriptor_set.file(1).has_source_code_info());
 }
 
 // -------------------------------------------------------------------
@@ -1129,6 +1226,17 @@ TEST_F(CommandLineInterfaceTest, GeneratorPluginCrash) {
 #endif
 }
 
+TEST_F(CommandLineInterfaceTest, PluginReceivesSourceCodeInfo) {
+  CreateTempFile("foo.proto",
+    "syntax = \"proto2\";\n"
+    "message MockCodeGenerator_HasSourceCodeInfo {}\n");
+
+  Run("protocol_compiler --plug_out=$tmpdir --proto_path=$tmpdir foo.proto");
+
+  ExpectErrorSubstring(
+      "Saw message type MockCodeGenerator_HasSourceCodeInfo: 1.");
+}
+
 TEST_F(CommandLineInterfaceTest, GeneratorPluginNotFound) {
   // Test what happens if the plugin isn't found.
 
@@ -1171,11 +1279,11 @@ TEST_F(CommandLineInterfaceTest, GeneratorPluginNotAllowed) {
 TEST_F(CommandLineInterfaceTest, HelpText) {
   Run("test_exec_name --help");
 
-  ExpectErrorSubstring("Usage: test_exec_name ");
-  ExpectErrorSubstring("--test_out=OUT_DIR");
-  ExpectErrorSubstring("Test output.");
-  ExpectErrorSubstring("--alt_out=OUT_DIR");
-  ExpectErrorSubstring("Alt output.");
+  ExpectErrorSubstringWithZeroReturnCode("Usage: test_exec_name ");
+  ExpectErrorSubstringWithZeroReturnCode("--test_out=OUT_DIR");
+  ExpectErrorSubstringWithZeroReturnCode("Test output.");
+  ExpectErrorSubstringWithZeroReturnCode("--alt_out=OUT_DIR");
+  ExpectErrorSubstringWithZeroReturnCode("Alt output.");
 }
 
 TEST_F(CommandLineInterfaceTest, GccFormatErrors) {

+ 1 - 1
src/google/protobuf/compiler/cpp/cpp_bootstrap_unittest.cc

@@ -48,8 +48,8 @@
 #include <google/protobuf/compiler/importer.h>
 #include <google/protobuf/descriptor.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
-#include <google/protobuf/stubs/stl_util-inl.h>
 #include <google/protobuf/stubs/map-util.h>
+#include <google/protobuf/stubs/stl_util.h>
 #include <google/protobuf/stubs/strutil.h>
 #include <google/protobuf/stubs/substitute.h>
 

+ 4 - 4
src/google/protobuf/compiler/cpp/cpp_enum.cc

@@ -46,10 +46,10 @@ namespace compiler {
 namespace cpp {
 
 EnumGenerator::EnumGenerator(const EnumDescriptor* descriptor,
-                             const string& dllexport_decl)
+                             const Options& options)
   : descriptor_(descriptor),
     classname_(ClassName(descriptor, false)),
-    dllexport_decl_(dllexport_decl) {
+    options_(options) {
 }
 
 EnumGenerator::~EnumGenerator() {}
@@ -88,10 +88,10 @@ void EnumGenerator::GenerateDefinition(io::Printer* printer) {
   vars["min_name"] = min_value->name();
   vars["max_name"] = max_value->name();
 
-  if (dllexport_decl_.empty()) {
+  if (options_.dllexport_decl.empty()) {
     vars["dllexport"] = "";
   } else {
-    vars["dllexport"] = dllexport_decl_ + " ";
+    vars["dllexport"] = options_.dllexport_decl + " ";
   }
 
   printer->Print(vars,

+ 4 - 2
src/google/protobuf/compiler/cpp/cpp_enum.h

@@ -36,8 +36,10 @@
 #define GOOGLE_PROTOBUF_COMPILER_CPP_ENUM_H__
 
 #include <string>
+#include <google/protobuf/compiler/cpp/cpp_options.h>
 #include <google/protobuf/descriptor.h>
 
+
 namespace google {
 namespace protobuf {
   namespace io {
@@ -53,7 +55,7 @@ class EnumGenerator {
  public:
   // See generator.cc for the meaning of dllexport_decl.
   explicit EnumGenerator(const EnumDescriptor* descriptor,
-                         const string& dllexport_decl);
+                         const Options& options);
   ~EnumGenerator();
 
   // Header stuff.
@@ -86,7 +88,7 @@ class EnumGenerator {
  private:
   const EnumDescriptor* descriptor_;
   string classname_;
-  string dllexport_decl_;
+  Options options_;
 
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(EnumGenerator);
 };

+ 14 - 9
src/google/protobuf/compiler/cpp/cpp_enum_field.cc

@@ -46,8 +46,9 @@ namespace cpp {
 namespace {
 
 void SetEnumVariables(const FieldDescriptor* descriptor,
-                      map<string, string>* variables) {
-  SetCommonFieldVariables(descriptor, variables);
+                      map<string, string>* variables,
+                      const Options& options) {
+  SetCommonFieldVariables(descriptor, variables, options);
   const EnumValueDescriptor* default_value = descriptor->default_value_enum();
   (*variables)["type"] = ClassName(descriptor->enum_type(), true);
   (*variables)["default"] = SimpleItoa(default_value->number());
@@ -58,9 +59,10 @@ void SetEnumVariables(const FieldDescriptor* descriptor,
 // ===================================================================
 
 EnumFieldGenerator::
-EnumFieldGenerator(const FieldDescriptor* descriptor)
+EnumFieldGenerator(const FieldDescriptor* descriptor,
+                   const Options& options)
   : descriptor_(descriptor) {
-  SetEnumVariables(descriptor, &variables_);
+  SetEnumVariables(descriptor, &variables_, options);
 }
 
 EnumFieldGenerator::~EnumFieldGenerator() {}
@@ -84,7 +86,7 @@ GenerateInlineAccessorDefinitions(io::Printer* printer) const {
     "  return static_cast< $type$ >($name$_);\n"
     "}\n"
     "inline void $classname$::set_$name$($type$ value) {\n"
-    "  GOOGLE_DCHECK($type$_IsValid(value));\n"
+    "  assert($type$_IsValid(value));\n"
     "  set_has_$name$();\n"
     "  $name$_ = value;\n"
     "}\n");
@@ -152,9 +154,10 @@ GenerateByteSize(io::Printer* printer) const {
 // ===================================================================
 
 RepeatedEnumFieldGenerator::
-RepeatedEnumFieldGenerator(const FieldDescriptor* descriptor)
+RepeatedEnumFieldGenerator(const FieldDescriptor* descriptor,
+                           const Options& options)
   : descriptor_(descriptor) {
-  SetEnumVariables(descriptor, &variables_);
+  SetEnumVariables(descriptor, &variables_, options);
 }
 
 RepeatedEnumFieldGenerator::~RepeatedEnumFieldGenerator() {}
@@ -187,11 +190,11 @@ GenerateInlineAccessorDefinitions(io::Printer* printer) const {
     "  return static_cast< $type$ >($name$_.Get(index));\n"
     "}\n"
     "inline void $classname$::set_$name$(int index, $type$ value) {\n"
-    "  GOOGLE_DCHECK($type$_IsValid(value));\n"
+    "  assert($type$_IsValid(value));\n"
     "  $name$_.Set(index, value);\n"
     "}\n"
     "inline void $classname$::add_$name$($type$ value) {\n"
-    "  GOOGLE_DCHECK($type$_IsValid(value));\n"
+    "  assert($type$_IsValid(value));\n"
     "  $name$_.Add(value);\n"
     "}\n");
   printer->Print(variables_,
@@ -345,7 +348,9 @@ GenerateByteSize(io::Printer* printer) const {
       "  total_size += $tag_size$ +\n"
       "    ::google::protobuf::internal::WireFormatLite::Int32Size(data_size);\n"
       "}\n"
+      "GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN();\n"
       "_$name$_cached_byte_size_ = data_size;\n"
+      "GOOGLE_SAFE_CONCURRENT_WRITES_END();\n"
       "total_size += data_size;\n");
   } else {
     printer->Print(variables_,

+ 4 - 2
src/google/protobuf/compiler/cpp/cpp_enum_field.h

@@ -46,7 +46,8 @@ namespace cpp {
 
 class EnumFieldGenerator : public FieldGenerator {
  public:
-  explicit EnumFieldGenerator(const FieldDescriptor* descriptor);
+  explicit EnumFieldGenerator(const FieldDescriptor* descriptor,
+                              const Options& options);
   ~EnumFieldGenerator();
 
   // implements FieldGenerator ---------------------------------------
@@ -71,7 +72,8 @@ class EnumFieldGenerator : public FieldGenerator {
 
 class RepeatedEnumFieldGenerator : public FieldGenerator {
  public:
-  explicit RepeatedEnumFieldGenerator(const FieldDescriptor* descriptor);
+  explicit RepeatedEnumFieldGenerator(const FieldDescriptor* descriptor,
+                                      const Options& options);
   ~RepeatedEnumFieldGenerator();
 
   // implements FieldGenerator ---------------------------------------

+ 4 - 4
src/google/protobuf/compiler/cpp/cpp_extension.cc

@@ -57,9 +57,9 @@ string ExtendeeClassName(const FieldDescriptor* descriptor) {
 }  // anonymous namespace
 
 ExtensionGenerator::ExtensionGenerator(const FieldDescriptor* descriptor,
-                                       const string& dllexport_decl)
+                                       const Options& options)
   : descriptor_(descriptor),
-    dllexport_decl_(dllexport_decl) {
+    options_(options) {
   // Construct type_traits_.
   if (descriptor_->is_repeated()) {
     type_traits_ = "Repeated";
@@ -106,8 +106,8 @@ void ExtensionGenerator::GenerateDeclaration(io::Printer* printer) {
   // export/import specifier.
   if (descriptor_->extension_scope() == NULL) {
     vars["qualifier"] = "extern";
-    if (!dllexport_decl_.empty()) {
-      vars["qualifier"] = dllexport_decl_ + " " + vars["qualifier"];
+    if (!options_.dllexport_decl.empty()) {
+      vars["qualifier"] = options_.dllexport_decl + " " + vars["qualifier"];
     }
   } else {
     vars["qualifier"] = "static";

+ 4 - 3
src/google/protobuf/compiler/cpp/cpp_extension.h

@@ -37,6 +37,7 @@
 
 #include <string>
 #include <google/protobuf/stubs/common.h>
+#include <google/protobuf/compiler/cpp/cpp_options.h>
 
 namespace google {
 namespace protobuf {
@@ -56,8 +57,8 @@ namespace cpp {
 class ExtensionGenerator {
  public:
   // See generator.cc for the meaning of dllexport_decl.
-  explicit ExtensionGenerator(const FieldDescriptor* descriptor,
-                              const string& dllexport_decl);
+  explicit ExtensionGenerator(const FieldDescriptor* desycriptor,
+                              const Options& options);
   ~ExtensionGenerator();
 
   // Header stuff.
@@ -72,7 +73,7 @@ class ExtensionGenerator {
  private:
   const FieldDescriptor* descriptor_;
   string type_traits_;
-  string dllexport_decl_;
+  Options options_;
 
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ExtensionGenerator);
 };

+ 17 - 14
src/google/protobuf/compiler/cpp/cpp_field.cc

@@ -52,7 +52,8 @@ namespace cpp {
 using internal::WireFormat;
 
 void SetCommonFieldVariables(const FieldDescriptor* descriptor,
-                             map<string, string>* variables) {
+                             map<string, string>* variables,
+                             const Options& options) {
   (*variables)["name"] = FieldName(descriptor);
   (*variables)["index"] = SimpleItoa(descriptor->index());
   (*variables)["number"] = SimpleItoa(descriptor->number());
@@ -64,6 +65,7 @@ void SetCommonFieldVariables(const FieldDescriptor* descriptor,
   (*variables)["deprecation"] = descriptor->options().deprecated()
       ? " PROTOBUF_DEPRECATED" : "";
 
+  (*variables)["cppget"] = "Get";
 }
 
 FieldGenerator::~FieldGenerator() {}
@@ -80,46 +82,47 @@ GenerateMergeFromCodedStreamWithPacking(io::Printer* printer) const {
 
 }
 
-FieldGeneratorMap::FieldGeneratorMap(const Descriptor* descriptor)
+FieldGeneratorMap::FieldGeneratorMap(const Descriptor* descriptor,
+                                     const Options& options)
   : descriptor_(descriptor),
-    field_generators_(
-      new scoped_ptr<FieldGenerator>[descriptor->field_count()]) {
+    field_generators_(new scoped_ptr<FieldGenerator>[descriptor->field_count()]) {
   // Construct all the FieldGenerators.
   for (int i = 0; i < descriptor->field_count(); i++) {
-    field_generators_[i].reset(MakeGenerator(descriptor->field(i)));
+    field_generators_[i].reset(MakeGenerator(descriptor->field(i), options));
   }
 }
 
-FieldGenerator* FieldGeneratorMap::MakeGenerator(const FieldDescriptor* field) {
+FieldGenerator* FieldGeneratorMap::MakeGenerator(const FieldDescriptor* field,
+                                                 const Options& options) {
   if (field->is_repeated()) {
     switch (field->cpp_type()) {
       case FieldDescriptor::CPPTYPE_MESSAGE:
-        return new RepeatedMessageFieldGenerator(field);
+        return new RepeatedMessageFieldGenerator(field, options);
       case FieldDescriptor::CPPTYPE_STRING:
         switch (field->options().ctype()) {
           default:  // RepeatedStringFieldGenerator handles unknown ctypes.
           case FieldOptions::STRING:
-            return new RepeatedStringFieldGenerator(field);
+            return new RepeatedStringFieldGenerator(field, options);
         }
       case FieldDescriptor::CPPTYPE_ENUM:
-        return new RepeatedEnumFieldGenerator(field);
+        return new RepeatedEnumFieldGenerator(field, options);
       default:
-        return new RepeatedPrimitiveFieldGenerator(field);
+        return new RepeatedPrimitiveFieldGenerator(field, options);
     }
   } else {
     switch (field->cpp_type()) {
       case FieldDescriptor::CPPTYPE_MESSAGE:
-        return new MessageFieldGenerator(field);
+        return new MessageFieldGenerator(field, options);
       case FieldDescriptor::CPPTYPE_STRING:
         switch (field->options().ctype()) {
           default:  // StringFieldGenerator handles unknown ctypes.
           case FieldOptions::STRING:
-            return new StringFieldGenerator(field);
+            return new StringFieldGenerator(field, options);
         }
       case FieldDescriptor::CPPTYPE_ENUM:
-        return new EnumFieldGenerator(field);
+        return new EnumFieldGenerator(field, options);
       default:
-        return new PrimitiveFieldGenerator(field);
+        return new PrimitiveFieldGenerator(field, options);
     }
   }
 }

Некоторые файлы не были показаны из-за большого количества измененных файлов