浏览代码

Optimize Java string serialization. Patch from Evan Jones.

kenton@google.com 15 年之前
父节点
当前提交
daee05168e

+ 5 - 0
CHANGES.txt

@@ -1,3 +1,8 @@
+????-??-?? version 2.3.1:
+
+  Java
+  * Improved performance of string serialization.
+
 2010-01-08 version 2.3.0:
 2010-01-08 version 2.3.0:
 
 
   General
   General

+ 2 - 0
CONTRIBUTORS.txt

@@ -80,6 +80,8 @@ Patch contributors:
     * Fixes for Solaris 10 32/64-bit confusion.
     * Fixes for Solaris 10 32/64-bit confusion.
   Evan Jones <evanj@mit.edu>
   Evan Jones <evanj@mit.edu>
     * Optimize Java serialization code when writing a small message to a stream.
     * Optimize Java serialization code when writing a small message to a stream.
+    * Optimize Java serialization of strings so that UTF-8 encoding happens only
+      once per string per serialization call.
     * Clean up some Java warnings.
     * Clean up some Java warnings.
   Michael Kucharski <m.kucharski@gmail.com>
   Michael Kucharski <m.kucharski@gmail.com>
     * Added CodedInputStream.getTotalBytesRead().
     * Added CodedInputStream.getTotalBytesRead().

+ 17 - 0
java/src/main/java/com/google/protobuf/CodedOutputStream.java

@@ -193,6 +193,23 @@ public final class CodedOutputStream {
     writeStringNoTag(value);
     writeStringNoTag(value);
   }
   }
 
 
+  /**
+   * Write a {@code string} field, including tag, to the stream, where bytes
+   * is the encoded version of value. Used by the SPEED version of messages
+   * to avoid performing the UTF-8 conversion twice. bytes is simply a hint
+   * and may be null. If it is null, value will be converted as usual.
+   */
+  public void writeStringCached(final int fieldNumber, final String value,
+                                ByteString bytes)
+                                throws IOException {
+    // The cache can be null if serializing without getting the size first, or
+    // if there are multiple threads.
+    if (bytes == null) {
+      bytes = ByteString.copyFromUtf8(value);
+    }
+    writeBytes(fieldNumber, bytes);
+  }
+
   /** Write a {@code group} field, including tag, to the stream. */
   /** Write a {@code group} field, including tag, to the stream. */
   public void writeGroup(final int fieldNumber, final MessageLite value)
   public void writeGroup(final int fieldNumber, final MessageLite value)
                          throws IOException {
                          throws IOException {

+ 24 - 0
java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java

@@ -36,6 +36,7 @@ import protobuf_unittest.UnittestProto.TestPackedTypes;
 import junit.framework.TestCase;
 import junit.framework.TestCase;
 
 
 import java.io.ByteArrayOutputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.List;
 
 
@@ -211,6 +212,29 @@ public class CodedOutputStreamTest extends TestCase {
       0x9abcdef012345678L);
       0x9abcdef012345678L);
   }
   }
 
 
+  /** Test writing cached strings. */
+  public void testWriteStringCached() throws IOException {
+    final ByteArrayOutputStream output = new ByteArrayOutputStream();
+    final CodedOutputStream stream = CodedOutputStream.newInstance(output);
+
+    // Test writing a string that is not cached
+    stream.writeStringCached(5, "hello", null);
+    stream.flush();
+    CodedInputStream in = CodedInputStream.newInstance(output.toByteArray());
+    assertEquals(WireFormat.makeTag(5, WireFormat.WIRETYPE_LENGTH_DELIMITED),
+                 in.readTag());
+    assertEquals("hello", in.readString());
+
+    // Write a cached string: the real string is ignored
+    output.reset();
+    stream.writeStringCached(5, "ignored", ByteString.copyFromUtf8("hello"));
+    stream.flush();
+    in = CodedInputStream.newInstance(output.toByteArray());
+    assertEquals(WireFormat.makeTag(5, WireFormat.WIRETYPE_LENGTH_DELIMITED),
+                 in.readTag());
+    assertEquals("hello", in.readString());
+  }
+
   /** Test encodeZigZag32() and encodeZigZag64(). */
   /** Test encodeZigZag32() and encodeZigZag64(). */
   public void testEncodeZigZag() throws Exception {
   public void testEncodeZigZag() throws Exception {
     assertEquals(0, CodedOutputStream.encodeZigZag32( 0));
     assertEquals(0, CodedOutputStream.encodeZigZag32( 0));

+ 49 - 9
src/google/protobuf/compiler/java/java_primitive_field.cc

@@ -199,6 +199,14 @@ GenerateMembers(io::Printer* printer) const {
     "private $type$ $name$_ = $default$;\n"
     "private $type$ $name$_ = $default$;\n"
     "public boolean has$capitalized_name$() { return has$capitalized_name$; }\n"
     "public boolean has$capitalized_name$() { return has$capitalized_name$; }\n"
     "public $type$ get$capitalized_name$() { return $name$_; }\n");
     "public $type$ get$capitalized_name$() { return $name$_; }\n");
+  // Avoid double encoding for Java strings
+  // This field does not need to be volatile because ByteString is immutable.
+  // http://www.cs.umd.edu/~pugh/java/memoryModel/jsr-133-faq.html#finalRight
+  // However, it seems better to be safe than sorry.
+  if (ShouldUseStringEncodingCache()) {
+    printer->Print(variables_,
+      "private volatile com.google.protobuf.ByteString $name$EncodedCache_;\n");
+  }
 }
 }
 
 
 void PrimitiveFieldGenerator::
 void PrimitiveFieldGenerator::
@@ -259,25 +267,57 @@ GenerateParsingCode(io::Printer* printer) const {
 
 
 void PrimitiveFieldGenerator::
 void PrimitiveFieldGenerator::
 GenerateSerializationCode(io::Printer* printer) const {
 GenerateSerializationCode(io::Printer* printer) const {
-  printer->Print(variables_,
-    "if (has$capitalized_name$()) {\n"
-    "  output.write$capitalized_type$($number$, get$capitalized_name$());\n"
-    "}\n");
+  if (ShouldUseStringEncodingCache()) {
+    // Pass the cached serialized version, then forget it.
+    // The cached version could be null if we didn't compute the size first,
+    // or if there are two threads attempting to serialize simultaneously.
+    // CodedOutputStream.writeStringCached handles this for us.
+    printer->Print(variables_,
+      "if (has$capitalized_name$()) {\n"
+      "  output.write$capitalized_type$Cached($number$,\n"
+      "                                       get$capitalized_name$(),\n"
+      "                                       $name$EncodedCache_);\n"
+      "  $name$EncodedCache_ = null;\n"
+      "}\n");
+  } else {
+    printer->Print(variables_,
+      "if (has$capitalized_name$()) {\n"
+      "  output.write$capitalized_type$($number$, get$capitalized_name$());\n"
+      "}\n");
+  }
 }
 }
 
 
 void PrimitiveFieldGenerator::
 void PrimitiveFieldGenerator::
 GenerateSerializedSizeCode(io::Printer* printer) const {
 GenerateSerializedSizeCode(io::Printer* printer) const {
-  printer->Print(variables_,
-    "if (has$capitalized_name$()) {\n"
-    "  size += com.google.protobuf.CodedOutputStream\n"
-    "    .compute$capitalized_type$Size($number$, get$capitalized_name$());\n"
-    "}\n");
+  // Avoid double encoding for strings: serialize the string here
+  if (ShouldUseStringEncodingCache()) {
+    printer->Print(variables_,
+      "if (has$capitalized_name$()) {\n"
+      "  com.google.protobuf.ByteString serialized = \n"
+      "    com.google.protobuf.ByteString.copyFromUtf8(\n"
+      "      get$capitalized_name$());\n"
+      "  $name$EncodedCache_ = serialized;\n"
+      "  size += com.google.protobuf.CodedOutputStream\n"
+      "    .computeBytesSize($number$, serialized);\n"
+      "}\n");
+  } else {
+    printer->Print(variables_,
+      "if (has$capitalized_name$()) {\n"
+      "  size += com.google.protobuf.CodedOutputStream\n"
+      "    .compute$capitalized_type$Size($number$, get$capitalized_name$());\n"
+      "}\n");
+  }
 }
 }
 
 
 string PrimitiveFieldGenerator::GetBoxedType() const {
 string PrimitiveFieldGenerator::GetBoxedType() const {
   return BoxedPrimitiveTypeName(GetJavaType(descriptor_));
   return BoxedPrimitiveTypeName(GetJavaType(descriptor_));
 }
 }
 
 
+bool PrimitiveFieldGenerator::ShouldUseStringEncodingCache() const {
+  return GetType(descriptor_) == FieldDescriptor::TYPE_STRING &&
+      descriptor_->file()->options().optimize_for() == FileOptions::SPEED;
+}
+
 // ===================================================================
 // ===================================================================
 
 
 RepeatedPrimitiveFieldGenerator::
 RepeatedPrimitiveFieldGenerator::

+ 2 - 0
src/google/protobuf/compiler/java/java_primitive_field.h

@@ -62,6 +62,8 @@ class PrimitiveFieldGenerator : public FieldGenerator {
   string GetBoxedType() const;
   string GetBoxedType() const;
 
 
  private:
  private:
+  bool ShouldUseStringEncodingCache() const;
+
   const FieldDescriptor* descriptor_;
   const FieldDescriptor* descriptor_;
   map<string, string> variables_;
   map<string, string> variables_;