Explorar el Código

Merge pull request #264 from tamird/getbytes

perf: String#getBytes(Charset) vs getBytes(String)
Feng Xiao hace 10 años
padre
commit
4990875f00
Se han modificado 25 ficheros con 305 adiciones y 263 borrados
  1. 10 2
      .travis.yml
  2. 51 20
      java/src/main/java/com/google/protobuf/ByteString.java
  3. 3 3
      java/src/main/java/com/google/protobuf/CodedInputStream.java
  4. 6 11
      java/src/main/java/com/google/protobuf/CodedOutputStream.java
  5. 27 33
      java/src/main/java/com/google/protobuf/Descriptors.java
  6. 35 61
      java/src/main/java/com/google/protobuf/Internal.java
  7. 9 14
      java/src/main/java/com/google/protobuf/LiteralByteString.java
  8. 4 9
      java/src/main/java/com/google/protobuf/RopeByteString.java
  9. 1 1
      java/src/main/java/com/google/protobuf/Utf8.java
  10. 14 1
      java/src/test/java/com/google/protobuf/BoundedByteStringTest.java
  11. 10 9
      java/src/test/java/com/google/protobuf/ByteStringTest.java
  12. 7 7
      java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java
  13. 16 20
      java/src/test/java/com/google/protobuf/DescriptorsTest.java
  14. 5 2
      java/src/test/java/com/google/protobuf/IsValidUtf8Test.java
  15. 10 11
      java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
  16. 10 3
      java/src/test/java/com/google/protobuf/LiteralByteStringTest.java
  17. 31 1
      java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java
  18. 28 2
      java/src/test/java/com/google/protobuf/RopeByteStringTest.java
  19. 1 5
      java/src/test/java/com/google/protobuf/TestUtil.java
  20. 3 3
      java/src/test/java/com/google/protobuf/TextFormatTest.java
  21. 1 1
      java/src/test/java/com/google/protobuf/UnknownFieldSetLiteTest.java
  22. 2 2
      javanano/src/main/java/com/google/protobuf/nano/CodedInputByteBufferNano.java
  23. 4 9
      javanano/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java
  24. 6 22
      javanano/src/main/java/com/google/protobuf/nano/InternalNano.java
  25. 11 11
      javanano/src/test/java/com/google/protobuf/nano/NanoTest.java

+ 10 - 2
.travis.yml

@@ -1,8 +1,16 @@
 sudo: false
-language: cpp
+language: java
+jdk:
+  - openjdk6
+  - openjdk7
+  - oraclejdk7
 os:
   - linux
   - osx
-script: ./autogen.sh && ./configure && make distcheck -j2
+script:
+  - ./autogen.sh && ./configure && make -j2
+  - cd java && mvn test && cd ..
+  - cd javanano && mvn test && cd ..
+  - make distcheck -j2
 notifications:
   email: false

+ 51 - 20
java/src/main/java/com/google/protobuf/ByteString.java

@@ -37,6 +37,8 @@ import java.io.OutputStream;
 import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.UnsupportedCharsetException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Iterator;
@@ -76,9 +78,6 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
   static final int MIN_READ_FROM_CHUNK_SIZE = 0x100;  // 256b
   static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000;  // 8k
 
-  // Defined by java.nio.charset.Charset
-  protected static final String UTF_8 = "UTF-8";
-
   /**
    * Empty {@code ByteString}.
    */
@@ -261,6 +260,18 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
     return new LiteralByteString(text.getBytes(charsetName));
   }
 
+  /**
+   * Encodes {@code text} into a sequence of bytes using the named charset
+   * and returns the result as a {@code ByteString}.
+   *
+   * @param text source string
+   * @param charset encode using this charset
+   * @return new {@code ByteString}
+   */
+  public static ByteString copyFrom(String text, Charset charset) {
+    return new LiteralByteString(text.getBytes(charset));
+  }
+
   /**
    * Encodes {@code text} into a sequence of UTF-8 bytes and returns the
    * result as a {@code ByteString}.
@@ -269,11 +280,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
    * @return new {@code ByteString}
    */
   public static ByteString copyFromUtf8(String text) {
-    try {
-      return new LiteralByteString(text.getBytes(UTF_8));
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported?", e);
-    }
+    return new LiteralByteString(text.getBytes(Internal.UTF_8));
   }
 
   // =================================================================
@@ -507,7 +514,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
   /**
    * Internal (package private) implementation of
    * {@link #copyTo(byte[],int,int,int)}.
-   * It assumes that all error checking has already been performed and that 
+   * It assumes that all error checking has already been performed and that
    * {@code numberToCopy > 0}.
    */
   protected abstract void copyToInternal(byte[] target, int sourceOffset,
@@ -546,7 +553,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
    * @throws IOException  if an I/O error occurs.
    */
   public abstract void writeTo(OutputStream out) throws IOException;
-  
+
   /**
    * Writes a specified part of this byte string to an output stream.
    *
@@ -572,7 +579,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
     if (numberToWrite > 0) {
       writeToInternal(out, sourceOffset, numberToWrite);
     }
-    
+
   }
 
   /**
@@ -599,7 +606,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
    * <p>
    * By returning a list, implementations of this method may be able to avoid
    * copying even when there are multiple backing arrays.
-   * 
+   *
    * @return a list of wrapped bytes
    */
   public abstract List<ByteBuffer> asReadOnlyByteBufferList();
@@ -612,8 +619,36 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
    * @return new string
    * @throws UnsupportedEncodingException if charset isn't recognized
    */
-  public abstract String toString(String charsetName)
-      throws UnsupportedEncodingException;
+  public String toString(String charsetName)
+      throws UnsupportedEncodingException {
+    try {
+      return toString(Charset.forName(charsetName));
+    } catch (UnsupportedCharsetException e) {
+      UnsupportedEncodingException exception = new UnsupportedEncodingException(charsetName);
+      exception.initCause(e);
+      throw exception;
+    }
+  }
+
+  /**
+   * Constructs a new {@code String} by decoding the bytes using the
+   * specified charset. Returns the same empty String if empty.
+   *
+   * @param charset encode using this charset
+   * @return new string
+   */
+  public String toString(Charset charset) {
+    return size() == 0 ? "" : toStringInternal(charset);
+  }
+
+  /**
+   * Constructs a new {@code String} by decoding the bytes using the
+   * specified charset.
+   *
+   * @param charset encode using this charset
+   * @return new string
+   */
+  protected abstract String toStringInternal(Charset charset);
 
   // =================================================================
   // UTF-8 decoding
@@ -624,11 +659,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
    * @return new string using UTF-8 encoding
    */
   public String toStringUtf8() {
-    try {
-      return toString(UTF_8);
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported?", e);
-    }
+    return toString(Internal.UTF_8);
   }
 
   /**
@@ -831,7 +862,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
       flushLastBuffer();
       return ByteString.copyFrom(flushedBuffers);
     }
-    
+
     /**
      * Implement java.util.Arrays.copyOf() for jdk 1.5.
      */

+ 3 - 3
java/src/main/java/com/google/protobuf/CodedInputStream.java

@@ -373,14 +373,14 @@ public final class CodedInputStream {
     if (size <= (bufferSize - bufferPos) && size > 0) {
       // Fast path:  We already have the bytes in a contiguous buffer, so
       //   just copy directly from it.
-      final String result = new String(buffer, bufferPos, size, "UTF-8");
+      final String result = new String(buffer, bufferPos, size, Internal.UTF_8);
       bufferPos += size;
       return result;
     } else if (size == 0) {
       return "";
     } else {
       // Slow path:  Build a byte array first then copy it.
-      return new String(readRawBytesSlowPath(size), "UTF-8");
+      return new String(readRawBytesSlowPath(size), Internal.UTF_8);
     }
   }
 
@@ -409,7 +409,7 @@ public final class CodedInputStream {
     if (!Utf8.isValidUtf8(bytes, pos, pos + size)) {
       throw InvalidProtocolBufferException.invalidUtf8();
     }
-    return new String(bytes, pos, size, "UTF-8");
+    return new String(bytes, pos, size, Internal.UTF_8);
   }
 
   /** Read a {@code group} field value from the stream. */

+ 6 - 11
java/src/main/java/com/google/protobuf/CodedOutputStream.java

@@ -32,7 +32,6 @@ package com.google.protobuf;
 
 import java.io.IOException;
 import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
 
 /**
@@ -144,7 +143,7 @@ public final class CodedOutputStream {
       int bufferSize) {
     return newInstance(new ByteBufferOutputStream(byteBuffer), bufferSize);
   }
-  
+
   private static class ByteBufferOutputStream extends OutputStream {
     private final ByteBuffer byteBuffer;
     public ByteBufferOutputStream(ByteBuffer byteBuffer) {
@@ -420,7 +419,7 @@ public final class CodedOutputStream {
     // Unfortunately there does not appear to be any way to tell Java to encode
     // UTF-8 directly into our buffer, so we have to let it create its own byte
     // array and then copy.
-    final byte[] bytes = value.getBytes("UTF-8");
+    final byte[] bytes = value.getBytes(Internal.UTF_8);
     writeRawVarint32(bytes.length);
     writeRawBytes(bytes);
   }
@@ -750,7 +749,7 @@ public final class CodedOutputStream {
            computeUInt32Size(WireFormat.MESSAGE_SET_TYPE_ID, fieldNumber) +
            computeLazyFieldSize(WireFormat.MESSAGE_SET_MESSAGE, value);
   }
-  
+
   // -----------------------------------------------------------------
 
   /**
@@ -827,13 +826,9 @@ public final class CodedOutputStream {
    * {@code string} field.
    */
   public static int computeStringSizeNoTag(final String value) {
-    try {
-      final byte[] bytes = value.getBytes("UTF-8");
-      return computeRawVarint32Size(bytes.length) +
-             bytes.length;
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported.", e);
-    }
+    final byte[] bytes = value.getBytes(Internal.UTF_8);
+    return computeRawVarint32Size(bytes.length) +
+           bytes.length;
   }
 
   /**

+ 27 - 33
java/src/main/java/com/google/protobuf/Descriptors.java

@@ -43,7 +43,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.WeakHashMap;
 import java.util.logging.Logger;
-import java.io.UnsupportedEncodingException;
 
 /**
  * Contains a collection of classes which describe protocol message types.
@@ -256,7 +255,7 @@ public final class Descriptors {
                                     throws DescriptorValidationException {
       return buildFrom(proto, dependencies, false);
     }
-    
+
 
     /**
      * Construct a {@code FileDescriptor}.
@@ -319,12 +318,7 @@ public final class Descriptors {
       }
 
       final byte[] descriptorBytes;
-      try {
-        descriptorBytes = descriptorData.toString().getBytes("ISO-8859-1");
-      } catch (UnsupportedEncodingException e) {
-        throw new RuntimeException(
-          "Standard encoding ISO-8859-1 not supported by JVM.", e);
-      }
+      descriptorBytes = descriptorData.toString().getBytes(Internal.ISO_8859_1);
 
       FileDescriptorProto proto;
       try {
@@ -495,9 +489,9 @@ public final class Descriptors {
           proto.getExtension(i), this, null, i, true);
       }
     }
-    
+
     /**
-     * Create a placeholder FileDescriptor for a message Descriptor. 
+     * Create a placeholder FileDescriptor for a message Descriptor.
      */
     FileDescriptor(String packageName, Descriptor message)
         throws DescriptorValidationException {
@@ -561,7 +555,7 @@ public final class Descriptors {
         extensions[i].setProto(proto.getExtension(i));
       }
     }
-    
+
     boolean supportsUnknownEnumValue() {
       return getSyntax() == Syntax.PROTO3;
     }
@@ -745,7 +739,7 @@ public final class Descriptors {
       this.fields = new FieldDescriptor[0];
       this.extensions = new FieldDescriptor[0];
       this.oneofs = new OneofDescriptor[0];
-      
+
       // Create a placeholder FileDescriptor to hold this message.
       this.file = new FileDescriptor(packageName, this);
     }
@@ -1333,8 +1327,8 @@ public final class Descriptors {
                 "Message type had default value.");
           }
         } catch (NumberFormatException e) {
-          throw new DescriptorValidationException(this, 
-              "Could not parse default value: \"" + 
+          throw new DescriptorValidationException(this,
+              "Could not parse default value: \"" +
               proto.getDefaultValue() + '\"', e);
         }
       } else {
@@ -1456,7 +1450,7 @@ public final class Descriptors {
       return file.pool.enumValuesByNumber.get(
         new DescriptorPool.DescriptorIntPair(this, number));
     }
-    
+
     /**
      * Get the enum value for a number. If no enum value has this number,
      * construct an EnumValueDescriptor for it.
@@ -1494,7 +1488,7 @@ public final class Descriptors {
         //     then remove the whole entry. This way unknown descriptors will
         //     be freed automatically and we don't need to do anything to
         //     clean-up unused map entries.
-        
+
         // Note: We must use "new Integer(number)" here because we don't want
         // these Integer objects to be cached.
         Integer key = new Integer(number);
@@ -1509,7 +1503,7 @@ public final class Descriptors {
       }
       return result;
     }
-    
+
     // Used in tests only.
     int getUnknownEnumValueDescriptorCount() {
       return unknownValues.size();
@@ -1585,7 +1579,7 @@ public final class Descriptors {
 
     /** Get the value's number. */
     public int getNumber() { return proto.getNumber(); }
-    
+
     @Override
     public String toString() { return proto.getName(); }
 
@@ -1627,7 +1621,7 @@ public final class Descriptors {
       file.pool.addSymbol(this);
       file.pool.addEnumValueByNumber(this);
     }
-    
+
     private Integer number;
     // Create an unknown enum value.
     private EnumValueDescriptor(
@@ -1643,7 +1637,7 @@ public final class Descriptors {
       this.type = parent;
       this.fullName = parent.getFullName() + '.' + proto.getName();
       this.number = number;
-      
+
       // Don't add this descriptor into pool.
     }
 
@@ -1930,13 +1924,13 @@ public final class Descriptors {
    * descriptors defined in a particular file.
    */
   private static final class DescriptorPool {
-    
-    /** Defines what subclass of descriptors to search in the descriptor pool. 
+
+    /** Defines what subclass of descriptors to search in the descriptor pool.
      */
     enum SearchFilter {
       TYPES_ONLY, AGGREGATES_ONLY, ALL_SYMBOLS
     }
-    
+
     DescriptorPool(final FileDescriptor[] dependencies,
         boolean allowUnknownDependencies) {
       this.dependencies = new HashSet<FileDescriptor>();
@@ -1982,9 +1976,9 @@ public final class Descriptors {
     GenericDescriptor findSymbol(final String fullName) {
       return findSymbol(fullName, SearchFilter.ALL_SYMBOLS);
     }
-    
-    /** Find a descriptor by fully-qualified name and given option to only 
-     * search valid field type descriptors. 
+
+    /** Find a descriptor by fully-qualified name and given option to only
+     * search valid field type descriptors.
      */
     GenericDescriptor findSymbol(final String fullName,
                                  final SearchFilter filter) {
@@ -2013,18 +2007,18 @@ public final class Descriptors {
 
     /** Checks if the descriptor is a valid type for a message field. */
     boolean isType(GenericDescriptor descriptor) {
-      return (descriptor instanceof Descriptor) || 
+      return (descriptor instanceof Descriptor) ||
         (descriptor instanceof EnumDescriptor);
     }
-    
+
     /** Checks if the descriptor is a valid namespace type. */
     boolean isAggregate(GenericDescriptor descriptor) {
-      return (descriptor instanceof Descriptor) || 
-        (descriptor instanceof EnumDescriptor) || 
-        (descriptor instanceof PackageDescriptor) || 
+      return (descriptor instanceof Descriptor) ||
+        (descriptor instanceof EnumDescriptor) ||
+        (descriptor instanceof PackageDescriptor) ||
         (descriptor instanceof ServiceDescriptor);
     }
-       
+
     /**
      * Look up a type descriptor by name, relative to some other descriptor.
      * The name may be fully-qualified (with a leading '.'),
@@ -2082,7 +2076,7 @@ public final class Descriptors {
 
             // Append firstPart and try to find
             scopeToTry.append(firstPart);
-            result = findSymbol(scopeToTry.toString(), 
+            result = findSymbol(scopeToTry.toString(),
                 DescriptorPool.SearchFilter.AGGREGATES_ONLY);
 
             if (result != null) {

+ 35 - 61
java/src/main/java/com/google/protobuf/Internal.java

@@ -30,9 +30,8 @@
 
 package com.google.protobuf;
 
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
 import java.util.AbstractList;
 import java.util.AbstractMap;
 import java.util.AbstractSet;
@@ -51,6 +50,10 @@ import java.util.Set;
  * @author kenton@google.com (Kenton Varda)
  */
 public class Internal {
+
+  protected static final Charset UTF_8 = Charset.forName("UTF-8");
+  protected static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
+
   /**
    * Helper called by generated code to construct default values for string
    * fields.
@@ -80,14 +83,7 @@ public class Internal {
    * generated code calls this automatically.
    */
   public static String stringDefaultValue(String bytes) {
-    try {
-      return new String(bytes.getBytes("ISO-8859-1"), "UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      // This should never happen since all JVMs are required to implement
-      // both of the above character sets.
-      throw new IllegalStateException(
-          "Java VM does not support a standard character set.", e);
-    }
+    return new String(bytes.getBytes(ISO_8859_1), UTF_8);
   }
 
   /**
@@ -99,37 +95,23 @@ public class Internal {
    * embed raw bytes as a string literal with ISO-8859-1 encoding.
    */
   public static ByteString bytesDefaultValue(String bytes) {
-    try {
-      return ByteString.copyFrom(bytes.getBytes("ISO-8859-1"));
-    } catch (UnsupportedEncodingException e) {
-      // This should never happen since all JVMs are required to implement
-      // ISO-8859-1.
-      throw new IllegalStateException(
-          "Java VM does not support a standard character set.", e);
-    }
+    return ByteString.copyFrom(bytes.getBytes(ISO_8859_1));
   }
   /**
    * Helper called by generated code to construct default values for bytes
    * fields.
    * <p>
-   * This is like {@link #bytesDefaultValue}, but returns a byte array. 
+   * This is like {@link #bytesDefaultValue}, but returns a byte array.
    */
   public static byte[] byteArrayDefaultValue(String bytes) {
-    try {
-      return bytes.getBytes("ISO-8859-1");
-    } catch (UnsupportedEncodingException e) {
-      // This should never happen since all JVMs are required to implement
-      // ISO-8859-1.
-      throw new IllegalStateException(
-          "Java VM does not support a standard character set.", e);
-    }
+    return bytes.getBytes(ISO_8859_1);
   }
 
   /**
    * Helper called by generated code to construct default values for bytes
    * fields.
    * <p>
-   * This is like {@link #bytesDefaultValue}, but returns a ByteBuffer. 
+   * This is like {@link #bytesDefaultValue}, but returns a ByteBuffer.
    */
   public static ByteBuffer byteBufferDefaultValue(String bytes) {
     return ByteBuffer.wrap(byteArrayDefaultValue(bytes));
@@ -185,7 +167,7 @@ public class Internal {
   public static boolean isValidUtf8(ByteString byteString) {
     return byteString.isValidUtf8();
   }
-  
+
   /**
    * Like {@link #isValidUtf8(ByteString)} but for byte arrays.
    */
@@ -197,22 +179,14 @@ public class Internal {
    * Helper method to get the UTF-8 bytes of a string.
    */
   public static byte[] toByteArray(String value) {
-    try {
-      return value.getBytes("UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported?", e);
-    }
+    return value.getBytes(UTF_8);
   }
-  
+
   /**
    * Helper method to convert a byte array to a string using UTF-8 encoding.
    */
   public static String toStringUtf8(byte[] bytes) {
-    try {
-      return new String(bytes, "UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported?", e);
-    }
+    return new String(bytes, UTF_8);
   }
 
   /**
@@ -274,7 +248,7 @@ public class Internal {
     }
     return hash;
   }
-  
+
   /**
    * Helper method for implementing {@link Message#equals(Object)} for bytes field.
    */
@@ -298,7 +272,7 @@ public class Internal {
     }
     return hash;
   }
-  
+
   /**
    * Helper method for implementing {@link Message#hashCode()} for bytes field.
    */
@@ -309,7 +283,7 @@ public class Internal {
     // based hashCode() method.
     return LiteralByteString.hashCode(bytes);
   }
-  
+
   /**
    * Helper method for implementing {@link Message#equals(Object)} for bytes
    * field.
@@ -322,7 +296,7 @@ public class Internal {
     // compare all the content.
     return a.duplicate().clear().equals(b.duplicate().clear());
   }
-  
+
   /**
    * Helper method for implementing {@link Message#equals(Object)} for bytes
    * field.
@@ -351,9 +325,9 @@ public class Internal {
     }
     return hash;
   }
-  
+
   private static final int DEFAULT_BUFFER_SIZE = 4096;
-  
+
   /**
    * Helper method for implementing {@link Message#hashCode()} for bytes
    * field.
@@ -382,18 +356,18 @@ public class Internal {
       return h == 0 ? 1 : h;
     }
   }
-  
+
   /**
    * An empty byte array constant used in generated code.
    */
   public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
-  
+
   /**
    * An empty byte array constant used in generated code.
    */
   public static final ByteBuffer EMPTY_BYTE_BUFFER =
       ByteBuffer.wrap(EMPTY_BYTE_ARRAY);
-  
+
   /** An empty coded input stream constant used in generated code. */
   public static final CodedInputStream EMPTY_CODED_INPUT_STREAM =
       CodedInputStream.newInstance(EMPTY_BYTE_ARRAY);
@@ -458,13 +432,13 @@ public class Internal {
 
     private final Map<K, RealValue> realMap;
     private final Converter<RealValue, V> valueConverter;
-    
+
     public MapAdapter(Map<K, RealValue> realMap,
         Converter<RealValue, V> valueConverter) {
       this.realMap = realMap;
       this.valueConverter = valueConverter;
     }
-    
+
     @SuppressWarnings("unchecked")
     @Override
     public V get(Object key) {
@@ -474,7 +448,7 @@ public class Internal {
       }
       return valueConverter.doForward(result);
     }
-    
+
     @Override
     public V put(K key, V value) {
       RealValue oldValue = realMap.put(key, valueConverter.doBackward(value));
@@ -488,13 +462,13 @@ public class Internal {
     public Set<java.util.Map.Entry<K, V>> entrySet() {
       return new SetAdapter(realMap.entrySet());
     }
-    
+
     private class SetAdapter extends AbstractSet<Map.Entry<K, V>> {
       private final Set<Map.Entry<K, RealValue>> realSet;
       public SetAdapter(Set<Map.Entry<K, RealValue>> realSet) {
         this.realSet = realSet;
       }
-      
+
       @Override
       public Iterator<java.util.Map.Entry<K, V>> iterator() {
         return new IteratorAdapter(realSet.iterator());
@@ -503,17 +477,17 @@ public class Internal {
       @Override
       public int size() {
         return realSet.size();
-      } 
+      }
     }
-    
+
     private class IteratorAdapter implements Iterator<Map.Entry<K, V>> {
       private final Iterator<Map.Entry<K, RealValue>> realIterator;
-      
+
       public IteratorAdapter(
           Iterator<Map.Entry<K, RealValue>> realIterator) {
         this.realIterator = realIterator;
       }
-      
+
       @Override
       public boolean hasNext() {
         return realIterator.hasNext();
@@ -529,14 +503,14 @@ public class Internal {
         realIterator.remove();
       }
     }
-    
+
     private class EntryAdapter implements Map.Entry<K, V> {
       private final Map.Entry<K, RealValue> realEntry;
-      
+
       public EntryAdapter(Map.Entry<K, RealValue> realEntry) {
         this.realEntry = realEntry;
       }
-      
+
       @Override
       public K getKey() {
         return realEntry.getKey();

+ 9 - 14
java/src/main/java/com/google/protobuf/LiteralByteString.java

@@ -34,8 +34,8 @@ import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.NoSuchElementException;
@@ -112,7 +112,7 @@ class LiteralByteString extends ByteString {
   // ByteString -> byte[]
 
   @Override
-  protected void copyToInternal(byte[] target, int sourceOffset, 
+  protected void copyToInternal(byte[] target, int sourceOffset,
       int targetOffset, int numberToCopy) {
     // Optimized form, not for subclasses, since we don't call
     // getOffsetIntoBytes() or check the 'numberToCopy' parameter.
@@ -152,13 +152,8 @@ class LiteralByteString extends ByteString {
   }
 
   @Override
-  public String toString(String charsetName)
-      throws UnsupportedEncodingException {
-    // Optimize for empty strings, but ensure we don't silently ignore invalid
-    // encodings.
-    return size() == 0 && UTF_8.equals(charsetName)
-        ? ""
-        : new String(bytes, getOffsetIntoBytes(), size(), charsetName);
+  protected String toStringInternal(Charset charset) {
+    return new String(bytes, getOffsetIntoBytes(), size(), charset);
   }
 
   // =================================================================
@@ -199,12 +194,12 @@ class LiteralByteString extends ByteString {
       LiteralByteString otherAsLiteral = (LiteralByteString) other;
       // If we know the hash codes and they are not equal, we know the byte
       // strings are not equal.
-      if (hash != 0 
-          && otherAsLiteral.hash != 0 
+      if (hash != 0
+          && otherAsLiteral.hash != 0
           && hash != otherAsLiteral.hash) {
         return false;
       }
-      
+
       return equalsRange((LiteralByteString) other, 0, size());
     } else if (other instanceof RopeByteString) {
       return other.equals(this);
@@ -285,14 +280,14 @@ class LiteralByteString extends ByteString {
   protected int partialHash(int h, int offset, int length) {
     return hashCode(h, bytes, getOffsetIntoBytes() + offset, length);
   }
-  
+
   static int hashCode(int h, byte[] bytes, int offset, int length) {
     for (int i = offset; i < offset + length; i++) {
       h = h * 31 + bytes[i];
     }
     return h;
   }
-  
+
   static int hashCode(byte[] bytes) {
     int h = hashCode(bytes.length, bytes, 0, bytes.length);
     return h == 0 ? 1 : h;

+ 4 - 9
java/src/main/java/com/google/protobuf/RopeByteString.java

@@ -30,14 +30,14 @@
 
 package com.google.protobuf;
 
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InvalidObjectException;
 import java.io.ObjectInputStream;
 import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
-import java.io.ByteArrayInputStream;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
@@ -418,13 +418,8 @@ class RopeByteString extends ByteString {
   }
 
   @Override
-  public String toString(String charsetName)
-      throws UnsupportedEncodingException {
-    // Optimize for empty strings, but ensure we don't silently ignore invalid
-    // encodings.
-    return size() == 0 && UTF_8.equals(charsetName)
-        ? ""
-        : new String(toByteArray(), charsetName);
+  protected String toStringInternal(Charset charset) {
+    return new String(toByteArray(), charset);
   }
 
   // =================================================================

+ 1 - 1
java/src/main/java/com/google/protobuf/Utf8.java

@@ -46,7 +46,7 @@ package com.google.protobuf;
  * <p>The byte sequences considered valid by this class are exactly
  * those that can be roundtrip converted to Strings and back to bytes
  * using the UTF-8 charset, without loss: <pre> {@code
- * Arrays.equals(bytes, new String(bytes, "UTF-8").getBytes("UTF-8"))
+ * Arrays.equals(bytes, new String(bytes, Internal.UTF_8).getBytes(Internal.UTF_8))
  * }</pre>
  *
  * <p>See the Unicode Standard,</br>

+ 14 - 1
java/src/test/java/com/google/protobuf/BoundedByteStringTest.java

@@ -62,7 +62,7 @@ public class BoundedByteStringTest extends LiteralByteStringTest {
   @Override
   public void testToString() throws UnsupportedEncodingException {
     String testString = "I love unicode \u1234\u5678 characters";
-    LiteralByteString unicode = new LiteralByteString(testString.getBytes(UTF_8));
+    LiteralByteString unicode = new LiteralByteString(testString.getBytes(Internal.UTF_8));
     ByteString chopped = unicode.substring(2, unicode.size() - 6);
     assertEquals(classUnderTest + ".substring() must have the expected type",
         classUnderTest, getActualClassName(chopped));
@@ -72,6 +72,19 @@ public class BoundedByteStringTest extends LiteralByteStringTest {
         testString.substring(2, testString.length() - 6), roundTripString);
   }
 
+  @Override
+  public void testCharsetToString() throws UnsupportedEncodingException {
+    String testString = "I love unicode \u1234\u5678 characters";
+    LiteralByteString unicode = new LiteralByteString(testString.getBytes(Internal.UTF_8));
+    ByteString chopped = unicode.substring(2, unicode.size() - 6);
+    assertEquals(classUnderTest + ".substring() must have the expected type",
+        classUnderTest, getActualClassName(chopped));
+
+    String roundTripString = chopped.toString(Internal.UTF_8);
+    assertEquals(classUnderTest + " unicode bytes must match",
+        testString.substring(2, testString.length() - 6), roundTripString);
+  }
+
   public void testJavaSerialization() throws Exception {
     ByteArrayOutputStream out = new ByteArrayOutputStream();
     ObjectOutputStream oos = new ObjectOutputStream(out);

+ 10 - 9
java/src/test/java/com/google/protobuf/ByteStringTest.java

@@ -41,6 +41,7 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
@@ -56,7 +57,7 @@ import java.util.Random;
  */
 public class ByteStringTest extends TestCase {
 
-  private static final String UTF_16 = "UTF-16";
+  private static final Charset UTF_16 = Charset.forName("UTF-16");
 
   static byte[] getTestBytes(int size, long seed) {
     Random random = new Random(seed);
@@ -139,7 +140,7 @@ public class ByteStringTest extends TestCase {
   public void testCopyFrom_Utf8() throws UnsupportedEncodingException {
     String testString = "I love unicode \u1234\u5678 characters";
     ByteString byteString = ByteString.copyFromUtf8(testString);
-    byte[] testBytes = testString.getBytes("UTF-8");
+    byte[] testBytes = testString.getBytes(Internal.UTF_8);
     assertTrue("copyFromUtf8 string must respect the charset",
         isArrayRange(byteString.toByteArray(), testBytes, 0, testBytes.length));
   }
@@ -380,7 +381,7 @@ public class ByteStringTest extends TestCase {
       return -1;
     }
   }
-  
+
   // A stream which exposes the byte array passed into write(byte[], int, int).
   private static class EvilOutputStream extends OutputStream {
     public byte[] capturedArray = null;
@@ -400,7 +401,7 @@ public class ByteStringTest extends TestCase {
 
   public void testToStringUtf8() throws UnsupportedEncodingException {
     String testString = "I love unicode \u1234\u5678 characters";
-    byte[] testBytes = testString.getBytes("UTF-8");
+    byte[] testBytes = testString.getBytes(Internal.UTF_8);
     ByteString byteString = ByteString.copyFrom(testBytes);
     assertEquals("copyToStringUtf8 must respect the charset",
         testString, byteString.toStringUtf8());
@@ -492,13 +493,13 @@ public class ByteStringTest extends TestCase {
           isArrayRange(bytes, byteString.toByteArray(), 0, bytes.length));
     }
   }
-  
+
   public void testNewOutputEmpty() throws IOException {
     // Make sure newOutput() correctly builds empty byte strings
     ByteString byteString = ByteString.newOutput().toByteString();
     assertEquals(ByteString.EMPTY, byteString);
   }
-  
+
   public void testNewOutput_Mutating() throws IOException {
     Output os = ByteString.newOutput(5);
     os.write(new byte[] {1, 2, 3, 4, 5});
@@ -705,14 +706,14 @@ public class ByteStringTest extends TestCase {
     }
     return pieces;
   }
-  
+
   private byte[] substringUsingWriteTo(
       ByteString data, int offset, int length) throws IOException {
     ByteArrayOutputStream output = new ByteArrayOutputStream();
     data.writeTo(output, offset, length);
     return output.toByteArray();
   }
-  
+
   public void testWriteToOutputStream() throws Exception {
     // Choose a size large enough so when two ByteStrings are concatenated they
     // won't be merged into one byte array due to some optimizations.
@@ -727,7 +728,7 @@ public class ByteStringTest extends TestCase {
     byte[] result = substringUsingWriteTo(left, 1, 1);
     assertEquals(1, result.length);
     assertEquals((byte) 11, result[0]);
-    
+
     byte[] data2 = new byte[dataSize];
     for (int i = 0; i < data1.length; i++) {
       data2[i] = (byte) 2;

+ 7 - 7
java/src/test/java/com/google/protobuf/CodedOutputStreamTest.java

@@ -321,7 +321,7 @@ public class CodedOutputStreamTest extends TestCase {
     final int BUFFER_SIZE = 4 * 1024;
     ByteArrayOutputStream outputStream = new ByteArrayOutputStream(BUFFER_SIZE);
     CodedOutputStream codedStream = CodedOutputStream.newInstance(outputStream);
-    byte[] value = "abcde".getBytes("UTF-8");
+    byte[] value = "abcde".getBytes(Internal.UTF_8);
     for (int i = 0; i < 1024; ++i) {
       codedStream.writeRawBytes(value, 0, value.length);
     }
@@ -330,7 +330,7 @@ public class CodedOutputStreamTest extends TestCase {
     assertTrue(codedStream.getTotalBytesWritten() > BUFFER_SIZE);
     assertEquals(value.length * 1024, codedStream.getTotalBytesWritten());
   }
-  
+
   public void testWriteToByteBuffer() throws Exception {
     final int bufferSize = 16 * 1024;
     ByteBuffer buffer = ByteBuffer.allocate(bufferSize);
@@ -351,7 +351,7 @@ public class CodedOutputStreamTest extends TestCase {
       codedStream.writeRawByte((byte) 3);
     }
     codedStream.flush();
-    
+
     // Check that data is correctly written to the ByteBuffer.
     assertEquals(0, buffer.remaining());
     buffer.flip();
@@ -365,9 +365,9 @@ public class CodedOutputStreamTest extends TestCase {
       assertEquals((byte) 3, buffer.get());
     }
   }
-  
+
   public void testWriteByteBuffer() throws Exception {
-    byte[] value = "abcde".getBytes("UTF-8");
+    byte[] value = "abcde".getBytes(Internal.UTF_8);
     ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
     CodedOutputStream codedStream = CodedOutputStream.newInstance(outputStream);
     ByteBuffer byteBuffer = ByteBuffer.wrap(value, 0, 1);
@@ -377,10 +377,10 @@ public class CodedOutputStreamTest extends TestCase {
     // The above call shouldn't affect the ByteBuffer's state.
     assertEquals(0, byteBuffer.position());
     assertEquals(1, byteBuffer.limit());
-    
+
     // The correct way to write part of an array using ByteBuffer.
     codedStream.writeRawBytes(ByteBuffer.wrap(value, 2, 1).slice());
-    
+
     codedStream.flush();
     byte[] result = outputStream.toByteArray();
     assertEquals(6, result.length);

+ 16 - 20
java/src/test/java/com/google/protobuf/DescriptorsTest.java

@@ -35,32 +35,28 @@ import com.google.protobuf.DescriptorProtos.EnumDescriptorProto;
 import com.google.protobuf.DescriptorProtos.EnumValueDescriptorProto;
 import com.google.protobuf.DescriptorProtos.FieldDescriptorProto;
 import com.google.protobuf.DescriptorProtos.FileDescriptorProto;
-import com.google.protobuf.Descriptors.DescriptorValidationException;
-import com.google.protobuf.Descriptors.FileDescriptor;
 import com.google.protobuf.Descriptors.Descriptor;
-import com.google.protobuf.Descriptors.FieldDescriptor;
-import com.google.protobuf.Descriptors.OneofDescriptor;
+import com.google.protobuf.Descriptors.DescriptorValidationException;
 import com.google.protobuf.Descriptors.EnumDescriptor;
 import com.google.protobuf.Descriptors.EnumValueDescriptor;
-import com.google.protobuf.Descriptors.ServiceDescriptor;
+import com.google.protobuf.Descriptors.FieldDescriptor;
+import com.google.protobuf.Descriptors.FileDescriptor;
 import com.google.protobuf.Descriptors.MethodDescriptor;
-
+import com.google.protobuf.Descriptors.OneofDescriptor;
+import com.google.protobuf.Descriptors.ServiceDescriptor;
 import com.google.protobuf.test.UnittestImport;
 import com.google.protobuf.test.UnittestImport.ImportEnum;
-import com.google.protobuf.test.UnittestImport.ImportMessage;
+import protobuf_unittest.TestCustomOptions;
+import protobuf_unittest.UnittestCustomOptions;
 import protobuf_unittest.UnittestProto;
 import protobuf_unittest.UnittestProto.ForeignEnum;
 import protobuf_unittest.UnittestProto.ForeignMessage;
-import protobuf_unittest.UnittestProto.TestAllTypes;
 import protobuf_unittest.UnittestProto.TestAllExtensions;
+import protobuf_unittest.UnittestProto.TestAllTypes;
 import protobuf_unittest.UnittestProto.TestExtremeDefaultValues;
 import protobuf_unittest.UnittestProto.TestMultipleExtensionRanges;
 import protobuf_unittest.UnittestProto.TestRequired;
 import protobuf_unittest.UnittestProto.TestService;
-import protobuf_unittest.UnittestCustomOptions;
-
-import protobuf_unittest.TestCustomOptions;
-
 
 import junit.framework.TestCase;
 
@@ -286,7 +282,7 @@ public class DescriptorsTest extends TestCase {
     d = TestExtremeDefaultValues.getDescriptor();
     assertEquals(
       ByteString.copyFrom(
-        "\0\001\007\b\f\n\r\t\013\\\'\"\u00fe".getBytes("ISO-8859-1")),
+        "\0\001\007\b\f\n\r\t\013\\\'\"\u00fe".getBytes(Internal.ISO_8859_1)),
       d.findFieldByName("escaped_bytes").getDefaultValue());
     assertEquals(-1, d.findFieldByName("large_uint32").getDefaultValue());
     assertEquals(-1L, d.findFieldByName("large_uint64").getDefaultValue());
@@ -497,7 +493,7 @@ public class DescriptorsTest extends TestCase {
       .build();
     // translate and crosslink
     FileDescriptor file =
-      Descriptors.FileDescriptor.buildFrom(fileDescriptorProto, 
+      Descriptors.FileDescriptor.buildFrom(fileDescriptorProto,
           new FileDescriptor[0]);
     // verify resulting descriptors
     assertNotNull(file);
@@ -518,7 +514,7 @@ public class DescriptorsTest extends TestCase {
     }
     assertTrue(barFound);
   }
-  
+
   public void testDependencyOrder() throws Exception {
     FileDescriptorProto fooProto = FileDescriptorProto.newBuilder()
         .setName("foo.proto").build();
@@ -537,14 +533,14 @@ public class DescriptorsTest extends TestCase {
         new FileDescriptor[0]);
     FileDescriptor barFile = Descriptors.FileDescriptor.buildFrom(barProto,
         new FileDescriptor[] {fooFile});
-    
-    // Items in the FileDescriptor array can be in any order. 
+
+    // Items in the FileDescriptor array can be in any order.
     Descriptors.FileDescriptor.buildFrom(bazProto,
         new FileDescriptor[] {fooFile, barFile});
     Descriptors.FileDescriptor.buildFrom(bazProto,
         new FileDescriptor[] {barFile, fooFile});
   }
-  
+
   public void testInvalidPublicDependency() throws Exception {
     FileDescriptorProto fooProto = FileDescriptorProto.newBuilder()
         .setName("foo.proto").build();
@@ -628,7 +624,7 @@ public class DescriptorsTest extends TestCase {
     Descriptors.FileDescriptor.buildFrom(
         fooProto, new FileDescriptor[] {forwardFile});
   }
-  
+
   /**
    * Tests the translate/crosslink for an example with a more complex namespace
    * referencing.
@@ -677,7 +673,7 @@ public class DescriptorsTest extends TestCase {
       assertTrue(field.getEnumType().getFile().getName().equals("bar.proto"));
       assertTrue(field.getEnumType().getFile().getPackage().equals(
           "a.b.c.d.bar.shared"));
-    }   
+    }
   }
 
   public void testOneofDescriptor() throws Exception {

+ 5 - 2
java/src/test/java/com/google/protobuf/IsValidUtf8Test.java

@@ -72,8 +72,11 @@ public class IsValidUtf8Test extends TestCase {
    * Tests that round tripping of all three byte permutations work.
    */
   public void testIsValidUtf8_3Bytes() throws UnsupportedEncodingException {
-    IsValidUtf8TestUtil.testBytes(3,
-        IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
+    // Travis' OOM killer doesn't like this test
+    if (System.getenv("TRAVIS") == null) {
+      IsValidUtf8TestUtil.testBytes(3,
+          IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
+    }
   }
 
   /**

+ 10 - 11
java/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java

@@ -33,18 +33,17 @@ package com.google.protobuf;
 import static junit.framework.Assert.*;
 
 import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Random;
 import java.util.logging.Logger;
-import java.nio.charset.CharsetDecoder;
-import java.nio.charset.Charset;
-import java.nio.charset.CodingErrorAction;
-import java.nio.charset.CharsetEncoder;
-import java.nio.charset.CoderResult;
-import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
 
 /**
  * Shared testing code for {@link IsValidUtf8Test} and
@@ -220,8 +219,8 @@ class IsValidUtf8TestUtil {
       }
       ByteString bs = ByteString.copyFrom(bytes);
       boolean isRoundTrippable = bs.isValidUtf8();
-      String s = new String(bytes, "UTF-8");
-      byte[] bytesReencoded = s.getBytes("UTF-8");
+      String s = new String(bytes, Internal.UTF_8);
+      byte[] bytesReencoded = s.getBytes(Internal.UTF_8);
       boolean bytesEqual = Arrays.equals(bytes, bytesReencoded);
 
       if (bytesEqual != isRoundTrippable) {
@@ -313,10 +312,10 @@ class IsValidUtf8TestUtil {
   void testBytesUsingByteBuffers(
       int numBytes, long expectedCount, long start, long lim)
       throws UnsupportedEncodingException {
-    CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
+    CharsetDecoder decoder = Internal.UTF_8.newDecoder()
         .onMalformedInput(CodingErrorAction.REPLACE)
         .onUnmappableCharacter(CodingErrorAction.REPLACE);
-    CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder()
+    CharsetEncoder encoder = Internal.UTF_8.newEncoder()
         .onMalformedInput(CodingErrorAction.REPLACE)
         .onUnmappableCharacter(CodingErrorAction.REPLACE);
     byte[] bytes = new byte[numBytes];

+ 10 - 3
java/src/test/java/com/google/protobuf/LiteralByteStringTest.java

@@ -248,7 +248,7 @@ public class LiteralByteStringTest extends TestCase {
     assertTrue(classUnderTest + ".writeTo() must give back the same bytes",
         Arrays.equals(referenceBytes, roundTripBytes));
   }
-  
+
   public void testWriteTo_mutating() throws IOException {
     OutputStream os = new OutputStream() {
       @Override
@@ -293,14 +293,21 @@ public class LiteralByteStringTest extends TestCase {
 
   public void testToString() throws UnsupportedEncodingException {
     String testString = "I love unicode \u1234\u5678 characters";
-    LiteralByteString unicode = new LiteralByteString(testString.getBytes(UTF_8));
+    LiteralByteString unicode = new LiteralByteString(testString.getBytes(Internal.UTF_8));
     String roundTripString = unicode.toString(UTF_8);
     assertEquals(classUnderTest + " unicode must match", testString, roundTripString);
   }
 
+  public void testCharsetToString() throws UnsupportedEncodingException {
+    String testString = "I love unicode \u1234\u5678 characters";
+    LiteralByteString unicode = new LiteralByteString(testString.getBytes(Internal.UTF_8));
+    String roundTripString = unicode.toString(Internal.UTF_8);
+    assertEquals(classUnderTest + " unicode must match", testString, roundTripString);
+  }
+
   public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException{
     assertSame(classUnderTest + " must be the same string references",
-        ByteString.EMPTY.toString(UTF_8), new LiteralByteString(new byte[]{}).toString(UTF_8));
+        ByteString.EMPTY.toString(Internal.UTF_8), new LiteralByteString(new byte[]{}).toString(Internal.UTF_8));
   }
 
   public void testToString_raisesException() throws UnsupportedEncodingException{

+ 31 - 1
java/src/test/java/com/google/protobuf/RopeByteStringSubstringTest.java

@@ -35,7 +35,7 @@ import java.util.Iterator;
 
 /**
  * This class tests {@link RopeByteString#substring(int, int)} by inheriting the tests from
- * {@link LiteralByteStringTest}.  Only a couple of methods are overridden.  
+ * {@link LiteralByteStringTest}.  Only a couple of methods are overridden.
  *
  * @author carlanton@google.com (Carl Haverl)
  */
@@ -94,4 +94,34 @@ public class RopeByteStringSubstringTest extends LiteralByteStringTest {
     assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
         flatString.hashCode(), unicode.hashCode());
   }
+
+  @Override
+  public void testCharsetToString() throws UnsupportedEncodingException {
+    String sourceString = "I love unicode \u1234\u5678 characters";
+    ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
+    int copies = 250;
+
+    // By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
+    StringBuilder builder = new StringBuilder(copies * sourceString.length());
+    ByteString unicode = ByteString.EMPTY;
+    for (int i = 0; i < copies; ++i) {
+      builder.append(sourceString);
+      unicode = RopeByteString.concatenate(unicode, sourceByteString);
+    }
+    String testString = builder.toString();
+
+    // Do the substring part
+    testString = testString.substring(2, testString.length() - 6);
+    unicode = unicode.substring(2, unicode.size() - 6);
+
+    assertEquals(classUnderTest + " from string must have the expected type",
+        classUnderTest, getActualClassName(unicode));
+    String roundTripString = unicode.toString(Internal.UTF_8);
+    assertEquals(classUnderTest + " unicode bytes must match",
+        testString, roundTripString);
+    ByteString flatString = ByteString.copyFromUtf8(testString);
+    assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
+    assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
+        flatString.hashCode(), unicode.hashCode());
+  }
 }

+ 28 - 2
java/src/test/java/com/google/protobuf/RopeByteStringTest.java

@@ -42,7 +42,7 @@ import java.util.Iterator;
 /**
  * This class tests {@link RopeByteString} by inheriting the tests from
  * {@link LiteralByteStringTest}.  Only a couple of methods are overridden.
- * 
+ *
  * <p>A full test of the result of {@link RopeByteString#substring(int, int)} is found in the
  * separate class {@link RopeByteStringSubstringTest}.
  *
@@ -118,12 +118,38 @@ public class RopeByteStringTest extends LiteralByteStringTest {
         flatString.hashCode(), unicode.hashCode());
   }
 
+  @Override
+  public void testCharsetToString() throws UnsupportedEncodingException {
+    String sourceString = "I love unicode \u1234\u5678 characters";
+    ByteString sourceByteString = ByteString.copyFromUtf8(sourceString);
+    int copies = 250;
+
+    // By building the RopeByteString by concatenating, this is actually a fairly strenuous test.
+    StringBuilder builder = new StringBuilder(copies * sourceString.length());
+    ByteString unicode = ByteString.EMPTY;
+    for (int i = 0; i < copies; ++i) {
+      builder.append(sourceString);
+      unicode = RopeByteString.concatenate(unicode, sourceByteString);
+    }
+    String testString = builder.toString();
+
+    assertEquals(classUnderTest + " from string must have the expected type",
+        classUnderTest, getActualClassName(unicode));
+    String roundTripString = unicode.toString(Internal.UTF_8);
+    assertEquals(classUnderTest + " unicode bytes must match",
+        testString, roundTripString);
+    ByteString flatString = ByteString.copyFromUtf8(testString);
+    assertEquals(classUnderTest + " string must equal the flat string", flatString, unicode);
+    assertEquals(classUnderTest + " string must must have same hashCode as the flat string",
+        flatString.hashCode(), unicode.hashCode());
+  }
+
   @Override
   public void testToString_returnsCanonicalEmptyString() throws UnsupportedEncodingException {
     RopeByteString ropeByteString =
         RopeByteString.newInstanceForTest(ByteString.EMPTY, ByteString.EMPTY);
     assertSame(classUnderTest + " must be the same string references",
-        ByteString.EMPTY.toString(UTF_8), ropeByteString.toString(UTF_8));
+        ByteString.EMPTY.toString(Internal.UTF_8), ropeByteString.toString(Internal.UTF_8));
   }
 
   public void testToString_raisesException() throws UnsupportedEncodingException{

+ 1 - 5
java/src/test/java/com/google/protobuf/TestUtil.java

@@ -276,11 +276,7 @@ public final class TestUtil {
 
   /** Helper to convert a String to ByteString. */
   static ByteString toBytes(String str) {
-    try {
-      return ByteString.copyFrom(str.getBytes("UTF-8"));
-    } catch(java.io.UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported.", e);
-    }
+    return ByteString.copyFrom(str.getBytes(Internal.UTF_8));
   }
 
   /**

+ 3 - 3
java/src/test/java/com/google/protobuf/TextFormatTest.java

@@ -243,8 +243,8 @@ public class TextFormatTest extends TestCase {
    * characters.  The characters are converted directly to bytes, *not*
    * encoded using UTF-8.
    */
-  private ByteString bytes(String str) throws Exception {
-    return ByteString.copyFrom(str.getBytes("ISO-8859-1"));
+  private ByteString bytes(String str) {
+    return ByteString.copyFrom(str.getBytes(Internal.ISO_8859_1));
   }
 
   /**
@@ -863,7 +863,7 @@ public class TextFormatTest extends TestCase {
     TextFormat.merge(TextFormat.printToUnicodeString(message), builder);
     assertEquals(message.getOptionalString(), builder.getOptionalString());
   }
-  
+
   public void testPrintToUnicodeStringWithNewlines() throws Exception {
     // No newlines at start and end
     assertEquals("optional_string: \"test newlines\\n\\nin\\nstring\"\n",

+ 1 - 1
java/src/test/java/com/google/protobuf/UnknownFieldSetLiteTest.java

@@ -229,7 +229,7 @@ public class UnknownFieldSetLiteTest extends TestCase {
 
   public void testMalformedBytes() throws Exception {
     try {
-      Foo.parseFrom("this is a malformed protocol buffer".getBytes("UTF-8"));
+      Foo.parseFrom("this is a malformed protocol buffer".getBytes(Internal.UTF_8));
       fail();
     } catch (InvalidProtocolBufferException e) {
       // Expected.

+ 2 - 2
javanano/src/main/java/com/google/protobuf/nano/CodedInputByteBufferNano.java

@@ -190,12 +190,12 @@ public final class CodedInputByteBufferNano {
     if (size <= (bufferSize - bufferPos) && size > 0) {
       // Fast path:  We already have the bytes in a contiguous buffer, so
       //   just copy directly from it.
-      final String result = new String(buffer, bufferPos, size, "UTF-8");
+      final String result = new String(buffer, bufferPos, size, InternalNano.UTF_8);
       bufferPos += size;
       return result;
     } else {
       // Slow path:  Build a byte array first then copy it.
-      return new String(readRawBytes(size), "UTF-8");
+      return new String(readRawBytes(size), InternalNano.UTF_8);
     }
   }
 

+ 4 - 9
javanano/src/main/java/com/google/protobuf/nano/CodedOutputByteBufferNano.java

@@ -31,7 +31,6 @@
 package com.google.protobuf.nano;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 
 /**
  * Encodes and writes protocol message fields.
@@ -291,7 +290,7 @@ public final class CodedOutputByteBufferNano {
     // Unfortunately there does not appear to be any way to tell Java to encode
     // UTF-8 directly into our buffer, so we have to let it create its own byte
     // array and then copy.
-    final byte[] bytes = value.getBytes("UTF-8");
+    final byte[] bytes = value.getBytes(InternalNano.UTF_8);
     writeRawVarint32(bytes.length);
     writeRawBytes(bytes);
   }
@@ -603,13 +602,9 @@ public final class CodedOutputByteBufferNano {
    * {@code string} field.
    */
   public static int computeStringSizeNoTag(final String value) {
-    try {
-      final byte[] bytes = value.getBytes("UTF-8");
-      return computeRawVarint32Size(bytes.length) +
-             bytes.length;
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported.");
-    }
+    final byte[] bytes = value.getBytes(InternalNano.UTF_8);
+    return computeRawVarint32Size(bytes.length) +
+           bytes.length;
   }
 
   /**

+ 6 - 22
javanano/src/main/java/com/google/protobuf/nano/InternalNano.java

@@ -33,7 +33,7 @@ package com.google.protobuf.nano;
 import com.google.protobuf.nano.MapFactories.MapFactory;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -67,6 +67,8 @@ public final class InternalNano {
   public static final int TYPE_SINT32   = 17;
   public static final int TYPE_SINT64   = 18;
 
+  protected static final Charset UTF_8 = Charset.forName("UTF-8");
+  protected static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
 
   private InternalNano() {}
 
@@ -111,14 +113,7 @@ public final class InternalNano {
    * generated code calls this automatically.
    */
   public static String stringDefaultValue(String bytes) {
-    try {
-      return new String(bytes.getBytes("ISO-8859-1"), "UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      // This should never happen since all JVMs are required to implement
-      // both of the above character sets.
-      throw new IllegalStateException(
-          "Java VM does not support a standard character set.", e);
-    }
+    return new String(bytes.getBytes(ISO_8859_1), InternalNano.UTF_8);
   }
 
   /**
@@ -130,14 +125,7 @@ public final class InternalNano {
    * embed raw bytes as a string literal with ISO-8859-1 encoding.
    */
   public static byte[] bytesDefaultValue(String bytes) {
-    try {
-      return bytes.getBytes("ISO-8859-1");
-    } catch (UnsupportedEncodingException e) {
-      // This should never happen since all JVMs are required to implement
-      // ISO-8859-1.
-      throw new IllegalStateException(
-          "Java VM does not support a standard character set.", e);
-    }
+    return bytes.getBytes(ISO_8859_1);
   }
 
   /**
@@ -145,11 +133,7 @@ public final class InternalNano {
    * UnsupportedEncodingException to a RuntimeException.
    */
   public static byte[] copyFromUtf8(final String text) {
-    try {
-      return text.getBytes("UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      throw new RuntimeException("UTF-8 not supported?");
-    }
+    return text.getBytes(InternalNano.UTF_8);
   }
 
   /**

+ 11 - 11
javanano/src/test/java/com/google/protobuf/nano/NanoTest.java

@@ -458,7 +458,7 @@ public class NanoTest extends TestCase {
     assertFalse(msg.optionalBytes.length > 0);
     msg.optionalBytes = InternalNano.copyFromUtf8("hello");
     assertTrue(msg.optionalBytes.length > 0);
-    assertEquals("hello", new String(msg.optionalBytes, "UTF-8"));
+    assertEquals("hello", new String(msg.optionalBytes, InternalNano.UTF_8));
     msg.clear();
     assertFalse(msg.optionalBytes.length > 0);
     msg.clear()
@@ -476,7 +476,7 @@ public class NanoTest extends TestCase {
 
     TestAllTypesNano newMsg = TestAllTypesNano.parseFrom(result);
     assertTrue(newMsg.optionalBytes.length > 0);
-    assertEquals("bye", new String(newMsg.optionalBytes, "UTF-8"));
+    assertEquals("bye", new String(newMsg.optionalBytes, InternalNano.UTF_8));
   }
 
   public void testNanoOptionalGroup() throws Exception {
@@ -1346,14 +1346,14 @@ public class NanoTest extends TestCase {
         InternalNano.copyFromUtf8("bye"),
         InternalNano.copyFromUtf8("boo")
     };
-    assertEquals("bye", new String(msg.repeatedBytes[1], "UTF-8"));
-    assertEquals("boo", new String(msg.repeatedBytes[2], "UTF-8"));
+    assertEquals("bye", new String(msg.repeatedBytes[1], InternalNano.UTF_8));
+    assertEquals("boo", new String(msg.repeatedBytes[2], InternalNano.UTF_8));
     msg.clear();
     assertEquals(0, msg.repeatedBytes.length);
     msg.clear()
        .repeatedBytes = new byte[][] { InternalNano.copyFromUtf8("boo") };
     assertEquals(1, msg.repeatedBytes.length);
-    assertEquals("boo", new String(msg.repeatedBytes[0], "UTF-8"));
+    assertEquals("boo", new String(msg.repeatedBytes[0], InternalNano.UTF_8));
     msg.clear();
     assertEquals(0, msg.repeatedBytes.length);
 
@@ -1385,8 +1385,8 @@ public class NanoTest extends TestCase {
 
     newMsg = TestAllTypesNano.parseFrom(result);
     assertEquals(2, newMsg.repeatedBytes.length);
-    assertEquals("hello", new String(newMsg.repeatedBytes[0], "UTF-8"));
-    assertEquals("world", new String(newMsg.repeatedBytes[1], "UTF-8"));
+    assertEquals("hello", new String(newMsg.repeatedBytes[0], InternalNano.UTF_8));
+    assertEquals("world", new String(newMsg.repeatedBytes[1], InternalNano.UTF_8));
   }
 
   public void testNanoRepeatedGroup() throws Exception {
@@ -2277,9 +2277,9 @@ public class NanoTest extends TestCase {
       assertTrue(52.0e3 == msg.defaultDouble);
       assertEquals(true, msg.defaultBool);
       assertEquals("hello", msg.defaultString);
-      assertEquals("world", new String(msg.defaultBytes, "UTF-8"));
+      assertEquals("world", new String(msg.defaultBytes, InternalNano.UTF_8));
       assertEquals("dünya", msg.defaultStringNonascii);
-      assertEquals("dünyab", new String(msg.defaultBytesNonascii, "UTF-8"));
+      assertEquals("dünyab", new String(msg.defaultBytesNonascii, InternalNano.UTF_8));
       assertEquals(TestAllTypesNano.BAR, msg.defaultNestedEnum);
       assertEquals(NanoOuterClass.FOREIGN_NANO_BAR, msg.defaultForeignEnum);
       assertEquals(UnittestImportNano.IMPORT_NANO_BAR, msg.defaultImportEnum);
@@ -2385,7 +2385,7 @@ public class NanoTest extends TestCase {
     assertEquals(TestAllTypesNanoHas.FOO, newMsg.optionalNestedEnum);
     assertEquals(41, newMsg.defaultInt32);
     assertEquals("hello", newMsg.defaultString);
-    assertEquals("world", new String(newMsg.defaultBytes, "UTF-8"));
+    assertEquals("world", new String(newMsg.defaultBytes, InternalNano.UTF_8));
     assertEquals(TestAllTypesNanoHas.BAR, newMsg.defaultNestedEnum);
     assertEquals(Float.NaN, newMsg.defaultFloatNan);
     assertEquals(0, newMsg.id);
@@ -2567,7 +2567,7 @@ public class NanoTest extends TestCase {
     assertEquals(TestNanoAccessors.FOO, newMsg.getOptionalNestedEnum());
     assertEquals(41, newMsg.getDefaultInt32());
     assertEquals("hello", newMsg.getDefaultString());
-    assertEquals("world", new String(newMsg.getDefaultBytes(), "UTF-8"));
+    assertEquals("world", new String(newMsg.getDefaultBytes(), InternalNano.UTF_8));
     assertEquals(TestNanoAccessors.BAR, newMsg.getDefaultNestedEnum());
     assertEquals(Float.NaN, newMsg.getDefaultFloatNan());
     assertEquals(0, newMsg.id);