Procházet zdrojové kódy

Merge branch 'master' into internal-merge

Temporarily merges google internal changes. The javascript tests are
still failing and require additional investigation.
Jisi Liu před 10 roky
rodič
revize
7630a83767
100 změnil soubory, kde provedl 28636 přidání a 2714 odebrání
  1. 8 0
      Makefile.am
  2. 2 0
      cmake/CMakeLists.txt
  3. 1 1
      conformance/ConformanceJava.java
  4. 125 0
      conformance/ConformanceJavaLite.java
  5. 19 1
      conformance/Makefile.am
  6. 8 5
      generate_descriptor_proto.sh
  7. 145 0
      java/core/src/main/java/com/google/protobuf/ByteBufferWriter.java
  8. 116 0
      java/core/src/main/java/com/google/protobuf/ByteOutput.java
  9. 104 16
      java/core/src/main/java/com/google/protobuf/ByteString.java
  10. 31 11
      java/core/src/main/java/com/google/protobuf/CodedInputStream.java
  11. 341 367
      java/core/src/main/java/com/google/protobuf/CodedOutputStream.java
  12. 2 2
      java/core/src/main/java/com/google/protobuf/Descriptors.java
  13. 30 0
      java/core/src/main/java/com/google/protobuf/ExperimentalApi.java
  14. 3 1
      java/core/src/main/java/com/google/protobuf/GeneratedMessage.java
  15. 241 60
      java/core/src/main/java/com/google/protobuf/GeneratedMessageLite.java
  16. 6 3
      java/core/src/main/java/com/google/protobuf/Internal.java
  17. 2 2
      java/core/src/main/java/com/google/protobuf/LazyField.java
  18. 95 17
      java/core/src/main/java/com/google/protobuf/LazyFieldLite.java
  19. 2 2
      java/core/src/main/java/com/google/protobuf/LazyStringArrayList.java
  20. 200 0
      java/core/src/main/java/com/google/protobuf/MessageLiteToString.java
  21. 18 37
      java/core/src/main/java/com/google/protobuf/NioByteString.java
  22. 0 1
      java/core/src/main/java/com/google/protobuf/Parser.java
  23. 17 9
      java/core/src/main/java/com/google/protobuf/RopeByteString.java
  24. 1 1
      java/core/src/main/java/com/google/protobuf/SmallSortedMap.java
  25. 90 86
      java/core/src/main/java/com/google/protobuf/TextFormat.java
  26. 137 0
      java/core/src/main/java/com/google/protobuf/TextFormatEscaper.java
  27. 225 0
      java/core/src/main/java/com/google/protobuf/TextFormatParseInfoTree.java
  28. 104 0
      java/core/src/main/java/com/google/protobuf/TextFormatParseLocation.java
  29. 15 95
      java/core/src/main/java/com/google/protobuf/UnknownFieldSetLite.java
  30. 23 2
      java/core/src/main/java/com/google/protobuf/UnsafeByteOperations.java
  31. 1433 231
      java/core/src/main/java/com/google/protobuf/Utf8.java
  32. 0 1
      java/core/src/test/java/com/google/protobuf/AbstractMessageTest.java
  33. 45 0
      java/core/src/test/java/com/google/protobuf/AnyTest.java
  34. 0 1
      java/core/src/test/java/com/google/protobuf/BoundedByteStringTest.java
  35. 81 0
      java/core/src/test/java/com/google/protobuf/ByteBufferWriterTest.java
  36. 14 0
      java/core/src/test/java/com/google/protobuf/ByteStringTest.java
  37. 1 0
      java/core/src/test/java/com/google/protobuf/CheckUtf8Test.java
  38. 50 0
      java/core/src/test/java/com/google/protobuf/CodedInputStreamTest.java
  39. 1 0
      java/core/src/test/java/com/google/protobuf/DeprecatedFieldTest.java
  40. 36 1
      java/core/src/test/java/com/google/protobuf/DescriptorsTest.java
  41. 1 1
      java/core/src/test/java/com/google/protobuf/DynamicMessageTest.java
  42. 76 0
      java/core/src/test/java/com/google/protobuf/EnumTest.java
  43. 49 49
      java/core/src/test/java/com/google/protobuf/FieldPresenceTest.java
  44. 15 0
      java/core/src/test/java/com/google/protobuf/GeneratedMessageTest.java
  45. 38 35
      java/core/src/test/java/com/google/protobuf/IsValidUtf8Test.java
  46. 115 87
      java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java
  47. 2 1
      java/core/src/test/java/com/google/protobuf/LazyFieldLiteTest.java
  48. 2 1
      java/core/src/test/java/com/google/protobuf/LazyFieldTest.java
  49. 17 0
      java/core/src/test/java/com/google/protobuf/LiteEqualsAndHashTest.java
  50. 166 27
      java/core/src/test/java/com/google/protobuf/LiteTest.java
  51. 60 10
      java/core/src/test/java/com/google/protobuf/LiteralByteStringTest.java
  52. 1 1
      java/core/src/test/java/com/google/protobuf/MapForProto2LiteTest.java
  53. 0 1
      java/core/src/test/java/com/google/protobuf/MapForProto2Test.java
  54. 0 1
      java/core/src/test/java/com/google/protobuf/MapTest.java
  55. 2 2
      java/core/src/test/java/com/google/protobuf/MessageTest.java
  56. 1 1
      java/core/src/test/java/com/google/protobuf/NestedBuildersTest.java
  57. 145 70
      java/core/src/test/java/com/google/protobuf/NioByteStringTest.java
  58. 134 72
      java/core/src/test/java/com/google/protobuf/ParseExceptionsTest.java
  59. 2 2
      java/core/src/test/java/com/google/protobuf/ParserTest.java
  60. 7 7
      java/core/src/test/java/com/google/protobuf/ServiceTest.java
  61. 171 184
      java/core/src/test/java/com/google/protobuf/TestUtil.java
  62. 182 0
      java/core/src/test/java/com/google/protobuf/TextFormatParseInfoTreeTest.java
  63. 86 0
      java/core/src/test/java/com/google/protobuf/TextFormatParseLocationTest.java
  64. 106 1
      java/core/src/test/java/com/google/protobuf/TextFormatTest.java
  65. 0 40
      java/core/src/test/java/com/google/protobuf/UnknownFieldSetLiteTest.java
  66. 11 11
      java/core/src/test/java/com/google/protobuf/WireFormatTest.java
  67. 12 0
      java/core/src/test/proto/com/google/protobuf/lite_equals_and_hash.proto
  68. 1 1
      java/pom.xml
  69. 44 31
      java/util/src/main/java/com/google/protobuf/util/FieldMaskTree.java
  70. 22 4
      java/util/src/main/java/com/google/protobuf/util/JsonFormat.java
  71. 68 56
      java/util/src/test/java/com/google/protobuf/util/FieldMaskTreeTest.java
  72. 129 111
      java/util/src/test/java/com/google/protobuf/util/JsonFormatTest.java
  73. 31 0
      java/util/src/test/proto/com/google/protobuf/util/json_test.proto
  74. 43 3
      js/binary/constants.js
  75. 7 5
      js/binary/decoder.js
  76. 64 56
      js/binary/decoder_test.js
  77. 430 0
      js/binary/encoder.js
  78. 227 88
      js/binary/proto_test.js
  79. 4 4
      js/binary/reader.js
  80. 29 45
      js/binary/reader_test.js
  81. 5 59
      js/binary/utils.js
  82. 14 13
      js/binary/utils_test.js
  83. 185 592
      js/binary/writer.js
  84. 1 1
      js/debug.js
  85. 1 0
      js/debug_test.js
  86. 248 36
      js/message.js
  87. 34 16
      js/message_test.js
  88. 60 29
      js/proto3_test.js
  89. 12 0
      js/test.proto
  90. 10 0
      php/ext/google/protobuf/config.m4
  91. 381 0
      php/ext/google/protobuf/def.c
  92. 273 0
      php/ext/google/protobuf/message.c
  93. 89 0
      php/ext/google/protobuf/protobuf.c
  94. 281 0
      php/ext/google/protobuf/protobuf.h
  95. 539 0
      php/ext/google/protobuf/storage.c
  96. 15 0
      php/ext/google/protobuf/test.php
  97. 11990 0
      php/ext/google/protobuf/upb.c
  98. 8217 0
      php/ext/google/protobuf/upb.h
  99. 2 1
      python/google/protobuf/internal/descriptor_database_test.py
  100. 7 6
      python/google/protobuf/internal/descriptor_pool_test.py

+ 8 - 0
Makefile.am

@@ -193,6 +193,8 @@ java_EXTRA_DIST=
   java/core/src/main/java/com/google/protobuf/BlockingRpcChannel.java              \
   java/core/src/main/java/com/google/protobuf/BlockingService.java                 \
   java/core/src/main/java/com/google/protobuf/BooleanArrayList.java                \
+  java/core/src/main/java/com/google/protobuf/ByteBufferWriter.java                \
+  java/core/src/main/java/com/google/protobuf/ByteOutput.java                      \
   java/core/src/main/java/com/google/protobuf/ByteString.java                      \
   java/core/src/main/java/com/google/protobuf/CodedInputStream.java                \
   java/core/src/main/java/com/google/protobuf/CodedOutputStream.java               \
@@ -243,6 +245,8 @@ java_EXTRA_DIST=
   java/core/src/main/java/com/google/protobuf/SmallSortedMap.java                  \
   java/core/src/main/java/com/google/protobuf/TextFormat.java                      \
   java/core/src/main/java/com/google/protobuf/TextFormatEscaper.java               \
+  java/core/src/main/java/com/google/protobuf/TextFormatParseInfoTree.java         \
+  java/core/src/main/java/com/google/protobuf/TextFormatParseLocation.java         \
   java/core/src/main/java/com/google/protobuf/UninitializedMessageException.java   \
   java/core/src/main/java/com/google/protobuf/UnknownFieldSet.java                 \
   java/core/src/main/java/com/google/protobuf/UnknownFieldSetLite.java             \
@@ -254,6 +258,7 @@ java_EXTRA_DIST=
   java/core/src/test/java/com/google/protobuf/AnyTest.java                         \
   java/core/src/test/java/com/google/protobuf/BooleanArrayListTest.java            \
   java/core/src/test/java/com/google/protobuf/BoundedByteStringTest.java           \
+  java/core/src/test/java/com/google/protobuf/ByteBufferWriterTest.java            \
   java/core/src/test/java/com/google/protobuf/ByteStringTest.java                  \
   java/core/src/test/java/com/google/protobuf/CheckUtf8Test.java                   \
   java/core/src/test/java/com/google/protobuf/CodedInputStreamTest.java            \
@@ -262,6 +267,7 @@ java_EXTRA_DIST=
   java/core/src/test/java/com/google/protobuf/DescriptorsTest.java                 \
   java/core/src/test/java/com/google/protobuf/DoubleArrayListTest.java             \
   java/core/src/test/java/com/google/protobuf/DynamicMessageTest.java              \
+  java/core/src/test/java/com/google/protobuf/EnumTest.java                        \
   java/core/src/test/java/com/google/protobuf/FieldPresenceTest.java               \
   java/core/src/test/java/com/google/protobuf/FloatArrayListTest.java              \
   java/core/src/test/java/com/google/protobuf/ForceFieldBuildersPreRun.java        \
@@ -294,6 +300,8 @@ java_EXTRA_DIST=
   java/core/src/test/java/com/google/protobuf/SmallSortedMapTest.java              \
   java/core/src/test/java/com/google/protobuf/TestBadIdentifiers.java              \
   java/core/src/test/java/com/google/protobuf/TestUtil.java                        \
+  java/core/src/test/java/com/google/protobuf/TextFormatParseInfoTreeTest.java     \
+  java/core/src/test/java/com/google/protobuf/TextFormatParseLocationTest.java     \
   java/core/src/test/java/com/google/protobuf/TextFormatTest.java                  \
   java/core/src/test/java/com/google/protobuf/UnknownEnumValueTest.java            \
   java/core/src/test/java/com/google/protobuf/UnknownFieldSetLiteTest.java         \

+ 2 - 0
cmake/CMakeLists.txt

@@ -118,6 +118,8 @@ if (MSVC)
   # Build with multiple processes
   add_definitions(/MP)
   add_definitions(/wd4244 /wd4267 /wd4018 /wd4355 /wd4800 /wd4251 /wd4996 /wd4146 /wd4305)
+  # Allow big object
+  add_definitions(/bigobj)
   string(REPLACE "/" "\\" PROTOBUF_SOURCE_WIN32_PATH ${protobuf_SOURCE_DIR})
   string(REPLACE "/" "\\" PROTOBUF_BINARY_WIN32_PATH ${protobuf_BINARY_DIR})
   configure_file(extract_includes.bat.in extract_includes.bat)

+ 1 - 1
conformance/ConformanceJava.java

@@ -129,7 +129,7 @@ class ConformanceJava {
     typeRegistry = TypeRegistry.newBuilder().add(
         Conformance.TestAllTypes.getDescriptor()).build();
     while (doTestIo()) {
-      // Empty.
+      this.testCount++;
     }
 
     System.err.println("ConformanceJava: received EOF from test runner after " +

+ 125 - 0
conformance/ConformanceJavaLite.java

@@ -0,0 +1,125 @@
+
+import com.google.protobuf.conformance.Conformance;
+import com.google.protobuf.InvalidProtocolBufferException;
+
+class ConformanceJavaLite {
+  private int testCount = 0;
+
+  private boolean readFromStdin(byte[] buf, int len) throws Exception {
+    int ofs = 0;
+    while (len > 0) {
+      int read = System.in.read(buf, ofs, len);
+      if (read == -1) {
+        return false;  // EOF
+      }
+      ofs += read;
+      len -= read;
+    }
+
+    return true;
+  }
+
+  private void writeToStdout(byte[] buf) throws Exception {
+    System.out.write(buf);
+  }
+
+  // Returns -1 on EOF (the actual values will always be positive).
+  private int readLittleEndianIntFromStdin() throws Exception {
+    byte[] buf = new byte[4];
+    if (!readFromStdin(buf, 4)) {
+      return -1;
+    }
+    return (buf[0] & 0xff)
+        | ((buf[1] & 0xff) << 8)
+        | ((buf[2] & 0xff) << 16)
+        | ((buf[3] & 0xff) << 24);
+  }
+
+  private void writeLittleEndianIntToStdout(int val) throws Exception {
+    byte[] buf = new byte[4];
+    buf[0] = (byte)val;
+    buf[1] = (byte)(val >> 8);
+    buf[2] = (byte)(val >> 16);
+    buf[3] = (byte)(val >> 24);
+    writeToStdout(buf);
+  }
+
+  private Conformance.ConformanceResponse doTest(Conformance.ConformanceRequest request) {
+    Conformance.TestAllTypes testMessage;
+
+    switch (request.getPayloadCase()) {
+      case PROTOBUF_PAYLOAD: {
+        try {
+          testMessage = Conformance.TestAllTypes.parseFrom(request.getProtobufPayload());
+        } catch (InvalidProtocolBufferException e) {
+          return Conformance.ConformanceResponse.newBuilder().setParseError(e.getMessage()).build();
+        }
+        break;
+      }
+      case JSON_PAYLOAD: {
+        return Conformance.ConformanceResponse.newBuilder().setSkipped(
+            "Lite runtime does not suport Json Formant.").build();
+      }
+      case PAYLOAD_NOT_SET: {
+        throw new RuntimeException("Request didn't have payload.");
+      }
+
+      default: {
+        throw new RuntimeException("Unexpected payload case.");
+      }
+    }
+
+    switch (request.getRequestedOutputFormat()) {
+      case UNSPECIFIED:
+        throw new RuntimeException("Unspecified output format.");
+
+      case PROTOBUF:
+        return Conformance.ConformanceResponse.newBuilder().setProtobufPayload(testMessage.toByteString()).build();
+
+      case JSON:
+        return Conformance.ConformanceResponse.newBuilder().setSkipped(
+            "Lite runtime does not suport Json Formant.").build();
+
+      default: {
+        throw new RuntimeException("Unexpected request output.");
+      }
+    }
+  }
+
+  private boolean doTestIo() throws Exception {
+    int bytes = readLittleEndianIntFromStdin();
+
+    if (bytes == -1) {
+      return false;  // EOF
+    }
+
+    byte[] serializedInput = new byte[bytes];
+
+    if (!readFromStdin(serializedInput, bytes)) {
+      throw new RuntimeException("Unexpected EOF from test program.");
+    }
+
+    Conformance.ConformanceRequest request =
+        Conformance.ConformanceRequest.parseFrom(serializedInput);
+    Conformance.ConformanceResponse response = doTest(request);
+    byte[] serializedOutput = response.toByteArray();
+
+    writeLittleEndianIntToStdout(serializedOutput.length);
+    writeToStdout(serializedOutput);
+
+    return true;
+  }
+
+  public void run() throws Exception {
+    while (doTestIo()) {
+      this.testCount++;
+    }
+
+    System.err.println("ConformanceJavaLite: received EOF from test runner after " +
+        this.testCount + " tests");
+  }
+
+  public static void main(String[] args) throws Exception {
+    new ConformanceJavaLite().run();
+  }
+}

+ 19 - 1
conformance/Makefile.am

@@ -19,6 +19,7 @@ protoc_outputs =                                               \
 other_language_protoc_outputs =                                \
   conformance.rb                                               \
   com/google/protobuf/conformance/Conformance.java             \
+  lite/com/google/protobuf/conformance/Conformance.java        \
   conformance_pb2.py                                           \
   Conformance.pbobjc.h                                         \
   Conformance.pbobjc.m
@@ -30,6 +31,7 @@ bin_PROGRAMS = conformance-test-runner conformance-cpp
 # automatically.
 EXTRA_DIST =                  \
   ConformanceJava.java        \
+  ConformanceJavaLite.java    \
   README.md                   \
   conformance.proto           \
   conformance_python.py       \
@@ -88,6 +90,7 @@ if USE_EXTERNAL_PROTOC
 protoc_middleman: $(conformance_protoc_inputs) $(well_known_type_protoc_inputs)
 	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=. --ruby_out=. --objc_out=. --python_out=. $(conformance_protoc_inputs)
 	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=. --ruby_out=. --python_out=. $(well_known_type_protoc_inputs)
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --java_out=lite:lite $(conformance_protoc_inputs) $(well_known_type_protoc_inputs)
 	touch protoc_middleman
 
 else
@@ -98,6 +101,8 @@ else
 protoc_middleman: $(top_srcdir)/src/protoc$(EXEEXT) $(conformance_protoc_inputs) $(well_known_type_protoc_inputs)
 	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd --ruby_out=$$oldpwd --objc_out=$$oldpwd --python_out=$$oldpwd $(conformance_protoc_inputs) )
 	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd --ruby_out=$$oldpwd --python_out=$$oldpwd $(well_known_type_protoc_inputs) )
+	@mkdir -p lite
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --java_out=lite:$$oldpwd/lite $(conformance_protoc_inputs) $(well_known_type_protoc_inputs) )
 	touch protoc_middleman
 
 endif
@@ -108,7 +113,7 @@ $(other_language_protoc_outputs): protoc_middleman
 
 BUILT_SOURCES = $(protoc_outputs) $(other_language_protoc_outputs)
 
-CLEANFILES = $(protoc_outputs) protoc_middleman javac_middleman conformance-java conformance-csharp $(other_language_protoc_outputs)
+CLEANFILES = $(protoc_outputs) protoc_middleman javac_middleman conformance-java javac_middleman_lite conformance-java-lite conformance-csharp $(other_language_protoc_outputs)
 
 MAINTAINERCLEANFILES =   \
   Makefile.in
@@ -123,6 +128,16 @@ conformance-java: javac_middleman
 	@jar=`ls ../java/util/target/*jar-with-dependencies.jar` && echo java -classpath .:../java/target/classes:$$jar ConformanceJava '$$@' >> conformance-java
 	@chmod +x conformance-java
 
+javac_middleman_lite: ConformanceJavaLite.java protoc_middleman $(other_language_protoc_outputs)
+	javac -classpath ../java/lite/target/classes:lite ConformanceJavaLite.java lite/com/google/protobuf/conformance/Conformance.java
+	@touch javac_middleman_lite
+
+conformance-java-lite: javac_middleman_lite
+	@echo "Writing shortcut script conformance-java-lite..."
+	@echo '#! /bin/sh' > conformance-java-lite
+	@echo java -classpath .:../java/lite/target/classes:lite ConformanceJavaLite '$$@' >> conformance-java-lite
+	@chmod +x conformance-java-lite
+
 # Currently the conformance code is alongside the rest of the C#
 # source, as it's easier to maintain there. We assume we've already
 # built that, so we just need a script to run it.
@@ -139,6 +154,9 @@ test_cpp: protoc_middleman conformance-test-runner conformance-cpp
 test_java: protoc_middleman conformance-test-runner conformance-java
 	./conformance-test-runner --failure_list failure_list_java.txt ./conformance-java
 
+test_java_lite: protoc_middleman conformance-test-runner conformance-java-lite
+	./conformance-test-runner ./conformance-java-lite
+
 test_csharp: protoc_middleman conformance-test-runner conformance-csharp
 	./conformance-test-runner --failure_list failure_list_csharp.txt ./conformance-csharp
 

+ 8 - 5
generate_descriptor_proto.sh

@@ -10,8 +10,6 @@
 #   to make when building protoc.  This is particularly useful for passing
 #   -j4 to run 4 jobs simultaneously.
 
-set -e
-
 if test ! -e src/google/protobuf/stubs/common.h; then
   cat >&2 << __EOF__
 Could not find source code.  Make sure you are running this script from the
@@ -52,9 +50,14 @@ do
   echo "Round $PROCESS_ROUND"
   CORE_PROTO_IS_CORRECT=1
 
-  make $@ protoc &&
-    ./protoc --cpp_out=dllexport_decl=LIBPROTOBUF_EXPORT:$TMP ${RUNTIME_PROTO_FILES[@]} && \
-    ./protoc --cpp_out=dllexport_decl=LIBPROTOC_EXPORT:$TMP google/protobuf/compiler/plugin.proto
+  make $@ protoc
+  if test $? -ne 0; then
+    echo "Failed to build protoc."
+    exit 1
+  fi
+
+  ./protoc --cpp_out=dllexport_decl=LIBPROTOBUF_EXPORT:$TMP ${RUNTIME_PROTO_FILES[@]} && \
+  ./protoc --cpp_out=dllexport_decl=LIBPROTOC_EXPORT:$TMP google/protobuf/compiler/plugin.proto
 
   for PROTO_FILE in ${RUNTIME_PROTO_FILES[@]}; do
     BASE_NAME=${PROTO_FILE%.*}

+ 145 - 0
java/core/src/main/java/com/google/protobuf/ByteBufferWriter.java

@@ -0,0 +1,145 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.lang.ref.SoftReference;
+import java.nio.ByteBuffer;
+
+/**
+ * Utility class to provide efficient writing of {@link ByteBuffer}s to {@link OutputStream}s.
+ */
+final class ByteBufferWriter {
+  private ByteBufferWriter() {}
+
+  /**
+   * Minimum size for a cached buffer. This prevents us from allocating buffers that are too
+   * small to be easily reused.
+   */
+  // TODO(nathanmittler): tune this property or allow configuration?
+  private static final int MIN_CACHED_BUFFER_SIZE = 1024;
+
+  /**
+   * Maximum size for a cached buffer. If a larger buffer is required, it will be allocated
+   * but not cached.
+   */
+  // TODO(nathanmittler): tune this property or allow configuration?
+  private static final int MAX_CACHED_BUFFER_SIZE = 16 * 1024;
+
+  /**
+   * The fraction of the requested buffer size under which the buffer will be reallocated.
+   */
+  // TODO(nathanmittler): tune this property or allow configuration?
+  private static final float BUFFER_REALLOCATION_THRESHOLD = 0.5f;
+
+  /**
+   * Keeping a soft reference to a thread-local buffer. This buffer is used for writing a
+   * {@link ByteBuffer} to an {@link OutputStream} when no zero-copy alternative was available.
+   * Using a "soft" reference since VMs may keep this reference around longer than "weak"
+   * (e.g. HotSpot will maintain soft references until memory pressure warrants collection).
+   */
+  private static final ThreadLocal<SoftReference<byte[]>> BUFFER =
+      new ThreadLocal<SoftReference<byte[]>>();
+
+  /**
+   * For testing purposes only. Clears the cached buffer to force a new allocation on the next
+   * invocation.
+   */
+  static void clearCachedBuffer() {
+    BUFFER.set(null);
+  }
+
+  /**
+   * Writes the remaining content of the buffer to the given stream. The buffer {@code position}
+   * will remain unchanged by this method.
+   */
+  static void write(ByteBuffer buffer, OutputStream output) throws IOException {
+    final int initialPos = buffer.position();
+    try {
+      if (buffer.hasArray()) {
+        // Optimized write for array-backed buffers.
+        // Note that we're taking the risk that a malicious OutputStream could modify the array.
+        output.write(buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
+      } else if (output instanceof FileOutputStream) {
+        // Use a channel to write out the ByteBuffer. This will automatically empty the buffer.
+        ((FileOutputStream) output).getChannel().write(buffer);
+      } else {
+        // Read all of the data from the buffer to an array.
+        // TODO(nathanmittler): Consider performance improvements for other "known" stream types.
+        final byte[] array = getOrCreateBuffer(buffer.remaining());
+        while (buffer.hasRemaining()) {
+          int length = min(buffer.remaining(), array.length);
+          buffer.get(array, 0, length);
+          output.write(array, 0, length);
+        }
+      }
+    } finally {
+      // Restore the initial position.
+      buffer.position(initialPos);
+    }
+  }
+
+  private static byte[] getOrCreateBuffer(int requestedSize) {
+    requestedSize = max(requestedSize, MIN_CACHED_BUFFER_SIZE);
+
+    byte[] buffer = getBuffer();
+    // Only allocate if we need to.
+    if (buffer == null || needToReallocate(requestedSize, buffer.length)) {
+      buffer = new byte[requestedSize];
+
+      // Only cache the buffer if it's not too big.
+      if (requestedSize <= MAX_CACHED_BUFFER_SIZE) {
+        setBuffer(buffer);
+      }
+    }
+    return buffer;
+  }
+
+  private static boolean needToReallocate(int requestedSize, int bufferLength) {
+    // First check against just the requested length to avoid the multiply.
+    return bufferLength < requestedSize
+        && bufferLength < requestedSize * BUFFER_REALLOCATION_THRESHOLD;
+  }
+
+  private static byte[] getBuffer() {
+    SoftReference<byte[]> sr = BUFFER.get();
+    return sr == null ? null : sr.get();
+  }
+
+  private static void setBuffer(byte[] value) {
+    BUFFER.set(new SoftReference<byte[]>(value));
+  }
+}

+ 116 - 0
java/core/src/main/java/com/google/protobuf/ByteOutput.java

@@ -0,0 +1,116 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * An output target for raw bytes. This interface provides semantics that support two types of
+ * writing:
+ *
+ * <p/><b>Traditional write operations:</b>
+ * (as defined by {@link java.io.OutputStream}) where the target method is responsible for either
+ * copying the data or completing the write before returning from the method call.
+ *
+ * <p/><b>Lazy write operations:</b> where the caller guarantees that it will never modify the
+ * provided buffer and it can therefore be considered immutable. The target method is free to
+ * maintain a reference to the buffer beyond the scope of the method call (e.g. until the write
+ * operation completes).
+ */
+@ExperimentalApi
+public abstract class ByteOutput {
+  /**
+   * Writes a single byte.
+   *
+   * @param value the byte to be written
+   * @throws IOException thrown if an error occurred while writing
+   */
+  public abstract void write(byte value) throws IOException;
+
+  /**
+   * Writes a sequence of bytes. The {@link ByteOutput} must copy {@code value} if it will
+   * not be processed prior to the return of this method call, since {@code value} may be
+   * reused/altered by the caller.
+   *
+   * <p>NOTE: This method <strong>MUST NOT</strong> modify the {@code value}. Doing so is a
+   * programming error and will lead to data corruption which will be difficult to debug.
+   *
+   * @param value the bytes to be written
+   * @param offset the offset of the start of the writable range
+   * @param length the number of bytes to write starting from {@code offset}
+   * @throws IOException thrown if an error occurred while writing
+   */
+  public abstract void write(byte[] value, int offset, int length) throws IOException;
+
+  /**
+   * Writes a sequence of bytes. The {@link ByteOutput} is free to retain a reference to the value
+   * beyond the scope of this method call (e.g. write later) since it is considered immutable and is
+   * guaranteed not to change by the caller.
+   *
+   * <p>NOTE: This method <strong>MUST NOT</strong> modify the {@code value}. Doing so is a
+   * programming error and will lead to data corruption which will be difficult to debug.
+   *
+   * @param value the bytes to be written
+   * @param offset the offset of the start of the writable range
+   * @param length the number of bytes to write starting from {@code offset}
+   * @throws IOException thrown if an error occurred while writing
+   */
+  public abstract void writeLazy(byte[] value, int offset, int length) throws IOException;
+
+  /**
+   * Writes a sequence of bytes. The {@link ByteOutput} must copy {@code value} if it will
+   * not be processed prior to the return of this method call, since {@code value} may be
+   * reused/altered by the caller.
+   *
+   * <p>NOTE: This method <strong>MUST NOT</strong> modify the {@code value}. Doing so is a
+   * programming error and will lead to data corruption which will be difficult to debug.
+   *
+   * @param value the bytes to be written. Upon returning from this call, the {@code position} of
+   * this buffer will be set to the {@code limit}
+   * @throws IOException thrown if an error occurred while writing
+   */
+  public abstract void write(ByteBuffer value) throws IOException;
+
+  /**
+   * Writes a sequence of bytes. The {@link ByteOutput} is free to retain a reference to the value
+   * beyond the scope of this method call (e.g. write later) since it is considered immutable and is
+   * guaranteed not to change by the caller.
+   *
+   * <p>NOTE: This method <strong>MUST NOT</strong> modify the {@code value}. Doing so is a
+   * programming error and will lead to data corruption which will be difficult to debug.
+   *
+   * @param value the bytes to be written. Upon returning from this call, the {@code position} of
+   * this buffer will be set to the {@code limit}
+   * @throws IOException thrown if an error occurred while writing
+   */
+  public abstract void writeLazy(ByteBuffer value) throws IOException;
+}

+ 104 - 16
java/core/src/main/java/com/google/protobuf/ByteString.java

@@ -1,4 +1,32 @@
-// Copyright 2007 Google Inc.  All rights reserved.
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 package com.google.protobuf;
 
@@ -15,6 +43,7 @@ import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.UnsupportedCharsetException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
@@ -58,6 +87,54 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
    * Empty {@code ByteString}.
    */
   public static final ByteString EMPTY = new LiteralByteString(Internal.EMPTY_BYTE_ARRAY);
+  
+  /** 
+   * An interface to efficiently copy {@code byte[]}.
+   * 
+   * <p>One of the noticable costs of copying a byte[] into a new array using 
+   * {@code System.arraycopy} is nullification of a new buffer before the copy. It has been shown 
+   * the Hotspot VM is capable to intrisicfy {@code Arrays.copyOfRange} operation to avoid this
+   * expensive nullification and provide substantial performance gain. Unfortunately this does not
+   * hold on Android runtimes and could make the copy slightly slower due to additional code in
+   * the {@code Arrays.copyOfRange}. Thus we provide two different implementation for array copier
+   * for Hotspot and Android runtimes. 
+   */
+  private interface ByteArrayCopier {
+    /**
+     * Copies the specified range of the specified array into a new array
+     */
+    byte[] copyFrom(byte[] bytes, int offset, int size);
+  }
+  
+  /** Implementation of {@code ByteArrayCopier} which uses {@link System#arraycopy}. */
+  private static final class SystemByteArrayCopier implements ByteArrayCopier {
+    @Override
+    public byte[] copyFrom(byte[] bytes, int offset, int size) {
+      byte[] copy = new byte[size];
+      System.arraycopy(bytes, offset, copy, 0, size);
+      return copy;
+    }
+  }
+  
+  /** Implementation of {@code ByteArrayCopier} which uses {@link Arrays#copyOfRange}. */
+  private static final class ArraysByteArrayCopier implements ByteArrayCopier {
+    @Override
+    public byte[] copyFrom(byte[] bytes, int offset, int size) {
+      return Arrays.copyOfRange(bytes, offset, offset + size);
+    }
+  }
+  
+  private static final ByteArrayCopier byteArrayCopier;
+  static {
+    boolean isAndroid = true;
+    try {
+      Class.forName("android.content.Context");
+    } catch (ClassNotFoundException e) {
+      isAndroid = false;
+    }
+    
+    byteArrayCopier = isAndroid ? new SystemByteArrayCopier() : new ArraysByteArrayCopier();
+  }
 
   /**
    * Cached hash value. Intentionally accessed via a data race, which
@@ -77,7 +154,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
    *
    * @param index index of byte
    * @return the value
-   * @throws ArrayIndexOutOfBoundsException {@code index < 0 or index >= size}
+   * @throws IndexOutOfBoundsException {@code index < 0 or index >= size}
    */
   public abstract byte byteAt(int index);
 
@@ -109,7 +186,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
       public byte nextByte() {
         try {
           return byteAt(position++);
-        } catch (ArrayIndexOutOfBoundsException e) {
+        } catch (IndexOutOfBoundsException e) {
           throw new NoSuchElementException(e.getMessage());
         }
       }
@@ -220,9 +297,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
    * @return new {@code ByteString}
    */
   public static ByteString copyFrom(byte[] bytes, int offset, int size) {
-    byte[] copy = new byte[size];
-    System.arraycopy(bytes, offset, copy, 0, size);
-    return new LiteralByteString(copy);
+    return new LiteralByteString(byteArrayCopier.copyFrom(bytes, offset, size));
   }
 
   /**
@@ -559,12 +634,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
   }
 
   /**
-   * Writes the complete contents of this byte string to
-   * the specified output stream argument.
-   *
-   * <p>It is assumed that the {@link OutputStream} will not modify the contents passed it
-   * it. It may be possible for a malicious {@link OutputStream} to corrupt
-   * the data underlying the {@link ByteString}.
+   * Writes a copy of the contents of this byte string to the specified output stream argument.
    *
    * @param  out  the output stream to which to write the data.
    * @throws IOException  if an I/O error occurs.
@@ -578,8 +648,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
    * @param  sourceOffset offset within these bytes
    * @param  numberToWrite number of bytes to write
    * @throws IOException  if an I/O error occurs.
-   * @throws IndexOutOfBoundsException if an offset or size is negative or too
-   *     large
+   * @throws IndexOutOfBoundsException if an offset or size is negative or too large
    */
   final void writeTo(OutputStream out, int sourceOffset, int numberToWrite)
       throws IOException {
@@ -596,6 +665,20 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
   abstract void writeToInternal(OutputStream out, int sourceOffset, int numberToWrite)
       throws IOException;
 
+  /**
+   * Writes this {@link ByteString} to the provided {@link ByteOutput}. Calling
+   * this method may result in multiple operations on the target {@link ByteOutput}.
+   *
+   * <p>This method may expose internal backing buffers of the {@link ByteString} to the {@link
+   * ByteOutput} in order to avoid additional copying overhead. It would be possible for a malicious
+   * {@link ByteOutput} to corrupt the {@link ByteString}. Use with caution!
+   *
+   * @param  byteOutput  the output target to receive the bytes
+   * @throws IOException  if an I/O error occurs
+   * @see UnsafeByteOperations#unsafeWriteTo(ByteString, ByteOutput)
+   */
+  abstract void writeTo(ByteOutput byteOutput) throws IOException;
+
   /**
    * Constructs a read-only {@code java.nio.ByteBuffer} whose content
    * is equal to the contents of this byte string.
@@ -1102,7 +1185,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
    *
    * @param index the index position to be tested
    * @param size the length of the array
-   * @throws ArrayIndexOutOfBoundsException if the index does not fall within the array.
+   * @throws IndexOutOfBoundsException if the index does not fall within the array.
    */
   static void checkIndex(int index, int size) {
     if ((index | (size - (index + 1))) < 0) {
@@ -1120,7 +1203,7 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
    * @param endIndex the end index of the range (exclusive)
    * @param size the size of the array.
    * @return the length of the range.
-   * @throws ArrayIndexOutOfBoundsException some or all of the range falls outside of the array.
+   * @throws IndexOutOfBoundsException some or all of the range falls outside of the array.
    */
   static int checkRange(int startIndex, int endIndex, int size) {
     final int length = endIndex - startIndex;
@@ -1235,6 +1318,11 @@ public abstract class ByteString implements Iterable<Byte>, Serializable {
       outputStream.write(bytes, getOffsetIntoBytes() + sourceOffset, numberToWrite);
     }
 
+    @Override
+    final void writeTo(ByteOutput output) throws IOException {
+      output.writeLazy(bytes, getOffsetIntoBytes(), size());
+    }
+
     @Override
     protected final String toStringInternal(Charset charset) {
       return new String(bytes, getOffsetIntoBytes(), size(), charset);

+ 31 - 11
java/core/src/main/java/com/google/protobuf/CodedInputStream.java

@@ -55,7 +55,14 @@ public final class CodedInputStream {
    * Create a new CodedInputStream wrapping the given InputStream.
    */
   public static CodedInputStream newInstance(final InputStream input) {
-    return new CodedInputStream(input);
+    return new CodedInputStream(input, BUFFER_SIZE);
+  }
+  
+  /**
+   * Create a new CodedInputStream wrapping the given InputStream.
+   */
+  static CodedInputStream newInstance(final InputStream input, int bufferSize) {
+    return new CodedInputStream(input, bufferSize);
   }
 
   /**
@@ -70,14 +77,14 @@ public final class CodedInputStream {
    */
   public static CodedInputStream newInstance(final byte[] buf, final int off,
                                              final int len) {
-    return newInstance(buf, off, len, false);
+    return newInstance(buf, off, len, false /* bufferIsImmutable */);
   }
-
+  
   /**
    * Create a new CodedInputStream wrapping the given byte array slice.
    */
-  public static CodedInputStream newInstance(final byte[] buf, final int off,
-                                             final int len, boolean bufferIsImmutable) {
+  static CodedInputStream newInstance(
+      final byte[] buf, final int off, final int len, final boolean bufferIsImmutable) {
     CodedInputStream result = new CodedInputStream(buf, off, len, bufferIsImmutable);
     try {
       // Some uses of CodedInputStream can be more efficient if they know
@@ -361,6 +368,11 @@ public final class CodedInputStream {
       return result;
     } else if (size == 0) {
       return "";
+    } else if (size <= bufferSize) {
+      refillBuffer(size);
+      String result = new String(buffer, bufferPos, size, Internal.UTF_8);
+      bufferPos += size;
+      return result;
     } else {
       // Slow path:  Build a byte array first then copy it.
       return new String(readRawBytesSlowPath(size), Internal.UTF_8);
@@ -375,14 +387,21 @@ public final class CodedInputStream {
   public String readStringRequireUtf8() throws IOException {
     final int size = readRawVarint32();
     final byte[] bytes;
-    int pos = bufferPos;
-    if (size <= (bufferSize - pos) && size > 0) {
+    final int oldPos = bufferPos;
+    final int pos;
+    if (size <= (bufferSize - oldPos) && size > 0) {
       // Fast path:  We already have the bytes in a contiguous buffer, so
       //   just copy directly from it.
       bytes = buffer;
-      bufferPos = pos + size;
+      bufferPos = oldPos + size;
+      pos = oldPos;
     } else if (size == 0) {
       return "";
+    } else if (size <= bufferSize) {
+      refillBuffer(size);
+      bytes = buffer;
+      pos = 0;
+      bufferPos = pos + size;
     } else {
       // Slow path:  Build a byte array first then copy it.
       bytes = readRawBytesSlowPath(size);
@@ -869,7 +888,8 @@ public final class CodedInputStream {
   private static final int DEFAULT_SIZE_LIMIT = 64 << 20;  // 64MB
   private static final int BUFFER_SIZE = 4096;
 
-  private CodedInputStream(final byte[] buffer, final int off, final int len, boolean bufferIsImmutable) {
+  private CodedInputStream(
+      final byte[] buffer, final int off, final int len, boolean bufferIsImmutable) {
     this.buffer = buffer;
     bufferSize = off + len;
     bufferPos = off;
@@ -878,8 +898,8 @@ public final class CodedInputStream {
     this.bufferIsImmutable = bufferIsImmutable;
   }
 
-  private CodedInputStream(final InputStream input) {
-    buffer = new byte[BUFFER_SIZE];
+  private CodedInputStream(final InputStream input, int bufferSize) {
+    buffer = new byte[bufferSize];
     bufferSize = 0;
     bufferPos = 0;
     totalBytesRetired = 0;

Rozdílová data souboru nebyla zobrazena, protože soubor je příliš velký
+ 341 - 367
java/core/src/main/java/com/google/protobuf/CodedOutputStream.java


+ 2 - 2
java/core/src/main/java/com/google/protobuf/Descriptors.java

@@ -272,7 +272,7 @@ public final class Descriptors {
      *           because a field has an undefined type or because two messages
      *           were defined with the same name.
      */
-    private static FileDescriptor buildFrom(
+    public static FileDescriptor buildFrom(
         final FileDescriptorProto proto, final FileDescriptor[] dependencies,
         final boolean allowUnknownDependencies)
         throws DescriptorValidationException {
@@ -1123,7 +1123,7 @@ public final class Descriptors {
       private JavaType javaType;
 
       public FieldDescriptorProto.Type toProto() {
-        return FieldDescriptorProto.Type.valueOf(ordinal() + 1);
+        return FieldDescriptorProto.Type.forNumber(ordinal() + 1);
       }
       public JavaType getJavaType() { return javaType; }
 

+ 30 - 0
java/core/src/main/java/com/google/protobuf/ExperimentalApi.java

@@ -1,3 +1,33 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 package com.google.protobuf;
 
 import java.lang.annotation.Documented;

+ 3 - 1
java/core/src/main/java/com/google/protobuf/GeneratedMessage.java

@@ -1019,7 +1019,9 @@ public abstract class GeneratedMessage extends AbstractMessage
         verifyContainingType(field);
         final Object value = extensions.getField(field);
         if (value == null) {
-          if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
+          if (field.isRepeated()) {
+            return Collections.emptyList();
+          } else if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
             // Lacking an ExtensionRegistry, we have no way to determine the
             // extension's real type, so we return a DynamicMessage.
             return DynamicMessage.getDefaultInstance(field.getMessageType());

+ 241 - 60
java/core/src/main/java/com/google/protobuf/GeneratedMessageLite.java

@@ -30,6 +30,7 @@
 
 package com.google.protobuf;
 
+import com.google.protobuf.AbstractMessageLite.Builder.LimitedInputStream;
 import com.google.protobuf.Internal.BooleanList;
 import com.google.protobuf.Internal.DoubleList;
 import com.google.protobuf.Internal.FloatList;
@@ -39,6 +40,7 @@ import com.google.protobuf.Internal.ProtobufList;
 import com.google.protobuf.WireFormat.FieldType;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.ObjectStreamException;
 import java.io.Serializable;
 import java.lang.reflect.InvocationTargetException;
@@ -57,10 +59,7 @@ import java.util.Map;
 public abstract class GeneratedMessageLite<
     MessageType extends GeneratedMessageLite<MessageType, BuilderType>,
     BuilderType extends GeneratedMessageLite.Builder<MessageType, BuilderType>> 
-        extends AbstractMessageLite
-        implements Serializable {
-
-  private static final long serialVersionUID = 1L;
+        extends AbstractMessageLite {
 
   /** For use by generated code only. Lazily initialized to reduce allocations. */
   protected UnknownFieldSetLite unknownFields = null;
@@ -83,6 +82,24 @@ public abstract class GeneratedMessageLite<
     return (BuilderType) dynamicMethod(MethodToInvoke.NEW_BUILDER);
   }
 
+  /**
+   * A reflective toString function. This is primarily intended as a developer aid, while keeping
+   * binary size down. The first line of the {@code toString()} representation includes a commented
+   * version of {@code super.toString()} to act as an indicator that this should not be relied on
+   * for comparisons.
+   * <p>
+   * NOTE: This method relies on the field getter methods not being stripped or renamed by proguard.
+   * If they are, the fields will not be included in the returned string representation.
+   * <p>
+   * NOTE: This implementation is liable to change in the future, and should not be relied on in
+   * code.
+   */
+  @Override
+  public String toString() {
+    return MessageLiteToString.toString(this, super.toString());
+  }
+
+
   // The general strategy for unknown fields is to use an UnknownFieldSetLite that is treated as
   // mutable during the parsing constructor and immutable after. This allows us to avoid
   // any unnecessary intermediary allocations while reducing the generated code size.
@@ -303,10 +320,9 @@ public abstract class GeneratedMessageLite<
         throws java.io.IOException {
       MessageType parsedMessage = null;
       try {
-        parsedMessage =
-            (MessageType) getDefaultInstanceForType().getParserForType().parsePartialFrom(
-                input, extensionRegistry);
-      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
+        parsedMessage = parsePartialFrom(
+            (MessageType) getDefaultInstanceForType(), input, extensionRegistry);
+      } catch (InvalidProtocolBufferException e) {
         parsedMessage = (MessageType) e.getUnfinishedMessage();
         throw e;
       } finally {
@@ -562,7 +578,6 @@ public abstract class GeneratedMessageLite<
       return extensions.isInitialized();
     }
 
-
     @Override
     protected final void doneParsing() {
       super.doneParsing();
@@ -1049,7 +1064,12 @@ public abstract class GeneratedMessageLite<
    * A serialized (serializable) form of the generated message.  Stores the
    * message as a class name and a byte array.
    */
-  static final class SerializedForm implements Serializable {
+  protected static final class SerializedForm implements Serializable {
+
+    public static SerializedForm of(MessageLite message) {
+      return new SerializedForm(message);
+    }
+    
     private static final long serialVersionUID = 0L;
 
     private final String messageClassName;
@@ -1093,16 +1113,6 @@ public abstract class GeneratedMessageLite<
       }
     }
   }
-
-  /**
-   * Replaces this object in the output stream with a serialized form.
-   * Part of Java's serialization magic.  Generated sub-classes must override
-   * this method by calling {@code return super.writeReplace();}
-   * @return a SerializedForm of this message
-   */
-  protected Object writeReplace() throws ObjectStreamException {
-    return new SerializedForm(this);
-  }
   
   /**
    * Checks that the {@link Extension} is Lite and returns it as a
@@ -1135,45 +1145,6 @@ public abstract class GeneratedMessageLite<
     message.dynamicMethod(MethodToInvoke.MAKE_IMMUTABLE);
   }
   
-  /**
-   * A static helper method for parsing a partial from input using the extension registry and the
-   * instance.
-   */
-  static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom(
-      T instance, CodedInputStream input, ExtensionRegistryLite extensionRegistry)
-          throws InvalidProtocolBufferException {
-    try {
-      return (T) instance.dynamicMethod(
-          MethodToInvoke.PARSE_PARTIAL_FROM, input, extensionRegistry);
-    } catch (RuntimeException e) {
-      if (e.getCause() instanceof InvalidProtocolBufferException) {
-        throw (InvalidProtocolBufferException) e.getCause();
-      }
-      throw e;
-    }
-  }
-  
-  /**
-   * A {@link Parser} implementation that delegates to the default instance.
-   * <p>
-   * For use by generated code only.
-   */
-  protected static class DefaultInstanceBasedParser<T extends GeneratedMessageLite<T, ?>>
-      extends AbstractParser<T> {
-    
-    private T defaultInstance;
-    
-    public DefaultInstanceBasedParser(T defaultInstance) {
-      this.defaultInstance = defaultInstance;
-    }
-    
-    @Override
-    public T parsePartialFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry)
-        throws InvalidProtocolBufferException {
-      return GeneratedMessageLite.parsePartialFrom(defaultInstance, input, extensionRegistry);
-    }
-  }
-  
   protected static IntList newIntList() {
     return new IntArrayList();
   }
@@ -1269,8 +1240,218 @@ public abstract class GeneratedMessageLite<
   protected static <E> ProtobufList<E> emptyProtobufList() {
     return ProtobufArrayList.emptyList();
   }
-  
+
   protected static LazyStringArrayList emptyLazyStringArrayList() {
     return LazyStringArrayList.emptyList();
   }
+  
+  /**
+   * A {@link Parser} implementation that delegates to the default instance.
+   * <p>
+   * For use by generated code only.
+   */
+  protected static class DefaultInstanceBasedParser<T extends GeneratedMessageLite<T, ?>>
+      extends AbstractParser<T> {
+    
+    private T defaultInstance;
+    
+    public DefaultInstanceBasedParser(T defaultInstance) {
+      this.defaultInstance = defaultInstance;
+    }
+    
+    @Override
+    public T parsePartialFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry)
+        throws InvalidProtocolBufferException {
+      return GeneratedMessageLite.parsePartialFrom(defaultInstance, input, extensionRegistry);
+    }
+  }
+  
+  /**
+   * A static helper method for parsing a partial from input using the extension registry and the
+   * instance.
+   */
+  // TODO(dweis): Should this verify that the last tag was 0?
+  static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom(
+      T instance, CodedInputStream input, ExtensionRegistryLite extensionRegistry)
+          throws InvalidProtocolBufferException {
+    T result;
+    try {
+      result = (T) instance.dynamicMethod(
+          MethodToInvoke.PARSE_PARTIAL_FROM, input, extensionRegistry);
+    } catch (RuntimeException e) {
+      if (e.getCause() instanceof InvalidProtocolBufferException) {
+        throw (InvalidProtocolBufferException) e.getCause();
+      }
+      throw e;
+    }
+    return result;
+  }
+  
+  protected static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom(
+      T defaultInstance,
+      CodedInputStream input)
+      throws InvalidProtocolBufferException {
+    return parsePartialFrom(defaultInstance, input, ExtensionRegistryLite.getEmptyRegistry());
+  }
+  
+  /**
+   * Helper method to check if message is initialized.
+   *
+   * @throws InvalidProtocolBufferException if it is not initialized.
+   * @return The message to check.
+   */
+  private static <T extends GeneratedMessageLite<T, ?>> T checkMessageInitialized(T message)
+      throws InvalidProtocolBufferException {
+    if (message != null && !message.isInitialized()) {
+      throw message.newUninitializedMessageException()
+          .asInvalidProtocolBufferException()
+          .setUnfinishedMessage(message);
+    }
+    return message;
+  }
+
+  // Validates last tag.
+  protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+      T defaultInstance, ByteString data)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parseFrom(defaultInstance, data, ExtensionRegistryLite.getEmptyRegistry()));
+  }
+
+  // Validates last tag.
+  protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+      T defaultInstance, ByteString data, ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(parsePartialFrom(defaultInstance, data, extensionRegistry));
+  }
+  
+  // This is a special case since we want to verify that the last tag is 0. We assume we exhaust the
+  // ByteString.
+  private static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom(
+      T defaultInstance, ByteString data, ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    T message;
+    try {
+      CodedInputStream input = data.newCodedInput();
+      message = parsePartialFrom(defaultInstance, input, extensionRegistry);
+      try {
+        input.checkLastTagWas(0);
+      } catch (InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(message);
+      }
+      return message;
+    } catch (InvalidProtocolBufferException e) {
+      throw e;
+    }
+  }
+  
+  // This is a special case since we want to verify that the last tag is 0. We assume we exhaust the
+  // ByteString.
+  private static <T extends GeneratedMessageLite<T, ?>> T parsePartialFrom(
+      T defaultInstance, byte[] data, ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    T message;
+    try {
+      CodedInputStream input = CodedInputStream.newInstance(data);
+      message = parsePartialFrom(defaultInstance, input, extensionRegistry);
+      try {
+        input.checkLastTagWas(0);
+      } catch (InvalidProtocolBufferException e) {
+        throw e.setUnfinishedMessage(message);
+      }
+      return message;
+    } catch (InvalidProtocolBufferException e) {
+      throw e;
+    }
+  }
+
+  // Validates last tag.
+  protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+      T defaultInstance, byte[] data)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialFrom(defaultInstance, data, ExtensionRegistryLite.getEmptyRegistry()));
+  }
+
+  // Validates last tag.
+  protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+      T defaultInstance, byte[] data, ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(parsePartialFrom(defaultInstance, data, extensionRegistry));
+  }
+
+  // Does not validate last tag.
+  protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+      T defaultInstance, InputStream input)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialFrom(defaultInstance, CodedInputStream.newInstance(input),
+            ExtensionRegistryLite.getEmptyRegistry()));
+  }
+
+  // Does not validate last tag.
+  protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+      T defaultInstance, InputStream input, ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialFrom(defaultInstance, CodedInputStream.newInstance(input), extensionRegistry));
+  }
+
+  // Does not validate last tag.
+  protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+      T defaultInstance, CodedInputStream input)
+      throws InvalidProtocolBufferException {
+    return parseFrom(defaultInstance, input, ExtensionRegistryLite.getEmptyRegistry());
+  }
+
+  // Does not validate last tag.
+  protected static <T extends GeneratedMessageLite<T, ?>> T parseFrom(
+      T defaultInstance, CodedInputStream input, ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialFrom(defaultInstance, input, extensionRegistry));
+  }
+
+  // Validates last tag.
+  protected static <T extends GeneratedMessageLite<T, ?>> T parseDelimitedFrom(
+      T defaultInstance, InputStream input)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialDelimitedFrom(defaultInstance, input,
+            ExtensionRegistryLite.getEmptyRegistry()));
+  }
+
+  // Validates last tag.
+  protected static <T extends GeneratedMessageLite<T, ?>> T parseDelimitedFrom(
+      T defaultInstance, InputStream input, ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    return checkMessageInitialized(
+        parsePartialDelimitedFrom(defaultInstance, input, extensionRegistry));
+  }
+  
+  private static <T extends GeneratedMessageLite<T, ?>> T parsePartialDelimitedFrom(
+      T defaultInstance,
+      InputStream input,
+      ExtensionRegistryLite extensionRegistry)
+      throws InvalidProtocolBufferException {
+    int size;
+    try {
+      int firstByte = input.read();
+      if (firstByte == -1) {
+        return null;
+      }
+      size = CodedInputStream.readRawVarint32(firstByte, input);
+    } catch (IOException e) {
+      throw new InvalidProtocolBufferException(e.getMessage());
+    }
+    InputStream limitedInput = new LimitedInputStream(input, size);
+    CodedInputStream codedInput = CodedInputStream.newInstance(limitedInput);
+    T message = parsePartialFrom(defaultInstance, codedInput, extensionRegistry);
+    try {
+      codedInput.checkLastTagWas(0);
+    } catch (InvalidProtocolBufferException e) {
+      throw e.setUnfinishedMessage(message);
+    }
+    return message;
+  }
 }

+ 6 - 3
java/core/src/main/java/com/google/protobuf/Internal.java

@@ -51,10 +51,12 @@ import java.util.Set;
  *
  * @author kenton@google.com (Kenton Varda)
  */
-public class Internal {
+public final class Internal {
 
-  protected static final Charset UTF_8 = Charset.forName("UTF-8");
-  protected static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
+  private Internal() {}
+
+  static final Charset UTF_8 = Charset.forName("UTF-8");
+  static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
 
   /**
    * Helper called by generated code to construct default values for string
@@ -406,6 +408,7 @@ public class Internal {
   public static final CodedInputStream EMPTY_CODED_INPUT_STREAM =
       CodedInputStream.newInstance(EMPTY_BYTE_ARRAY);
 
+
   /**
    * Provides an immutable view of {@code List<T>} around a {@code List<F>}.
    *

+ 2 - 2
java/core/src/main/java/com/google/protobuf/LazyField.java

@@ -39,14 +39,14 @@ import java.util.Map.Entry;
  *
  * Most of key methods are implemented in {@link LazyFieldLite} but this class
  * can contain default instance of the message to provide {@code hashCode()},
- * {@code equals()} and {@code toString()}.
+ * {@code euqals()} and {@code toString()}.
  *
  * @author xiangl@google.com (Xiang Li)
  */
 public class LazyField extends LazyFieldLite {
 
   /**
-   * Carry a message's default instance which is used by {@code hashCode()}, {@code equals()} and
+   * Carry a message's default instance which is used by {@code hashCode()}, {@code euqals()} and
    * {@code toString()}.
    */
   private final MessageLite defaultInstance;

+ 95 - 17
java/core/src/main/java/com/google/protobuf/LazyFieldLite.java

@@ -30,14 +30,26 @@
 
 package com.google.protobuf;
 
+import java.io.IOException;
+
 /**
  * LazyFieldLite encapsulates the logic of lazily parsing message fields. It stores
- * the message in a ByteString initially and then parse it on-demand.
+ * the message in a ByteString initially and then parses it on-demand.
+ *
+ * LazyFieldLite is thread-compatible: concurrent reads are safe once the proto that this
+ * LazyFieldLite is a part of is no longer being mutated by its Builder. However, explicit
+ * synchronization is needed under read/write situations.
  *
- * LazyField is thread-compatible e.g. concurrent read are safe, however,
- * synchronizations are needed under read/write situations.
+ * When a LazyFieldLite is used in the context of a MessageLite object, its behavior is considered
+ * to be immutable and none of the setter methods in its API are expected to be invoked. All of the
+ * getters are expected to be thread-safe. When used in the context of a MessageLite.Builder,
+ * setters can be invoked, but there is no guarantee of thread safety.
+ * 
+ * TODO(yatin,dweis): Consider splitting this class's functionality and put the mutable methods
+ * into a separate builder class to allow us to give stronger compile-time guarantees.
  *
- * This class is internal implementation detail, so you don't need to use it directly.
+ * This class is internal implementation detail of the protobuf library, so you don't need to use it
+ * directly.
  *
  * @author xiangl@google.com (Xiang Li)
  */
@@ -46,8 +58,34 @@ public class LazyFieldLite {
       ExtensionRegistryLite.getEmptyRegistry();
 
   /**
-   * A delayed-parsed version of the bytes. When this is non-null then {@code extensionRegistry } is
-   * also non-null and {@code value} and {@code memoizedBytes} are null.
+   * The value associated with the LazyFieldLite object is stored in one or more of the following
+   * three fields (delayedBytes, value, memoizedBytes). They should together be interpreted as
+   * follows.
+   * 1) delayedBytes can be non-null, while value and memoizedBytes is null. The object will be in
+   *    this state while the value for the object has not yet been parsed.
+   * 2) Both delayedBytes and value are non-null. The object transitions to this state as soon as
+   *    some caller needs to access the value (by invoking getValue()).
+   * 3) memoizedBytes is merely an optimization for calls to LazyFieldLite.toByteString() to avoid
+   *    recomputing the ByteString representation on each call. Instead, when the value is parsed
+   *    from delayedBytes, we will also assign the contents of delayedBytes to memoizedBytes (since
+   *    that is the ByteString representation of value).
+   * 4) Finally, if the LazyFieldLite was created directly with a parsed MessageLite value, then
+   *    delayedBytes will be null, and memoizedBytes will be initialized only upon the first call to
+   *    LazyFieldLite.toByteString().
+   *
+   * Given the above conditions, any caller that needs a serialized representation of this object
+   * must first check if the memoizedBytes or delayedBytes ByteString is non-null and use it
+   * directly; if both of those are null, it can look at the parsed value field. Similarly, any
+   * caller that needs a parsed value must first check if the value field is already non-null, if
+   * not it must parse the value from delayedBytes.
+   */
+
+  /**
+   * A delayed-parsed version of the contents of this field. When this field is non-null, then the
+   * "value" field is allowed to be null until the time that the value needs to be read.
+   *
+   * When delayedBytes is non-null then {@code extensionRegistry} is required to also be non-null.
+   * {@code value} and {@code memoizedBytes} will be initialized lazily.
    */
   private ByteString delayedBytes;
 
@@ -60,12 +98,15 @@ public class LazyFieldLite {
   private ExtensionRegistryLite extensionRegistry;
 
   /**
-   * The parsed value. When this is non-null then {@code delayedBytes} will be null.
+   * The parsed value. When this is null and a caller needs access to the MessageLite value, then
+   * {@code delayedBytes} will be parsed lazily at that time.
    */
   protected volatile MessageLite value;
 
   /**
-   * The memoized bytes for {@code value}. Will be null when {@code value} is null.
+   * The memoized bytes for {@code value}. This is an optimization for the toByteString() method to
+   * not have to recompute its return-value on each invocation.
+   * TODO(yatin): Figure out whether this optimization is actually necessary.
    */
   private volatile ByteString memoizedBytes;
 
@@ -230,6 +271,46 @@ public class LazyFieldLite {
       return;
     }
   }
+  
+  /**
+   * Merges another instance's contents from a stream.
+   *
+   * <p>LazyField is not thread-safe for write access. Synchronizations are needed
+   * under read/write situations.
+   */
+  public void mergeFrom(CodedInputStream input, ExtensionRegistryLite extensionRegistry)
+      throws IOException {
+    if (this.containsDefaultInstance()) {
+      setByteString(input.readBytes(), extensionRegistry);
+      return;
+    }
+
+    // If the other field has an extension registry but this does not, copy over the other extension
+    // registry.
+    if (this.extensionRegistry == null) {
+      this.extensionRegistry = extensionRegistry;
+    }
+
+    // In the case that both of them are not parsed we simply concatenate the bytes to save time. In
+    // the (probably rare) case that they have different extension registries there is a chance that
+    // some of the extensions may be dropped, but the tradeoff of making this operation fast seems
+    // to outway the benefits of combining the extension registries, which is not normally done for
+    // lite protos anyways.
+    if (this.delayedBytes != null) {
+      setByteString(this.delayedBytes.concat(input.readBytes()), this.extensionRegistry);
+      return;
+    }
+
+    // We are parsed and both contain data. We won't drop any extensions here directly, but in the
+    // case that the extension registries are not the same then we might in the future if we
+    // need to serialize and parse a message again.
+    try {
+      setValue(value.toBuilder().mergeFrom(input, extensionRegistry).build());
+    } catch (InvalidProtocolBufferException e) {
+      // Nothing is logged and no exceptions are thrown. Clients will be unaware that a proto
+      // was invalid.
+    }
+  }
 
   private static MessageLite mergeValueAndBytes(
       MessageLite value, ByteString otherBytes, ExtensionRegistryLite extensionRegistry) {
@@ -259,10 +340,10 @@ public class LazyFieldLite {
    * parsed. Be careful when using this method.
    */
   public int getSerializedSize() {
-    if (delayedBytes != null) {
-      return delayedBytes.size();
-    } else if (memoizedBytes != null) {
+    if (memoizedBytes != null) {
       return memoizedBytes.size();
+    } else if (delayedBytes != null) {
+      return delayedBytes.size();
     } else if (value != null) {
       return value.getSerializedSize();
     } else {
@@ -274,12 +355,12 @@ public class LazyFieldLite {
    * Returns a BytesString for this field in a thread-safe way.
    */
   public ByteString toByteString() {
-    if (delayedBytes != null) {
-      return delayedBytes;
-    }
     if (memoizedBytes != null) {
       return memoizedBytes;
     }
+    if (delayedBytes != null) {
+      return delayedBytes;
+    }
     synchronized (this) {
       if (memoizedBytes != null) {
         return memoizedBytes;
@@ -311,18 +392,15 @@ public class LazyFieldLite {
               .parseFrom(delayedBytes, extensionRegistry);
           this.value = parsedValue;
           this.memoizedBytes = delayedBytes;
-          this.delayedBytes = null;
         } else {
           this.value = defaultInstance;
           this.memoizedBytes = ByteString.EMPTY;
-          this.delayedBytes = null;
         }
       } catch (InvalidProtocolBufferException e) {
         // Nothing is logged and no exceptions are thrown. Clients will be unaware that this proto
         // was invalid.
         this.value = defaultInstance;
         this.memoizedBytes = ByteString.EMPTY;
-        this.delayedBytes = null;
       }
     }
   }

+ 2 - 2
java/core/src/main/java/com/google/protobuf/LazyStringArrayList.java

@@ -30,12 +30,12 @@
 
 package com.google.protobuf;
 
-import java.util.Arrays;
-import java.util.List;
 import java.util.AbstractList;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.List;
 import java.util.RandomAccess;
 
 /**

+ 200 - 0
java/core/src/main/java/com/google/protobuf/MessageLiteToString.java

@@ -0,0 +1,200 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Helps generate {@link String} representations of {@link MessageLite} protos.
+ */
+final class MessageLiteToString {
+  /**
+   * Suffix for *_FIELD_NUMBER fields. This is used to reflectively detect proto fields that should
+   * be toString()ed.
+   */
+  private static final String FIELD_NUMBER_NAME_SUFFIX = "_FIELD_NUMBER";
+
+  /**
+   * Returns a {@link String} representation of the {@link MessageLite} object.  The first line of
+   * the {@code String} representation representation includes a comment string to uniquely identify
+   * the objcet instance. This acts as an indicator that this should not be relied on for
+   * comparisons.
+   *
+   * <p>For use by generated code only.
+   */
+  static String toString(MessageLite messageLite, String commentString) {
+    StringBuilder buffer = new StringBuilder();
+    buffer.append("# ").append(commentString);
+    reflectivePrintWithIndent(messageLite, buffer, 0);
+    return buffer.toString();
+  }
+
+  /**
+   * Reflectively prints the {@link MessageLite} to the buffer at given {@code indent} level.
+   *
+   * @param buffer the buffer to write to
+   * @param indent the number of spaces to indent the proto by
+   */
+  private static void reflectivePrintWithIndent(
+      MessageLite messageLite, StringBuilder buffer, int indent) {
+    // Build a map of method name to method. We're looking for methods like getFoo(), hasFoo(), and
+    // getFooList() which might be useful for building an object's string representation.
+    Map<String, Method> nameToNoArgMethod = new HashMap<String, Method>();
+    for (Method method : messageLite.getClass().getDeclaredMethods()) {
+      if (method.getParameterTypes().length == 0) {
+        nameToNoArgMethod.put(method.getName(), method);
+      }
+    }
+
+    for (Field field : messageLite.getClass().getDeclaredFields()) {
+      String fieldName = field.getName();
+      // Skip all fields that aren't in a format like "FOO_BAR_FIELD_NUMBER"
+      if (!fieldName.endsWith(FIELD_NUMBER_NAME_SUFFIX)) {
+        continue;
+      }
+
+      // For "FOO_BAR_FIELD_NUMBER" his would be "FOO_BAR"
+      String upperUnderscore =
+          fieldName.substring(0, fieldName.length() - FIELD_NUMBER_NAME_SUFFIX.length());
+
+      // For "FOO_BAR_FIELD_NUMBER" his would be "FooBar"
+      String upperCamelCaseName = upperUnderscoreToUpperCamel(upperUnderscore);
+
+      // Try to reflectively get the value and toString() the field as if it were optional. This
+      // only works if the method names have not be proguarded out or renamed.
+      Method getMethod = nameToNoArgMethod.get("get" + upperCamelCaseName);
+      Method hasMethod = nameToNoArgMethod.get("has" + upperCamelCaseName);
+      if (getMethod != null && hasMethod != null) {
+        if ((Boolean) GeneratedMessageLite.invokeOrDie(hasMethod, messageLite)) {
+          printField(
+              buffer,
+              indent,
+              upperUnderscore.toLowerCase(),
+              GeneratedMessageLite.invokeOrDie(getMethod, messageLite));
+        }
+        continue;
+      }
+
+      // Try to reflectively get the value and toString() the field as if it were repeated. This
+      // only works if the method names have not be proguarded out or renamed.
+      Method listMethod = nameToNoArgMethod.get("get" + upperCamelCaseName + "List");
+      if (listMethod != null) {
+        printField(
+            buffer,
+            indent,
+            upperUnderscore.toLowerCase(),
+            GeneratedMessageLite.invokeOrDie(listMethod, messageLite));
+        continue;
+      }
+    }
+
+    if (messageLite instanceof GeneratedMessageLite.ExtendableMessage) {
+      Iterator<Map.Entry<GeneratedMessageLite.ExtensionDescriptor, Object>> iter =
+          ((GeneratedMessageLite.ExtendableMessage<?, ?>) messageLite).extensions.iterator();
+      while (iter.hasNext()) {
+        Map.Entry<GeneratedMessageLite.ExtensionDescriptor, Object> entry = iter.next();
+        printField(buffer, indent, "[" + entry.getKey().getNumber() + "]", entry.getValue());
+      }
+    }
+
+    if (((GeneratedMessageLite) messageLite).unknownFields != null) {
+      ((GeneratedMessageLite) messageLite).unknownFields.printWithIndent(buffer, indent);
+    }
+  }
+
+  /**
+   * Formats a text proto field.
+   *
+   * <p>For use by generated code only.
+   *
+   * @param buffer the buffer to write to
+   * @param indent the number of spaces the proto should be indented by
+   * @param name the field name (in lower underscore case)
+   * @param object the object value of the field
+   */
+  static final void printField(StringBuilder buffer, int indent, String name, Object object) {
+    if (object instanceof List<?>) {
+      List<?> list = (List<?>) object;
+      for (Object entry : list) {
+        printField(buffer, indent, name, entry);
+      }
+      return;
+    }
+
+    buffer.append('\n');
+    for (int i = 0; i < indent; i++) {
+      buffer.append(' ');
+    }
+    buffer.append(name);
+
+    if (object instanceof String) {
+      buffer.append(": \"").append(TextFormatEscaper.escapeText((String) object)).append('"');
+    } else if (object instanceof ByteString) {
+      buffer.append(": \"").append(TextFormatEscaper.escapeBytes((ByteString) object)).append('"');
+    } else if (object instanceof GeneratedMessageLite) {
+      buffer.append(" {");
+      reflectivePrintWithIndent((GeneratedMessageLite) object, buffer, indent + 2);
+      buffer.append("\n");
+      for (int i = 0; i < indent; i++) {
+        buffer.append(' ');
+      }
+      buffer.append("}");
+    } else {
+      buffer.append(": ").append(object.toString());
+    }
+  }
+
+  /**
+   * A Guava-less implementation of:
+   * {@code CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, upperUnderscore)}
+   */
+  private static String upperUnderscoreToUpperCamel(String upperUnderscore) {
+    String upperCamelCaseName = "";
+    boolean nextCharacterShouldBeUpper = true;
+    for (int i = 0; i < upperUnderscore.length(); i++) {
+      char ch = upperUnderscore.charAt(i);
+      if (ch == '_') {
+        nextCharacterShouldBeUpper = true;
+      } else if (nextCharacterShouldBeUpper){
+        upperCamelCaseName += Character.toUpperCase(ch);
+        nextCharacterShouldBeUpper = false;
+      } else {
+        upperCamelCaseName += Character.toLowerCase(ch);
+      }
+    }
+    return upperCamelCaseName;
+  }
+}

+ 18 - 37
java/core/src/main/java/com/google/protobuf/NioByteString.java

@@ -30,15 +30,14 @@
 
 package com.google.protobuf;
 
-import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InvalidObjectException;
 import java.io.ObjectInputStream;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 import java.nio.InvalidMarkException;
-import java.nio.channels.Channels;
 import java.nio.charset.Charset;
 import java.util.Collections;
 import java.util.List;
@@ -54,7 +53,7 @@ final class NioByteString extends ByteString.LeafByteString {
       throw new NullPointerException("buffer");
     }
 
-    this.buffer = buffer.slice();
+    this.buffer = buffer.slice().order(ByteOrder.nativeOrder());
   }
 
   // =================================================================
@@ -119,7 +118,7 @@ final class NioByteString extends ByteString.LeafByteString {
 
   @Override
   public void writeTo(OutputStream out) throws IOException {
-    writeToInternal(out, buffer.position(), buffer.remaining());
+    out.write(toByteArray());
   }
 
   @Override
@@ -137,14 +136,12 @@ final class NioByteString extends ByteString.LeafByteString {
       return;
     }
 
-    // Slow path
-    if (out instanceof FileOutputStream || numberToWrite >= 8192) {
-      // Use a channel to write out the ByteBuffer.
-      Channels.newChannel(out).write(slice(sourceOffset, sourceOffset + numberToWrite));
-    } else {
-      // Just copy the data to an array and write it.
-      out.write(toByteArray());
-    }
+    ByteBufferWriter.write(slice(sourceOffset, sourceOffset + numberToWrite), out);
+  }
+
+  @Override
+  void writeTo(ByteOutput output) throws IOException {
+    output.writeLazy(buffer.slice());
   }
 
   @Override
@@ -159,46 +156,30 @@ final class NioByteString extends ByteString.LeafByteString {
 
   @Override
   protected String toStringInternal(Charset charset) {
-    byte[] bytes;
-    int offset;
+    final byte[] bytes;
+    final int offset;
+    final int length;
     if (buffer.hasArray()) {
       bytes = buffer.array();
       offset = buffer.arrayOffset() + buffer.position();
+      length = buffer.remaining();
     } else {
+      // TODO(nathanmittler): Can we optimize this?
       bytes = toByteArray();
       offset = 0;
+      length = bytes.length;
     }
-    return new String(bytes, offset, size(), charset);
+    return new String(bytes, offset, length, charset);
   }
 
   @Override
   public boolean isValidUtf8() {
-    // TODO(nathanmittler): add a ByteBuffer fork for Utf8.isValidUtf8 to avoid the copy
-    byte[] bytes;
-    int startIndex;
-    if (buffer.hasArray()) {
-      bytes = buffer.array();
-      startIndex = buffer.arrayOffset() + buffer.position();
-    } else {
-      bytes = toByteArray();
-      startIndex = 0;
-    }
-    return Utf8.isValidUtf8(bytes, startIndex, startIndex + size());
+    return Utf8.isValidUtf8(buffer);
   }
 
   @Override
   protected int partialIsValidUtf8(int state, int offset, int length) {
-    // TODO(nathanmittler): TODO add a ByteBuffer fork for Utf8.partialIsValidUtf8 to avoid the copy
-    byte[] bytes;
-    int startIndex;
-    if (buffer.hasArray()) {
-      bytes = buffer.array();
-      startIndex = buffer.arrayOffset() + buffer.position();
-    } else {
-      bytes = toByteArray();
-      startIndex = 0;
-    }
-    return Utf8.partialIsValidUtf8(state, bytes, startIndex, startIndex + size());
+    return Utf8.partialIsValidUtf8(state, buffer, offset, offset + length);
   }
 
   @Override

+ 0 - 1
java/core/src/main/java/com/google/protobuf/Parser.java

@@ -30,7 +30,6 @@
 
 package com.google.protobuf;
 
-import java.io.IOException;
 import java.io.InputStream;
 
 /**

+ 17 - 9
java/core/src/main/java/com/google/protobuf/RopeByteString.java

@@ -48,10 +48,11 @@ import java.util.Stack;
 /**
  * Class to represent {@code ByteStrings} formed by concatenation of other
  * ByteStrings, without copying the data in the pieces. The concatenation is
- * represented as a tree whose leaf nodes are each a {@link LiteralByteString}.
+ * represented as a tree whose leaf nodes are each a
+ * {@link com.google.protobuf.ByteString.LeafByteString}.
  *
  * <p>Most of the operation here is inspired by the now-famous paper <a
- * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
+ * href="https://web.archive.org/web/20060202015456/http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
  * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and
  * michael plass
  *
@@ -139,8 +140,9 @@ final class RopeByteString extends ByteString {
   /**
    * Concatenate the given strings while performing various optimizations to
    * slow the growth rate of tree depth and tree node count. The result is
-   * either a {@link LiteralByteString} or a {@link RopeByteString}
-   * depending on which optimizations, if any, were applied.
+   * either a {@link com.google.protobuf.ByteString.LeafByteString} or a
+   * {@link RopeByteString} depending on which optimizations, if any, were
+   * applied.
    *
    * <p>Small pieces of length less than {@link
    * ByteString#CONCATENATE_BY_COPY_SIZE} may be copied by value here, as in
@@ -294,8 +296,7 @@ final class RopeByteString extends ByteString {
    *
    * <p>Substrings of {@code length < 2} should result in at most a single
    * recursive call chain, terminating at a leaf node. Thus the result will be a
-   * {@link LiteralByteString}. {@link #RopeByteString(ByteString,
-   * ByteString)}.
+   * {@link com.google.protobuf.ByteString.LeafByteString}.
    *
    * @param beginIndex start at this index
    * @param endIndex   the last character is the one before this index
@@ -368,7 +369,7 @@ final class RopeByteString extends ByteString {
 
   @Override
   public List<ByteBuffer> asReadOnlyByteBufferList() {
-    // Walk through the list of LiteralByteString's that make up this
+    // Walk through the list of LeafByteString's that make up this
     // rope, and add each one as a read-only ByteBuffer.
     List<ByteBuffer> result = new ArrayList<ByteBuffer>();
     PieceIterator pieces = new PieceIterator(this);
@@ -399,6 +400,12 @@ final class RopeByteString extends ByteString {
     }
   }
 
+  @Override
+  void writeTo(ByteOutput output) throws IOException {
+    left.writeTo(output);
+    right.writeTo(output);
+  }
+
   @Override
   protected String toStringInternal(Charset charset) {
     return new String(toByteArray(), charset);
@@ -709,9 +716,10 @@ final class RopeByteString extends ByteString {
     }
 
     /**
-     * Returns the next item and advances one {@code LiteralByteString}.
+     * Returns the next item and advances one
+     * {@link com.google.protobuf.ByteString.LeafByteString}.
      *
-     * @return next non-empty LiteralByteString or {@code null}
+     * @return next non-empty LeafByteString or {@code null}
      */
     @Override
     public LeafByteString next() {

+ 1 - 1
java/core/src/main/java/com/google/protobuf/SmallSortedMap.java

@@ -35,12 +35,12 @@ import java.util.AbstractSet;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
-import java.util.TreeMap;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Set;
 import java.util.SortedMap;
+import java.util.TreeMap;
 
 /**
  * A custom map implementation from FieldDescriptor to Object optimized to

+ 90 - 86
java/core/src/main/java/com/google/protobuf/TextFormat.java

@@ -425,7 +425,7 @@ public final class TextFormat {
         case STRING:
           generator.print("\"");
           generator.print(escapeNonAscii
-              ? escapeText((String) value)
+              ? TextFormatEscaper.escapeText((String) value)
               : escapeDoubleQuotesAndBackslashes((String) value)
                   .replace("\n", "\\n"));
           generator.print("\"");
@@ -661,6 +661,14 @@ public final class TextFormat {
       nextToken();
     }
 
+    int getLine() {
+      return line;
+    }
+
+    int getColumn() {
+      return column;
+    }
+
     /** Are we at the end of the input? */
     public boolean atEnd() {
       return currentToken.length() == 0;
@@ -1074,7 +1082,7 @@ public final class TextFormat {
     private ParseException floatParseException(final NumberFormatException e) {
       return parseException("Couldn't parse number: " + e.getMessage());
     }
-    
+
     /**
      * Returns a {@link UnknownFieldParseException} with the line and column
      * numbers of the previous token in the description, and the unknown field
@@ -1133,7 +1141,7 @@ public final class TextFormat {
       return column;
     }
   }
-  
+
   /**
    * Thrown when encountering an unknown field while parsing
    * a text format message.
@@ -1257,11 +1265,14 @@ public final class TextFormat {
 
     private final boolean allowUnknownFields;
     private final SingularOverwritePolicy singularOverwritePolicy;
+    private TextFormatParseInfoTree.Builder parseInfoTreeBuilder;
 
-    private Parser(boolean allowUnknownFields,
-        SingularOverwritePolicy singularOverwritePolicy) {
+    private Parser(
+        boolean allowUnknownFields, SingularOverwritePolicy singularOverwritePolicy,
+        TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
       this.allowUnknownFields = allowUnknownFields;
       this.singularOverwritePolicy = singularOverwritePolicy;
+      this.parseInfoTreeBuilder = parseInfoTreeBuilder;
     }
 
     /**
@@ -1278,6 +1289,7 @@ public final class TextFormat {
       private boolean allowUnknownFields = false;
       private SingularOverwritePolicy singularOverwritePolicy =
           SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES;
+      private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null;
 
 
       /**
@@ -1288,8 +1300,15 @@ public final class TextFormat {
         return this;
       }
 
+      public Builder setParseInfoTreeBuilder(
+          TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
+        this.parseInfoTreeBuilder = parseInfoTreeBuilder;
+        return this;
+      }
+
       public Parser build() {
-        return new Parser(allowUnknownFields, singularOverwritePolicy);
+        return new Parser(
+            allowUnknownFields, singularOverwritePolicy, parseInfoTreeBuilder);
       }
     }
 
@@ -1380,7 +1399,21 @@ public final class TextFormat {
                             final ExtensionRegistry extensionRegistry,
                             final MessageReflection.MergeTarget target)
                             throws ParseException {
+      mergeField(tokenizer, extensionRegistry, target, parseInfoTreeBuilder);
+    }
+
+    /**
+     * Parse a single field from {@code tokenizer} and merge it into
+     * {@code builder}.
+     */
+    private void mergeField(final Tokenizer tokenizer,
+                            final ExtensionRegistry extensionRegistry,
+                            final MessageReflection.MergeTarget target,
+                            TextFormatParseInfoTree.Builder parseTreeBuilder)
+                            throws ParseException {
       FieldDescriptor field = null;
+      int startLine = tokenizer.getLine();
+      int startColumn = tokenizer.getColumn();
       final Descriptor type = target.getDescriptorForType();
       ExtensionRegistry.ExtensionInfo extension = null;
 
@@ -1472,14 +1505,51 @@ public final class TextFormat {
       // Handle potential ':'.
       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
         tokenizer.tryConsume(":");  // optional
+        if (parseTreeBuilder != null) {
+          TextFormatParseInfoTree.Builder childParseTreeBuilder =
+              parseTreeBuilder.getBuilderForSubMessageField(field);
+          consumeFieldValues(tokenizer, extensionRegistry, target, field, extension,
+              childParseTreeBuilder);
+        } else {
+          consumeFieldValues(tokenizer, extensionRegistry, target, field, extension,
+              parseTreeBuilder);
+        }
       } else {
         tokenizer.consume(":");  // required
+        consumeFieldValues(
+            tokenizer, extensionRegistry, target, field, extension, parseTreeBuilder);
       }
+
+      if (parseTreeBuilder != null) {
+        parseTreeBuilder.setLocation(
+            field, TextFormatParseLocation.create(startLine, startColumn));
+      }
+
+      // For historical reasons, fields may optionally be separated by commas or
+      // semicolons.
+      if (!tokenizer.tryConsume(";")) {
+        tokenizer.tryConsume(",");
+      }
+    }
+
+    /**
+     * Parse a one or more field values from {@code tokenizer} and merge it into
+     * {@code builder}.
+     */
+    private void consumeFieldValues(
+        final Tokenizer tokenizer,
+        final ExtensionRegistry extensionRegistry,
+        final MessageReflection.MergeTarget target,
+        final FieldDescriptor field,
+        final ExtensionRegistry.ExtensionInfo extension,
+        final TextFormatParseInfoTree.Builder parseTreeBuilder)
+        throws ParseException {
       // Support specifying repeated field values as a comma-separated list.
       // Ex."foo: [1, 2, 3]"
       if (field.isRepeated() && tokenizer.tryConsume("[")) {
         while (true) {
-          consumeFieldValue(tokenizer, extensionRegistry, target, field, extension);
+          consumeFieldValue(tokenizer, extensionRegistry, target, field, extension,
+              parseTreeBuilder);
           if (tokenizer.tryConsume("]")) {
             // End of list.
             break;
@@ -1487,13 +1557,8 @@ public final class TextFormat {
           tokenizer.consume(",");
         }
       } else {
-        consumeFieldValue(tokenizer, extensionRegistry, target, field, extension);
-      }
-
-      // For historical reasons, fields may optionally be separated by commas or
-      // semicolons.
-      if (!tokenizer.tryConsume(";")) {
-        tokenizer.tryConsume(",");
+        consumeFieldValue(
+            tokenizer, extensionRegistry, target, field, extension, parseTreeBuilder);
       }
     }
 
@@ -1506,7 +1571,8 @@ public final class TextFormat {
         final ExtensionRegistry extensionRegistry,
         final MessageReflection.MergeTarget target,
         final FieldDescriptor field,
-        final ExtensionRegistry.ExtensionInfo extension)
+        final ExtensionRegistry.ExtensionInfo extension,
+        final TextFormatParseInfoTree.Builder parseTreeBuilder)
         throws ParseException {
       Object value = null;
 
@@ -1528,7 +1594,7 @@ public final class TextFormat {
             throw tokenizer.parseException(
               "Expected \"" + endToken + "\".");
           }
-          mergeField(tokenizer, extensionRegistry, subField);
+          mergeField(tokenizer, extensionRegistry, subField, parseTreeBuilder);
         }
 
         value = subField.finish();
@@ -1704,52 +1770,6 @@ public final class TextFormat {
   // Some of these methods are package-private because Descriptors.java uses
   // them.
 
-  private interface ByteSequence {
-    int size();
-    byte byteAt(int offset);
-  }
-
-  /**
-   * Escapes bytes in the format used in protocol buffer text format, which
-   * is the same as the format used for C string literals.  All bytes
-   * that are not printable 7-bit ASCII characters are escaped, as well as
-   * backslash, single-quote, and double-quote characters.  Characters for
-   * which no defined short-hand escape sequence is defined will be escaped
-   * using 3-digit octal sequences.
-   */
-  public static String escapeBytes(final ByteSequence input) {
-    final StringBuilder builder = new StringBuilder(input.size());
-    for (int i = 0; i < input.size(); i++) {
-      final byte b = input.byteAt(i);
-      switch (b) {
-        // Java does not recognize \a or \v, apparently.
-        case 0x07: builder.append("\\a"); break;
-        case '\b': builder.append("\\b"); break;
-        case '\f': builder.append("\\f"); break;
-        case '\n': builder.append("\\n"); break;
-        case '\r': builder.append("\\r"); break;
-        case '\t': builder.append("\\t"); break;
-        case 0x0b: builder.append("\\v"); break;
-        case '\\': builder.append("\\\\"); break;
-        case '\'': builder.append("\\\'"); break;
-        case '"' : builder.append("\\\""); break;
-        default:
-          // Only ASCII characters between 0x20 (space) and 0x7e (tilde) are
-          // printable.  Other byte values must be escaped.
-          if (b >= 0x20 && b <= 0x7e) {
-            builder.append((char) b);
-          } else {
-            builder.append('\\');
-            builder.append((char) ('0' + ((b >>> 6) & 3)));
-            builder.append((char) ('0' + ((b >>> 3) & 7)));
-            builder.append((char) ('0' + (b & 7)));
-          }
-          break;
-      }
-    }
-    return builder.toString();
-  }
-
   /**
    * Escapes bytes in the format used in protocol buffer text format, which
    * is the same as the format used for C string literals.  All bytes
@@ -1758,33 +1778,15 @@ public final class TextFormat {
    * which no defined short-hand escape sequence is defined will be escaped
    * using 3-digit octal sequences.
    */
-  public static String escapeBytes(final ByteString input) {
-    return escapeBytes(new ByteSequence() {
-      @Override
-      public int size() {
-        return input.size();
-      }
-      @Override
-      public byte byteAt(int offset) {
-        return input.byteAt(offset);
-      }
-    });
+  public static String escapeBytes(ByteString input) {
+    return TextFormatEscaper.escapeBytes(input);
   }
 
   /**
    * Like {@link #escapeBytes(ByteString)}, but used for byte array.
    */
-  public static String escapeBytes(final byte[] input) {
-    return escapeBytes(new ByteSequence() {
-      @Override
-      public int size() {
-        return input.length;
-      }
-      @Override
-      public byte byteAt(int offset) {
-        return input[offset];
-      }
-    });
+  public static String escapeBytes(byte[] input) {
+    return TextFormatEscaper.escapeBytes(input);
   }
 
   /**
@@ -1868,7 +1870,9 @@ public final class TextFormat {
       }
     }
 
-    return ByteString.copyFrom(result, 0, pos);
+    return result.length == pos
+        ? ByteString.wrap(result)  // This reference has not been out of our control.
+        : ByteString.copyFrom(result, 0, pos);
   }
 
   /**
@@ -1896,7 +1900,7 @@ public final class TextFormat {
    * Escape double quotes and backslashes in a String for unicode output of a message.
    */
   public static String escapeDoubleQuotesAndBackslashes(final String input) {
-    return input.replace("\\", "\\\\").replace("\"", "\\\"");
+    return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input);
   }
 
   /**

+ 137 - 0
java/core/src/main/java/com/google/protobuf/TextFormatEscaper.java

@@ -0,0 +1,137 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+/**
+ * Provide text format escaping support for proto2 instances.
+ */
+final class TextFormatEscaper {
+  private TextFormatEscaper() {}
+
+  private interface ByteSequence {
+    int size();
+    byte byteAt(int offset);
+  }
+
+  /**
+   * Escapes bytes in the format used in protocol buffer text format, which
+   * is the same as the format used for C string literals.  All bytes
+   * that are not printable 7-bit ASCII characters are escaped, as well as
+   * backslash, single-quote, and double-quote characters.  Characters for
+   * which no defined short-hand escape sequence is defined will be escaped
+   * using 3-digit octal sequences.
+   */
+  static String escapeBytes(final ByteSequence input) {
+    final StringBuilder builder = new StringBuilder(input.size());
+    for (int i = 0; i < input.size(); i++) {
+      final byte b = input.byteAt(i);
+      switch (b) {
+        // Java does not recognize \a or \v, apparently.
+        case 0x07: builder.append("\\a"); break;
+        case '\b': builder.append("\\b"); break;
+        case '\f': builder.append("\\f"); break;
+        case '\n': builder.append("\\n"); break;
+        case '\r': builder.append("\\r"); break;
+        case '\t': builder.append("\\t"); break;
+        case 0x0b: builder.append("\\v"); break;
+        case '\\': builder.append("\\\\"); break;
+        case '\'': builder.append("\\\'"); break;
+        case '"' : builder.append("\\\""); break;
+        default:
+          // Only ASCII characters between 0x20 (space) and 0x7e (tilde) are
+          // printable.  Other byte values must be escaped.
+          if (b >= 0x20 && b <= 0x7e) {
+            builder.append((char) b);
+          } else {
+            builder.append('\\');
+            builder.append((char) ('0' + ((b >>> 6) & 3)));
+            builder.append((char) ('0' + ((b >>> 3) & 7)));
+            builder.append((char) ('0' + (b & 7)));
+          }
+          break;
+      }
+    }
+    return builder.toString();
+  }
+
+  /**
+   * Escapes bytes in the format used in protocol buffer text format, which
+   * is the same as the format used for C string literals.  All bytes
+   * that are not printable 7-bit ASCII characters are escaped, as well as
+   * backslash, single-quote, and double-quote characters.  Characters for
+   * which no defined short-hand escape sequence is defined will be escaped
+   * using 3-digit octal sequences.
+   */
+  static String escapeBytes(final ByteString input) {
+    return escapeBytes(new ByteSequence() {
+      @Override
+      public int size() {
+        return input.size();
+      }
+      @Override
+      public byte byteAt(int offset) {
+        return input.byteAt(offset);
+      }
+    });
+  }
+
+  /**
+   * Like {@link #escapeBytes(ByteString)}, but used for byte array.
+   */
+  static String escapeBytes(final byte[] input) {
+    return escapeBytes(new ByteSequence() {
+      @Override
+      public int size() {
+        return input.length;
+      }
+      @Override
+      public byte byteAt(int offset) {
+        return input[offset];
+      }
+    });
+  }
+
+  /**
+   * Like {@link #escapeBytes(ByteString)}, but escapes a text string.
+   * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped
+   * individually as a 3-digit octal escape.  Yes, it's weird.
+   */
+  static String escapeText(final String input) {
+    return escapeBytes(ByteString.copyFromUtf8(input));
+  }
+
+  /**
+   * Escape double quotes and backslashes in a String for unicode output of a message.
+   */
+  static String escapeDoubleQuotesAndBackslashes(final String input) {
+    return input.replace("\\", "\\\\").replace("\"", "\\\"");
+  }
+}

+ 225 - 0
java/core/src/main/java/com/google/protobuf/TextFormatParseInfoTree.java

@@ -0,0 +1,225 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import com.google.protobuf.Descriptors.FieldDescriptor;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+
+/**
+ * Data structure which is populated with the locations of each field value parsed from the text.
+ *
+ * <p>The locations of primary fields values are retrieved by {@code getLocation} or
+ * {@code getLocations}.  The locations of sub message values are within nested
+ * {@code TextFormatParseInfoTree}s and are retrieve by {@getNestedTree} or {code @getNestedTrees}.
+ *
+ * <p>The {@code TextFormatParseInfoTree} is created by a Builder.
+ */
+public class TextFormatParseInfoTree {
+
+  // Defines a mapping between each field's descriptor to the list of locations where
+  // its value(s) were was encountered.
+  private Map<FieldDescriptor, List<TextFormatParseLocation>> locationsFromField;
+
+  // Defines a mapping between a field's descriptor to a list of TextFormatParseInfoTrees for
+  // sub message location information.
+  Map<FieldDescriptor, List<TextFormatParseInfoTree>> subtreesFromField;
+
+  /**
+   * Construct a {@code TextFormatParseInfoTree}.
+   *
+   * @param locationsFromField a map of fields to location in the source code
+   * @param subtreeBuildersFromField a map of fields to parse tree location information builders
+   */
+  private TextFormatParseInfoTree(
+      Map<FieldDescriptor, List<TextFormatParseLocation>> locationsFromField,
+      Map<FieldDescriptor, List<TextFormatParseInfoTree.Builder>> subtreeBuildersFromField) {
+
+    // The maps are unmodifiable.  The values in the maps are unmodifiable.
+    Map<FieldDescriptor, List<TextFormatParseLocation>> locs =
+        new HashMap<FieldDescriptor, List<TextFormatParseLocation>>();
+    for (Entry<FieldDescriptor, List<TextFormatParseLocation>> kv : locationsFromField.entrySet()) {
+      locs.put(kv.getKey(), Collections.unmodifiableList(kv.getValue()));
+    }
+    this.locationsFromField = Collections.unmodifiableMap(locs);
+
+    Map<FieldDescriptor, List<TextFormatParseInfoTree>> subs =
+        new HashMap<FieldDescriptor, List<TextFormatParseInfoTree>>();
+    for (Entry<FieldDescriptor, List<Builder>> kv : subtreeBuildersFromField.entrySet()) {
+      List<TextFormatParseInfoTree> submessagesOfField = new ArrayList<TextFormatParseInfoTree>();
+      for (Builder subBuilder : kv.getValue()) {
+        submessagesOfField.add(subBuilder.build());
+      }
+      subs.put(kv.getKey(), Collections.unmodifiableList(submessagesOfField));
+    }
+    this.subtreesFromField = Collections.unmodifiableMap(subs);
+  }
+
+ /**
+  * Retrieve all the locations of a field.
+  *
+  * @param fieldDescriptor the the @{link FieldDescriptor} of the desired field
+  * @return a list of the locations of values of the field.  If there are not values
+  *         or the field doesn't exist, an empty list is returned.
+  */
+  public List<TextFormatParseLocation> getLocations(final FieldDescriptor fieldDescriptor) {
+    List<TextFormatParseLocation> result = locationsFromField.get(fieldDescriptor);
+    return (result == null) ? Collections.<TextFormatParseLocation>emptyList() : result;
+  }
+
+  /**
+   * Get the location in the source of a field's value.
+   *
+   * <p>Returns the {@link TextFormatParseLocation} for index-th value of the field in the parsed
+   * text.
+   *
+   * @param fieldDescriptor the @{link FieldDescriptor} of the desired field
+   * @param index the index of the value.
+   * @return the {@link TextFormatParseLocation} of the value
+   * @throws IllegalArgumentException index is out of range
+   */
+  public TextFormatParseLocation getLocation(final FieldDescriptor fieldDescriptor, int index) {
+    return getFromList(getLocations(fieldDescriptor), index, fieldDescriptor);
+  }
+
+  /**
+   * Retrieve a list of all the location information trees for a sub message field.
+   *
+   * @param fieldDescriptor the @{link FieldDescriptor} of the desired field
+   * @return A list of {@link TextFormatParseInfoTree}
+   */
+  public List<TextFormatParseInfoTree> getNestedTrees(final FieldDescriptor fieldDescriptor) {
+    List<TextFormatParseInfoTree> result = subtreesFromField.get(fieldDescriptor);
+    return result == null ? Collections.<TextFormatParseInfoTree>emptyList() : result;
+  }
+
+  /**
+   * Returns the parse info tree for the given field, which must be a message type.
+   *
+   * @param fieldDescriptor the @{link FieldDescriptor} of the desired sub message
+   * @param index the index of message value.
+   * @return the {@code ParseInfoTree} of the message value. {@code null} is returned if the field
+   *         doesn't exist or the index is out of range.
+   * @throws IllegalArgumentException if index is out of range
+   */
+  public TextFormatParseInfoTree getNestedTree(final FieldDescriptor fieldDescriptor, int index) {
+    return getFromList(getNestedTrees(fieldDescriptor), index, fieldDescriptor);
+  }
+
+  /**
+   * Create a builder for a {@code ParseInfoTree}.
+   *
+   * @return the builder
+   */
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  private static <T> T getFromList(List<T> list, int index, FieldDescriptor fieldDescriptor) {
+    if (index >= list.size() || index < 0)  {
+      throw new IllegalArgumentException(String.format("Illegal index field: %s, index %d",
+          fieldDescriptor == null ? "<null>" : fieldDescriptor.getName(), index));
+    }
+    return list.get(index);
+  }
+
+  /**
+   * Builder for a {@link TextFormatParseInfoTree}.
+   */
+  public static class Builder {
+
+    private Map<FieldDescriptor, List<TextFormatParseLocation>> locationsFromField;
+
+    // Defines a mapping between a field's descriptor to a list of ParseInfoTrees builders for
+    // sub message location information.
+    private Map<FieldDescriptor, List<Builder>> subtreeBuildersFromField;
+
+     /**
+     * Create a root level {@ParseInfoTree} builder.
+     */
+    private Builder() {
+      locationsFromField = new HashMap<FieldDescriptor, List<TextFormatParseLocation>>();
+      subtreeBuildersFromField = new HashMap<FieldDescriptor, List<Builder>>();
+    }
+
+    /**
+     * Record the starting location of a single value for a field.
+     *
+     * @param fieldDescriptor the field
+     * @param location source code location information
+     */
+    public Builder setLocation(
+        final FieldDescriptor fieldDescriptor, TextFormatParseLocation location) {
+      List<TextFormatParseLocation> fieldLocations = locationsFromField.get(fieldDescriptor);
+      if (fieldLocations == null) {
+        fieldLocations = new ArrayList<TextFormatParseLocation>();
+        locationsFromField.put(fieldDescriptor, fieldLocations);
+      }
+      fieldLocations.add(location);
+      return this;
+    }
+
+    /**
+     * Set for a sub message.
+     *
+     * <p>A new builder is created for a sub message. The builder that is returned is a new builder.
+     * The return is <emph>not</emph> the invoked {@code builder.getBuilderForSubMessageField}.
+     *
+     * @param fieldDescriptor the field whose value is the submessage
+     * @return a new Builder for the sub message
+     */
+    public Builder getBuilderForSubMessageField(final FieldDescriptor fieldDescriptor) {
+      List<Builder> submessageBuilders = subtreeBuildersFromField.get(fieldDescriptor);
+      if (submessageBuilders == null) {
+        submessageBuilders = new ArrayList<Builder>();
+        subtreeBuildersFromField.put(fieldDescriptor, submessageBuilders);
+      }
+      Builder subtreeBuilder = new Builder();
+      submessageBuilders.add(subtreeBuilder);
+      return subtreeBuilder;
+    }
+
+    /**
+     * Build the {@code TextFormatParseInfoTree}.
+     *
+     * @return the {@code TextFormatParseInfoTree}
+     */
+    public TextFormatParseInfoTree build() {
+      return new TextFormatParseInfoTree(locationsFromField, subtreeBuildersFromField);
+    }
+  }
+}

+ 104 - 0
java/core/src/main/java/com/google/protobuf/TextFormatParseLocation.java

@@ -0,0 +1,104 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import java.util.Arrays;
+
+/**
+ * A location in the source code.
+ *
+ * <p>A location is the starting line number and starting column number.
+ */
+public final class TextFormatParseLocation {
+
+  /**
+   * The empty location.
+   */
+  public static final TextFormatParseLocation EMPTY = new TextFormatParseLocation(-1, -1);
+
+  /**
+   * Create a location.
+   *
+   * @param line the starting line number
+   * @param column the starting column number
+   * @return a {@code ParseLocation}
+   */
+  static TextFormatParseLocation create(int line, int column) {
+    if (line == -1 && column == -1) {
+      return EMPTY;
+    }
+    if (line < 0 || column < 0) {
+      throw new IllegalArgumentException(
+          String.format("line and column values must be >= 0: line %d, column: %d", line, column));
+    }
+    return new TextFormatParseLocation(line, column);
+  }
+
+  private final int line;
+  private final int column;
+
+  private TextFormatParseLocation(int line, int column) {
+    this.line = line;
+    this.column = column;
+  }
+
+  public int getLine() {
+    return line;
+  }
+
+  public int getColumn() {
+    return column;
+  }
+
+  @Override
+  public String toString() {
+    return String.format("ParseLocation{line=%d, column=%d}", line, column);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o == this) {
+      return true;
+    }
+    if (!(o instanceof TextFormatParseLocation)) {
+      return false;
+    }
+    TextFormatParseLocation that = (TextFormatParseLocation) o;
+    return (this.line == that.getLine())
+         && (this.column == that.getColumn());
+  }
+
+  @Override
+  public int hashCode() {
+    int[] values = {line, column};
+    return Arrays.hashCode(values);
+  }
+}

+ 15 - 95
java/core/src/main/java/com/google/protobuf/UnknownFieldSetLite.java

@@ -61,15 +61,6 @@ public final class UnknownFieldSetLite {
   public static UnknownFieldSetLite getDefaultInstance() {
     return DEFAULT_INSTANCE;
   }
-
-  /**
-   * Returns an empty {@code UnknownFieldSetLite.Builder}.
-   *
-   * <p>For use by generated code only.
-   */
-  public static Builder newBuilder() {
-    return new Builder();
-  }
   
   /**
    * Returns a new mutable instance.
@@ -262,6 +253,21 @@ public final class UnknownFieldSetLite {
     return hashCode;
   }
 
+  /**
+   * Prints a String representation of the unknown field set.
+   *
+   * <p>For use by generated code only.
+   *
+   * @param buffer the buffer to write to
+   * @param indent the number of spaces the fields should be indented by
+   */
+  final void printWithIndent(StringBuilder buffer, int indent) {
+    for (int i = 0; i < count; i++) {
+      int fieldNumber = WireFormat.getTagFieldNumber(tags[i]);
+      MessageLiteToString.printField(buffer, indent, String.valueOf(fieldNumber), objects[i]);
+    }
+  }
+
   private void storeField(int tag, Object value) {
     ensureCapacity();
     
@@ -369,90 +375,4 @@ public final class UnknownFieldSetLite {
     }
     return this;
   }
-  
-  /**
-   * Builder for {@link UnknownFieldSetLite}s.
-   *
-   * <p>Use {@link UnknownFieldSet#newBuilder()} to construct a {@code Builder}.
-   *
-   * <p>For use by generated code only.
-   */
-  // TODO(dweis): Update the mutable API to no longer need this builder and delete.
-  public static final class Builder {
-
-    private UnknownFieldSetLite set;
-    
-    private Builder() {
-      this.set = null;
-    }
-
-    /**
-     * Ensures internal state is initialized for use.
-     */
-    private void ensureNotBuilt() {
-      if (set == null) {
-        set = new UnknownFieldSetLite();
-      }
-      
-      set.checkMutable();
-    }
-    
-    /**
-     * Parse a single field from {@code input} and merge it into this set.
-     *
-     * <p>For use by generated code only.
-     *
-     * @param tag The field's tag number, which was already parsed.
-     * @return {@code false} if the tag is an end group tag.
-     */
-    boolean mergeFieldFrom(final int tag, final CodedInputStream input) throws IOException {
-      ensureNotBuilt();
-      return set.mergeFieldFrom(tag, input);
-    }
-
-    /**
-     * Convenience method for merging a new field containing a single varint
-     * value. This is used in particular when an unknown enum value is
-     * encountered.
-     *
-     * <p>For use by generated code only.
-     */
-    Builder mergeVarintField(int fieldNumber, int value) {
-      ensureNotBuilt();
-      set.mergeVarintField(fieldNumber, value);
-      return this;
-    }
-    
-    /**
-     * Convenience method for merging a length-delimited field.
-     *
-     * <p>For use by generated code only.
-     */
-    public Builder mergeLengthDelimitedField(final int fieldNumber, final ByteString value) {
-      ensureNotBuilt();  
-      set.mergeLengthDelimitedField(fieldNumber, value);
-      return this;
-    }
-    
-    /**
-     * Build the {@link UnknownFieldSetLite} and return it.
-     *
-     * <p>Once {@code build()} has been called, the {@code Builder} will no
-     * longer be usable.  Calling any method after {@code build()} will result
-     * in undefined behavior and can cause an
-     * {@code UnsupportedOperationException} to be thrown.
-     *
-     * <p>For use by generated code only.
-     */
-    public UnknownFieldSetLite build() {
-      if (set == null) {
-        return DEFAULT_INSTANCE;
-      }
-      
-      set.checkMutable();
-      set.makeImmutable();
-
-      return set;
-    }
-  }
 }

+ 23 - 2
java/core/src/main/java/com/google/protobuf/UnsafeByteOperations.java

@@ -30,6 +30,7 @@
 
 package com.google.protobuf;
 
+import java.io.IOException;
 import java.nio.ByteBuffer;
 
 /**
@@ -49,8 +50,8 @@ public final class UnsafeByteOperations {
   /**
    * An unsafe operation that returns a {@link ByteString} that is backed by the provided buffer.
    *
-   * @param buffer the Java NIO buffer to be wrapped.
-   * @return a {@link ByteString} backed by the provided buffer.
+   * @param buffer the Java NIO buffer to be wrapped
+   * @return a {@link ByteString} backed by the provided buffer
    */
   public static ByteString unsafeWrap(ByteBuffer buffer) {
     if (buffer.hasArray()) {
@@ -60,4 +61,24 @@ public final class UnsafeByteOperations {
       return new NioByteString(buffer);
     }
   }
+
+  /**
+   * Writes the given {@link ByteString} to the provided {@link ByteOutput}. Calling this method may
+   * result in multiple operations on the target {@link ByteOutput}
+   * (i.e. for roped {@link ByteString}s).
+   *
+   * <p>This method exposes the internal backing buffer(s) of the {@link ByteString} to the {@link
+   * ByteOutput} in order to avoid additional copying overhead. It would be possible for a malicious
+   * {@link ByteOutput} to corrupt the {@link ByteString}. Use with caution!
+   *
+   * <p> NOTE: The {@link ByteOutput} <strong>MUST NOT</strong> modify the provided buffers. Doing
+   * so may result in corrupted data, which would be difficult to debug.
+   *
+   * @param bytes the {@link ByteString} to be written
+   * @param  output  the output to receive the bytes
+   * @throws IOException  if an I/O error occurs
+   */
+  public static void unsafeWriteTo(ByteString bytes, ByteOutput output) throws IOException {
+    bytes.writeTo(output);
+  }
 }

+ 1433 - 231
java/core/src/main/java/com/google/protobuf/Utf8.java

@@ -30,6 +30,19 @@
 
 package com.google.protobuf;
 
+import static java.lang.Character.MAX_SURROGATE;
+import static java.lang.Character.MIN_SURROGATE;
+import static java.lang.Character.isSurrogatePair;
+import static java.lang.Character.toCodePoint;
+
+import java.lang.reflect.Field;
+import java.nio.Buffer;
+import java.nio.ByteBuffer;
+import java.security.AccessController;
+import java.security.PrivilegedExceptionAction;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
 /**
  * A set of low-level, high-performance static utility methods related
  * to the UTF-8 character encoding.  This class has no dependencies
@@ -64,9 +77,24 @@ package com.google.protobuf;
  *
  * @author martinrb@google.com (Martin Buchholz)
  */
+// TODO(nathanmittler): Copy changes in this class back to Guava
 final class Utf8 {
-  private Utf8() {}
-  
+  private static final Logger logger = Logger.getLogger(Utf8.class.getName());
+
+  /**
+   * UTF-8 is a runtime hot spot so we attempt to provide heavily optimized implementations
+   * depending on what is available on the platform. The processor is the platform-optimized
+   * delegate for which all methods are delegated directly to.
+   */
+  private static final Processor processor =
+      UnsafeProcessor.isAvailable() ? new UnsafeProcessor() : new SafeProcessor();
+
+  /**
+   * A mask used when performing unsafe reads to determine if a long value contains any non-ASCII
+   * characters (i.e. any byte >= 0x80).
+   */
+  private static final long ASCII_MASK_LONG = 0x8080808080808080L;
+
   /**
    * Maximum number of bytes per Java UTF-16 char in UTF-8.
    * @see java.nio.charset.CharsetEncoder#maxBytesPerChar()
@@ -85,6 +113,18 @@ final class Utf8 {
    */
   public static final int MALFORMED = -1;
 
+  /**
+   * Used by {@code Unsafe} UTF-8 string validation logic to determine the minimum string length
+   * above which to employ an optimized algorithm for counting ASCII characters. The reason for this
+   * threshold is that for small strings, the optimization may not be beneficial or may even
+   * negatively impact performance since it requires additional logic to avoid unaligned reads
+   * (when calling {@code Unsafe.getLong}). This threshold guarantees that even if the initial
+   * offset is unaligned, we're guaranteed to make at least one call to {@code Unsafe.getLong()}
+   * which provides a performance improvement that entirely subsumes the cost of the additional
+   * logic.
+   */
+  private static final int UNSAFE_COUNT_ASCII_THRESHOLD = 16;
+
   // Other state values include the partial bytes of the incomplete
   // character to be decoded in the simplest way: we pack the bytes
   // into the state int in little-endian order.  For example:
@@ -112,7 +152,7 @@ final class Utf8 {
    * isValidUtf8(bytes, 0, bytes.length)}.
    */
   public static boolean isValidUtf8(byte[] bytes) {
-    return isValidUtf8(bytes, 0, bytes.length);
+    return processor.isValidUtf8(bytes, 0, bytes.length);
   }
 
   /**
@@ -125,7 +165,7 @@ final class Utf8 {
    * partialIsValidUtf8(bytes, index, limit) == Utf8.COMPLETE}.
    */
   public static boolean isValidUtf8(byte[] bytes, int index, int limit) {
-    return partialIsValidUtf8(bytes, index, limit) == COMPLETE;
+    return processor.isValidUtf8(bytes, index, limit);
   }
 
   /**
@@ -146,183 +186,8 @@ final class Utf8 {
    * decode the character when passed to a subsequent invocation of a
    * partial decoding method.
    */
-  public static int partialIsValidUtf8(
-      int state, byte[] bytes, int index, int limit) {
-    if (state != COMPLETE) {
-      // The previous decoding operation was incomplete (or malformed).
-      // We look for a well-formed sequence consisting of bytes from
-      // the previous decoding operation (stored in state) together
-      // with bytes from the array slice.
-      //
-      // We expect such "straddler characters" to be rare.
-
-      if (index >= limit) {  // No bytes? No progress.
-        return state;
-      }
-      int byte1 = (byte) state;
-      // byte1 is never ASCII.
-      if (byte1 < (byte) 0xE0) {
-        // two-byte form
-
-        // Simultaneously checks for illegal trailing-byte in
-        // leading position and overlong 2-byte form.
-        if (byte1 < (byte) 0xC2 ||
-            // byte2 trailing-byte test
-            bytes[index++] > (byte) 0xBF) {
-          return MALFORMED;
-        }
-      } else if (byte1 < (byte) 0xF0) {
-        // three-byte form
-
-        // Get byte2 from saved state or array
-        int byte2 = (byte) ~(state >> 8);
-        if (byte2 == 0) {
-          byte2 = bytes[index++];
-          if (index >= limit) {
-            return incompleteStateFor(byte1, byte2);
-          }
-        }
-        if (byte2 > (byte) 0xBF ||
-            // overlong? 5 most significant bits must not all be zero
-            (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) ||
-            // illegal surrogate codepoint?
-            (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) ||
-            // byte3 trailing-byte test
-            bytes[index++] > (byte) 0xBF) {
-          return MALFORMED;
-        }
-      } else {
-        // four-byte form
-
-        // Get byte2 and byte3 from saved state or array
-        int byte2 = (byte) ~(state >> 8);
-        int byte3 = 0;
-        if (byte2 == 0) {
-          byte2 = bytes[index++];
-          if (index >= limit) {
-            return incompleteStateFor(byte1, byte2);
-          }
-        } else {
-          byte3 = (byte) (state >> 16);
-        }
-        if (byte3 == 0) {
-          byte3 = bytes[index++];
-          if (index >= limit) {
-            return incompleteStateFor(byte1, byte2, byte3);
-          }
-        }
-
-        // If we were called with state == MALFORMED, then byte1 is 0xFF,
-        // which never occurs in well-formed UTF-8, and so we will return
-        // MALFORMED again below.
-
-        if (byte2 > (byte) 0xBF ||
-            // Check that 1 <= plane <= 16.  Tricky optimized form of:
-            // if (byte1 > (byte) 0xF4 ||
-            //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
-            //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
-            (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 ||
-            // byte3 trailing-byte test
-            byte3 > (byte) 0xBF ||
-            // byte4 trailing-byte test
-             bytes[index++] > (byte) 0xBF) {
-          return MALFORMED;
-        }
-      }
-    }
-
-    return partialIsValidUtf8(bytes, index, limit);
-  }
-
-  /**
-   * Tells whether the given byte array slice is a well-formed,
-   * malformed, or incomplete UTF-8 byte sequence.  The range of bytes
-   * to be checked extends from index {@code index}, inclusive, to
-   * {@code limit}, exclusive.
-   *
-   * <p>This is a convenience method, equivalent to a call to {@code
-   * partialIsValidUtf8(Utf8.COMPLETE, bytes, index, limit)}.
-   *
-   * @return {@link #MALFORMED} if the partial byte sequence is
-   * definitely not well-formed, {@link #COMPLETE} if it is well-formed
-   * (no additional input needed), or if the byte sequence is
-   * "incomplete", i.e. apparently terminated in the middle of a character,
-   * an opaque integer "state" value containing enough information to
-   * decode the character when passed to a subsequent invocation of a
-   * partial decoding method.
-   */
-  public static int partialIsValidUtf8(
-      byte[] bytes, int index, int limit) {
-    // Optimize for 100% ASCII.
-    // Hotspot loves small simple top-level loops like this.
-    while (index < limit && bytes[index] >= 0) {
-      index++;
-    }
-
-    return (index >= limit) ? COMPLETE :
-        partialIsValidUtf8NonAscii(bytes, index, limit);
-  }
-
-  private static int partialIsValidUtf8NonAscii(
-      byte[] bytes, int index, int limit) {
-    for (;;) {
-      int byte1, byte2;
-
-      // Optimize for interior runs of ASCII bytes.
-      do {
-        if (index >= limit) {
-          return COMPLETE;
-        }
-      } while ((byte1 = bytes[index++]) >= 0);
-
-      if (byte1 < (byte) 0xE0) {
-        // two-byte form
-
-        if (index >= limit) {
-          return byte1;
-        }
-
-        // Simultaneously checks for illegal trailing-byte in
-        // leading position and overlong 2-byte form.
-        if (byte1 < (byte) 0xC2 ||
-            bytes[index++] > (byte) 0xBF) {
-          return MALFORMED;
-        }
-      } else if (byte1 < (byte) 0xF0) {
-        // three-byte form
-
-        if (index >= limit - 1) { // incomplete sequence
-          return incompleteStateFor(bytes, index, limit);
-        }
-        if ((byte2 = bytes[index++]) > (byte) 0xBF ||
-            // overlong? 5 most significant bits must not all be zero
-            (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0) ||
-            // check for illegal surrogate codepoints
-            (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0) ||
-            // byte3 trailing-byte test
-            bytes[index++] > (byte) 0xBF) {
-          return MALFORMED;
-        }
-      } else {
-        // four-byte form
-
-        if (index >= limit - 2) {  // incomplete sequence
-          return incompleteStateFor(bytes, index, limit);
-        }
-        if ((byte2 = bytes[index++]) > (byte) 0xBF ||
-            // Check that 1 <= plane <= 16.  Tricky optimized form of:
-            // if (byte1 > (byte) 0xF4 ||
-            //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
-            //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
-            (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0 ||
-            // byte3 trailing-byte test
-            bytes[index++] > (byte) 0xBF ||
-            // byte4 trailing-byte test
-            bytes[index++] > (byte) 0xBF) {
-          return MALFORMED;
-        }
-      }
-    }
+  public static int partialIsValidUtf8(int state, byte[] bytes, int index, int limit) {
+    return processor.partialIsValidUtf8(state, bytes, index, limit);
   }
 
   private static int incompleteStateFor(int byte1) {
@@ -352,19 +217,31 @@ final class Utf8 {
       default: throw new AssertionError();
     }
   }
-  
+
+  private static int incompleteStateFor(
+      final ByteBuffer buffer, final int byte1, final int index, final int remaining) {
+    switch (remaining) {
+      case 0:
+        return incompleteStateFor(byte1);
+      case 1:
+        return incompleteStateFor(byte1, buffer.get(index));
+      case 2:
+        return incompleteStateFor(byte1, buffer.get(index), buffer.get(index + 1));
+      default:
+        throw new AssertionError();
+    }
+  }
 
   // These UTF-8 handling methods are copied from Guava's Utf8 class with a modification to throw
   // a protocol buffer local exception. This exception is then caught in CodedOutputStream so it can
   // fallback to more lenient behavior.
   
   static class UnpairedSurrogateException extends IllegalArgumentException {
-    
     private UnpairedSurrogateException(int index, int length) {
       super("Unpaired surrogate at index " + index + " of " + length);
     }
   }
-  
+
   /**
    * Returns the number of bytes in the UTF-8-encoded form of {@code sequence}. For a string,
    * this method is equivalent to {@code string.getBytes(UTF_8).length}, but is more efficient in
@@ -426,56 +303,1381 @@ final class Utf8 {
     return utf8Length;
   }
 
-  static int encode(CharSequence sequence, byte[] bytes, int offset, int length) {
-    int utf16Length = sequence.length();
-    int j = offset;
-    int i = 0;
-    int limit = offset + length;
-    // Designed to take advantage of
-    // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination
-    for (char c; i < utf16Length && i + j < limit && (c = sequence.charAt(i)) < 0x80; i++) {
-      bytes[j + i] = (byte) c;
-    }
-    if (i == utf16Length) {
-      return j + utf16Length;
-    }
-    j += i;
-    for (char c; i < utf16Length; i++) {
-      c = sequence.charAt(i);
-      if (c < 0x80 && j < limit) {
-        bytes[j++] = (byte) c;
-      } else if (c < 0x800 && j <= limit - 2) { // 11 bits, two UTF-8 bytes
-        bytes[j++] = (byte) ((0xF << 6) | (c >>> 6));
-        bytes[j++] = (byte) (0x80 | (0x3F & c));
-      } else if ((c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) && j <= limit - 3) {
-        // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
-        bytes[j++] = (byte) ((0xF << 5) | (c >>> 12));
-        bytes[j++] = (byte) (0x80 | (0x3F & (c >>> 6)));
-        bytes[j++] = (byte) (0x80 | (0x3F & c));
-      } else if (j <= limit - 4) {
-        // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8 bytes
-        final char low;
-        if (i + 1 == sequence.length()
-                || !Character.isSurrogatePair(c, (low = sequence.charAt(++i)))) {
-          throw new UnpairedSurrogateException((i - 1), utf16Length);
-        }
-        int codePoint = Character.toCodePoint(c, low);
-        bytes[j++] = (byte) ((0xF << 4) | (codePoint >>> 18));
-        bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 12)));
-        bytes[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 6)));
-        bytes[j++] = (byte) (0x80 | (0x3F & codePoint));
+  static int encode(CharSequence in, byte[] out, int offset, int length) {
+    return processor.encodeUtf8(in, out, offset, length);
+  }
+  // End Guava UTF-8 methods.
+
+  /**
+   * Determines if the given {@link ByteBuffer} is a valid UTF-8 string.
+   *
+   * <p>Selects an optimal algorithm based on the type of {@link ByteBuffer} (i.e. heap or direct)
+   * and the capabilities of the platform.
+   *
+   * @param buffer the buffer to check.
+   * @see Utf8#isValidUtf8(byte[], int, int)
+   */
+  static boolean isValidUtf8(ByteBuffer buffer) {
+    return processor.isValidUtf8(buffer, buffer.position(), buffer.remaining());
+  }
+
+  /**
+   * Determines if the given {@link ByteBuffer} is a partially valid UTF-8 string.
+   *
+   * <p>Selects an optimal algorithm based on the type of {@link ByteBuffer} (i.e. heap or direct)
+   * and the capabilities of the platform.
+   *
+   * @param buffer the buffer to check.
+   * @see Utf8#partialIsValidUtf8(int, byte[], int, int)
+   */
+  static int partialIsValidUtf8(int state, ByteBuffer buffer, int index, int limit) {
+    return processor.partialIsValidUtf8(state, buffer, index, limit);
+  }
+
+  /**
+   * Encodes the given characters to the target {@link ByteBuffer} using UTF-8 encoding.
+   *
+   * <p>Selects an optimal algorithm based on the type of {@link ByteBuffer} (i.e. heap or direct)
+   * and the capabilities of the platform.
+   *
+   * @param in the source string to be encoded
+   * @param out the target buffer to receive the encoded string.
+   * @see Utf8#encode(CharSequence, byte[], int, int)
+   */
+  static void encodeUtf8(CharSequence in, ByteBuffer out) {
+    processor.encodeUtf8(in, out);
+  }
+
+  /**
+   * Counts (approximately) the number of consecutive ASCII characters in the given buffer.
+   * The byte order of the {@link ByteBuffer} does not matter, so performance can be improved if
+   * native byte order is used (i.e. no byte-swapping in {@link ByteBuffer#getLong(int)}).
+   *
+   * @param buffer the buffer to be scanned for ASCII chars
+   * @param index the starting index of the scan
+   * @param limit the limit within buffer for the scan
+   * @return the number of ASCII characters found. The stopping position will be at or
+   * before the first non-ASCII byte.
+   */
+  private static int estimateConsecutiveAscii(ByteBuffer buffer, int index, int limit) {
+    int i = index;
+    final int lim = limit - 7;
+    // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
+    // To speed things up further, we're reading longs instead of bytes so we use a mask to
+    // determine if any byte in the current long is non-ASCII.
+    for (; i < lim && (buffer.getLong(i) & ASCII_MASK_LONG) == 0; i += 8) {}
+    return i - index;
+  }
+
+  /**
+   * A processor of UTF-8 strings, providing methods for checking validity and encoding.
+   */
+  // TODO(nathanmittler): Add support for Memory/MemoryBlock on Android.
+  abstract static class Processor {
+    /**
+     * Returns {@code true} if the given byte array slice is a
+     * well-formed UTF-8 byte sequence.  The range of bytes to be
+     * checked extends from index {@code index}, inclusive, to {@code
+     * limit}, exclusive.
+     *
+     * <p>This is a convenience method, equivalent to {@code
+     * partialIsValidUtf8(bytes, index, limit) == Utf8.COMPLETE}.
+     */
+    final boolean isValidUtf8(byte[] bytes, int index, int limit) {
+      return partialIsValidUtf8(COMPLETE, bytes, index, limit) == COMPLETE;
+    }
+
+    /**
+     * Tells whether the given byte array slice is a well-formed,
+     * malformed, or incomplete UTF-8 byte sequence.  The range of bytes
+     * to be checked extends from index {@code index}, inclusive, to
+     * {@code limit}, exclusive.
+     *
+     * @param state either {@link Utf8#COMPLETE} (if this is the initial decoding
+     * operation) or the value returned from a call to a partial decoding method
+     * for the previous bytes
+     *
+     * @return {@link #MALFORMED} if the partial byte sequence is
+     * definitely not well-formed, {@link #COMPLETE} if it is well-formed
+     * (no additional input needed), or if the byte sequence is
+     * "incomplete", i.e. apparently terminated in the middle of a character,
+     * an opaque integer "state" value containing enough information to
+     * decode the character when passed to a subsequent invocation of a
+     * partial decoding method.
+     */
+    abstract int partialIsValidUtf8(int state, byte[] bytes, int index, int limit);
+
+    /**
+     * Returns {@code true} if the given portion of the {@link ByteBuffer} is a
+     * well-formed UTF-8 byte sequence.  The range of bytes to be
+     * checked extends from index {@code index}, inclusive, to {@code
+     * limit}, exclusive.
+     *
+     * <p>This is a convenience method, equivalent to {@code
+     * partialIsValidUtf8(bytes, index, limit) == Utf8.COMPLETE}.
+     */
+    final boolean isValidUtf8(ByteBuffer buffer, int index, int limit) {
+      return partialIsValidUtf8(COMPLETE, buffer, index, limit) == COMPLETE;
+    }
+
+    /**
+     * Indicates whether or not the given buffer contains a valid UTF-8 string.
+     *
+     * @param buffer the buffer to check.
+     * @return {@code true} if the given buffer contains a valid UTF-8 string.
+     */
+    final int partialIsValidUtf8(
+        final int state, final ByteBuffer buffer, int index, final int limit) {
+      if (buffer.hasArray()) {
+        final int offset = buffer.arrayOffset();
+        return partialIsValidUtf8(state, buffer.array(), offset + index, offset + limit);
+      } else if (buffer.isDirect()){
+        return partialIsValidUtf8Direct(state, buffer, index, limit);
+      }
+      return partialIsValidUtf8Default(state, buffer, index, limit);
+    }
+
+    /**
+     * Performs validation for direct {@link ByteBuffer} instances.
+     */
+    abstract int partialIsValidUtf8Direct(
+        final int state, final ByteBuffer buffer, int index, final int limit);
+
+    /**
+     * Performs validation for {@link ByteBuffer} instances using the {@link ByteBuffer} API rather
+     * than potentially faster approaches. This first completes validation for the current
+     * character (provided by {@code state}) and then finishes validation for the sequence.
+     */
+    final int partialIsValidUtf8Default(
+        final int state, final ByteBuffer buffer, int index, final int limit) {
+      if (state != COMPLETE) {
+        // The previous decoding operation was incomplete (or malformed).
+        // We look for a well-formed sequence consisting of bytes from
+        // the previous decoding operation (stored in state) together
+        // with bytes from the array slice.
+        //
+        // We expect such "straddler characters" to be rare.
+
+        if (index >= limit) { // No bytes? No progress.
+          return state;
+        }
+
+        byte byte1 = (byte) state;
+        // byte1 is never ASCII.
+        if (byte1 < (byte) 0xE0) {
+          // two-byte form
+
+          // Simultaneously checks for illegal trailing-byte in
+          // leading position and overlong 2-byte form.
+          if (byte1 < (byte) 0xC2
+              // byte2 trailing-byte test
+              || buffer.get(index++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else if (byte1 < (byte) 0xF0) {
+          // three-byte form
+
+          // Get byte2 from saved state or array
+          byte byte2 = (byte) ~(state >> 8);
+          if (byte2 == 0) {
+            byte2 = buffer.get(index++);
+            if (index >= limit) {
+              return incompleteStateFor(byte1, byte2);
+            }
+          }
+          if (byte2 > (byte) 0xBF
+              // overlong? 5 most significant bits must not all be zero
+              || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+              // illegal surrogate codepoint?
+              || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+              // byte3 trailing-byte test
+              || buffer.get(index++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else {
+          // four-byte form
+
+          // Get byte2 and byte3 from saved state or array
+          byte byte2 = (byte) ~(state >> 8);
+          byte byte3 = 0;
+          if (byte2 == 0) {
+            byte2 = buffer.get(index++);
+            if (index >= limit) {
+              return incompleteStateFor(byte1, byte2);
+            }
+          } else {
+            byte3 = (byte) (state >> 16);
+          }
+          if (byte3 == 0) {
+            byte3 = buffer.get(index++);
+            if (index >= limit) {
+              return incompleteStateFor(byte1, byte2, byte3);
+            }
+          }
+
+          // If we were called with state == MALFORMED, then byte1 is 0xFF,
+          // which never occurs in well-formed UTF-8, and so we will return
+          // MALFORMED again below.
+
+          if (byte2 > (byte) 0xBF
+              // Check that 1 <= plane <= 16.  Tricky optimized form of:
+              // if (byte1 > (byte) 0xF4 ||
+              //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+              //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+              || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+              // byte3 trailing-byte test
+              || byte3 > (byte) 0xBF
+              // byte4 trailing-byte test
+              || buffer.get(index++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        }
+      }
+
+      // Finish validation for the sequence.
+      return partialIsValidUtf8(buffer, index, limit);
+    }
+
+    /**
+     * Performs validation for {@link ByteBuffer} instances using the {@link ByteBuffer} API rather
+     * than potentially faster approaches.
+     */
+    private static int partialIsValidUtf8(final ByteBuffer buffer, int index, final int limit) {
+      index += estimateConsecutiveAscii(buffer, index, limit);
+
+      for (;;) {
+        // Optimize for interior runs of ASCII bytes.
+        // TODO(nathanmittler): Consider checking 8 bytes at a time after some threshold?
+        // Maybe after seeing a few in a row that are ASCII, go back to fast mode?
+        int byte1;
+        do {
+          if (index >= limit) {
+            return COMPLETE;
+          }
+        } while ((byte1 = buffer.get(index++)) >= 0);
+
+        // If we're here byte1 is not ASCII. Only need to handle 2-4 byte forms.
+        if (byte1 < (byte) 0xE0) {
+          // Two-byte form (110xxxxx 10xxxxxx)
+          if (index >= limit) {
+            // Incomplete sequence
+            return byte1;
+          }
+
+          // Simultaneously checks for illegal trailing-byte in
+          // leading position and overlong 2-byte form.
+          if (byte1 < (byte) 0xC2 || buffer.get(index) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+          index++;
+        } else if (byte1 < (byte) 0xF0) {
+          // Three-byte form (1110xxxx 10xxxxxx 10xxxxxx)
+          if (index >= limit - 1) {
+            // Incomplete sequence
+            return incompleteStateFor(buffer, byte1, index, limit - index);
+          }
+
+          final byte byte2 = buffer.get(index++);
+          if (byte2 > (byte) 0xBF
+              // overlong? 5 most significant bits must not all be zero
+              || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+              // check for illegal surrogate codepoints
+              || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+              // byte3 trailing-byte test
+              || buffer.get(index) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+          index++;
+        } else {
+          // Four-byte form (1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+          if (index >= limit - 2) {
+            // Incomplete sequence
+            return incompleteStateFor(buffer, byte1, index, limit - index);
+          }
+
+          // TODO(nathanmittler): Consider using getInt() to improve performance.
+          final int byte2 = buffer.get(index++);
+          if (byte2 > (byte) 0xBF
+              // Check that 1 <= plane <= 16.  Tricky optimized form of:
+              // if (byte1 > (byte) 0xF4 ||
+              //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+              //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+              || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+              // byte3 trailing-byte test
+              || buffer.get(index++) > (byte) 0xBF
+              // byte4 trailing-byte test
+              || buffer.get(index++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        }
+      }
+    }
+
+    /**
+     * Encodes an input character sequence ({@code in}) to UTF-8 in the target array ({@code out}).
+     * For a string, this method is similar to
+     * <pre>{@code
+     * byte[] a = string.getBytes(UTF_8);
+     * System.arraycopy(a, 0, bytes, offset, a.length);
+     * return offset + a.length;
+     * }</pre>
+     *
+     * but is more efficient in both time and space. One key difference is that this method
+     * requires paired surrogates, and therefore does not support chunking.
+     * While {@code String.getBytes(UTF_8)} replaces unpaired surrogates with the default
+     * replacement character, this method throws {@link UnpairedSurrogateException}.
+     *
+     * <p>To ensure sufficient space in the output buffer, either call {@link #encodedLength} to
+     * compute the exact amount needed, or leave room for 
+     * {@code Utf8.MAX_BYTES_PER_CHAR * sequence.length()}, which is the largest possible number
+     * of bytes that any input can be encoded to.
+     *
+     * @param in the input character sequence to be encoded
+     * @param out the target array
+     * @param offset the starting offset in {@code bytes} to start writing at
+     * @param length the length of the {@code bytes}, starting from {@code offset}
+     * @throws UnpairedSurrogateException if {@code sequence} contains ill-formed UTF-16 (unpaired
+     *     surrogates)
+     * @throws ArrayIndexOutOfBoundsException if {@code sequence} encoded in UTF-8 is longer than
+     *     {@code bytes.length - offset}
+     * @return the new offset, equivalent to {@code offset + Utf8.encodedLength(sequence)}
+     */
+    abstract int encodeUtf8(CharSequence in, byte[] out, int offset, int length);
+
+    /**
+     * Encodes an input character sequence ({@code in}) to UTF-8 in the target buffer ({@code out}).
+     * Upon returning from this method, the {@code out} position will point to the position after
+     * the last encoded byte. This method requires paired surrogates, and therefore does not
+     * support chunking.
+     *
+     * <p>To ensure sufficient space in the output buffer, either call {@link #encodedLength} to
+     * compute the exact amount needed, or leave room for
+     * {@code Utf8.MAX_BYTES_PER_CHAR * in.length()}, which is the largest possible number
+     * of bytes that any input can be encoded to.
+     *
+     * @param in the source character sequence to be encoded
+     * @param out the target buffer
+     * @throws UnpairedSurrogateException if {@code in} contains ill-formed UTF-16 (unpaired
+     *     surrogates)
+     * @throws ArrayIndexOutOfBoundsException if {@code in} encoded in UTF-8 is longer than
+     *     {@code out.remaining()}
+     */
+    final void encodeUtf8(CharSequence in, ByteBuffer out) {
+      if (out.hasArray()) {
+        final int offset = out.arrayOffset();
+        int endIndex =
+            Utf8.encode(in, out.array(), offset + out.position(), out.remaining());
+        out.position(endIndex - offset);
+      } else if (out.isDirect()) {
+        encodeUtf8Direct(in, out);
       } else {
-        // If we are surrogates and we're not a surrogate pair, always throw an
-        // IllegalArgumentException instead of an ArrayOutOfBoundsException.
-        if ((Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE)
-            && (i + 1 == sequence.length()
-                || !Character.isSurrogatePair(c, sequence.charAt(i + 1)))) {
-          throw new UnpairedSurrogateException(i, utf16Length);
+        encodeUtf8Default(in, out);
+      }
+    }
+
+    /**
+     * Encodes the input character sequence to a direct {@link ByteBuffer} instance.
+     */
+    abstract void encodeUtf8Direct(CharSequence in, ByteBuffer out);
+
+    /**
+     * Encodes the input character sequence to a {@link ByteBuffer} instance using the {@link
+     * ByteBuffer} API, rather than potentially faster approaches.
+     */
+    final void encodeUtf8Default(CharSequence in, ByteBuffer out) {
+      final int inLength = in.length();
+      int outIx = out.position();
+      int inIx = 0;
+
+      // Since ByteBuffer.putXXX() already checks boundaries for us, no need to explicitly check
+      // access. Assume the buffer is big enough and let it handle the out of bounds exception
+      // if it occurs.
+      try {
+        // Designed to take advantage of
+        // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination
+        for (char c; inIx < inLength && (c = in.charAt(inIx)) < 0x80; ++inIx) {
+          out.put(outIx + inIx, (byte) c);
         }
-        throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + j);
+        if (inIx == inLength) {
+          // Successfully encoded the entire string.
+          out.position(outIx + inIx);
+          return;
+        }
+
+        outIx += inIx;
+        for (char c; inIx < inLength; ++inIx, ++outIx) {
+          c = in.charAt(inIx);
+          if (c < 0x80) {
+            // One byte (0xxx xxxx)
+            out.put(outIx, (byte) c);
+          } else if (c < 0x800) {
+            // Two bytes (110x xxxx 10xx xxxx)
+
+            // Benchmarks show put performs better than putShort here (for HotSpot).
+            out.put(outIx++, (byte) (0xC0 | (c >>> 6)));
+            out.put(outIx, (byte) (0x80 | (0x3F & c)));
+          } else if (c < MIN_SURROGATE || MAX_SURROGATE < c) {
+            // Three bytes (1110 xxxx 10xx xxxx 10xx xxxx)
+            // Maximum single-char code point is 0xFFFF, 16 bits.
+
+            // Benchmarks show put performs better than putShort here (for HotSpot).
+            out.put(outIx++, (byte) (0xE0 | (c >>> 12)));
+            out.put(outIx++, (byte) (0x80 | (0x3F & (c >>> 6))));
+            out.put(outIx, (byte) (0x80 | (0x3F & c)));
+          } else {
+            // Four bytes (1111 xxxx 10xx xxxx 10xx xxxx 10xx xxxx)
+
+            // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8
+            // bytes
+            final char low;
+            if (inIx + 1 == inLength || !isSurrogatePair(c, (low = in.charAt(++inIx)))) {
+              throw new UnpairedSurrogateException(inIx, inLength);
+            }
+            // TODO(nathanmittler): Consider using putInt() to improve performance.
+            int codePoint = toCodePoint(c, low);
+            out.put(outIx++, (byte) ((0xF << 4) | (codePoint >>> 18)));
+            out.put(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 12))));
+            out.put(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 6))));
+            out.put(outIx, (byte) (0x80 | (0x3F & codePoint)));
+          }
+        }
+
+        // Successfully encoded the entire string.
+        out.position(outIx);
+      } catch (IndexOutOfBoundsException e) {
+        // TODO(nathanmittler): Consider making the API throw IndexOutOfBoundsException instead.
+
+        // If we failed in the outer ASCII loop, outIx will not have been updated. In this case,
+        // use inIx to determine the bad write index.
+        int badWriteIndex = out.position() + Math.max(inIx, outIx - out.position() + 1);
+        throw new ArrayIndexOutOfBoundsException(
+            "Failed writing " + in.charAt(inIx) + " at index " + badWriteIndex);
       }
     }
-    return j;
   }
-  // End Guava UTF-8 methods.
+
+  /**
+   * {@link Processor} implementation that does not use any {@code sun.misc.Unsafe} methods.
+   */
+  static final class SafeProcessor extends Processor {
+    @Override
+    int partialIsValidUtf8(int state, byte[] bytes, int index, int limit) {
+      if (state != COMPLETE) {
+        // The previous decoding operation was incomplete (or malformed).
+        // We look for a well-formed sequence consisting of bytes from
+        // the previous decoding operation (stored in state) together
+        // with bytes from the array slice.
+        //
+        // We expect such "straddler characters" to be rare.
+
+        if (index >= limit) {  // No bytes? No progress.
+          return state;
+        }
+        int byte1 = (byte) state;
+        // byte1 is never ASCII.
+        if (byte1 < (byte) 0xE0) {
+          // two-byte form
+
+          // Simultaneously checks for illegal trailing-byte in
+          // leading position and overlong 2-byte form.
+          if (byte1 < (byte) 0xC2
+              // byte2 trailing-byte test
+              || bytes[index++] > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else if (byte1 < (byte) 0xF0) {
+          // three-byte form
+
+          // Get byte2 from saved state or array
+          int byte2 = (byte) ~(state >> 8);
+          if (byte2 == 0) {
+            byte2 = bytes[index++];
+            if (index >= limit) {
+              return incompleteStateFor(byte1, byte2);
+            }
+          }
+          if (byte2 > (byte) 0xBF
+              // overlong? 5 most significant bits must not all be zero
+              || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+              // illegal surrogate codepoint?
+              || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+              // byte3 trailing-byte test
+              || bytes[index++] > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else {
+          // four-byte form
+
+          // Get byte2 and byte3 from saved state or array
+          int byte2 = (byte) ~(state >> 8);
+          int byte3 = 0;
+          if (byte2 == 0) {
+            byte2 = bytes[index++];
+            if (index >= limit) {
+              return incompleteStateFor(byte1, byte2);
+            }
+          } else {
+            byte3 = (byte) (state >> 16);
+          }
+          if (byte3 == 0) {
+            byte3 = bytes[index++];
+            if (index >= limit) {
+              return incompleteStateFor(byte1, byte2, byte3);
+            }
+          }
+
+          // If we were called with state == MALFORMED, then byte1 is 0xFF,
+          // which never occurs in well-formed UTF-8, and so we will return
+          // MALFORMED again below.
+
+          if (byte2 > (byte) 0xBF
+              // Check that 1 <= plane <= 16.  Tricky optimized form of:
+              // if (byte1 > (byte) 0xF4 ||
+              //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+              //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+              || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+              // byte3 trailing-byte test
+              || byte3 > (byte) 0xBF
+              // byte4 trailing-byte test
+              || bytes[index++] > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        }
+      }
+
+      return partialIsValidUtf8(bytes, index, limit);
+    }
+
+    @Override
+    int partialIsValidUtf8Direct(int state, ByteBuffer buffer, int index, int limit) {
+      // For safe processing, we have to use the ByteBuffer API.
+      return partialIsValidUtf8Default(state, buffer, index, limit);
+    }
+
+    @Override
+    int encodeUtf8(CharSequence in, byte[] out, int offset, int length) {
+      int utf16Length = in.length();
+      int j = offset;
+      int i = 0;
+      int limit = offset + length;
+      // Designed to take advantage of
+      // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination
+      for (char c; i < utf16Length && i + j < limit && (c = in.charAt(i)) < 0x80; i++) {
+        out[j + i] = (byte) c;
+      }
+      if (i == utf16Length) {
+        return j + utf16Length;
+      }
+      j += i;
+      for (char c; i < utf16Length; i++) {
+        c = in.charAt(i);
+        if (c < 0x80 && j < limit) {
+          out[j++] = (byte) c;
+        } else if (c < 0x800 && j <= limit - 2) { // 11 bits, two UTF-8 bytes
+          out[j++] = (byte) ((0xF << 6) | (c >>> 6));
+          out[j++] = (byte) (0x80 | (0x3F & c));
+        } else if ((c < Character.MIN_SURROGATE || Character.MAX_SURROGATE < c) && j <= limit - 3) {
+          // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
+          out[j++] = (byte) ((0xF << 5) | (c >>> 12));
+          out[j++] = (byte) (0x80 | (0x3F & (c >>> 6)));
+          out[j++] = (byte) (0x80 | (0x3F & c));
+        } else if (j <= limit - 4) {
+          // Minimum code point represented by a surrogate pair is 0x10000, 17 bits,
+          // four UTF-8 bytes
+          final char low;
+          if (i + 1 == in.length()
+                  || !Character.isSurrogatePair(c, (low = in.charAt(++i)))) {
+            throw new UnpairedSurrogateException((i - 1), utf16Length);
+          }
+          int codePoint = Character.toCodePoint(c, low);
+          out[j++] = (byte) ((0xF << 4) | (codePoint >>> 18));
+          out[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 12)));
+          out[j++] = (byte) (0x80 | (0x3F & (codePoint >>> 6)));
+          out[j++] = (byte) (0x80 | (0x3F & codePoint));
+        } else {
+          // If we are surrogates and we're not a surrogate pair, always throw an
+          // UnpairedSurrogateException instead of an ArrayOutOfBoundsException.
+          if ((Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE)
+              && (i + 1 == in.length()
+                  || !Character.isSurrogatePair(c, in.charAt(i + 1)))) {
+            throw new UnpairedSurrogateException(i, utf16Length);
+          }
+          throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + j);
+        }
+      }
+      return j;
+    }
+
+    @Override
+    void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
+      // For safe processing, we have to use the ByteBuffer API.
+      encodeUtf8Default(in, out);
+    }
+
+    private static int partialIsValidUtf8(byte[] bytes, int index, int limit) {
+      // Optimize for 100% ASCII (Hotspot loves small simple top-level loops like this).
+      // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
+      while (index < limit && bytes[index] >= 0) {
+        index++;
+      }
+
+      return (index >= limit) ? COMPLETE : partialIsValidUtf8NonAscii(bytes, index, limit);
+    }
+
+    private static int partialIsValidUtf8NonAscii(byte[] bytes, int index, int limit) {
+      for (;;) {
+        int byte1, byte2;
+
+        // Optimize for interior runs of ASCII bytes.
+        do {
+          if (index >= limit) {
+            return COMPLETE;
+          }
+        } while ((byte1 = bytes[index++]) >= 0);
+
+        if (byte1 < (byte) 0xE0) {
+          // two-byte form
+
+          if (index >= limit) {
+            // Incomplete sequence
+            return byte1;
+          }
+
+          // Simultaneously checks for illegal trailing-byte in
+          // leading position and overlong 2-byte form.
+          if (byte1 < (byte) 0xC2
+              || bytes[index++] > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else if (byte1 < (byte) 0xF0) {
+          // three-byte form
+
+          if (index >= limit - 1) { // incomplete sequence
+            return incompleteStateFor(bytes, index, limit);
+          }
+          if ((byte2 = bytes[index++]) > (byte) 0xBF
+              // overlong? 5 most significant bits must not all be zero
+              || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+              // check for illegal surrogate codepoints
+              || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+              // byte3 trailing-byte test
+              || bytes[index++] > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else {
+          // four-byte form
+
+          if (index >= limit - 2) {  // incomplete sequence
+            return incompleteStateFor(bytes, index, limit);
+          }
+          if ((byte2 = bytes[index++]) > (byte) 0xBF
+              // Check that 1 <= plane <= 16.  Tricky optimized form of:
+              // if (byte1 > (byte) 0xF4 ||
+              //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+              //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+              || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+              // byte3 trailing-byte test
+              || bytes[index++] > (byte) 0xBF
+              // byte4 trailing-byte test
+              || bytes[index++] > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * {@link Processor} that uses {@code sun.misc.Unsafe} where possible to improve performance.
+   */
+  static final class UnsafeProcessor extends Processor {
+    private static final sun.misc.Unsafe UNSAFE = getUnsafe();
+    private static final long BUFFER_ADDRESS_OFFSET =
+        fieldOffset(field(Buffer.class, "address"));
+    private static final int ARRAY_BASE_OFFSET = byteArrayBaseOffset();
+
+    /**
+     * We only use Unsafe operations if we have access to direct {@link ByteBuffer}'s address
+     * and the array base offset is a multiple of 8 (needed by Unsafe.getLong()).
+     */
+    private static final boolean AVAILABLE =
+        BUFFER_ADDRESS_OFFSET != -1 && ARRAY_BASE_OFFSET % 8 == 0;
+
+    /**
+     * Indicates whether or not all required unsafe operations are supported on this platform.
+     */
+    static boolean isAvailable() {
+      return AVAILABLE;
+    }
+
+    @Override
+    int partialIsValidUtf8(int state, byte[] bytes, final int index, final int limit) {
+      if ((index | limit | bytes.length - limit) < 0) {
+        throw new ArrayIndexOutOfBoundsException(
+            String.format("Array length=%d, index=%d, limit=%d", bytes.length, index, limit));
+      }
+      long offset = ARRAY_BASE_OFFSET + index;
+      final long offsetLimit = ARRAY_BASE_OFFSET + limit;
+      if (state != COMPLETE) {
+        // The previous decoding operation was incomplete (or malformed).
+        // We look for a well-formed sequence consisting of bytes from
+        // the previous decoding operation (stored in state) together
+        // with bytes from the array slice.
+        //
+        // We expect such "straddler characters" to be rare.
+
+        if (offset >= offsetLimit) {  // No bytes? No progress.
+          return state;
+        }
+        int byte1 = (byte) state;
+        // byte1 is never ASCII.
+        if (byte1 < (byte) 0xE0) {
+          // two-byte form
+
+          // Simultaneously checks for illegal trailing-byte in
+          // leading position and overlong 2-byte form.
+          if (byte1 < (byte) 0xC2
+              // byte2 trailing-byte test
+              || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else if (byte1 < (byte) 0xF0) {
+          // three-byte form
+
+          // Get byte2 from saved state or array
+          int byte2 = (byte) ~(state >> 8);
+          if (byte2 == 0) {
+            byte2 = UNSAFE.getByte(bytes, offset++);
+            if (offset >= offsetLimit) {
+              return incompleteStateFor(byte1, byte2);
+            }
+          }
+          if (byte2 > (byte) 0xBF
+              // overlong? 5 most significant bits must not all be zero
+              || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+              // illegal surrogate codepoint?
+              || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+              // byte3 trailing-byte test
+              || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else {
+          // four-byte form
+
+          // Get byte2 and byte3 from saved state or array
+          int byte2 = (byte) ~(state >> 8);
+          int byte3 = 0;
+          if (byte2 == 0) {
+            byte2 = UNSAFE.getByte(bytes, offset++);
+            if (offset >= offsetLimit) {
+              return incompleteStateFor(byte1, byte2);
+            }
+          } else {
+            byte3 = (byte) (state >> 16);
+          }
+          if (byte3 == 0) {
+            byte3 = UNSAFE.getByte(bytes, offset++);
+            if (offset >= offsetLimit) {
+              return incompleteStateFor(byte1, byte2, byte3);
+            }
+          }
+
+          // If we were called with state == MALFORMED, then byte1 is 0xFF,
+          // which never occurs in well-formed UTF-8, and so we will return
+          // MALFORMED again below.
+
+          if (byte2 > (byte) 0xBF
+              // Check that 1 <= plane <= 16.  Tricky optimized form of:
+              // if (byte1 > (byte) 0xF4 ||
+              //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+              //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+              || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+              // byte3 trailing-byte test
+              || byte3 > (byte) 0xBF
+              // byte4 trailing-byte test
+              || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        }
+      }
+
+      return partialIsValidUtf8(bytes, offset, (int) (offsetLimit - offset));
+    }
+
+    @Override
+    int partialIsValidUtf8Direct(
+        final int state, ByteBuffer buffer, final int index, final int limit) {
+      if ((index | limit | buffer.limit() - limit) < 0) {
+        throw new ArrayIndexOutOfBoundsException(
+            String.format("buffer limit=%d, index=%d, limit=%d", buffer.limit(), index, limit));
+      }
+      long address = addressOffset(buffer) + index;
+      final long addressLimit = address + (limit - index);
+      if (state != COMPLETE) {
+        // The previous decoding operation was incomplete (or malformed).
+        // We look for a well-formed sequence consisting of bytes from
+        // the previous decoding operation (stored in state) together
+        // with bytes from the array slice.
+        //
+        // We expect such "straddler characters" to be rare.
+
+        if (address >= addressLimit) { // No bytes? No progress.
+          return state;
+        }
+
+        final int byte1 = (byte) state;
+        // byte1 is never ASCII.
+        if (byte1 < (byte) 0xE0) {
+          // two-byte form
+
+          // Simultaneously checks for illegal trailing-byte in
+          // leading position and overlong 2-byte form.
+          if (byte1 < (byte) 0xC2
+              // byte2 trailing-byte test
+              || UNSAFE.getByte(address++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else if (byte1 < (byte) 0xF0) {
+          // three-byte form
+
+          // Get byte2 from saved state or array
+          int byte2 = (byte) ~(state >> 8);
+          if (byte2 == 0) {
+            byte2 = UNSAFE.getByte(address++);
+            if (address >= addressLimit) {
+              return incompleteStateFor(byte1, byte2);
+            }
+          }
+          if (byte2 > (byte) 0xBF
+              // overlong? 5 most significant bits must not all be zero
+              || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+              // illegal surrogate codepoint?
+              || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+              // byte3 trailing-byte test
+              || UNSAFE.getByte(address++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else {
+          // four-byte form
+
+          // Get byte2 and byte3 from saved state or array
+          int byte2 = (byte) ~(state >> 8);
+          int byte3 = 0;
+          if (byte2 == 0) {
+            byte2 = UNSAFE.getByte(address++);
+            if (address >= addressLimit) {
+              return incompleteStateFor(byte1, byte2);
+            }
+          } else {
+            byte3 = (byte) (state >> 16);
+          }
+          if (byte3 == 0) {
+            byte3 = UNSAFE.getByte(address++);
+            if (address >= addressLimit) {
+              return incompleteStateFor(byte1, byte2, byte3);
+            }
+          }
+
+          // If we were called with state == MALFORMED, then byte1 is 0xFF,
+          // which never occurs in well-formed UTF-8, and so we will return
+          // MALFORMED again below.
+
+          if (byte2 > (byte) 0xBF
+              // Check that 1 <= plane <= 16.  Tricky optimized form of:
+              // if (byte1 > (byte) 0xF4 ||
+              //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+              //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+              || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+              // byte3 trailing-byte test
+              || byte3 > (byte) 0xBF
+              // byte4 trailing-byte test
+              || UNSAFE.getByte(address++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        }
+      }
+
+      return partialIsValidUtf8(address, (int) (addressLimit - address));
+    }
+
+    @Override
+    int encodeUtf8(final CharSequence in, final byte[] out, final int offset, final int length) {
+      long outIx = ARRAY_BASE_OFFSET + offset;
+      final long outLimit = outIx + length;
+      final int inLimit = in.length();
+      if (inLimit > length || out.length - length < offset) {
+        // Not even enough room for an ASCII-encoded string.
+        throw new ArrayIndexOutOfBoundsException(
+            "Failed writing " + in.charAt(inLimit - 1) + " at index " + (offset + length));
+      }
+
+      // Designed to take advantage of
+      // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination
+      int inIx = 0;
+      for (char c; inIx < inLimit && (c = in.charAt(inIx)) < 0x80; ++inIx) {
+        UNSAFE.putByte(out, outIx++, (byte) c);
+      }
+      if (inIx == inLimit) {
+        // We're done, it was ASCII encoded.
+        return (int) (outIx - ARRAY_BASE_OFFSET);
+      }
+
+      for (char c; inIx < inLimit; ++inIx) {
+        c = in.charAt(inIx);
+        if (c < 0x80 && outIx < outLimit) {
+          UNSAFE.putByte(out, outIx++, (byte) c);
+        } else if (c < 0x800 && outIx <= outLimit - 2L) { // 11 bits, two UTF-8 bytes
+          UNSAFE.putByte(out, outIx++, (byte) ((0xF << 6) | (c >>> 6)));
+          UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & c)));
+        } else if ((c < MIN_SURROGATE || MAX_SURROGATE < c) && outIx <= outLimit - 3L) {
+          // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
+          UNSAFE.putByte(out, outIx++, (byte) ((0xF << 5) | (c >>> 12)));
+          UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & (c >>> 6))));
+          UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & c)));
+        } else if (outIx <= outLimit - 4L) {
+          // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8
+          // bytes
+          final char low;
+          if (inIx + 1 == inLimit || !isSurrogatePair(c, (low = in.charAt(++inIx)))) {
+            throw new UnpairedSurrogateException((inIx - 1), inLimit);
+          }
+          int codePoint = toCodePoint(c, low);
+          UNSAFE.putByte(out, outIx++, (byte) ((0xF << 4) | (codePoint >>> 18)));
+          UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 12))));
+          UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 6))));
+          UNSAFE.putByte(out, outIx++, (byte) (0x80 | (0x3F & codePoint)));
+        } else {
+          if ((MIN_SURROGATE <= c && c <= MAX_SURROGATE)
+              && (inIx + 1 == inLimit || !isSurrogatePair(c, in.charAt(inIx + 1)))) {
+            // We are surrogates and we're not a surrogate pair.
+            throw new UnpairedSurrogateException(inIx, inLimit);
+          }
+          // Not enough space in the output buffer.
+          throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + outIx);
+        }
+      }
+
+      // All bytes have been encoded.
+      return (int) (outIx - ARRAY_BASE_OFFSET);
+    }
+
+    @Override
+    void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
+      final long address = addressOffset(out);
+      long outIx = address + out.position();
+      final long outLimit = address + out.limit();
+      final int inLimit = in.length();
+      if (inLimit > outLimit - outIx) {
+        // Not even enough room for an ASCII-encoded string.
+        throw new ArrayIndexOutOfBoundsException(
+            "Failed writing " + in.charAt(inLimit - 1) + " at index " + out.limit());
+      }
+
+      // Designed to take advantage of
+      // https://wikis.oracle.com/display/HotSpotInternals/RangeCheckElimination
+      int inIx = 0;
+      for (char c; inIx < inLimit && (c = in.charAt(inIx)) < 0x80; ++inIx) {
+        UNSAFE.putByte(outIx++, (byte) c);
+      }
+      if (inIx == inLimit) {
+        // We're done, it was ASCII encoded.
+        out.position((int) (outIx - address));
+        return;
+      }
+
+      for (char c; inIx < inLimit; ++inIx) {
+        c = in.charAt(inIx);
+        if (c < 0x80 && outIx < outLimit) {
+          UNSAFE.putByte(outIx++, (byte) c);
+        } else if (c < 0x800 && outIx <= outLimit - 2L) { // 11 bits, two UTF-8 bytes
+          UNSAFE.putByte(outIx++, (byte) ((0xF << 6) | (c >>> 6)));
+          UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & c)));
+        } else if ((c < MIN_SURROGATE || MAX_SURROGATE < c) && outIx <= outLimit - 3L) {
+          // Maximum single-char code point is 0xFFFF, 16 bits, three UTF-8 bytes
+          UNSAFE.putByte(outIx++, (byte) ((0xF << 5) | (c >>> 12)));
+          UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & (c >>> 6))));
+          UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & c)));
+        } else if (outIx <= outLimit - 4L) {
+          // Minimum code point represented by a surrogate pair is 0x10000, 17 bits, four UTF-8
+          // bytes
+          final char low;
+          if (inIx + 1 == inLimit || !isSurrogatePair(c, (low = in.charAt(++inIx)))) {
+            throw new UnpairedSurrogateException((inIx - 1), inLimit);
+          }
+          int codePoint = toCodePoint(c, low);
+          UNSAFE.putByte(outIx++, (byte) ((0xF << 4) | (codePoint >>> 18)));
+          UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 12))));
+          UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & (codePoint >>> 6))));
+          UNSAFE.putByte(outIx++, (byte) (0x80 | (0x3F & codePoint)));
+        } else {
+          if ((MIN_SURROGATE <= c && c <= MAX_SURROGATE)
+              && (inIx + 1 == inLimit || !isSurrogatePair(c, in.charAt(inIx + 1)))) {
+            // We are surrogates and we're not a surrogate pair.
+            throw new UnpairedSurrogateException(inIx, inLimit);
+          }
+          // Not enough space in the output buffer.
+          throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + outIx);
+        }
+      }
+
+      // All bytes have been encoded.
+      out.position((int) (outIx - address));
+    }
+
+    /**
+     * Counts (approximately) the number of consecutive ASCII characters starting from the given
+     * position, using the most efficient method available to the platform.
+     *
+     * @param bytes the array containing the character sequence
+     * @param offset the offset position of the index (same as index + arrayBaseOffset)
+     * @param maxChars the maximum number of characters to count
+     * @return the number of ASCII characters found. The stopping position will be at or
+     * before the first non-ASCII byte.
+     */
+    private static int unsafeEstimateConsecutiveAscii(
+        byte[] bytes, long offset, final int maxChars) {
+      int remaining = maxChars;
+      if (remaining < UNSAFE_COUNT_ASCII_THRESHOLD) {
+        // Don't bother with small strings.
+        return 0;
+      }
+
+      // Read bytes until 8-byte aligned so that we can read longs in the loop below.
+      // Byte arrays are already either 8 or 16-byte aligned, so we just need to make sure that
+      // the index (relative to the start of the array) is also 8-byte aligned. We do this by
+      // ANDing the index with 7 to determine the number of bytes that need to be read before
+      // we're 8-byte aligned.
+      final int unaligned = (int) offset & 7;
+      for (int j = unaligned; j > 0; j--) {
+        if (UNSAFE.getByte(bytes, offset++) < 0) {
+          return unaligned - j;
+        }
+      }
+
+      // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
+      // To speed things up further, we're reading longs instead of bytes so we use a mask to
+      // determine if any byte in the current long is non-ASCII.
+      remaining -= unaligned;
+      for (; remaining >= 8 && (UNSAFE.getLong(bytes, offset) & ASCII_MASK_LONG) == 0;
+          offset += 8, remaining -= 8) {}
+      return maxChars - remaining;
+    }
+
+    /**
+     * Same as {@link Utf8#estimateConsecutiveAscii(ByteBuffer, int, int)} except that it uses the
+     * most efficient method available to the platform.
+     */
+    private static int unsafeEstimateConsecutiveAscii(long address, final int maxChars) {
+      int remaining = maxChars;
+      if (remaining < UNSAFE_COUNT_ASCII_THRESHOLD) {
+        // Don't bother with small strings.
+        return 0;
+      }
+
+      // Read bytes until 8-byte aligned so that we can read longs in the loop below.
+      // We do this by ANDing the address with 7 to determine the number of bytes that need to
+      // be read before we're 8-byte aligned.
+      final int unaligned = (int) address & 7;
+      for (int j = unaligned; j > 0; j--) {
+        if (UNSAFE.getByte(address++) < 0) {
+          return unaligned - j;
+        }
+      }
+
+      // This simple loop stops when we encounter a byte >= 0x80 (i.e. non-ASCII).
+      // To speed things up further, we're reading longs instead of bytes so we use a mask to
+      // determine if any byte in the current long is non-ASCII.
+      remaining -= unaligned;
+      for (; remaining >= 8 && (UNSAFE.getLong(address) & ASCII_MASK_LONG) == 0;
+          address += 8, remaining -= 8) {}
+      return maxChars - remaining;
+    }
+
+    private static int partialIsValidUtf8(final byte[] bytes, long offset, int remaining) {
+      // Skip past ASCII characters as quickly as possible. 
+      final int skipped = unsafeEstimateConsecutiveAscii(bytes, offset, remaining);
+      remaining -= skipped;
+      offset += skipped;
+
+      for (;;) {
+        // Optimize for interior runs of ASCII bytes.
+        // TODO(nathanmittler): Consider checking 8 bytes at a time after some threshold?
+        // Maybe after seeing a few in a row that are ASCII, go back to fast mode?
+        int byte1 = 0;
+        for (; remaining > 0 && (byte1 = UNSAFE.getByte(bytes, offset++)) >= 0; --remaining) {
+        }
+        if (remaining == 0) {
+          return COMPLETE;
+        }
+        remaining--;
+
+        // If we're here byte1 is not ASCII. Only need to handle 2-4 byte forms.
+        if (byte1 < (byte) 0xE0) {
+          // Two-byte form (110xxxxx 10xxxxxx)
+          if (remaining == 0) {
+            // Incomplete sequence
+            return byte1;
+          }
+          remaining--;
+
+          // Simultaneously checks for illegal trailing-byte in
+          // leading position and overlong 2-byte form.
+          if (byte1 < (byte) 0xC2
+              || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else if (byte1 < (byte) 0xF0) {
+          // Three-byte form (1110xxxx 10xxxxxx 10xxxxxx)
+          if (remaining < 2) {
+            // Incomplete sequence
+            return unsafeIncompleteStateFor(bytes, byte1, offset, remaining);
+          }
+          remaining -= 2;
+
+          final int byte2;
+          if ((byte2 = UNSAFE.getByte(bytes, offset++)) > (byte) 0xBF
+              // overlong? 5 most significant bits must not all be zero
+              || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+              // check for illegal surrogate codepoints
+              || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+              // byte3 trailing-byte test
+              || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else {
+          // Four-byte form (1110xxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+          if (remaining < 3) {
+            // Incomplete sequence
+            return unsafeIncompleteStateFor(bytes, byte1, offset, remaining);
+          }
+          remaining -= 3;
+
+          final int byte2;
+          if ((byte2 = UNSAFE.getByte(bytes, offset++)) > (byte) 0xBF
+              // Check that 1 <= plane <= 16.  Tricky optimized form of:
+              // if (byte1 > (byte) 0xF4 ||
+              //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+              //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+              || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+              // byte3 trailing-byte test
+              || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF
+              // byte4 trailing-byte test
+              || UNSAFE.getByte(bytes, offset++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        }
+      }
+    }
+
+    private static int partialIsValidUtf8(long address, int remaining) {
+      // Skip past ASCII characters as quickly as possible.
+      final int skipped = unsafeEstimateConsecutiveAscii(address, remaining);
+      address += skipped;
+      remaining -= skipped;
+
+      for (;;) {
+        // Optimize for interior runs of ASCII bytes.
+        // TODO(nathanmittler): Consider checking 8 bytes at a time after some threshold?
+        // Maybe after seeing a few in a row that are ASCII, go back to fast mode?
+        int byte1 = 0;
+        for (; remaining > 0 && (byte1 = UNSAFE.getByte(address++)) >= 0; --remaining) {
+        }
+        if (remaining == 0) {
+          return COMPLETE;
+        }
+        remaining--;
+
+        if (byte1 < (byte) 0xE0) {
+          // Two-byte form
+
+          if (remaining == 0) {
+            // Incomplete sequence
+            return byte1;
+          }
+          remaining--;
+
+          // Simultaneously checks for illegal trailing-byte in
+          // leading position and overlong 2-byte form.
+          if (byte1 < (byte) 0xC2 || UNSAFE.getByte(address++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else if (byte1 < (byte) 0xF0) {
+          // Three-byte form
+
+          if (remaining < 2) {
+            // Incomplete sequence
+            return unsafeIncompleteStateFor(address, byte1, remaining);
+          }
+          remaining -= 2;
+
+          final byte byte2 = UNSAFE.getByte(address++);
+          if (byte2 > (byte) 0xBF
+              // overlong? 5 most significant bits must not all be zero
+              || (byte1 == (byte) 0xE0 && byte2 < (byte) 0xA0)
+              // check for illegal surrogate codepoints
+              || (byte1 == (byte) 0xED && byte2 >= (byte) 0xA0)
+              // byte3 trailing-byte test
+              || UNSAFE.getByte(address++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        } else {
+          // Four-byte form
+
+          if (remaining < 3) {
+            // Incomplete sequence
+            return unsafeIncompleteStateFor(address, byte1, remaining);
+          }
+          remaining -= 3;
+
+          final byte byte2 = UNSAFE.getByte(address++);
+          if (byte2 > (byte) 0xBF
+              // Check that 1 <= plane <= 16.  Tricky optimized form of:
+              // if (byte1 > (byte) 0xF4 ||
+              //     byte1 == (byte) 0xF0 && byte2 < (byte) 0x90 ||
+              //     byte1 == (byte) 0xF4 && byte2 > (byte) 0x8F)
+              || (((byte1 << 28) + (byte2 - (byte) 0x90)) >> 30) != 0
+              // byte3 trailing-byte test
+              || UNSAFE.getByte(address++) > (byte) 0xBF
+              // byte4 trailing-byte test
+              || UNSAFE.getByte(address++) > (byte) 0xBF) {
+            return MALFORMED;
+          }
+        }
+      }
+    }
+
+    private static int unsafeIncompleteStateFor(byte[] bytes, int byte1, long offset,
+        int remaining) {
+      switch (remaining) {
+        case 0: {
+          return incompleteStateFor(byte1);
+        }
+        case 1: {
+          return incompleteStateFor(byte1, UNSAFE.getByte(bytes, offset));
+        }
+        case 2: {
+          return incompleteStateFor(byte1, UNSAFE.getByte(bytes, offset),
+              UNSAFE.getByte(bytes, offset + 1));
+        }
+        default: {
+          throw new AssertionError();
+        }
+      }
+    }
+
+    private static int unsafeIncompleteStateFor(long address, final int byte1, int remaining) {
+      switch (remaining) {
+        case 0: {
+          return incompleteStateFor(byte1);
+        }
+        case 1: {
+          return incompleteStateFor(byte1, UNSAFE.getByte(address));
+        }
+        case 2: {
+          return incompleteStateFor(byte1, UNSAFE.getByte(address), UNSAFE.getByte(address + 1));
+        }
+        default: {
+          throw new AssertionError();
+        }
+      }
+    }
+
+    /**
+     * Gets the field with the given name within the class, or {@code null} if not found. If
+     * found, the field is made accessible.
+     */
+    private static Field field(Class<?> clazz, String fieldName) {
+      Field field;
+      try {
+        field = clazz.getDeclaredField(fieldName);
+        field.setAccessible(true);
+      } catch (Throwable t) {
+        // Failed to access the fields.
+        field = null;
+      }
+      logger.log(Level.FINEST, "{0}.{1}: {2}",
+          new Object[] {clazz.getName(), fieldName, (field != null ? "available" : "unavailable")});
+      return field;
+    }
+
+    /**
+     * Returns the offset of the provided field, or {@code -1} if {@code sun.misc.Unsafe} is not
+     * available.
+     */
+    private static long fieldOffset(Field field) {
+      return field == null || UNSAFE == null ? -1 : UNSAFE.objectFieldOffset(field);
+    }
+
+    /**
+     * Get the base offset for byte arrays, or {@code -1} if {@code sun.misc.Unsafe} is not
+     * available.
+     */
+    private static <T> int byteArrayBaseOffset() {
+      return UNSAFE == null ? -1 : UNSAFE.arrayBaseOffset(byte[].class);
+    }
+
+    /**
+     * Gets the offset of the {@code address} field of the given direct {@link ByteBuffer}.
+     */
+    private static long addressOffset(ByteBuffer buffer) {
+      return UNSAFE.getLong(buffer, BUFFER_ADDRESS_OFFSET);
+    }
+
+    /**
+     * Gets the {@code sun.misc.Unsafe} instance, or {@code null} if not available on this
+     * platform.
+     */
+    private static sun.misc.Unsafe getUnsafe() {
+      sun.misc.Unsafe unsafe = null;
+      try {
+        unsafe = AccessController.doPrivileged(new PrivilegedExceptionAction<sun.misc.Unsafe>() {
+          @Override
+          public sun.misc.Unsafe run() throws Exception {
+            Class<sun.misc.Unsafe> k = sun.misc.Unsafe.class;
+
+            // Check that this platform supports all of the required unsafe methods.
+            checkRequiredMethods(k);
+
+            for (Field f : k.getDeclaredFields()) {
+              f.setAccessible(true);
+              Object x = f.get(null);
+              if (k.isInstance(x)) {
+                return k.cast(x);
+              }
+            }
+            // The sun.misc.Unsafe field does not exist.
+            return null;
+          }
+        });
+      } catch (Throwable e) {
+        // Catching Throwable here due to the fact that Google AppEngine raises NoClassDefFoundError
+        // for Unsafe.
+      }
+
+      logger.log(Level.FINEST, "sun.misc.Unsafe: {}",
+          unsafe != null ? "available" : "unavailable");
+      return unsafe;
+    }
+
+    /**
+     * Verifies that all required methods of {@code sun.misc.Unsafe} are available on this platform.
+     */
+    private static void checkRequiredMethods(Class<sun.misc.Unsafe> clazz)
+        throws NoSuchMethodException, SecurityException {
+      // Needed for Unsafe byte[] access
+      clazz.getMethod("arrayBaseOffset", Class.class);
+      clazz.getMethod("getByte", Object.class, long.class);
+      clazz.getMethod("putByte", Object.class, long.class, byte.class);
+      clazz.getMethod("getLong", Object.class, long.class);
+
+      // Needed for Unsafe Direct ByteBuffer access
+      clazz.getMethod("objectFieldOffset", Field.class);
+      clazz.getMethod("getByte", long.class);
+      clazz.getMethod("getLong", Object.class, long.class);
+      clazz.getMethod("putByte", long.class, byte.class);
+      clazz.getMethod("getLong", long.class);
+    }
+  }
+
+  private Utf8() {}
 }

+ 0 - 1
java/core/src/test/java/com/google/protobuf/AbstractMessageTest.java

@@ -40,7 +40,6 @@ import protobuf_unittest.UnittestProto.TestPackedTypes;
 import protobuf_unittest.UnittestProto.TestRequired;
 import protobuf_unittest.UnittestProto.TestRequiredForeign;
 import protobuf_unittest.UnittestProto.TestUnpackedTypes;
-
 import junit.framework.TestCase;
 
 import java.util.Map;

+ 45 - 0
java/core/src/test/java/com/google/protobuf/AnyTest.java

@@ -75,6 +75,51 @@ public class AnyTest extends TestCase {
     }
   }
 
+  public void testCustomTypeUrls() throws Exception {
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+    TestUtil.setAllFields(builder);
+    TestAllTypes message = builder.build();
+
+    TestAny container = TestAny.newBuilder()
+        .setValue(Any.pack(message, "xxx.com")).build();
+
+    assertEquals(
+        "xxx.com/" + TestAllTypes.getDescriptor().getFullName(),
+        container.getValue().getTypeUrl());
+
+    assertTrue(container.getValue().is(TestAllTypes.class));
+    assertFalse(container.getValue().is(TestAny.class));
+
+    TestAllTypes result = container.getValue().unpack(TestAllTypes.class);
+    TestUtil.assertAllFieldsSet(result);
+
+    container = TestAny.newBuilder()
+        .setValue(Any.pack(message, "yyy.com/")).build();
+
+    assertEquals(
+        "yyy.com/" + TestAllTypes.getDescriptor().getFullName(),
+        container.getValue().getTypeUrl());
+
+    assertTrue(container.getValue().is(TestAllTypes.class));
+    assertFalse(container.getValue().is(TestAny.class));
+
+    result = container.getValue().unpack(TestAllTypes.class);
+    TestUtil.assertAllFieldsSet(result);
+
+    container = TestAny.newBuilder()
+        .setValue(Any.pack(message, "")).build();
+
+    assertEquals(
+        "/" + TestAllTypes.getDescriptor().getFullName(),
+        container.getValue().getTypeUrl());
+
+    assertTrue(container.getValue().is(TestAllTypes.class));
+    assertFalse(container.getValue().is(TestAny.class));
+
+    result = container.getValue().unpack(TestAllTypes.class);
+    TestUtil.assertAllFieldsSet(result);
+  }
+
   public void testCachedUnpackResult() throws Exception {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     TestUtil.setAllFields(builder);

+ 0 - 1
java/core/src/test/java/com/google/protobuf/BoundedByteStringTest.java

@@ -37,7 +37,6 @@ import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.io.UnsupportedEncodingException;
 
-
 /**
  * This class tests {@link BoundedByteString}, which extends {@link LiteralByteString},
  * by inheriting the tests from {@link LiteralByteStringTest}.  The only method which

+ 81 - 0
java/core/src/test/java/com/google/protobuf/ByteBufferWriterTest.java

@@ -0,0 +1,81 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import junit.framework.TestCase;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Random;
+
+/**
+ * Tests for {@link ByteBufferWriter}.
+ */
+public class ByteBufferWriterTest extends TestCase {
+
+  public void testHeapBuffer() throws IOException {
+    // Test a small and large buffer.
+    testWrite(ByteBuffer.allocate(100));
+    testWrite(ByteBuffer.allocate(1024 * 100));
+  }
+
+  public void testDirectBuffer() throws IOException {
+    // Test a small and large buffer.
+    testWrite(ByteBuffer.allocateDirect(100));
+    testWrite(ByteBuffer.allocateDirect(1024 * 100));
+  }
+
+  private void testWrite(ByteBuffer buffer) throws IOException {
+    fillRandom(buffer);
+    ByteArrayOutputStream os = new ByteArrayOutputStream(buffer.remaining());
+    ByteBufferWriter.write(buffer, os);
+    assertEquals(0, buffer.position());
+    assertTrue(Arrays.equals(toArray(buffer), os.toByteArray()));
+  }
+
+  private void fillRandom(ByteBuffer buf) {
+    byte[] bytes = new byte[buf.remaining()];
+    new Random().nextBytes(bytes);
+    buf.put(bytes);
+    buf.flip();
+    return;
+  }
+
+  private byte[] toArray(ByteBuffer buf) {
+    int originalPosition = buf.position();
+    byte[] bytes = new byte[buf.remaining()];
+    buf.get(bytes);
+    buf.position(originalPosition);
+    return bytes;
+  }
+}

+ 14 - 0
java/core/src/test/java/com/google/protobuf/ByteStringTest.java

@@ -39,6 +39,7 @@ import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.lang.reflect.Field;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
@@ -757,4 +758,17 @@ public class ByteStringTest extends TestCase {
     assertEquals((byte) 2, result[dataSize - dataSize / 2]);
     assertEquals((byte) 2, result[dataSize - 1]);
   }
+  
+  /**
+   * Tests ByteString uses Arrays based byte copier when running under Hotstop VM.
+   */
+  public void testByteArrayCopier() throws Exception {
+    Field field = ByteString.class.getDeclaredField("byteArrayCopier");
+    field.setAccessible(true);
+    Object byteArrayCopier = field.get(null);
+    assertNotNull(byteArrayCopier);
+    assertTrue(
+        byteArrayCopier.toString(),
+        byteArrayCopier.getClass().getSimpleName().endsWith("ArraysByteArrayCopier"));
+  }
 }

+ 1 - 0
java/core/src/test/java/com/google/protobuf/CheckUtf8Test.java

@@ -34,6 +34,7 @@ import proto2_test_check_utf8.TestCheckUtf8.BytesWrapper;
 import proto2_test_check_utf8.TestCheckUtf8.StringWrapper;
 import proto2_test_check_utf8_size.TestCheckUtf8Size.BytesWrapperSize;
 import proto2_test_check_utf8_size.TestCheckUtf8Size.StringWrapperSize;
+
 import junit.framework.TestCase;
 
 /**

+ 50 - 0
java/core/src/test/java/com/google/protobuf/CodedInputStreamTest.java

@@ -547,6 +547,56 @@ public class CodedInputStreamTest extends TestCase {
     }
   }
 
+  public void testReadString() throws Exception {
+    String lorem = "Lorem ipsum dolor sit amet ";
+    StringBuilder builder = new StringBuilder();
+    for (int i = 0; i < 4096; i += lorem.length()) {
+      builder.append(lorem);
+    }
+    lorem = builder.toString().substring(0, 4096);
+    byte[] bytes = lorem.getBytes("UTF-8");
+    ByteString.Output rawOutput = ByteString.newOutput();
+    CodedOutputStream output = CodedOutputStream.newInstance(rawOutput, bytes.length);
+
+    int tag = WireFormat.makeTag(1, WireFormat.WIRETYPE_LENGTH_DELIMITED);
+    output.writeRawVarint32(tag);
+    output.writeRawVarint32(bytes.length);
+    output.writeRawBytes(bytes);
+    output.flush();
+
+    CodedInputStream input =
+        CodedInputStream.newInstance(
+            new ByteArrayInputStream(rawOutput.toByteString().toByteArray()));
+    assertEquals(tag, input.readTag());
+    String text = input.readString();
+    assertEquals(lorem, text);
+  }
+
+  public void testReadStringRequireUtf8() throws Exception {
+    String lorem = "Lorem ipsum dolor sit amet ";
+    StringBuilder builder = new StringBuilder();
+    for (int i = 0; i < 4096; i += lorem.length()) {
+      builder.append(lorem);
+    }
+    lorem = builder.toString().substring(0, 4096);
+    byte[] bytes = lorem.getBytes("UTF-8");
+    ByteString.Output rawOutput = ByteString.newOutput();
+    CodedOutputStream output = CodedOutputStream.newInstance(rawOutput, bytes.length);
+
+    int tag = WireFormat.makeTag(1, WireFormat.WIRETYPE_LENGTH_DELIMITED);
+    output.writeRawVarint32(tag);
+    output.writeRawVarint32(bytes.length);
+    output.writeRawBytes(bytes);
+    output.flush();
+
+    CodedInputStream input =
+        CodedInputStream.newInstance(
+            new ByteArrayInputStream(rawOutput.toByteString().toByteArray()));
+    assertEquals(tag, input.readTag());
+    String text = input.readStringRequireUtf8();
+    assertEquals(lorem, text);
+  }
+
   /**
    * Tests that if we readString invalid UTF-8 bytes, no exception
    * is thrown.  Instead, the invalid bytes are replaced with the Unicode

+ 1 - 0
java/core/src/test/java/com/google/protobuf/DeprecatedFieldTest.java

@@ -36,6 +36,7 @@ import junit.framework.TestCase;
 
 import java.lang.reflect.AnnotatedElement;
 import java.lang.reflect.Method;
+
 /**
  * Test field deprecation
  * 

+ 36 - 1
java/core/src/test/java/com/google/protobuf/DescriptorsTest.java

@@ -59,7 +59,6 @@ import protobuf_unittest.UnittestProto.TestMultipleExtensionRanges;
 import protobuf_unittest.UnittestProto.TestRequired;
 import protobuf_unittest.UnittestProto.TestReservedFields;
 import protobuf_unittest.UnittestProto.TestService;
-
 import junit.framework.TestCase;
 
 import java.util.Arrays;
@@ -573,6 +572,42 @@ public class DescriptorsTest extends TestCase {
     }
   }
 
+  public void testUnknownFieldsDenied() throws Exception {
+    FileDescriptorProto fooProto = FileDescriptorProto.newBuilder()
+        .setName("foo.proto")
+        .addMessageType(DescriptorProto.newBuilder()
+            .setName("Foo")
+            .addField(FieldDescriptorProto.newBuilder()
+                .setLabel(FieldDescriptorProto.Label.LABEL_OPTIONAL)
+                .setTypeName("Bar")
+                .setName("bar")
+                .setNumber(1)))
+        .build();
+
+    try {
+      Descriptors.FileDescriptor.buildFrom(fooProto, new FileDescriptor[0]);
+      fail("DescriptorValidationException expected");
+    } catch (DescriptorValidationException e) {
+      assertTrue(e.getMessage().indexOf("Bar") != -1);
+      assertTrue(e.getMessage().indexOf("is not defined") != -1);
+    }
+  }
+
+  public void testUnknownFieldsAllowed() throws Exception {
+    FileDescriptorProto fooProto = FileDescriptorProto.newBuilder()
+        .setName("foo.proto")
+        .addDependency("bar.proto")
+        .addMessageType(DescriptorProto.newBuilder()
+            .setName("Foo")
+            .addField(FieldDescriptorProto.newBuilder()
+                .setLabel(FieldDescriptorProto.Label.LABEL_OPTIONAL)
+                .setTypeName("Bar")
+                .setName("bar")
+                .setNumber(1)))
+        .build();
+    Descriptors.FileDescriptor.buildFrom(fooProto, new FileDescriptor[0], true);
+  }
+
   public void testHiddenDependency() throws Exception {
     FileDescriptorProto barProto = FileDescriptorProto.newBuilder()
         .setName("bar.proto")

+ 1 - 1
java/core/src/test/java/com/google/protobuf/DynamicMessageTest.java

@@ -33,13 +33,13 @@ package com.google.protobuf;
 import com.google.protobuf.Descriptors.EnumDescriptor;
 import com.google.protobuf.Descriptors.FieldDescriptor;
 import com.google.protobuf.Descriptors.OneofDescriptor;
-
 import protobuf_unittest.UnittestProto.TestAllExtensions;
 import protobuf_unittest.UnittestProto.TestAllTypes;
 import protobuf_unittest.UnittestProto.TestEmptyMessage;
 import protobuf_unittest.UnittestProto.TestPackedTypes;
 
 import junit.framework.TestCase;
+
 import java.util.Arrays;
 
 /**

+ 76 - 0
java/core/src/test/java/com/google/protobuf/EnumTest.java

@@ -0,0 +1,76 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import com.google.protobuf.UnittestLite.ForeignEnumLite;
+import com.google.protobuf.UnittestLite.TestAllTypesLite;
+import protobuf_unittest.UnittestProto.ForeignEnum;
+import protobuf_unittest.UnittestProto.TestAllTypes;
+
+import junit.framework.TestCase;
+
+public class EnumTest extends TestCase {
+  
+  public void testForNumber() {
+    ForeignEnum e = ForeignEnum.forNumber(ForeignEnum.FOREIGN_BAR.getNumber());
+    assertEquals(ForeignEnum.FOREIGN_BAR, e);
+
+    e = ForeignEnum.forNumber(1000);
+    assertEquals(null, e);
+  }
+  
+  public void testForNumber_oneof() {
+    TestAllTypes.OneofFieldCase e = TestAllTypes.OneofFieldCase.forNumber(
+        TestAllTypes.OneofFieldCase.ONEOF_NESTED_MESSAGE.getNumber());
+    assertEquals(TestAllTypes.OneofFieldCase.ONEOF_NESTED_MESSAGE, e);
+
+    e = TestAllTypes.OneofFieldCase.forNumber(1000);
+    assertEquals(null, e);
+  }
+  
+  public void testForNumberLite() {
+    ForeignEnumLite e = ForeignEnumLite.forNumber(ForeignEnumLite.FOREIGN_LITE_BAR.getNumber());
+    assertEquals(ForeignEnumLite.FOREIGN_LITE_BAR, e);
+
+    e = ForeignEnumLite.forNumber(1000);
+    assertEquals(null, e);
+  }
+  
+  public void testForNumberLite_oneof() {
+    TestAllTypesLite.OneofFieldCase e = TestAllTypesLite.OneofFieldCase.forNumber(
+        TestAllTypesLite.OneofFieldCase.ONEOF_NESTED_MESSAGE.getNumber());
+    assertEquals(TestAllTypesLite.OneofFieldCase.ONEOF_NESTED_MESSAGE, e);
+
+    e = TestAllTypesLite.OneofFieldCase.forNumber(1000);
+    assertEquals(null, e);
+  }
+}
+

+ 49 - 49
java/core/src/test/java/com/google/protobuf/FieldPresenceTest.java

@@ -44,9 +44,9 @@ import junit.framework.TestCase;
  * non-message fields.
  */
 public class FieldPresenceTest extends TestCase {
-  private static boolean hasMethod(Class clazz, String name) {
+  private static boolean hasMethod(Class<?> clazz, String name) {
     try {
-      if (clazz.getMethod(name, new Class[]{}) != null) {
+      if (clazz.getMethod(name) != null) {
         return true;
       } else {
         return false;
@@ -56,90 +56,90 @@ public class FieldPresenceTest extends TestCase {
     }
   }
 
-  private static boolean isHasMethodRemoved(
-      Class classWithFieldPresence,
-      Class classWithoutFieldPresence,
+  private static void assertHasMethodRemoved(
+      Class<?> classWithFieldPresence,
+      Class<?> classWithoutFieldPresence,
       String camelName) {
-    return hasMethod(classWithFieldPresence, "get" + camelName)
-        && hasMethod(classWithFieldPresence, "has" + camelName)
-        && hasMethod(classWithoutFieldPresence, "get" + camelName)
-        && !hasMethod(classWithoutFieldPresence, "has" + camelName);
+    assertTrue(hasMethod(classWithFieldPresence, "get" + camelName));
+    assertTrue(hasMethod(classWithFieldPresence, "has" + camelName));
+    assertTrue(hasMethod(classWithoutFieldPresence, "get" + camelName));
+    assertFalse(hasMethod(classWithoutFieldPresence, "has" + camelName));
   }
 
   public void testHasMethod() {
     // Optional non-message fields don't have a hasFoo() method generated.
-    assertTrue(isHasMethodRemoved(
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.class,
         TestAllTypes.class,
-        "OptionalInt32"));
-    assertTrue(isHasMethodRemoved(
+        "OptionalInt32");
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.class,
         TestAllTypes.class,
-        "OptionalString"));
-    assertTrue(isHasMethodRemoved(
+        "OptionalString");
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.class,
         TestAllTypes.class,
-        "OptionalBytes"));
-    assertTrue(isHasMethodRemoved(
+        "OptionalBytes");
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.class,
         TestAllTypes.class,
-        "OptionalNestedEnum"));
+        "OptionalNestedEnum");
 
-    assertTrue(isHasMethodRemoved(
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.Builder.class,
         TestAllTypes.Builder.class,
-        "OptionalInt32"));
-    assertTrue(isHasMethodRemoved(
+        "OptionalInt32");
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.Builder.class,
         TestAllTypes.Builder.class,
-        "OptionalString"));
-    assertTrue(isHasMethodRemoved(
+        "OptionalString");
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.Builder.class,
         TestAllTypes.Builder.class,
-        "OptionalBytes"));
-    assertTrue(isHasMethodRemoved(
+        "OptionalBytes");
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.Builder.class,
         TestAllTypes.Builder.class,
-        "OptionalNestedEnum"));
+        "OptionalNestedEnum");
 
     // message fields still have the hasFoo() method generated.
     assertFalse(TestAllTypes.newBuilder().build().hasOptionalNestedMessage());
     assertFalse(TestAllTypes.newBuilder().hasOptionalNestedMessage());
 
     // oneof fields don't have hasFoo() methods (even for message types).
-    assertTrue(isHasMethodRemoved(
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.class,
         TestAllTypes.class,
-        "OneofUint32"));
-    assertTrue(isHasMethodRemoved(
+        "OneofUint32");
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.class,
         TestAllTypes.class,
-        "OneofString"));
-    assertTrue(isHasMethodRemoved(
+        "OneofString");
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.class,
         TestAllTypes.class,
-        "OneofBytes"));
-    assertTrue(isHasMethodRemoved(
+        "OneofBytes");
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.class,
         TestAllTypes.class,
-        "OneofNestedMessage"));
+        "OneofNestedMessage");
 
-    assertTrue(isHasMethodRemoved(
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.Builder.class,
         TestAllTypes.Builder.class,
-        "OneofUint32"));
-    assertTrue(isHasMethodRemoved(
+        "OneofUint32");
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.Builder.class,
         TestAllTypes.Builder.class,
-        "OneofString"));
-    assertTrue(isHasMethodRemoved(
+        "OneofString");
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.Builder.class,
         TestAllTypes.Builder.class,
-        "OneofBytes"));
-    assertTrue(isHasMethodRemoved(
+        "OneofBytes");
+    assertHasMethodRemoved(
         UnittestProto.TestAllTypes.Builder.class,
         TestAllTypes.Builder.class,
-        "OneofNestedMessage"));
+        "OneofNestedMessage");
   }
 
   public void testOneofEquals() throws Exception {
@@ -232,24 +232,24 @@ public class FieldPresenceTest extends TestCase {
     assertTrue(message.hasField(optionalNestedEnumField));
     assertEquals(4, message.getAllFields().size());
   }
-  
+
   public void testMessageField() {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     assertFalse(builder.hasOptionalNestedMessage());
     assertFalse(builder.build().hasOptionalNestedMessage());
-    
+
     TestAllTypes.NestedMessage.Builder nestedBuilder =
         builder.getOptionalNestedMessageBuilder();
     assertTrue(builder.hasOptionalNestedMessage());
     assertTrue(builder.build().hasOptionalNestedMessage());
-    
+
     nestedBuilder.setValue(1);
     assertEquals(1, builder.build().getOptionalNestedMessage().getValue());
-    
+
     builder.clearOptionalNestedMessage();
     assertFalse(builder.hasOptionalNestedMessage());
     assertFalse(builder.build().hasOptionalNestedMessage());
-    
+
     // Unlike non-message fields, if we set a message field to its default value (i.e.,
     // default instance), the field should be seen as present.
     builder.setOptionalNestedMessage(TestAllTypes.NestedMessage.getDefaultInstance());
@@ -340,7 +340,7 @@ public class FieldPresenceTest extends TestCase {
     assertTrue(builder.buildPartial().isInitialized());
   }
 
-  
+
   // Test that unknown fields are dropped.
   public void testUnknownFields() throws Exception {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
@@ -348,7 +348,7 @@ public class FieldPresenceTest extends TestCase {
     builder.addRepeatedInt32(5678);
     TestAllTypes message = builder.build();
     ByteString data = message.toByteString();
-    
+
     TestOptionalFieldsOnly optionalOnlyMessage =
         TestOptionalFieldsOnly.parseFrom(data);
     // UnknownFieldSet should be empty.
@@ -360,7 +360,7 @@ public class FieldPresenceTest extends TestCase {
     // The repeated field is discarded because it's unknown to the optional-only
     // message.
     assertEquals(0, message.getRepeatedInt32Count());
-    
+
     DynamicMessage dynamicOptionalOnlyMessage =
         DynamicMessage.getDefaultInstance(
             TestOptionalFieldsOnly.getDescriptor())

+ 15 - 0
java/core/src/test/java/com/google/protobuf/GeneratedMessageTest.java

@@ -620,6 +620,21 @@ public class GeneratedMessageTest extends TestCase {
     TestUtil.assertExtensionsClear(TestAllExtensions.newBuilder().build());
   }
 
+  public void testUnsetRepeatedExtensionGetField() {
+    TestAllExtensions message = TestAllExtensions.getDefaultInstance();
+    Object value;
+
+    value = message.getField(UnittestProto.repeatedStringExtension.getDescriptor());
+    assertTrue(value instanceof List);
+    assertTrue(((List<?>) value).isEmpty());
+    assertIsUnmodifiable((List<?>) value);
+
+    value = message.getField(UnittestProto.repeatedNestedMessageExtension.getDescriptor());
+    assertTrue(value instanceof List);
+    assertTrue(((List<?>) value).isEmpty());
+    assertIsUnmodifiable((List<?>) value);
+  }
+
   public void testExtensionReflectionGetters() throws Exception {
     TestAllExtensions.Builder builder = TestAllExtensions.newBuilder();
     TestUtil.setAllExtensions(builder);

+ 38 - 35
java/core/src/test/java/com/google/protobuf/IsValidUtf8Test.java

@@ -30,12 +30,18 @@
 
 package com.google.protobuf;
 
+import static com.google.protobuf.IsValidUtf8TestUtil.DIRECT_NIO_FACTORY;
+import static com.google.protobuf.IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT;
+import static com.google.protobuf.IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT;
+import static com.google.protobuf.IsValidUtf8TestUtil.HEAP_NIO_FACTORY;
+import static com.google.protobuf.IsValidUtf8TestUtil.LITERAL_FACTORY;
+import static com.google.protobuf.IsValidUtf8TestUtil.testBytes;
+
+import com.google.protobuf.IsValidUtf8TestUtil.ByteStringFactory;
 import com.google.protobuf.IsValidUtf8TestUtil.Shard;
 
 import junit.framework.TestCase;
 
-import java.io.UnsupportedEncodingException;
-
 /**
  * Tests cases for {@link ByteString#isValidUtf8()}. This includes three
  * brute force tests that actually test every permutation of one byte, two byte,
@@ -51,31 +57,33 @@ import java.io.UnsupportedEncodingException;
  * @author martinrb@google.com (Martin Buchholz)
  */
 public class IsValidUtf8Test extends TestCase {
-
   /**
    * Tests that round tripping of all two byte permutations work.
    */
-  public void testIsValidUtf8_1Byte() throws UnsupportedEncodingException {
-    IsValidUtf8TestUtil.testBytes(1,
-        IsValidUtf8TestUtil.EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT);
+  public void testIsValidUtf8_1Byte() {
+    testBytes(LITERAL_FACTORY, 1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT);
+    testBytes(HEAP_NIO_FACTORY, 1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT);
+    testBytes(DIRECT_NIO_FACTORY, 1, EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT);
   }
 
   /**
    * Tests that round tripping of all two byte permutations work.
    */
-  public void testIsValidUtf8_2Bytes() throws UnsupportedEncodingException {
-    IsValidUtf8TestUtil.testBytes(2,
-        IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT);
+  public void testIsValidUtf8_2Bytes() {
+    testBytes(LITERAL_FACTORY, 2, IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT);
+    testBytes(HEAP_NIO_FACTORY, 2, IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT);
+    testBytes(DIRECT_NIO_FACTORY, 2, IsValidUtf8TestUtil.EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT);
   }
 
   /**
    * Tests that round tripping of all three byte permutations work.
    */
-  public void testIsValidUtf8_3Bytes() throws UnsupportedEncodingException {
+  public void testIsValidUtf8_3Bytes() {
     // Travis' OOM killer doesn't like this test
     if (System.getenv("TRAVIS") == null) {
-      IsValidUtf8TestUtil.testBytes(3,
-          IsValidUtf8TestUtil.EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
+      testBytes(LITERAL_FACTORY, 3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
+      testBytes(HEAP_NIO_FACTORY, 3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
+      testBytes(DIRECT_NIO_FACTORY, 3, EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT);
     }
   }
 
@@ -85,8 +93,7 @@ public class IsValidUtf8Test extends TestCase {
    * {@link IsValidUtf8FourByteTest} is used for full coverage. This method
    * tests specific four-byte cases.
    */
-  public void testIsValidUtf8_4BytesSamples()
-      throws UnsupportedEncodingException {
+  public void testIsValidUtf8_4BytesSamples() {
     // Valid 4 byte.
     assertValidUtf8(0xF0, 0xA4, 0xAD, 0xA2);
 
@@ -119,9 +126,7 @@ public class IsValidUtf8Test extends TestCase {
     assertTrue(asBytes("\u024B62\u024B62").isValidUtf8());
 
     // Mixed string
-    assertTrue(
-        asBytes("a\u020ac\u00a2b\\u024B62u020acc\u00a2de\u024B62")
-        .isValidUtf8());
+    assertTrue(asBytes("a\u020ac\u00a2b\\u024B62u020acc\u00a2de\u024B62").isValidUtf8());
 
     // Not a valid string
     assertInvalidUtf8(-1, 0, -1, 0);
@@ -135,36 +140,35 @@ public class IsValidUtf8Test extends TestCase {
     return realBytes;
   }
 
-  private ByteString toByteString(int... bytes) {
-    return ByteString.copyFrom(toByteArray(bytes));
-  }
-
-  private void assertValidUtf8(int[] bytes, boolean not) {
+  private void assertValidUtf8(ByteStringFactory factory, int[] bytes, boolean not) {
     byte[] realBytes = toByteArray(bytes);
     assertTrue(not ^ Utf8.isValidUtf8(realBytes));
     assertTrue(not ^ Utf8.isValidUtf8(realBytes, 0, bytes.length));
-    ByteString lit = ByteString.copyFrom(realBytes);
-    ByteString sub = lit.substring(0, bytes.length);
-    assertTrue(not ^ lit.isValidUtf8());
+    ByteString leaf = factory.newByteString(realBytes);
+    ByteString sub = leaf.substring(0, bytes.length);
+    assertTrue(not ^ leaf.isValidUtf8());
     assertTrue(not ^ sub.isValidUtf8());
     ByteString[] ropes = {
-      RopeByteString.newInstanceForTest(ByteString.EMPTY, lit),
-      RopeByteString.newInstanceForTest(ByteString.EMPTY, sub),
-      RopeByteString.newInstanceForTest(lit, ByteString.EMPTY),
-      RopeByteString.newInstanceForTest(sub, ByteString.EMPTY),
-      RopeByteString.newInstanceForTest(sub, lit)
-    };
+        RopeByteString.newInstanceForTest(ByteString.EMPTY, leaf),
+        RopeByteString.newInstanceForTest(ByteString.EMPTY, sub),
+        RopeByteString.newInstanceForTest(leaf, ByteString.EMPTY),
+        RopeByteString.newInstanceForTest(sub, ByteString.EMPTY),
+        RopeByteString.newInstanceForTest(sub, leaf)};
     for (ByteString rope : ropes) {
       assertTrue(not ^ rope.isValidUtf8());
     }
   }
 
   private void assertValidUtf8(int... bytes) {
-    assertValidUtf8(bytes, false);
+    assertValidUtf8(LITERAL_FACTORY, bytes, false);
+    assertValidUtf8(HEAP_NIO_FACTORY, bytes, false);
+    assertValidUtf8(DIRECT_NIO_FACTORY, bytes, false);
   }
 
   private void assertInvalidUtf8(int... bytes) {
-    assertValidUtf8(bytes, true);
+    assertValidUtf8(LITERAL_FACTORY, bytes, true);
+    assertValidUtf8(HEAP_NIO_FACTORY, bytes, true);
+    assertValidUtf8(DIRECT_NIO_FACTORY, bytes, true);
   }
 
   private static ByteString asBytes(String s) {
@@ -177,7 +181,6 @@ public class IsValidUtf8Test extends TestCase {
     for (Shard shard : IsValidUtf8TestUtil.FOUR_BYTE_SHARDS) {
       actual += shard.expected;
     }
-    assertEquals(IsValidUtf8TestUtil.EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT,
-        actual);
+    assertEquals(IsValidUtf8TestUtil.EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT, actual);
   }
 }

+ 115 - 87
java/core/src/test/java/com/google/protobuf/IsValidUtf8TestUtil.java

@@ -30,9 +30,13 @@
 
 package com.google.protobuf;
 
-import static junit.framework.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
-import java.io.UnsupportedEncodingException;
+import java.lang.ref.SoftReference;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.CharsetDecoder;
@@ -52,64 +56,105 @@ import java.util.logging.Logger;
  * @author jonp@google.com (Jon Perlow)
  * @author martinrb@google.com (Martin Buchholz)
  */
-class IsValidUtf8TestUtil {
-  private static Logger logger = Logger.getLogger(
-      IsValidUtf8TestUtil.class.getName());
+final class IsValidUtf8TestUtil {
+  private static Logger logger = Logger.getLogger(IsValidUtf8TestUtil.class.getName());
+
+  private IsValidUtf8TestUtil() {}
+
+  static interface ByteStringFactory {
+    ByteString newByteString(byte[] bytes);
+  }
+
+  static final ByteStringFactory LITERAL_FACTORY = new ByteStringFactory() {
+    @Override
+    public ByteString newByteString(byte[] bytes) {
+      return ByteString.wrap(bytes);
+    }
+  };
+
+  static final ByteStringFactory HEAP_NIO_FACTORY = new ByteStringFactory() {
+    @Override
+    public ByteString newByteString(byte[] bytes) {
+      return new NioByteString(ByteBuffer.wrap(bytes));
+    }
+  };
+
+  private static ThreadLocal<SoftReference<ByteBuffer>> directBuffer =
+      new ThreadLocal<SoftReference<ByteBuffer>>();
+
+  /**
+   * Factory for direct {@link ByteBuffer} instances. To reduce direct memory usage, this
+   * uses a thread local direct buffer. This means that each call will overwrite the buffer's
+   * contents from the previous call, so the calling code must be careful not to continue using
+   * a buffer returned from a previous invocation.
+   */
+  static final ByteStringFactory DIRECT_NIO_FACTORY = new ByteStringFactory() {
+    @Override
+    public ByteString newByteString(byte[] bytes) {
+      SoftReference<ByteBuffer> ref = directBuffer.get();
+      ByteBuffer buffer = ref == null ? null : ref.get();
+      if (buffer == null || buffer.capacity() < bytes.length) {
+        buffer = ByteBuffer.allocateDirect(bytes.length);
+        directBuffer.set(new SoftReference<ByteBuffer>(buffer));
+      }
+      buffer.clear();
+      buffer.put(bytes);
+      buffer.flip();
+      return new NioByteString(buffer);
+    }
+  };
 
   // 128 - [chars 0x0000 to 0x007f]
-  static long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1;
+  static final long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x007f - 0x0000 + 1;
 
   // 128
-  static long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT =
-      ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
+  static final long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT = ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
 
   // 1920 [chars 0x0080 to 0x07FF]
-  static long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1;
+  static final long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x07FF - 0x0080 + 1;
 
   // 18,304
-  static long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =
+  static final long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =
       // Both bytes are one byte characters
       (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2) +
       // The possible number of two byte characters
       TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS;
 
   // 2048
-  static long THREE_BYTE_SURROGATES = 2 * 1024;
+  static final long THREE_BYTE_SURROGATES = 2 * 1024;
 
   // 61,440 [chars 0x0800 to 0xFFFF, minus surrogates]
-  static long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
+  static final long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
       0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES;
 
   // 2,650,112
-  static long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =
+  static final long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =
       // All one byte characters
       (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3) +
       // One two byte character and a one byte character
-      2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
-          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
-       // Three byte characters
+      2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
+      // Three byte characters
       THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
 
   // 1,048,576 [chars 0x10000L to 0x10FFFF]
-  static long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1;
+  static final long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS = 0x10FFFF - 0x10000L + 1;
 
   // 289,571,839
-  static long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =
+  static final long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =
       // All one byte characters
       (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4) +
       // One and three byte characters
-      2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
-          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
+      2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
       // Two two byte characters
       TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS +
       // Permutations of one and two byte characters
-      3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
-          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
-          ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
+      3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
+      * ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS
+      +
       // Four byte characters
       FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS;
 
-  static class Shard {
+  static final class Shard {
     final long index;
     final long start;
     final long lim;
@@ -138,7 +183,7 @@ class IsValidUtf8TestUtil {
 
     // 97-111 are all 2342912
     for (int i = 97; i <= 111; i++) {
-     expected[i] = 2342912;
+      expected[i] = 2342912;
     }
 
     // 113-117 are all 1048576
@@ -158,22 +203,18 @@ class IsValidUtf8TestUtil {
     return expected;
   }
 
-  static final List<Shard> FOUR_BYTE_SHARDS = generateFourByteShards(
-      128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES);
+  static final List<Shard> FOUR_BYTE_SHARDS =
+      generateFourByteShards(128, FOUR_BYTE_SHARDS_EXPECTED_ROUNTRIPPABLES);
 
 
-  private static List<Shard> generateFourByteShards(
-      int numShards, long[] expected) {
+  private static List<Shard> generateFourByteShards(int numShards, long[] expected) {
     assertEquals(numShards, expected.length);
     List<Shard> shards = new ArrayList<Shard>(numShards);
     long LIM = 1L << 32;
     long increment = LIM / numShards;
     assertTrue(LIM % numShards == 0);
     for (int i = 0; i < numShards; i++) {
-      shards.add(new Shard(i,
-          increment * i,
-          increment * (i + 1),
-          expected[i]));
+      shards.add(new Shard(i, increment * i, increment * (i + 1), expected[i]));
     }
     return shards;
   }
@@ -182,12 +223,12 @@ class IsValidUtf8TestUtil {
    * Helper to run the loop to test all the permutations for the number of bytes
    * specified.
    *
+   * @param factory the factory for {@link ByteString} instances.
    * @param numBytes the number of bytes in the byte array
    * @param expectedCount the expected number of roundtrippable permutations
    */
-  static void testBytes(int numBytes, long expectedCount)
-      throws UnsupportedEncodingException {
-    testBytes(numBytes, expectedCount, 0, -1);
+  static void testBytes(ByteStringFactory factory, int numBytes, long expectedCount) {
+    testBytes(factory, numBytes, expectedCount, 0, -1);
   }
 
   /**
@@ -195,14 +236,15 @@ class IsValidUtf8TestUtil {
    * specified. This overload is useful for debugging to get the loop to start
    * at a certain character.
    *
+   * @param factory the factory for {@link ByteString} instances.
    * @param numBytes the number of bytes in the byte array
    * @param expectedCount the expected number of roundtrippable permutations
    * @param start the starting bytes encoded as a long as big-endian
    * @param lim the limit of bytes to process encoded as a long as big-endian,
    *     or -1 to mean the max limit for numBytes
    */
-  static void testBytes(int numBytes, long expectedCount, long start, long lim)
-      throws UnsupportedEncodingException {
+  static void testBytes(
+      ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim) {
     Random rnd = new Random();
     byte[] bytes = new byte[numBytes];
 
@@ -217,7 +259,7 @@ class IsValidUtf8TestUtil {
         bytes[bytes.length - i - 1] = (byte) tmpByteChar;
         tmpByteChar = tmpByteChar >> 8;
       }
-      ByteString bs = ByteString.copyFrom(bytes);
+      ByteString bs = factory.newByteString(bytes);
       boolean isRoundTrippable = bs.isValidUtf8();
       String s = new String(bytes, Internal.UTF_8);
       byte[] bytesReencoded = s.getBytes(Internal.UTF_8);
@@ -236,14 +278,15 @@ class IsValidUtf8TestUtil {
       int i = rnd.nextInt(numBytes);
       int j = rnd.nextInt(numBytes);
       if (j < i) {
-        int tmp = i; i = j; j = tmp;
+        int tmp = i;
+        i = j;
+        j = tmp;
       }
       int state1 = Utf8.partialIsValidUtf8(Utf8.COMPLETE, bytes, 0, i);
       int state2 = Utf8.partialIsValidUtf8(state1, bytes, i, j);
       int state3 = Utf8.partialIsValidUtf8(state2, bytes, j, numBytes);
       if (isRoundTrippable != (state3 == Utf8.COMPLETE)) {
-        System.out.printf("state=%04x %04x %04x i=%d j=%d%n",
-                          state1, state2, state3, i, j);
+        System.out.printf("state=%04x %04x %04x i=%d j=%d%n", state1, state2, state3, i, j);
         outputFailure(byteChar, bytes, bytesReencoded);
       }
       assertEquals(isRoundTrippable, (state3 == Utf8.COMPLETE));
@@ -251,36 +294,24 @@ class IsValidUtf8TestUtil {
       // Test ropes built out of small partial sequences
       ByteString rope = RopeByteString.newInstanceForTest(
           bs.substring(0, i),
-          RopeByteString.newInstanceForTest(
-              bs.substring(i, j),
-              bs.substring(j, numBytes)));
+          RopeByteString.newInstanceForTest(bs.substring(i, j), bs.substring(j, numBytes)));
       assertSame(RopeByteString.class, rope.getClass());
 
-      ByteString[] byteStrings = { bs, bs.substring(0, numBytes), rope };
+      ByteString[] byteStrings = {bs, bs.substring(0, numBytes), rope};
       for (ByteString x : byteStrings) {
-        assertEquals(isRoundTrippable,
-                     x.isValidUtf8());
-        assertEquals(state3,
-                     x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes));
-
-        assertEquals(state1,
-                     x.partialIsValidUtf8(Utf8.COMPLETE, 0, i));
-        assertEquals(state1,
-                     x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i));
-        assertEquals(state2,
-                     x.partialIsValidUtf8(state1, i, j - i));
-        assertEquals(state2,
-                     x.substring(i, j).partialIsValidUtf8(state1, 0, j - i));
-        assertEquals(state3,
-                     x.partialIsValidUtf8(state2, j, numBytes - j));
-        assertEquals(state3,
-                     x.substring(j, numBytes)
-                     .partialIsValidUtf8(state2, 0, numBytes - j));
+        assertEquals(isRoundTrippable, x.isValidUtf8());
+        assertEquals(state3, x.partialIsValidUtf8(Utf8.COMPLETE, 0, numBytes));
+
+        assertEquals(state1, x.partialIsValidUtf8(Utf8.COMPLETE, 0, i));
+        assertEquals(state1, x.substring(0, i).partialIsValidUtf8(Utf8.COMPLETE, 0, i));
+        assertEquals(state2, x.partialIsValidUtf8(state1, i, j - i));
+        assertEquals(state2, x.substring(i, j).partialIsValidUtf8(state1, 0, j - i));
+        assertEquals(state3, x.partialIsValidUtf8(state2, j, numBytes - j));
+        assertEquals(state3, x.substring(j, numBytes).partialIsValidUtf8(state2, 0, numBytes - j));
       }
 
       // ByteString reduplication should not affect its UTF-8 validity.
-      ByteString ropeADope =
-          RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes));
+      ByteString ropeADope = RopeByteString.newInstanceForTest(bs, bs.substring(0, numBytes));
       assertEquals(isRoundTrippable, ropeADope.isValidUtf8());
 
       if (isRoundTrippable) {
@@ -288,8 +319,7 @@ class IsValidUtf8TestUtil {
       }
       count++;
       if (byteChar != 0 && byteChar % 1000000L == 0) {
-        logger.info("Processed " + (byteChar / 1000000L) +
-            " million characters");
+        logger.info("Processed " + (byteChar / 1000000L) + " million characters");
       }
     }
     logger.info("Round tripped " + countRoundTripped + " of " + count);
@@ -303,25 +333,26 @@ class IsValidUtf8TestUtil {
    * actual String class, it's possible for incompatibilities to develop
    * (although unlikely).
    *
+   * @param factory the factory for {@link ByteString} instances.
    * @param numBytes the number of bytes in the byte array
    * @param expectedCount the expected number of roundtrippable permutations
    * @param start the starting bytes encoded as a long as big-endian
    * @param lim the limit of bytes to process encoded as a long as big-endian,
    *     or -1 to mean the max limit for numBytes
    */
-  void testBytesUsingByteBuffers(
-      int numBytes, long expectedCount, long start, long lim)
-      throws UnsupportedEncodingException {
-    CharsetDecoder decoder = Internal.UTF_8.newDecoder()
-        .onMalformedInput(CodingErrorAction.REPLACE)
-        .onUnmappableCharacter(CodingErrorAction.REPLACE);
-    CharsetEncoder encoder = Internal.UTF_8.newEncoder()
-        .onMalformedInput(CodingErrorAction.REPLACE)
-        .onUnmappableCharacter(CodingErrorAction.REPLACE);
+  static void testBytesUsingByteBuffers(
+      ByteStringFactory factory, int numBytes, long expectedCount, long start, long lim) {
+    CharsetDecoder decoder =
+        Internal.UTF_8.newDecoder()
+            .onMalformedInput(CodingErrorAction.REPLACE)
+            .onUnmappableCharacter(CodingErrorAction.REPLACE);
+    CharsetEncoder encoder =
+        Internal.UTF_8.newEncoder()
+            .onMalformedInput(CodingErrorAction.REPLACE)
+            .onUnmappableCharacter(CodingErrorAction.REPLACE);
     byte[] bytes = new byte[numBytes];
     int maxChars = (int) (decoder.maxCharsPerByte() * numBytes) + 1;
-    char[] charsDecoded =
-        new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1];
+    char[] charsDecoded = new char[(int) (decoder.maxCharsPerByte() * numBytes) + 1];
     int maxBytes = (int) (encoder.maxBytesPerChar() * maxChars) + 1;
     byte[] bytesReencoded = new byte[maxBytes];
 
@@ -347,7 +378,7 @@ class IsValidUtf8TestUtil {
         bytes[bytes.length - i - 1] = (byte) tmpByteChar;
         tmpByteChar = tmpByteChar >> 8;
       }
-      boolean isRoundTrippable = ByteString.copyFrom(bytes).isValidUtf8();
+      boolean isRoundTrippable = factory.newByteString(bytes).isValidUtf8();
       CoderResult result = decoder.decode(bb, cb, true);
       assertFalse(result.isError());
       result = decoder.flush(cb);
@@ -382,8 +413,7 @@ class IsValidUtf8TestUtil {
         countRoundTripped++;
       }
       if (byteChar != 0 && byteChar % 1000000 == 0) {
-        logger.info("Processed " + (byteChar / 1000000) +
-            " million characters");
+        logger.info("Processed " + (byteChar / 1000000) + " million characters");
       }
     }
     logger.info("Round tripped " + countRoundTripped + " of " + count);
@@ -394,10 +424,9 @@ class IsValidUtf8TestUtil {
     outputFailure(byteChar, bytes, after, after.length);
   }
 
-  private static void outputFailure(long byteChar, byte[] bytes, byte[] after,
-      int len) {
-    fail("Failure: (" + Long.toHexString(byteChar) + ") " +
-        toHexString(bytes) + " => " + toHexString(after, len));
+  private static void outputFailure(long byteChar, byte[] bytes, byte[] after, int len) {
+    fail("Failure: (" + Long.toHexString(byteChar) + ") " + toHexString(bytes) + " => "
+        + toHexString(after, len));
   }
 
   private static String toHexString(byte[] b) {
@@ -416,5 +445,4 @@ class IsValidUtf8TestUtil {
     s.append("\"");
     return s.toString();
   }
-
 }

+ 2 - 1
java/core/src/test/java/com/google/protobuf/LazyFieldLiteTest.java

@@ -36,9 +36,10 @@ import static protobuf_unittest.UnittestProto.optionalInt64Extension;
 import protobuf_unittest.UnittestProto.TestAllExtensions;
 import protobuf_unittest.UnittestProto.TestAllTypes;
 
-import java.io.IOException;
 import junit.framework.TestCase;
 
+import java.io.IOException;
+
 /**
  * Unit test for {@link LazyFieldLite}.
  *

+ 2 - 1
java/core/src/test/java/com/google/protobuf/LazyFieldTest.java

@@ -33,9 +33,10 @@ package com.google.protobuf;
 import protobuf_unittest.UnittestProto.TestAllExtensions;
 import protobuf_unittest.UnittestProto.TestAllTypes;
 
-import java.io.IOException;
 import junit.framework.TestCase;
 
+import java.io.IOException;
+
 /**
  * Unit test for {@link LazyField}.
  *

+ 17 - 0
java/core/src/test/java/com/google/protobuf/LiteEqualsAndHashTest.java

@@ -33,6 +33,8 @@ package com.google.protobuf;
 import protobuf_unittest.lite_equals_and_hash.LiteEqualsAndHash.Bar;
 import protobuf_unittest.lite_equals_and_hash.LiteEqualsAndHash.BarPrime;
 import protobuf_unittest.lite_equals_and_hash.LiteEqualsAndHash.Foo;
+import protobuf_unittest.lite_equals_and_hash.LiteEqualsAndHash.TestOneofEquals;
+import protobuf_unittest.lite_equals_and_hash.LiteEqualsAndHash.TestRecursiveOneof;
 
 import junit.framework.TestCase;
 
@@ -83,6 +85,16 @@ public class LiteEqualsAndHashTest extends TestCase {
     assertFalse(bar.equals(barPrime));
   }
 
+  public void testOneofEquals() throws Exception {
+    TestOneofEquals.Builder builder = TestOneofEquals.newBuilder();
+    TestOneofEquals message1 = builder.build();
+    // Set message2's name field to default value. The two messages should be different when we
+    // check with the oneof case.
+    builder.setName("");
+    TestOneofEquals message2 = builder.build();
+    assertFalse(message1.equals(message2));
+  }
+
   public void testEqualsAndHashCodeWithUnknownFields() throws InvalidProtocolBufferException {
     Foo fooWithOnlyValue = Foo.newBuilder()
         .setValue(1)
@@ -105,4 +117,9 @@ public class LiteEqualsAndHashTest extends TestCase {
     assertFalse(o1.equals(o2));
     assertFalse(o1.hashCode() == o2.hashCode());
   }
+
+  public void testRecursiveHashcode() {
+    // This tests that we don't infinite loop.
+    TestRecursiveOneof.getDefaultInstance().hashCode();
+  }
 }

+ 166 - 27
java/core/src/test/java/com/google/protobuf/LiteTest.java

@@ -49,6 +49,7 @@ import junit.framework.TestCase;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.NotSerializableException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 
@@ -128,33 +129,7 @@ public class LiteTest extends TestCase {
     assertEquals(7, message2.getExtension(
         UnittestLite.optionalNestedMessageExtensionLite).getBb());
   }
-
-  public void testSerialize() throws Exception {
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    TestAllTypesLite expected =
-      TestAllTypesLite.newBuilder()
-                      .setOptionalInt32(123)
-                      .addRepeatedString("hello")
-                      .setOptionalNestedMessage(
-                          TestAllTypesLite.NestedMessage.newBuilder().setBb(7))
-                      .build();
-    ObjectOutputStream out = new ObjectOutputStream(baos);
-    try {
-      out.writeObject(expected);
-    } finally {
-      out.close();
-    }
-    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
-    ObjectInputStream in = new ObjectInputStream(bais);
-    TestAllTypesLite actual = (TestAllTypesLite) in.readObject();
-    assertEquals(expected.getOptionalInt32(), actual.getOptionalInt32());
-    assertEquals(expected.getRepeatedStringCount(),
-        actual.getRepeatedStringCount());
-    assertEquals(expected.getRepeatedString(0),
-        actual.getRepeatedString(0));
-    assertEquals(expected.getOptionalNestedMessage().getBb(),
-        actual.getOptionalNestedMessage().getBb());
-  }
+  
   
   public void testClone() {
     TestAllTypesLite.Builder expected = TestAllTypesLite.newBuilder()
@@ -1459,4 +1434,168 @@ public class LiteTest extends TestCase {
         11, (int) extendableMessage.getExtension(
             UnittestLite.optionalFixed32ExtensionLite));
   }
+
+  public void testToStringDefaultInstance() throws Exception {
+    assertToStringEquals("", TestAllTypesLite.getDefaultInstance());
+  }
+
+  public void testToStringPrimitives() throws Exception {
+    TestAllTypesLite proto = TestAllTypesLite.newBuilder()
+        .setOptionalInt32(1)
+        .setOptionalInt64(9223372036854775807L)
+        .build();
+    assertToStringEquals("optional_int32: 1\noptional_int64: 9223372036854775807", proto);
+
+    proto = TestAllTypesLite.newBuilder()
+        .setOptionalBool(true)
+        .setOptionalNestedEnum(TestAllTypesLite.NestedEnum.BAZ)
+        .build();
+    assertToStringEquals("optional_bool: true\noptional_nested_enum: BAZ", proto);
+
+    proto = TestAllTypesLite.newBuilder()
+        .setOptionalFloat(2.72f)
+        .setOptionalDouble(3.14)
+        .build();
+    assertToStringEquals("optional_float: 2.72\noptional_double: 3.14", proto);
+  }
+
+  public void testToStringStringFields() throws Exception {
+    TestAllTypesLite proto = TestAllTypesLite.newBuilder()
+        .setOptionalString("foo\"bar\nbaz\\")
+        .build();
+    assertToStringEquals("optional_string: \"foo\\\"bar\\nbaz\\\\\"", proto);
+
+    proto = TestAllTypesLite.newBuilder()
+        .setOptionalString("\u6587")
+        .build();
+    assertToStringEquals("optional_string: \"\\346\\226\\207\"", proto);
+  }
+
+  public void testToStringNestedMessage() throws Exception {
+    TestAllTypesLite proto = TestAllTypesLite.newBuilder()
+        .setOptionalNestedMessage(TestAllTypesLite.NestedMessage.getDefaultInstance())
+        .build();
+    assertToStringEquals("optional_nested_message {\n}", proto);
+
+    proto = TestAllTypesLite.newBuilder()
+        .setOptionalNestedMessage(
+            TestAllTypesLite.NestedMessage.newBuilder().setBb(7))
+        .build();
+    assertToStringEquals("optional_nested_message {\n  bb: 7\n}", proto);
+  }
+
+  public void testToStringRepeatedFields() throws Exception {
+    TestAllTypesLite proto = TestAllTypesLite.newBuilder()
+        .addRepeatedInt32(32)
+        .addRepeatedInt32(32)
+        .addRepeatedInt64(64)
+        .build();
+    assertToStringEquals("repeated_int32: 32\nrepeated_int32: 32\nrepeated_int64: 64", proto);
+
+    proto = TestAllTypesLite.newBuilder()
+        .addRepeatedLazyMessage(
+            TestAllTypesLite.NestedMessage.newBuilder().setBb(7))
+        .addRepeatedLazyMessage(
+            TestAllTypesLite.NestedMessage.newBuilder().setBb(8))
+        .build();
+    assertToStringEquals(
+        "repeated_lazy_message {\n  bb: 7\n}\nrepeated_lazy_message {\n  bb: 8\n}",
+        proto);
+  }
+
+  public void testToStringForeignFields() throws Exception {
+    TestAllTypesLite proto = TestAllTypesLite.newBuilder()
+        .setOptionalForeignEnum(ForeignEnumLite.FOREIGN_LITE_BAR)
+        .setOptionalForeignMessage(
+            ForeignMessageLite.newBuilder()
+            .setC(3))
+        .build();
+    assertToStringEquals(
+        "optional_foreign_message {\n  c: 3\n}\noptional_foreign_enum: FOREIGN_LITE_BAR",
+        proto);
+  }
+
+  public void testToStringExtensions() throws Exception {
+    TestAllExtensionsLite message = TestAllExtensionsLite.newBuilder()
+        .setExtension(UnittestLite.optionalInt32ExtensionLite, 123)
+        .addExtension(UnittestLite.repeatedStringExtensionLite, "spam")
+        .addExtension(UnittestLite.repeatedStringExtensionLite, "eggs")
+        .setExtension(UnittestLite.optionalNestedEnumExtensionLite,
+            TestAllTypesLite.NestedEnum.BAZ)
+        .setExtension(UnittestLite.optionalNestedMessageExtensionLite,
+            TestAllTypesLite.NestedMessage.newBuilder().setBb(7).build())
+        .build();
+    assertToStringEquals(
+        "[1]: 123\n[18] {\n  bb: 7\n}\n[21]: 3\n[44]: \"spam\"\n[44]: \"eggs\"",
+        message);
+  }
+
+  public void testToStringUnknownFields() throws Exception {
+    TestAllExtensionsLite messageWithExtensions = TestAllExtensionsLite.newBuilder()
+        .setExtension(UnittestLite.optionalInt32ExtensionLite, 123)
+        .addExtension(UnittestLite.repeatedStringExtensionLite, "spam")
+        .addExtension(UnittestLite.repeatedStringExtensionLite, "eggs")
+        .setExtension(UnittestLite.optionalNestedEnumExtensionLite,
+            TestAllTypesLite.NestedEnum.BAZ)
+        .setExtension(UnittestLite.optionalNestedMessageExtensionLite,
+            TestAllTypesLite.NestedMessage.newBuilder().setBb(7).build())
+        .build();
+    TestAllExtensionsLite messageWithUnknownFields = TestAllExtensionsLite.parseFrom(
+        messageWithExtensions.toByteArray());
+    assertToStringEquals(
+        "1: 123\n18: \"\\b\\a\"\n21: 3\n44: \"spam\"\n44: \"eggs\"",
+        messageWithUnknownFields);
+  }
+
+  // Asserts that the toString() representation of the message matches the expected. This verifies
+  // the first line starts with a comment; but, does not factor in said comment as part of the
+  // comparison as it contains unstable addresses.
+  private static void assertToStringEquals(String expected, MessageLite message) {
+    String toString = message.toString();
+    assertEquals('#', toString.charAt(0));
+    if (toString.indexOf("\n") >= 0) {
+      toString = toString.substring(toString.indexOf("\n") + 1);
+    } else {
+      toString = "";
+    }
+    assertEquals(expected, toString);
+  }
+  
+  public void testParseLazy() throws Exception {
+    ByteString bb = TestAllTypesLite.newBuilder()
+        .setOptionalLazyMessage(NestedMessage.newBuilder()
+            .setBb(11)
+            .build())
+        .build().toByteString();
+    ByteString cc = TestAllTypesLite.newBuilder()
+        .setOptionalLazyMessage(NestedMessage.newBuilder()
+            .setCc(22)
+            .build())
+        .build().toByteString();
+    
+    ByteString concat = bb.concat(cc);
+    TestAllTypesLite message = TestAllTypesLite.parseFrom(concat);
+
+    assertEquals(11, message.getOptionalLazyMessage().getBb());
+    assertEquals(22L, message.getOptionalLazyMessage().getCc());
+  }
+  
+  public void testParseLazy_oneOf() throws Exception {
+    ByteString bb = TestAllTypesLite.newBuilder()
+        .setOneofLazyNestedMessage(NestedMessage.newBuilder()
+            .setBb(11)
+            .build())
+        .build().toByteString();
+    ByteString cc = TestAllTypesLite.newBuilder()
+        .setOneofLazyNestedMessage(NestedMessage.newBuilder()
+            .setCc(22)
+            .build())
+        .build().toByteString();
+    
+    ByteString concat = bb.concat(cc);
+    TestAllTypesLite message = TestAllTypesLite.parseFrom(concat);
+
+    assertEquals(11, message.getOneofLazyNestedMessage().getBb());
+    assertEquals(22L, message.getOneofLazyNestedMessage().getCc());
+  }
 }

+ 60 - 10
java/core/src/test/java/com/google/protobuf/LiteralByteStringTest.java

@@ -47,9 +47,9 @@ import java.util.List;
 import java.util.NoSuchElementException;
 
 /**
- * Test {@link LiteralByteString} by setting up a reference string in {@link #setUp()}.
- * This class is designed to be extended for testing extensions of {@link LiteralByteString}
- * such as {@link BoundedByteString}, see {@link BoundedByteStringTest}.
+ * Test {@code LiteralByteString} by setting up a reference string in {@link #setUp()}.
+ * This class is designed to be extended for testing extensions of {@code LiteralByteString}
+ * such as {@code BoundedByteString}, see {@link BoundedByteStringTest}.
  *
  * @author carlanton@google.com (Carl Haverl)
  */
@@ -304,25 +304,75 @@ public class LiteralByteStringTest extends TestCase {
         Arrays.equals(referenceBytes, roundTripBytes));
   }
 
-  public void testWriteTo_mutating() throws IOException {
+  public void testWriteToShouldNotExposeInternalBufferToOutputStream() throws IOException {
     OutputStream os = new OutputStream() {
       @Override
       public void write(byte[] b, int off, int len) {
-        for (int x = 0; x < len; ++x) {
-          b[off + x] = (byte) 0;
-        }
+        Arrays.fill(b, off, off + len, (byte) 0);
       }
 
       @Override
       public void write(int b) {
-        // Purposefully left blank.
+        throw new UnsupportedOperationException();
       }
     };
 
     stringUnderTest.writeTo(os);
-    byte[] newBytes = stringUnderTest.toByteArray();
     assertTrue(classUnderTest + ".writeTo() must not grant access to underlying array",
-        Arrays.equals(referenceBytes, newBytes));
+        Arrays.equals(referenceBytes, stringUnderTest.toByteArray()));
+  }
+
+  public void testWriteToInternalShouldExposeInternalBufferToOutputStream() throws IOException {
+    OutputStream os = new OutputStream() {
+      @Override
+      public void write(byte[] b, int off, int len) {
+        Arrays.fill(b, off, off + len, (byte) 0);
+      }
+
+      @Override
+      public void write(int b) {
+        throw new UnsupportedOperationException();
+      }
+    };
+
+    stringUnderTest.writeToInternal(os, 0, stringUnderTest.size());
+    byte[] allZeros = new byte[stringUnderTest.size()];
+    assertTrue(classUnderTest + ".writeToInternal() must grant access to underlying array",
+        Arrays.equals(allZeros, stringUnderTest.toByteArray()));
+  }
+
+  public void testWriteToShouldExposeInternalBufferToByteOutput() throws IOException {
+    ByteOutput out = new ByteOutput() {
+      @Override
+      public void write(byte value) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void write(byte[] value, int offset, int length) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void writeLazy(byte[] value, int offset, int length) throws IOException {
+        Arrays.fill(value, offset, offset + length, (byte) 0);
+      }
+
+      @Override
+      public void write(ByteBuffer value) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void writeLazy(ByteBuffer value) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+    };
+
+    stringUnderTest.writeTo(out);
+    byte[] allZeros = new byte[stringUnderTest.size()];
+    assertTrue(classUnderTest + ".writeToInternal() must grant access to underlying array",
+        Arrays.equals(allZeros, stringUnderTest.toByteArray()));
   }
 
   public void testNewOutput() throws IOException {

+ 1 - 1
java/core/src/test/java/com/google/protobuf/MapForProto2LiteTest.java

@@ -432,7 +432,7 @@ public class MapForProto2LiteTest extends TestCase {
     assertEquals(1, messageWithUnknownEnums.getInt32ToInt32Field().get(1).intValue());
     assertEquals(54321, messageWithUnknownEnums.getInt32ToInt32Field().get(2).intValue());
   }
-  
+
 
   public void testIterationOrder() throws Exception {
     TestMap.Builder builder = TestMap.newBuilder();

+ 0 - 1
java/core/src/test/java/com/google/protobuf/MapForProto2Test.java

@@ -36,7 +36,6 @@ import map_test.MapForProto2TestProto.TestMap.MessageValue;
 import map_test.MapForProto2TestProto.TestMap.MessageWithRequiredFields;
 import map_test.MapForProto2TestProto.TestRecursiveMap;
 import map_test.MapForProto2TestProto.TestUnknownEnumValue;
-
 import junit.framework.TestCase;
 
 import java.util.ArrayList;

+ 0 - 1
java/core/src/test/java/com/google/protobuf/MapTest.java

@@ -37,7 +37,6 @@ import com.google.protobuf.Descriptors.FieldDescriptor;
 import map_test.MapTestProto.TestMap;
 import map_test.MapTestProto.TestMap.MessageValue;
 import map_test.MapTestProto.TestOnChangeEventPropagation;
-
 import junit.framework.TestCase;
 
 import java.util.ArrayList;

+ 2 - 2
java/core/src/test/java/com/google/protobuf/MessageTest.java

@@ -30,11 +30,11 @@
 
 package com.google.protobuf;
 
-import protobuf_unittest.UnittestProto.TestAllTypes;
+import protobuf_unittest.UnittestProto.ForeignMessage;
 import protobuf_unittest.UnittestProto.TestAllExtensions;
+import protobuf_unittest.UnittestProto.TestAllTypes;
 import protobuf_unittest.UnittestProto.TestRequired;
 import protobuf_unittest.UnittestProto.TestRequiredForeign;
-import protobuf_unittest.UnittestProto.ForeignMessage;
 
 import junit.framework.TestCase;
 

+ 1 - 1
java/core/src/test/java/com/google/protobuf/NestedBuildersTest.java

@@ -35,8 +35,8 @@ import protobuf_unittest.Wheel;
 
 import junit.framework.TestCase;
 
-import java.util.List;
 import java.util.ArrayList;
+import java.util.List;
 
 /**
  * Test cases that exercise end-to-end use cases involving

+ 145 - 70
java/core/src/test/java/com/google/protobuf/NioByteStringTest.java

@@ -41,6 +41,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
+import java.io.OutputStream;
 import java.io.UnsupportedEncodingException;
 import java.nio.BufferOverflowException;
 import java.nio.ByteBuffer;
@@ -56,11 +57,12 @@ public class NioByteStringTest extends TestCase {
   private static final String CLASSNAME = NioByteString.class.getSimpleName();
   private static final byte[] BYTES = ByteStringTest.getTestBytes(1234, 11337766L);
   private static final int EXPECTED_HASH = ByteString.wrap(BYTES).hashCode();
-  private static final ByteBuffer BUFFER = ByteBuffer.wrap(BYTES.clone());
-  private static final ByteString TEST_STRING = new NioByteString(BUFFER);
+
+  private final ByteBuffer backingBuffer = ByteBuffer.wrap(BYTES.clone());
+  private final ByteString testString = new NioByteString(backingBuffer);
 
   public void testExpectedType() {
-    String actualClassName = getActualClassName(TEST_STRING);
+    String actualClassName = getActualClassName(testString);
     assertEquals(CLASSNAME + " should match type exactly", CLASSNAME, actualClassName);
   }
 
@@ -73,14 +75,14 @@ public class NioByteStringTest extends TestCase {
   public void testByteAt() {
     boolean stillEqual = true;
     for (int i = 0; stillEqual && i < BYTES.length; ++i) {
-      stillEqual = (BYTES[i] == TEST_STRING.byteAt(i));
+      stillEqual = (BYTES[i] == testString.byteAt(i));
     }
     assertTrue(CLASSNAME + " must capture the right bytes", stillEqual);
   }
 
   public void testByteIterator() {
     boolean stillEqual = true;
-    ByteString.ByteIterator iter = TEST_STRING.iterator();
+    ByteString.ByteIterator iter = testString.iterator();
     for (int i = 0; stillEqual && i < BYTES.length; ++i) {
       stillEqual = (iter.hasNext() && BYTES[i] == iter.nextByte());
     }
@@ -98,7 +100,7 @@ public class NioByteStringTest extends TestCase {
   public void testByteIterable() {
     boolean stillEqual = true;
     int j = 0;
-    for (byte quantum : TEST_STRING) {
+    for (byte quantum : testString) {
       stillEqual = (BYTES[j] == quantum);
       ++j;
     }
@@ -108,15 +110,15 @@ public class NioByteStringTest extends TestCase {
 
   public void testSize() {
     assertEquals(CLASSNAME + " must have the expected size", BYTES.length,
-        TEST_STRING.size());
+        testString.size());
   }
 
   public void testGetTreeDepth() {
-    assertEquals(CLASSNAME + " must have depth 0", 0, TEST_STRING.getTreeDepth());
+    assertEquals(CLASSNAME + " must have depth 0", 0, testString.getTreeDepth());
   }
 
   public void testIsBalanced() {
-    assertTrue(CLASSNAME + " is technically balanced", TEST_STRING.isBalanced());
+    assertTrue(CLASSNAME + " is technically balanced", testString.isBalanced());
   }
 
   public void testCopyTo_ByteArrayOffsetLength() {
@@ -124,7 +126,7 @@ public class NioByteStringTest extends TestCase {
     int length = 100;
     byte[] destination = new byte[destinationOffset + length];
     int sourceOffset = 213;
-    TEST_STRING.copyTo(destination, sourceOffset, destinationOffset, length);
+    testString.copyTo(destination, sourceOffset, destinationOffset, length);
     boolean stillEqual = true;
     for (int i = 0; stillEqual && i < length; ++i) {
       stillEqual = BYTES[i + sourceOffset] == destination[i + destinationOffset];
@@ -139,7 +141,7 @@ public class NioByteStringTest extends TestCase {
 
     try {
       // Copy one too many bytes
-      TEST_STRING.copyTo(destination, TEST_STRING.size() + 1 - length,
+      testString.copyTo(destination, testString.size() + 1 - length,
           destinationOffset, length);
       fail("Should have thrown an exception when copying too many bytes of a "
           + CLASSNAME);
@@ -149,7 +151,7 @@ public class NioByteStringTest extends TestCase {
 
     try {
       // Copy with illegal negative sourceOffset
-      TEST_STRING.copyTo(destination, -1, destinationOffset, length);
+      testString.copyTo(destination, -1, destinationOffset, length);
       fail("Should have thrown an exception when given a negative sourceOffset in "
           + CLASSNAME);
     } catch (IndexOutOfBoundsException expected) {
@@ -158,7 +160,7 @@ public class NioByteStringTest extends TestCase {
 
     try {
       // Copy with illegal negative destinationOffset
-      TEST_STRING.copyTo(destination, 0, -1, length);
+      testString.copyTo(destination, 0, -1, length);
       fail("Should have thrown an exception when given a negative destinationOffset in "
           + CLASSNAME);
     } catch (IndexOutOfBoundsException expected) {
@@ -167,7 +169,7 @@ public class NioByteStringTest extends TestCase {
 
     try {
       // Copy with illegal negative size
-      TEST_STRING.copyTo(destination, 0, 0, -1);
+      testString.copyTo(destination, 0, 0, -1);
       fail("Should have thrown an exception when given a negative size in "
           + CLASSNAME);
     } catch (IndexOutOfBoundsException expected) {
@@ -176,7 +178,7 @@ public class NioByteStringTest extends TestCase {
 
     try {
       // Copy with illegal too-large sourceOffset
-      TEST_STRING.copyTo(destination, 2 * TEST_STRING.size(), 0, length);
+      testString.copyTo(destination, 2 * testString.size(), 0, length);
       fail("Should have thrown an exception when the destinationOffset is too large in "
           + CLASSNAME);
     } catch (IndexOutOfBoundsException expected) {
@@ -185,7 +187,7 @@ public class NioByteStringTest extends TestCase {
 
     try {
       // Copy with illegal too-large destinationOffset
-      TEST_STRING.copyTo(destination, 0, 2 * destination.length, length);
+      testString.copyTo(destination, 0, 2 * destination.length, length);
       fail("Should have thrown an exception when the destinationOffset is too large in "
           + CLASSNAME);
     } catch (IndexOutOfBoundsException expected) {
@@ -196,21 +198,21 @@ public class NioByteStringTest extends TestCase {
   public void testCopyTo_ByteBuffer() {
     // Same length.
     ByteBuffer myBuffer = ByteBuffer.allocate(BYTES.length);
-    TEST_STRING.copyTo(myBuffer);
+    testString.copyTo(myBuffer);
     myBuffer.flip();
     assertEquals(CLASSNAME + ".copyTo(ByteBuffer) must give back the same bytes",
-        BUFFER, myBuffer);
+        backingBuffer, myBuffer);
 
     // Target buffer bigger than required.
-    myBuffer = ByteBuffer.allocate(TEST_STRING.size() + 1);
-    TEST_STRING.copyTo(myBuffer);
+    myBuffer = ByteBuffer.allocate(testString.size() + 1);
+    testString.copyTo(myBuffer);
     myBuffer.flip();
-    assertEquals(BUFFER, myBuffer);
+    assertEquals(backingBuffer, myBuffer);
 
     // Target buffer has no space.
     myBuffer = ByteBuffer.allocate(0);
     try {
-      TEST_STRING.copyTo(myBuffer);
+      testString.copyTo(myBuffer);
       fail("Should have thrown an exception when target ByteBuffer has insufficient capacity");
     } catch (BufferOverflowException e) {
       // Expected.
@@ -219,7 +221,7 @@ public class NioByteStringTest extends TestCase {
     // Target buffer too small.
     myBuffer = ByteBuffer.allocate(1);
     try {
-      TEST_STRING.copyTo(myBuffer);
+      testString.copyTo(myBuffer);
       fail("Should have thrown an exception when target ByteBuffer has insufficient capacity");
     } catch (BufferOverflowException e) {
       // Expected.
@@ -227,26 +229,26 @@ public class NioByteStringTest extends TestCase {
   }
 
   public void testMarkSupported() {
-    InputStream stream = TEST_STRING.newInput();
+    InputStream stream = testString.newInput();
     assertTrue(CLASSNAME + ".newInput() must support marking", stream.markSupported());
   }
 
   public void testMarkAndReset() throws IOException {
-    int fraction = TEST_STRING.size() / 3;
+    int fraction = testString.size() / 3;
 
-    InputStream stream = TEST_STRING.newInput();
-    stream.mark(TEST_STRING.size()); // First, mark() the end.
+    InputStream stream = testString.newInput();
+    stream.mark(testString.size()); // First, mark() the end.
 
     skipFully(stream, fraction); // Skip a large fraction, but not all.
     assertEquals(
         CLASSNAME + ": after skipping to the 'middle', half the bytes are available",
-        (TEST_STRING.size() - fraction), stream.available());
+        (testString.size() - fraction), stream.available());
     stream.reset();
     assertEquals(
         CLASSNAME + ": after resetting, all bytes are available",
-        TEST_STRING.size(), stream.available());
+        testString.size(), stream.available());
 
-    skipFully(stream, TEST_STRING.size()); // Skip to the end.
+    skipFully(stream, testString.size()); // Skip to the end.
     assertEquals(
         CLASSNAME + ": after skipping to the end, no more bytes are available",
         0, stream.available());
@@ -284,7 +286,7 @@ public class NioByteStringTest extends TestCase {
   }
 
   public void testAsReadOnlyByteBuffer() {
-    ByteBuffer byteBuffer = TEST_STRING.asReadOnlyByteBuffer();
+    ByteBuffer byteBuffer = testString.asReadOnlyByteBuffer();
     byte[] roundTripBytes = new byte[BYTES.length];
     assertTrue(byteBuffer.remaining() == BYTES.length);
     assertTrue(byteBuffer.isReadOnly());
@@ -294,7 +296,7 @@ public class NioByteStringTest extends TestCase {
   }
 
   public void testAsReadOnlyByteBufferList() {
-    List<ByteBuffer> byteBuffers = TEST_STRING.asReadOnlyByteBufferList();
+    List<ByteBuffer> byteBuffers = testString.asReadOnlyByteBufferList();
     int bytesSeen = 0;
     byte[] roundTripBytes = new byte[BYTES.length];
     for (ByteBuffer byteBuffer : byteBuffers) {
@@ -310,25 +312,98 @@ public class NioByteStringTest extends TestCase {
   }
 
   public void testToByteArray() {
-    byte[] roundTripBytes = TEST_STRING.toByteArray();
+    byte[] roundTripBytes = testString.toByteArray();
     assertTrue(CLASSNAME + ".toByteArray() must give back the same bytes",
         Arrays.equals(BYTES, roundTripBytes));
   }
 
   public void testWriteTo() throws IOException {
     ByteArrayOutputStream bos = new ByteArrayOutputStream();
-    TEST_STRING.writeTo(bos);
+    testString.writeTo(bos);
     byte[] roundTripBytes = bos.toByteArray();
     assertTrue(CLASSNAME + ".writeTo() must give back the same bytes",
         Arrays.equals(BYTES, roundTripBytes));
   }
 
+  public void testWriteToShouldNotExposeInternalBufferToOutputStream() throws IOException {
+    OutputStream os = new OutputStream() {
+      @Override
+      public void write(byte[] b, int off, int len) {
+        Arrays.fill(b, off, off + len, (byte) 0);
+      }
+
+      @Override
+      public void write(int b) {
+        throw new UnsupportedOperationException();
+      }
+    };
+
+    byte[] original = Arrays.copyOf(BYTES, BYTES.length);
+    testString.writeTo(os);
+    assertTrue(CLASSNAME + ".writeTo() must NOT grant access to underlying buffer",
+        Arrays.equals(original, BYTES));
+  }
+
+  public void testWriteToInternalShouldExposeInternalBufferToOutputStream() throws IOException {
+    OutputStream os = new OutputStream() {
+      @Override
+      public void write(byte[] b, int off, int len) {
+        Arrays.fill(b, off, off + len, (byte) 0);
+      }
+
+      @Override
+      public void write(int b) {
+        throw new UnsupportedOperationException();
+      }
+    };
+
+    testString.writeToInternal(os, 0, testString.size());
+    byte[] allZeros = new byte[testString.size()];
+    assertTrue(CLASSNAME + ".writeToInternal() must grant access to underlying buffer",
+        Arrays.equals(allZeros, backingBuffer.array()));
+  }
+
+  public void testWriteToShouldExposeInternalBufferToByteOutput() throws IOException {
+    ByteOutput out = new ByteOutput() {
+      @Override
+      public void write(byte value) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void write(byte[] value, int offset, int length) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void writeLazy(byte[] value, int offset, int length) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void write(ByteBuffer value) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void writeLazy(ByteBuffer value) throws IOException {
+        Arrays.fill(value.array(), value.arrayOffset(), value.arrayOffset() + value.limit(),
+            (byte) 0);
+      }
+    };
+
+    testString.writeTo(out);
+    byte[] allZeros = new byte[testString.size()];
+    assertTrue(CLASSNAME + ".writeTo() must grant access to underlying buffer",
+        Arrays.equals(allZeros, backingBuffer.array()));
+  }
+
   public void testNewOutput() throws IOException {
     ByteArrayOutputStream bos = new ByteArrayOutputStream();
     ByteString.Output output = ByteString.newOutput();
-    TEST_STRING.writeTo(output);
+    testString.writeTo(output);
     assertEquals("Output Size returns correct result",
-        output.size(), TEST_STRING.size());
+        output.size(), testString.size());
     output.writeTo(bos);
     assertTrue("Output.writeTo() must give back the same bytes",
         Arrays.equals(BYTES, bos.toByteArray()));
@@ -336,7 +411,7 @@ public class NioByteStringTest extends TestCase {
     // write the output stream to itself! This should cause it to double
     output.writeTo(output);
     assertEquals("Writing an output stream to itself is successful",
-        TEST_STRING.concat(TEST_STRING), output.toByteString());
+        testString.concat(testString), output.toByteString());
 
     output.reset();
     assertEquals("Output.reset() resets the output", 0, output.size());
@@ -373,7 +448,7 @@ public class NioByteStringTest extends TestCase {
     }
 
     try {
-      TEST_STRING.toString("invalid");
+      testString.toString("invalid");
       fail("Should have thrown an exception.");
     } catch (UnsupportedEncodingException expected) {
       // This is success
@@ -381,36 +456,36 @@ public class NioByteStringTest extends TestCase {
   }
 
   public void testEquals() {
-    assertEquals(CLASSNAME + " must not equal null", false, TEST_STRING.equals(null));
-    assertEquals(CLASSNAME + " must equal self", TEST_STRING, TEST_STRING);
+    assertEquals(CLASSNAME + " must not equal null", false, testString.equals(null));
+    assertEquals(CLASSNAME + " must equal self", testString, testString);
     assertFalse(CLASSNAME + " must not equal the empty string",
-        TEST_STRING.equals(EMPTY));
+        testString.equals(EMPTY));
     assertEquals(CLASSNAME + " empty strings must be equal",
-        EMPTY, TEST_STRING.substring(55, 55));
+        EMPTY, testString.substring(55, 55));
     assertEquals(CLASSNAME + " must equal another string with the same value",
-        TEST_STRING, new NioByteString(BUFFER));
+        testString, new NioByteString(backingBuffer));
 
     byte[] mungedBytes = mungedBytes();
     assertFalse(CLASSNAME + " must not equal every string with the same length",
-        TEST_STRING.equals(new NioByteString(ByteBuffer.wrap(mungedBytes))));
+        testString.equals(new NioByteString(ByteBuffer.wrap(mungedBytes))));
   }
 
   public void testEqualsLiteralByteString() {
     ByteString literal = ByteString.copyFrom(BYTES);
     assertEquals(CLASSNAME + " must equal LiteralByteString with same value", literal,
-        TEST_STRING);
-    assertEquals(CLASSNAME + " must equal LiteralByteString with same value", TEST_STRING,
+        testString);
+    assertEquals(CLASSNAME + " must equal LiteralByteString with same value", testString,
         literal);
     assertFalse(CLASSNAME + " must not equal the empty string",
-        TEST_STRING.equals(ByteString.EMPTY));
+        testString.equals(ByteString.EMPTY));
     assertEquals(CLASSNAME + " empty strings must be equal",
-        ByteString.EMPTY, TEST_STRING.substring(55, 55));
+        ByteString.EMPTY, testString.substring(55, 55));
 
     literal = ByteString.copyFrom(mungedBytes());
     assertFalse(CLASSNAME + " must not equal every LiteralByteString with the same length",
-        TEST_STRING.equals(literal));
+        testString.equals(literal));
     assertFalse(CLASSNAME + " must not equal every LiteralByteString with the same length",
-        literal.equals(TEST_STRING));
+        literal.equals(testString));
   }
 
   public void testEqualsRopeByteString() {
@@ -419,22 +494,22 @@ public class NioByteStringTest extends TestCase {
     ByteString rope = p1.concat(p2);
 
     assertEquals(CLASSNAME + " must equal RopeByteString with same value", rope,
-        TEST_STRING);
-    assertEquals(CLASSNAME + " must equal RopeByteString with same value", TEST_STRING,
+        testString);
+    assertEquals(CLASSNAME + " must equal RopeByteString with same value", testString,
         rope);
     assertFalse(CLASSNAME + " must not equal the empty string",
-        TEST_STRING.equals(ByteString.EMPTY.concat(ByteString.EMPTY)));
+        testString.equals(ByteString.EMPTY.concat(ByteString.EMPTY)));
     assertEquals(CLASSNAME + " empty strings must be equal",
-        ByteString.EMPTY.concat(ByteString.EMPTY), TEST_STRING.substring(55, 55));
+        ByteString.EMPTY.concat(ByteString.EMPTY), testString.substring(55, 55));
 
     byte[] mungedBytes = mungedBytes();
     p1 = ByteString.copyFrom(mungedBytes, 0, 5);
     p2 = ByteString.copyFrom(mungedBytes, 5, mungedBytes.length - 5);
     rope = p1.concat(p2);
     assertFalse(CLASSNAME + " must not equal every RopeByteString with the same length",
-        TEST_STRING.equals(rope));
+        testString.equals(rope));
     assertFalse(CLASSNAME + " must not equal every RopeByteString with the same length",
-        rope.equals(TEST_STRING));
+        rope.equals(testString));
   }
 
   private byte[] mungedBytes() {
@@ -445,12 +520,12 @@ public class NioByteStringTest extends TestCase {
   }
 
   public void testHashCode() {
-    int hash = TEST_STRING.hashCode();
+    int hash = testString.hashCode();
     assertEquals(CLASSNAME + " must have expected hashCode", EXPECTED_HASH, hash);
   }
 
   public void testPeekCachedHashCode() {
-    ByteString newString = new NioByteString(BUFFER);
+    ByteString newString = new NioByteString(backingBuffer);
     assertEquals(CLASSNAME + ".peekCachedHashCode() should return zero at first", 0,
         newString.peekCachedHashCode());
     newString.hashCode();
@@ -461,15 +536,15 @@ public class NioByteStringTest extends TestCase {
   public void testPartialHash() {
     // partialHash() is more strenuously tested elsewhere by testing hashes of substrings.
     // This test would fail if the expected hash were 1.  It's not.
-    int hash = TEST_STRING.partialHash(TEST_STRING.size(), 0, TEST_STRING.size());
+    int hash = testString.partialHash(testString.size(), 0, testString.size());
     assertEquals(CLASSNAME + ".partialHash() must yield expected hashCode",
         EXPECTED_HASH, hash);
   }
 
   public void testNewInput() throws IOException {
-    InputStream input = TEST_STRING.newInput();
+    InputStream input = testString.newInput();
     assertEquals("InputStream.available() returns correct value",
-        TEST_STRING.size(), input.available());
+        testString.size(), input.available());
     boolean stillEqual = true;
     for (byte referenceByte : BYTES) {
       int expectedInt = (referenceByte & 0xFF);
@@ -482,8 +557,8 @@ public class NioByteStringTest extends TestCase {
   }
 
   public void testNewInput_skip() throws IOException {
-    InputStream input = TEST_STRING.newInput();
-    int stringSize = TEST_STRING.size();
+    InputStream input = testString.newInput();
+    int stringSize = testString.size();
     int nearEndIndex = stringSize * 2 / 3;
     long skipped1 = input.skip(nearEndIndex);
     assertEquals("InputStream.skip()", skipped1, nearEndIndex);
@@ -492,7 +567,7 @@ public class NioByteStringTest extends TestCase {
     assertTrue("InputStream.mark() is available", input.markSupported());
     input.mark(0);
     assertEquals("InputStream.skip(), read()",
-        TEST_STRING.byteAt(nearEndIndex) & 0xFF, input.read());
+        testString.byteAt(nearEndIndex) & 0xFF, input.read());
     assertEquals("InputStream.available()",
         stringSize - skipped1 - 1, input.available());
     long skipped2 = input.skip(stringSize);
@@ -504,11 +579,11 @@ public class NioByteStringTest extends TestCase {
     assertEquals("InputStream.reset() succeded",
         stringSize - skipped1, input.available());
     assertEquals("InputStream.reset(), read()",
-        TEST_STRING.byteAt(nearEndIndex) & 0xFF, input.read());
+        testString.byteAt(nearEndIndex) & 0xFF, input.read());
   }
 
   public void testNewCodedInput() throws IOException {
-    CodedInputStream cis = TEST_STRING.newCodedInput();
+    CodedInputStream cis = testString.newCodedInput();
     byte[] roundTripBytes = cis.readRawBytes(BYTES.length);
     assertTrue(CLASSNAME + " must give the same bytes back from the CodedInputStream",
         Arrays.equals(BYTES, roundTripBytes));
@@ -521,22 +596,22 @@ public class NioByteStringTest extends TestCase {
    */
   public void testConcat_empty() {
     assertSame(CLASSNAME + " concatenated with empty must give " + CLASSNAME,
-        TEST_STRING.concat(EMPTY), TEST_STRING);
+        testString.concat(EMPTY), testString);
     assertSame("empty concatenated with " + CLASSNAME + " must give " + CLASSNAME,
-        EMPTY.concat(TEST_STRING), TEST_STRING);
+        EMPTY.concat(testString), testString);
   }
 
   public void testJavaSerialization() throws Exception {
     ByteArrayOutputStream out = new ByteArrayOutputStream();
     ObjectOutputStream oos = new ObjectOutputStream(out);
-    oos.writeObject(TEST_STRING);
+    oos.writeObject(testString);
     oos.close();
     byte[] pickled = out.toByteArray();
     InputStream in = new ByteArrayInputStream(pickled);
     ObjectInputStream ois = new ObjectInputStream(in);
     Object o = ois.readObject();
     assertTrue("Didn't get a ByteString back", o instanceof ByteString);
-    assertEquals("Should get an equal ByteString back", TEST_STRING, o);
+    assertEquals("Should get an equal ByteString back", testString, o);
   }
 
   private static ByteString forString(String str) {

+ 134 - 72
java/core/src/test/java/com/google/protobuf/ParseExceptionsTest.java

@@ -1,17 +1,51 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 package com.google.protobuf;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
 import com.google.protobuf.DescriptorProtos.DescriptorProto;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.FilterInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import org.junit.Test;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
 /**
  * Tests the exceptions thrown when parsing from a stream. The methods on the {@link Parser}
@@ -22,6 +56,7 @@ import static org.junit.Assert.fail;
  *
  * @author jh@squareup.com (Joshua Humphries)
  */
+@RunWith(JUnit4.class)
 public class ParseExceptionsTest {
 
   private interface ParseTester {
@@ -46,116 +81,143 @@ public class ParseExceptionsTest {
 
   @Test public void message_parseFrom_InputStream() {
     setup();
-    verifyExceptions(new ParseTester() {
-      public DescriptorProto parse(InputStream in) throws IOException {
-        return DescriptorProto.parseFrom(in);
-      }
-    });
+    verifyExceptions(
+        new ParseTester() {
+          @Override
+          public DescriptorProto parse(InputStream in) throws IOException {
+            return DescriptorProto.parseFrom(in);
+          }
+        });
   }
 
   @Test public void message_parseFrom_InputStreamAndExtensionRegistry() {
     setup();
-    verifyExceptions(new ParseTester() {
-      public DescriptorProto parse(InputStream in) throws IOException {
-        return DescriptorProto.parseFrom(in, ExtensionRegistry.newInstance());
-      }
-    });
+    verifyExceptions(
+        new ParseTester() {
+          @Override
+          public DescriptorProto parse(InputStream in) throws IOException {
+            return DescriptorProto.parseFrom(in, ExtensionRegistry.newInstance());
+          }
+        });
   }
 
   @Test public void message_parseFrom_CodedInputStream() {
     setup();
-    verifyExceptions(new ParseTester() {
-      public DescriptorProto parse(InputStream in) throws IOException {
-        return DescriptorProto.parseFrom(CodedInputStream.newInstance(in));
-      }
-    });
+    verifyExceptions(
+        new ParseTester() {
+          @Override
+          public DescriptorProto parse(InputStream in) throws IOException {
+            return DescriptorProto.parseFrom(CodedInputStream.newInstance(in));
+          }
+        });
   }
 
   @Test public void message_parseFrom_CodedInputStreamAndExtensionRegistry() {
     setup();
-    verifyExceptions(new ParseTester() {
-      public DescriptorProto parse(InputStream in) throws IOException {
-        return DescriptorProto.parseFrom(CodedInputStream.newInstance(in),
-            ExtensionRegistry.newInstance());
-      }
-    });
+    verifyExceptions(
+        new ParseTester() {
+          @Override
+          public DescriptorProto parse(InputStream in) throws IOException {
+            return DescriptorProto.parseFrom(
+                CodedInputStream.newInstance(in), ExtensionRegistry.newInstance());
+          }
+        });
   }
 
   @Test public void message_parseDelimitedFrom_InputStream() {
     setupDelimited();
-    verifyExceptions(new ParseTester() {
-      public DescriptorProto parse(InputStream in) throws IOException {
-        return DescriptorProto.parseDelimitedFrom(in);
-      }
-    });
+    verifyExceptions(
+        new ParseTester() {
+          @Override
+          public DescriptorProto parse(InputStream in) throws IOException {
+            return DescriptorProto.parseDelimitedFrom(in);
+          }
+        });
   }
 
   @Test public void message_parseDelimitedFrom_InputStreamAndExtensionRegistry() {
     setupDelimited();
-    verifyExceptions(new ParseTester() {
-      public DescriptorProto parse(InputStream in) throws IOException {
-        return DescriptorProto.parseDelimitedFrom(in, ExtensionRegistry.newInstance());
-      }
-    });
+    verifyExceptions(
+        new ParseTester() {
+          @Override
+          public DescriptorProto parse(InputStream in) throws IOException {
+            return DescriptorProto.parseDelimitedFrom(in, ExtensionRegistry.newInstance());
+          }
+        });
   }
 
   @Test public void messageBuilder_mergeFrom_InputStream() {
     setup();
-    verifyExceptions(new ParseTester() {
-      public DescriptorProto parse(InputStream in) throws IOException {
-        return DescriptorProto.newBuilder().mergeFrom(in).build();
-      }
-    });
+    verifyExceptions(
+        new ParseTester() {
+          @Override
+          public DescriptorProto parse(InputStream in) throws IOException {
+            return DescriptorProto.newBuilder().mergeFrom(in).build();
+          }
+        });
   }
 
   @Test public void messageBuilder_mergeFrom_InputStreamAndExtensionRegistry() {
     setup();
-    verifyExceptions(new ParseTester() {
-      public DescriptorProto parse(InputStream in) throws IOException {
-        return DescriptorProto.newBuilder().mergeFrom(in, ExtensionRegistry.newInstance()).build();
-      }
-    });
+    verifyExceptions(
+        new ParseTester() {
+          @Override
+          public DescriptorProto parse(InputStream in) throws IOException {
+            return DescriptorProto.newBuilder()
+                .mergeFrom(in, ExtensionRegistry.newInstance())
+                .build();
+          }
+        });
   }
 
   @Test public void messageBuilder_mergeFrom_CodedInputStream() {
     setup();
-    verifyExceptions(new ParseTester() {
-      public DescriptorProto parse(InputStream in) throws IOException {
-        return DescriptorProto.newBuilder().mergeFrom(CodedInputStream.newInstance(in)).build();
-      }
-    });
+    verifyExceptions(
+        new ParseTester() {
+          @Override
+          public DescriptorProto parse(InputStream in) throws IOException {
+            return DescriptorProto.newBuilder().mergeFrom(CodedInputStream.newInstance(in)).build();
+          }
+        });
   }
 
   @Test public void messageBuilder_mergeFrom_CodedInputStreamAndExtensionRegistry() {
     setup();
-    verifyExceptions(new ParseTester() {
-      public DescriptorProto parse(InputStream in) throws IOException {
-        return DescriptorProto.newBuilder()
-            .mergeFrom(CodedInputStream.newInstance(in), ExtensionRegistry.newInstance()).build();
-      }
-    });
+    verifyExceptions(
+        new ParseTester() {
+          @Override
+          public DescriptorProto parse(InputStream in) throws IOException {
+            return DescriptorProto.newBuilder()
+                .mergeFrom(CodedInputStream.newInstance(in), ExtensionRegistry.newInstance())
+                .build();
+          }
+        });
   }
 
   @Test public void messageBuilder_mergeDelimitedFrom_InputStream() {
     setupDelimited();
-    verifyExceptions(new ParseTester() {
-      public DescriptorProto parse(InputStream in) throws IOException {
-        DescriptorProto.Builder builder = DescriptorProto.newBuilder();
-        builder.mergeDelimitedFrom(in);
-        return builder.build();
-      }
-    });
+    verifyExceptions(
+        new ParseTester() {
+          @Override
+          public DescriptorProto parse(InputStream in) throws IOException {
+            DescriptorProto.Builder builder = DescriptorProto.newBuilder();
+            builder.mergeDelimitedFrom(in);
+            return builder.build();
+          }
+        });
   }
 
   @Test public void messageBuilder_mergeDelimitedFrom_InputStreamAndExtensionRegistry() {
     setupDelimited();
-    verifyExceptions(new ParseTester() {
-      public DescriptorProto parse(InputStream in) throws IOException {
-        DescriptorProto.Builder builder = DescriptorProto.newBuilder();
-        builder.mergeDelimitedFrom(in, ExtensionRegistry.newInstance());
-        return builder.build();
-      }
-    });
+    verifyExceptions(
+        new ParseTester() {
+          @Override
+          public DescriptorProto parse(InputStream in) throws IOException {
+            DescriptorProto.Builder builder = DescriptorProto.newBuilder();
+            builder.mergeDelimitedFrom(in, ExtensionRegistry.newInstance());
+            return builder.build();
+          }
+        });
   }
 
   private void verifyExceptions(ParseTester parseTester) {

+ 2 - 2
java/core/src/test/java/com/google/protobuf/ParserTest.java

@@ -33,15 +33,15 @@ package com.google.protobuf;
 import com.google.protobuf.UnittestLite.TestAllTypesLite;
 import com.google.protobuf.UnittestLite.TestPackedExtensionsLite;
 import com.google.protobuf.UnittestLite.TestParsingMergeLite;
+import protobuf_unittest.UnittestOptimizeFor;
 import protobuf_unittest.UnittestOptimizeFor.TestOptimizedForSize;
 import protobuf_unittest.UnittestOptimizeFor.TestRequiredOptimizedForSize;
-import protobuf_unittest.UnittestOptimizeFor;
+import protobuf_unittest.UnittestProto;
 import protobuf_unittest.UnittestProto.ForeignMessage;
 import protobuf_unittest.UnittestProto.TestAllTypes;
 import protobuf_unittest.UnittestProto.TestEmptyMessage;
 import protobuf_unittest.UnittestProto.TestParsingMerge;
 import protobuf_unittest.UnittestProto.TestRequired;
-import protobuf_unittest.UnittestProto;
 
 import junit.framework.TestCase;
 

+ 7 - 7
java/core/src/test/java/com/google/protobuf/ServiceTest.java

@@ -35,22 +35,22 @@ import com.google.protobuf.Descriptors.MethodDescriptor;
 import google.protobuf.no_generic_services_test.UnittestNoGenericServices;
 import protobuf_unittest.MessageWithNoOuter;
 import protobuf_unittest.ServiceWithNoOuter;
-import protobuf_unittest.UnittestProto.TestAllTypes;
-import protobuf_unittest.UnittestProto.TestService;
-import protobuf_unittest.UnittestProto.FooRequest;
-import protobuf_unittest.UnittestProto.FooResponse;
 import protobuf_unittest.UnittestProto.BarRequest;
 import protobuf_unittest.UnittestProto.BarResponse;
+import protobuf_unittest.UnittestProto.FooRequest;
+import protobuf_unittest.UnittestProto.FooResponse;
+import protobuf_unittest.UnittestProto.TestAllTypes;
+import protobuf_unittest.UnittestProto.TestService;
+
+import junit.framework.TestCase;
 
 import org.easymock.classextension.EasyMock;
-import org.easymock.classextension.IMocksControl;
 import org.easymock.IArgumentMatcher;
+import org.easymock.classextension.IMocksControl;
 
 import java.util.HashSet;
 import java.util.Set;
 
-import junit.framework.TestCase;
-
 /**
  * Tests services and stubs.
  *

+ 171 - 184
java/core/src/test/java/com/google/protobuf/TestUtil.java

@@ -30,205 +30,207 @@
 
 package com.google.protobuf;
 
-import protobuf_unittest.UnittestProto;
-import com.google.protobuf.UnittestLite;
-
+import static com.google.protobuf.UnittestLite.OptionalGroup_extension_lite;
+import static com.google.protobuf.UnittestLite.RepeatedGroup_extension_lite;
+import static com.google.protobuf.UnittestLite.defaultBoolExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultBytesExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultCordExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultDoubleExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultFixed32ExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultFixed64ExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultFloatExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultForeignEnumExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultImportEnumExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultInt32ExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultInt64ExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultNestedEnumExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultSfixed32ExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultSfixed64ExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultSint32ExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultSint64ExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultStringExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultStringPieceExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultUint32ExtensionLite;
+import static com.google.protobuf.UnittestLite.defaultUint64ExtensionLite;
+import static com.google.protobuf.UnittestLite.oneofBytesExtensionLite;
+import static com.google.protobuf.UnittestLite.oneofNestedMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.oneofStringExtensionLite;
+import static com.google.protobuf.UnittestLite.oneofUint32ExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalBoolExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalBytesExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalCordExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalDoubleExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalFixed32ExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalFixed64ExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalFloatExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalForeignEnumExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalForeignMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalGroupExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalImportEnumExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalImportMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalInt32ExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalInt64ExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalLazyMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalNestedEnumExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalNestedMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalPublicImportMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalSfixed32ExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalSfixed64ExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalSint32ExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalSint64ExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalStringExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalStringPieceExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalUint32ExtensionLite;
+import static com.google.protobuf.UnittestLite.optionalUint64ExtensionLite;
+import static com.google.protobuf.UnittestLite.packedBoolExtensionLite;
+import static com.google.protobuf.UnittestLite.packedDoubleExtensionLite;
+import static com.google.protobuf.UnittestLite.packedEnumExtensionLite;
+import static com.google.protobuf.UnittestLite.packedFixed32ExtensionLite;
+import static com.google.protobuf.UnittestLite.packedFixed64ExtensionLite;
+import static com.google.protobuf.UnittestLite.packedFloatExtensionLite;
+import static com.google.protobuf.UnittestLite.packedInt32ExtensionLite;
+import static com.google.protobuf.UnittestLite.packedInt64ExtensionLite;
+import static com.google.protobuf.UnittestLite.packedSfixed32ExtensionLite;
+import static com.google.protobuf.UnittestLite.packedSfixed64ExtensionLite;
+import static com.google.protobuf.UnittestLite.packedSint32ExtensionLite;
+import static com.google.protobuf.UnittestLite.packedSint64ExtensionLite;
+import static com.google.protobuf.UnittestLite.packedUint32ExtensionLite;
+import static com.google.protobuf.UnittestLite.packedUint64ExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedBoolExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedBytesExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedCordExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedDoubleExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedFixed32ExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedFixed64ExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedFloatExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedForeignEnumExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedForeignMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedGroupExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedImportEnumExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedImportMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedInt32ExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedInt64ExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedLazyMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedNestedEnumExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedNestedMessageExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedSfixed32ExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedSfixed64ExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedSint32ExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedSint64ExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedStringExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedStringPieceExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedUint32ExtensionLite;
+import static com.google.protobuf.UnittestLite.repeatedUint64ExtensionLite;
+import static protobuf_unittest.UnittestProto.OptionalGroup_extension;
+import static protobuf_unittest.UnittestProto.RepeatedGroup_extension;
+import static protobuf_unittest.UnittestProto.defaultBoolExtension;
+import static protobuf_unittest.UnittestProto.defaultBytesExtension;
+import static protobuf_unittest.UnittestProto.defaultCordExtension;
+import static protobuf_unittest.UnittestProto.defaultDoubleExtension;
+import static protobuf_unittest.UnittestProto.defaultFixed32Extension;
+import static protobuf_unittest.UnittestProto.defaultFixed64Extension;
+import static protobuf_unittest.UnittestProto.defaultFloatExtension;
+import static protobuf_unittest.UnittestProto.defaultForeignEnumExtension;
+import static protobuf_unittest.UnittestProto.defaultImportEnumExtension;
 // The static imports are to avoid 100+ char lines.  The following is roughly equivalent to
 // import static protobuf_unittest.UnittestProto.*;
 import static protobuf_unittest.UnittestProto.defaultInt32Extension;
 import static protobuf_unittest.UnittestProto.defaultInt64Extension;
-import static protobuf_unittest.UnittestProto.defaultUint32Extension;
-import static protobuf_unittest.UnittestProto.defaultUint64Extension;
-import static protobuf_unittest.UnittestProto.defaultSint32Extension;
-import static protobuf_unittest.UnittestProto.defaultSint64Extension;
-import static protobuf_unittest.UnittestProto.defaultFixed32Extension;
-import static protobuf_unittest.UnittestProto.defaultFixed64Extension;
+import static protobuf_unittest.UnittestProto.defaultNestedEnumExtension;
 import static protobuf_unittest.UnittestProto.defaultSfixed32Extension;
 import static protobuf_unittest.UnittestProto.defaultSfixed64Extension;
-import static protobuf_unittest.UnittestProto.defaultFloatExtension;
-import static protobuf_unittest.UnittestProto.defaultDoubleExtension;
-import static protobuf_unittest.UnittestProto.defaultBoolExtension;
+import static protobuf_unittest.UnittestProto.defaultSint32Extension;
+import static protobuf_unittest.UnittestProto.defaultSint64Extension;
 import static protobuf_unittest.UnittestProto.defaultStringExtension;
-import static protobuf_unittest.UnittestProto.defaultBytesExtension;
-import static protobuf_unittest.UnittestProto.defaultNestedEnumExtension;
-import static protobuf_unittest.UnittestProto.defaultForeignEnumExtension;
-import static protobuf_unittest.UnittestProto.defaultImportEnumExtension;
 import static protobuf_unittest.UnittestProto.defaultStringPieceExtension;
-import static protobuf_unittest.UnittestProto.defaultCordExtension;
-
-import static protobuf_unittest.UnittestProto.oneofUint32Extension;
+import static protobuf_unittest.UnittestProto.defaultUint32Extension;
+import static protobuf_unittest.UnittestProto.defaultUint64Extension;
+import static protobuf_unittest.UnittestProto.oneofBytesExtension;
 import static protobuf_unittest.UnittestProto.oneofNestedMessageExtension;
 import static protobuf_unittest.UnittestProto.oneofStringExtension;
-import static protobuf_unittest.UnittestProto.oneofBytesExtension;
-
-import static protobuf_unittest.UnittestProto.optionalInt32Extension;
-import static protobuf_unittest.UnittestProto.optionalInt64Extension;
-import static protobuf_unittest.UnittestProto.optionalUint32Extension;
-import static protobuf_unittest.UnittestProto.optionalUint64Extension;
-import static protobuf_unittest.UnittestProto.optionalSint32Extension;
-import static protobuf_unittest.UnittestProto.optionalSint64Extension;
-import static protobuf_unittest.UnittestProto.optionalFixed32Extension;
-import static protobuf_unittest.UnittestProto.optionalFixed64Extension;
-import static protobuf_unittest.UnittestProto.optionalSfixed32Extension;
-import static protobuf_unittest.UnittestProto.optionalSfixed64Extension;
-import static protobuf_unittest.UnittestProto.optionalFloatExtension;
-import static protobuf_unittest.UnittestProto.optionalDoubleExtension;
+import static protobuf_unittest.UnittestProto.oneofUint32Extension;
 import static protobuf_unittest.UnittestProto.optionalBoolExtension;
-import static protobuf_unittest.UnittestProto.optionalStringExtension;
 import static protobuf_unittest.UnittestProto.optionalBytesExtension;
-import static protobuf_unittest.UnittestProto.optionalGroupExtension;
 import static protobuf_unittest.UnittestProto.optionalCordExtension;
+import static protobuf_unittest.UnittestProto.optionalDoubleExtension;
+import static protobuf_unittest.UnittestProto.optionalFixed32Extension;
+import static protobuf_unittest.UnittestProto.optionalFixed64Extension;
+import static protobuf_unittest.UnittestProto.optionalFloatExtension;
 import static protobuf_unittest.UnittestProto.optionalForeignEnumExtension;
 import static protobuf_unittest.UnittestProto.optionalForeignMessageExtension;
+import static protobuf_unittest.UnittestProto.optionalGroupExtension;
 import static protobuf_unittest.UnittestProto.optionalImportEnumExtension;
 import static protobuf_unittest.UnittestProto.optionalImportMessageExtension;
+import static protobuf_unittest.UnittestProto.optionalInt32Extension;
+import static protobuf_unittest.UnittestProto.optionalInt64Extension;
+import static protobuf_unittest.UnittestProto.optionalLazyMessageExtension;
 import static protobuf_unittest.UnittestProto.optionalNestedEnumExtension;
 import static protobuf_unittest.UnittestProto.optionalNestedMessageExtension;
 import static protobuf_unittest.UnittestProto.optionalPublicImportMessageExtension;
-import static protobuf_unittest.UnittestProto.optionalLazyMessageExtension;
+import static protobuf_unittest.UnittestProto.optionalSfixed32Extension;
+import static protobuf_unittest.UnittestProto.optionalSfixed64Extension;
+import static protobuf_unittest.UnittestProto.optionalSint32Extension;
+import static protobuf_unittest.UnittestProto.optionalSint64Extension;
+import static protobuf_unittest.UnittestProto.optionalStringExtension;
 import static protobuf_unittest.UnittestProto.optionalStringPieceExtension;
-
-import static protobuf_unittest.UnittestProto.repeatedInt32Extension;
-import static protobuf_unittest.UnittestProto.repeatedInt64Extension;
-import static protobuf_unittest.UnittestProto.repeatedUint32Extension;
-import static protobuf_unittest.UnittestProto.repeatedUint64Extension;
-import static protobuf_unittest.UnittestProto.repeatedSint32Extension;
-import static protobuf_unittest.UnittestProto.repeatedSint64Extension;
+import static protobuf_unittest.UnittestProto.optionalUint32Extension;
+import static protobuf_unittest.UnittestProto.optionalUint64Extension;
+import static protobuf_unittest.UnittestProto.packedBoolExtension;
+import static protobuf_unittest.UnittestProto.packedDoubleExtension;
+import static protobuf_unittest.UnittestProto.packedEnumExtension;
+import static protobuf_unittest.UnittestProto.packedFixed32Extension;
+import static protobuf_unittest.UnittestProto.packedFixed64Extension;
+import static protobuf_unittest.UnittestProto.packedFloatExtension;
+import static protobuf_unittest.UnittestProto.packedInt32Extension;
+import static protobuf_unittest.UnittestProto.packedInt64Extension;
+import static protobuf_unittest.UnittestProto.packedSfixed32Extension;
+import static protobuf_unittest.UnittestProto.packedSfixed64Extension;
+import static protobuf_unittest.UnittestProto.packedSint32Extension;
+import static protobuf_unittest.UnittestProto.packedSint64Extension;
+import static protobuf_unittest.UnittestProto.packedUint32Extension;
+import static protobuf_unittest.UnittestProto.packedUint64Extension;
+import static protobuf_unittest.UnittestProto.repeatedBoolExtension;
+import static protobuf_unittest.UnittestProto.repeatedBytesExtension;
+import static protobuf_unittest.UnittestProto.repeatedCordExtension;
+import static protobuf_unittest.UnittestProto.repeatedDoubleExtension;
 import static protobuf_unittest.UnittestProto.repeatedFixed32Extension;
 import static protobuf_unittest.UnittestProto.repeatedFixed64Extension;
-import static protobuf_unittest.UnittestProto.repeatedSfixed32Extension;
-import static protobuf_unittest.UnittestProto.repeatedSfixed64Extension;
 import static protobuf_unittest.UnittestProto.repeatedFloatExtension;
-import static protobuf_unittest.UnittestProto.repeatedDoubleExtension;
-import static protobuf_unittest.UnittestProto.repeatedBoolExtension;
-import static protobuf_unittest.UnittestProto.repeatedStringExtension;
-import static protobuf_unittest.UnittestProto.repeatedBytesExtension;
-import static protobuf_unittest.UnittestProto.repeatedGroupExtension;
-import static protobuf_unittest.UnittestProto.repeatedNestedMessageExtension;
+import static protobuf_unittest.UnittestProto.repeatedForeignEnumExtension;
 import static protobuf_unittest.UnittestProto.repeatedForeignMessageExtension;
+import static protobuf_unittest.UnittestProto.repeatedGroupExtension;
+import static protobuf_unittest.UnittestProto.repeatedImportEnumExtension;
 import static protobuf_unittest.UnittestProto.repeatedImportMessageExtension;
+import static protobuf_unittest.UnittestProto.repeatedInt32Extension;
+import static protobuf_unittest.UnittestProto.repeatedInt64Extension;
 import static protobuf_unittest.UnittestProto.repeatedLazyMessageExtension;
 import static protobuf_unittest.UnittestProto.repeatedNestedEnumExtension;
-import static protobuf_unittest.UnittestProto.repeatedForeignEnumExtension;
-import static protobuf_unittest.UnittestProto.repeatedImportEnumExtension;
+import static protobuf_unittest.UnittestProto.repeatedNestedMessageExtension;
+import static protobuf_unittest.UnittestProto.repeatedSfixed32Extension;
+import static protobuf_unittest.UnittestProto.repeatedSfixed64Extension;
+import static protobuf_unittest.UnittestProto.repeatedSint32Extension;
+import static protobuf_unittest.UnittestProto.repeatedSint64Extension;
+import static protobuf_unittest.UnittestProto.repeatedStringExtension;
 import static protobuf_unittest.UnittestProto.repeatedStringPieceExtension;
-import static protobuf_unittest.UnittestProto.repeatedCordExtension;
-
-import static protobuf_unittest.UnittestProto.OptionalGroup_extension;
-import static protobuf_unittest.UnittestProto.RepeatedGroup_extension;
-
-import static protobuf_unittest.UnittestProto.packedInt32Extension;
-import static protobuf_unittest.UnittestProto.packedInt64Extension;
-import static protobuf_unittest.UnittestProto.packedUint32Extension;
-import static protobuf_unittest.UnittestProto.packedUint64Extension;
-import static protobuf_unittest.UnittestProto.packedSint32Extension;
-import static protobuf_unittest.UnittestProto.packedSint64Extension;
-import static protobuf_unittest.UnittestProto.packedFixed32Extension;
-import static protobuf_unittest.UnittestProto.packedFixed64Extension;
-import static protobuf_unittest.UnittestProto.packedSfixed32Extension;
-import static protobuf_unittest.UnittestProto.packedSfixed64Extension;
-import static protobuf_unittest.UnittestProto.packedFloatExtension;
-import static protobuf_unittest.UnittestProto.packedDoubleExtension;
-import static protobuf_unittest.UnittestProto.packedBoolExtension;
-import static protobuf_unittest.UnittestProto.packedEnumExtension;
-
-import static com.google.protobuf.UnittestLite.defaultInt32ExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultInt64ExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultUint32ExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultUint64ExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultSint32ExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultSint64ExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultFixed32ExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultFixed64ExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultSfixed32ExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultSfixed64ExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultFloatExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultDoubleExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultBoolExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultStringExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultBytesExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultNestedEnumExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultForeignEnumExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultImportEnumExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultStringPieceExtensionLite;
-import static com.google.protobuf.UnittestLite.defaultCordExtensionLite;
-
-import static com.google.protobuf.UnittestLite.oneofUint32ExtensionLite;
-import static com.google.protobuf.UnittestLite.oneofNestedMessageExtensionLite;
-import static com.google.protobuf.UnittestLite.oneofStringExtensionLite;
-import static com.google.protobuf.UnittestLite.oneofBytesExtensionLite;
-
-import static com.google.protobuf.UnittestLite.optionalInt32ExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalInt64ExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalUint32ExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalUint64ExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalSint32ExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalSint64ExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalFixed32ExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalFixed64ExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalSfixed32ExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalSfixed64ExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalFloatExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalDoubleExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalBoolExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalStringExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalBytesExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalGroupExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalNestedMessageExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalForeignEnumExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalForeignMessageExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalImportEnumExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalImportMessageExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalNestedEnumExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalPublicImportMessageExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalLazyMessageExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalStringPieceExtensionLite;
-import static com.google.protobuf.UnittestLite.optionalCordExtensionLite;
-
-import static com.google.protobuf.UnittestLite.repeatedInt32ExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedInt64ExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedUint32ExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedUint64ExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedSint32ExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedSint64ExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedFixed32ExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedFixed64ExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedSfixed32ExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedSfixed64ExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedFloatExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedDoubleExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedBoolExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedStringExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedBytesExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedGroupExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedNestedMessageExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedForeignMessageExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedImportMessageExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedLazyMessageExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedNestedEnumExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedForeignEnumExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedImportEnumExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedStringPieceExtensionLite;
-import static com.google.protobuf.UnittestLite.repeatedCordExtensionLite;
-
-import static com.google.protobuf.UnittestLite.OptionalGroup_extension_lite;
-import static com.google.protobuf.UnittestLite.RepeatedGroup_extension_lite;
-
-import static com.google.protobuf.UnittestLite.packedInt32ExtensionLite;
-import static com.google.protobuf.UnittestLite.packedInt64ExtensionLite;
-import static com.google.protobuf.UnittestLite.packedUint32ExtensionLite;
-import static com.google.protobuf.UnittestLite.packedUint64ExtensionLite;
-import static com.google.protobuf.UnittestLite.packedSint32ExtensionLite;
-import static com.google.protobuf.UnittestLite.packedSint64ExtensionLite;
-import static com.google.protobuf.UnittestLite.packedFixed32ExtensionLite;
-import static com.google.protobuf.UnittestLite.packedFixed64ExtensionLite;
-import static com.google.protobuf.UnittestLite.packedSfixed32ExtensionLite;
-import static com.google.protobuf.UnittestLite.packedSfixed64ExtensionLite;
-import static com.google.protobuf.UnittestLite.packedFloatExtensionLite;
-import static com.google.protobuf.UnittestLite.packedDoubleExtensionLite;
-import static com.google.protobuf.UnittestLite.packedBoolExtensionLite;
-import static com.google.protobuf.UnittestLite.packedEnumExtensionLite;
+import static protobuf_unittest.UnittestProto.repeatedUint32Extension;
+import static protobuf_unittest.UnittestProto.repeatedUint64Extension;
 
+import com.google.protobuf.UnittestImportLite.ImportEnumLite;
+import com.google.protobuf.UnittestImportLite.ImportMessageLite;
+import com.google.protobuf.UnittestImportPublicLite.PublicImportMessageLite;
+import com.google.protobuf.UnittestLite;
+import com.google.protobuf.UnittestLite.ForeignEnumLite;
+import com.google.protobuf.UnittestLite.ForeignMessageLite;
+import com.google.protobuf.UnittestLite.TestAllExtensionsLite;
+import com.google.protobuf.UnittestLite.TestAllExtensionsLiteOrBuilder;
+import com.google.protobuf.UnittestLite.TestAllTypesLite;
+import com.google.protobuf.UnittestLite.TestPackedExtensionsLite;
+import com.google.protobuf.test.UnittestImport.ImportEnum;
+import com.google.protobuf.test.UnittestImport.ImportMessage;
+import com.google.protobuf.test.UnittestImportPublic.PublicImportMessage;
+import protobuf_unittest.UnittestProto;
+import protobuf_unittest.UnittestProto.ForeignEnum;
+import protobuf_unittest.UnittestProto.ForeignMessage;
 import protobuf_unittest.UnittestProto.TestAllExtensions;
 import protobuf_unittest.UnittestProto.TestAllExtensionsOrBuilder;
 import protobuf_unittest.UnittestProto.TestAllTypes;
@@ -237,21 +239,6 @@ import protobuf_unittest.UnittestProto.TestOneof2;
 import protobuf_unittest.UnittestProto.TestPackedExtensions;
 import protobuf_unittest.UnittestProto.TestPackedTypes;
 import protobuf_unittest.UnittestProto.TestUnpackedTypes;
-import protobuf_unittest.UnittestProto.ForeignMessage;
-import protobuf_unittest.UnittestProto.ForeignEnum;
-import com.google.protobuf.test.UnittestImport.ImportEnum;
-import com.google.protobuf.test.UnittestImport.ImportMessage;
-import com.google.protobuf.test.UnittestImportPublic.PublicImportMessage;
-
-import com.google.protobuf.UnittestLite.TestAllTypesLite;
-import com.google.protobuf.UnittestLite.TestAllExtensionsLite;
-import com.google.protobuf.UnittestLite.TestAllExtensionsLiteOrBuilder;
-import com.google.protobuf.UnittestLite.TestPackedExtensionsLite;
-import com.google.protobuf.UnittestLite.ForeignMessageLite;
-import com.google.protobuf.UnittestLite.ForeignEnumLite;
-import com.google.protobuf.UnittestImportLite.ImportEnumLite;
-import com.google.protobuf.UnittestImportLite.ImportMessageLite;
-import com.google.protobuf.UnittestImportPublicLite.PublicImportMessageLite;
 
 import junit.framework.Assert;
 

+ 182 - 0
java/core/src/test/java/com/google/protobuf/TextFormatParseInfoTreeTest.java

@@ -0,0 +1,182 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import com.google.protobuf.Descriptors.Descriptor;
+import com.google.protobuf.Descriptors.FieldDescriptor;
+import protobuf_unittest.UnittestProto.TestAllTypes;
+
+import junit.framework.TestCase;
+
+/**
+ * Test @{link TextFormatParseInfoTree}.
+ */
+public class TextFormatParseInfoTreeTest extends TestCase {
+
+  private static final Descriptor DESCRIPTOR = TestAllTypes.getDescriptor();
+  private static final FieldDescriptor OPTIONAL_INT32 =
+      DESCRIPTOR.findFieldByName("optional_int32");
+  private static final FieldDescriptor OPTIONAL_BOOLEAN =
+      DESCRIPTOR.findFieldByName("optional_boolean");
+  private static final FieldDescriptor REPEATED_INT32 =
+      DESCRIPTOR.findFieldByName("repeated_int32");
+  private static final FieldDescriptor OPTIONAL_NESTED_MESSAGE =
+      DESCRIPTOR.findFieldByName("optional_nested_message");
+  private static final FieldDescriptor REPEATED_NESTED_MESSAGE =
+      DESCRIPTOR.findFieldByName("repeated_nested_message");
+  private static final FieldDescriptor FIELD_BB =
+      TestAllTypes.NestedMessage.getDescriptor().findFieldByName("bb");
+
+  private static final TextFormatParseLocation LOC0 = TextFormatParseLocation.create(1, 2);
+  private static final TextFormatParseLocation LOC1 = TextFormatParseLocation.create(2, 3);
+
+  private TextFormatParseInfoTree.Builder rootBuilder;
+
+  @Override
+  public void setUp() {
+    rootBuilder = TextFormatParseInfoTree.builder();
+  }
+
+  public void testBuildEmptyParseTree() {
+    TextFormatParseInfoTree tree = rootBuilder.build();
+    assertTrue(tree.getLocations(null).isEmpty());
+  }
+
+  public void testGetLocationReturnsSingleLocation() {
+    rootBuilder.setLocation(OPTIONAL_INT32, LOC0);
+    TextFormatParseInfoTree root = rootBuilder.build();
+    assertEquals(LOC0, root.getLocation(OPTIONAL_INT32, 0));
+    assertEquals(1, root.getLocations(OPTIONAL_INT32).size());
+    }
+
+  public void testGetLocationsReturnsNoParseLocationsForUnknownField() {
+    assertTrue(rootBuilder.build().getLocations(OPTIONAL_INT32).isEmpty());
+    rootBuilder.setLocation(OPTIONAL_BOOLEAN, LOC0);
+    TextFormatParseInfoTree root = rootBuilder.build();
+    assertTrue(root.getLocations(OPTIONAL_INT32).isEmpty());
+    assertEquals(LOC0, root.getLocations(OPTIONAL_BOOLEAN).get(0));
+  }
+
+  public void testGetLocationThrowsIllegalArgumentExceptionForUnknownField() {
+    rootBuilder.setLocation(REPEATED_INT32, LOC0);
+    TextFormatParseInfoTree root = rootBuilder.build();
+    try {
+      root.getNestedTree(OPTIONAL_INT32, 0);
+      fail("Did not detect unknown field");
+    } catch (IllegalArgumentException expected) {
+      // pass
+    }
+  }
+
+  public void testGetLocationThrowsIllegalArgumentExceptionForInvalidIndex() {
+    TextFormatParseInfoTree root = rootBuilder.setLocation(OPTIONAL_INT32, LOC0).build();
+    try {
+      root.getLocation(OPTIONAL_INT32, 1);
+      fail("Invalid index not detected");
+    } catch (IllegalArgumentException expected) {
+      // pass
+    }
+    try {
+      root.getLocation(OPTIONAL_INT32, -1);
+      fail("Negative index not detected");
+    } catch (IllegalArgumentException expected) {
+      // pass
+    }
+  }
+
+  public void testGetLocationsReturnsMultipleLocations() {
+    rootBuilder.setLocation(REPEATED_INT32, LOC0);
+    rootBuilder.setLocation(REPEATED_INT32, LOC1);
+    TextFormatParseInfoTree root = rootBuilder.build();
+    assertEquals(LOC0, root.getLocation(REPEATED_INT32, 0));
+    assertEquals(LOC1, root.getLocation(REPEATED_INT32, 1));
+    assertEquals(2, root.getLocations(REPEATED_INT32).size());
+  }
+
+  public void testGetNestedTreeThrowsIllegalArgumentExceptionForUnknownField() {
+    rootBuilder.setLocation(REPEATED_INT32, LOC0);
+    TextFormatParseInfoTree root = rootBuilder.build();
+    try {
+      root.getNestedTree(OPTIONAL_NESTED_MESSAGE, 0);
+      fail("Did not detect unknown field");
+    } catch (IllegalArgumentException expected) {
+      // pass
+    }
+  }
+
+  public void testGetNestedTreesReturnsNoParseInfoTreesForUnknownField() {
+    rootBuilder.setLocation(REPEATED_INT32, LOC0);
+    TextFormatParseInfoTree root = rootBuilder.build();
+    assertTrue(root.getNestedTrees(OPTIONAL_NESTED_MESSAGE).isEmpty());
+  }
+
+  public void testGetNestedTreeThrowsIllegalArgumentExceptionForInvalidIndex() {
+    rootBuilder.setLocation(REPEATED_INT32, LOC0);
+    rootBuilder.getBuilderForSubMessageField(OPTIONAL_NESTED_MESSAGE);
+    TextFormatParseInfoTree root = rootBuilder.build();
+    try {
+      root.getNestedTree(OPTIONAL_NESTED_MESSAGE, 1);
+      fail("Submessage index that is too large not detected");
+    } catch (IllegalArgumentException expected) {
+      // pass
+    }
+    try {
+      rootBuilder.build().getNestedTree(OPTIONAL_NESTED_MESSAGE, -1);
+      fail("Invalid submessage index (-1) not detected");
+    } catch (IllegalArgumentException expected) {
+      // pass
+    }
+  }
+
+  public void testGetNestedTreesReturnsSingleTree() {
+    rootBuilder.getBuilderForSubMessageField(OPTIONAL_NESTED_MESSAGE);
+    TextFormatParseInfoTree root = rootBuilder.build();
+    assertEquals(1, root.getNestedTrees(OPTIONAL_NESTED_MESSAGE).size());
+    TextFormatParseInfoTree subtree = root.getNestedTrees(OPTIONAL_NESTED_MESSAGE).get(0);
+    assertNotNull(subtree);
+  }
+
+  public void testGetNestedTreesReturnsMultipleTrees() {
+    TextFormatParseInfoTree.Builder subtree1Builder =
+        rootBuilder.getBuilderForSubMessageField(REPEATED_NESTED_MESSAGE);
+    subtree1Builder.getBuilderForSubMessageField(FIELD_BB);
+    subtree1Builder.getBuilderForSubMessageField(FIELD_BB);
+    TextFormatParseInfoTree.Builder subtree2Builder =
+        rootBuilder.getBuilderForSubMessageField(REPEATED_NESTED_MESSAGE);
+    subtree2Builder.getBuilderForSubMessageField(FIELD_BB);
+    TextFormatParseInfoTree root = rootBuilder.build();
+    assertEquals(2, root.getNestedTrees(REPEATED_NESTED_MESSAGE).size());
+    assertEquals(
+        2, root.getNestedTrees(REPEATED_NESTED_MESSAGE).get(0).getNestedTrees(FIELD_BB).size());
+    assertEquals(
+        1, root.getNestedTrees(REPEATED_NESTED_MESSAGE).get(1).getNestedTrees(FIELD_BB).size());
+  }
+}

+ 86 - 0
java/core/src/test/java/com/google/protobuf/TextFormatParseLocationTest.java

@@ -0,0 +1,86 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import junit.framework.TestCase;
+
+/**
+ * Test @{link TextFormatParseLocation}.
+ */
+public class TextFormatParseLocationTest extends TestCase {
+
+  public void testCreateEmpty() {
+    TextFormatParseLocation location = TextFormatParseLocation.create(-1, -1);
+    assertEquals(TextFormatParseLocation.EMPTY, location);
+  }
+
+  public void testCreate() {
+    TextFormatParseLocation location = TextFormatParseLocation.create(2, 1);
+    assertEquals(2, location.getLine());
+    assertEquals(1, location.getColumn());
+  }
+
+  public void testCreateThrowsIllegalArgumentExceptionForInvalidIndex() {
+    try {
+      TextFormatParseLocation.create(-1, 0);
+      fail("Should throw IllegalArgumentException if line is less than 0");
+    } catch (IllegalArgumentException unused) {
+      // pass
+    }
+    try {
+      TextFormatParseLocation.create(0, -1);
+      fail("Should throw, column < 0");
+    } catch (IllegalArgumentException unused) {
+      // pass
+    }
+  }
+
+  public void testHashCode() {
+    TextFormatParseLocation loc0 = TextFormatParseLocation.create(2, 1);
+    TextFormatParseLocation loc1 = TextFormatParseLocation.create(2, 1);
+
+    assertEquals(loc0.hashCode(), loc1.hashCode());
+    assertEquals(
+        TextFormatParseLocation.EMPTY.hashCode(), TextFormatParseLocation.EMPTY.hashCode());
+  }
+
+  public void testEquals() {
+    TextFormatParseLocation loc0 = TextFormatParseLocation.create(2, 1);
+    TextFormatParseLocation loc1 = TextFormatParseLocation.create(1, 2);
+    TextFormatParseLocation loc2 = TextFormatParseLocation.create(2, 2);
+    TextFormatParseLocation loc3 = TextFormatParseLocation.create(2, 1);
+
+    assertEquals(loc0, loc3);
+    assertNotSame(loc0, loc1);
+    assertNotSame(loc0, loc2);
+    assertNotSame(loc1, loc2);
+  }
+}

+ 106 - 1
java/core/src/test/java/com/google/protobuf/TextFormatTest.java

@@ -30,6 +30,7 @@
 
 package com.google.protobuf;
 
+import com.google.protobuf.Descriptors.Descriptor;
 import com.google.protobuf.Descriptors.FieldDescriptor;
 import com.google.protobuf.TextFormat.Parser.SingularOverwritePolicy;
 import protobuf_unittest.UnittestMset.TestMessageSetExtension1;
@@ -45,6 +46,7 @@ import proto2_wireformat_unittest.UnittestMsetWireFormat.TestMessageSet;
 import junit.framework.TestCase;
 
 import java.io.StringReader;
+import java.util.List;
 
 /**
  * Test case for {@link TextFormat}.
@@ -568,6 +570,16 @@ public class TextFormatTest extends TestCase {
     assertEquals(kEscapeTestString,
       TextFormat.unescapeText(kEscapeTestStringEscaped));
 
+    // Invariant
+    assertEquals("hello",
+        TextFormat.escapeBytes(bytes("hello")));
+    assertEquals("hello",
+        TextFormat.escapeText("hello"));
+    assertEquals(bytes("hello"),
+        TextFormat.unescapeBytes("hello"));
+    assertEquals("hello",
+        TextFormat.unescapeText("hello"));
+
     // Unicode handling.
     assertEquals("\\341\\210\\264", TextFormat.escapeText("\u1234"));
     assertEquals("\\341\\210\\264",
@@ -811,7 +823,6 @@ public class TextFormatTest extends TestCase {
 
   private void assertPrintFieldValue(String expect, Object value,
       String fieldName) throws Exception {
-    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     StringBuilder sb = new StringBuilder();
     TextFormat.printFieldValue(
         TestAllTypes.getDescriptor().findFieldByName(fieldName),
@@ -1018,4 +1029,98 @@ public class TextFormatTest extends TestCase {
     assertFalse(oneof.hasFooString());
     assertTrue(oneof.hasFooInt());
   }
+
+  // =======================================================================
+  // test location information
+
+  public void testParseInfoTreeBuilding() throws Exception {
+    TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+
+    Descriptor descriptor = TestAllTypes.getDescriptor();
+    TextFormatParseInfoTree.Builder treeBuilder = TextFormatParseInfoTree.builder();
+    // Set to allow unknown fields
+    TextFormat.Parser parser =
+        TextFormat.Parser.newBuilder()
+            .setParseInfoTreeBuilder(treeBuilder)
+            .build();
+
+    final String stringData =
+        "optional_int32: 1\n"
+        + "optional_int64: 2\n"
+        + "  optional_double: 2.4\n"
+        + "repeated_int32: 5\n"
+        + "repeated_int32: 10\n"
+        + "optional_nested_message <\n"
+        + "  bb: 78\n"
+        + ">\n"
+        + "repeated_nested_message <\n"
+        + "  bb: 79\n"
+        + ">\n"
+        + "repeated_nested_message <\n"
+        + "  bb: 80\n"
+        + ">";
+
+    parser.merge(stringData, builder);
+    TextFormatParseInfoTree tree = treeBuilder.build();
+
+    // Verify that the tree has the correct positions.
+    assertLocation(tree, descriptor, "optional_int32", 0, 0, 0);
+    assertLocation(tree, descriptor, "optional_int64", 0, 1, 0);
+    assertLocation(tree, descriptor, "optional_double", 0, 2, 2);
+
+    assertLocation(tree, descriptor, "repeated_int32", 0, 3, 0);
+    assertLocation(tree, descriptor, "repeated_int32", 1, 4, 0);
+
+    assertLocation(tree, descriptor, "optional_nested_message", 0, 5, 0);
+    assertLocation(tree, descriptor, "repeated_nested_message", 0, 8, 0);
+    assertLocation(tree, descriptor, "repeated_nested_message", 1, 11, 0);
+
+    // Check for fields not set. For an invalid field, the location returned should be -1, -1.
+    assertLocation(tree, descriptor, "repeated_int64", 0, -1, -1);
+    assertLocation(tree, descriptor, "repeated_int32", 6, -1, -1);
+
+    // Verify inside the nested message.
+    FieldDescriptor nestedField = descriptor.findFieldByName("optional_nested_message");
+
+    TextFormatParseInfoTree nestedTree = tree.getNestedTrees(nestedField).get(0);
+    assertLocation(nestedTree, nestedField.getMessageType(), "bb", 0, 6, 2);
+
+    // Verify inside another nested message.
+    nestedField = descriptor.findFieldByName("repeated_nested_message");
+    nestedTree = tree.getNestedTrees(nestedField).get(0);
+    assertLocation(nestedTree, nestedField.getMessageType(), "bb", 0, 9, 2);
+
+    nestedTree = tree.getNestedTrees(nestedField).get(1);
+    assertLocation(nestedTree, nestedField.getMessageType(), "bb", 0, 12, 2);
+
+    // Verify a NULL tree for an unknown nested field.
+    try {
+      tree.getNestedTree(nestedField, 2);
+      fail("unknown nested field should throw");
+    } catch (IllegalArgumentException unused) {
+      // pass
+    }
+  }
+
+  private void assertLocation(
+      TextFormatParseInfoTree tree,
+      final Descriptor descriptor,
+      final String fieldName,
+      int index,
+      int line,
+      int column) {
+    List<TextFormatParseLocation> locs = tree.getLocations(descriptor.findFieldByName(fieldName));
+    if (index < locs.size()) {
+      TextFormatParseLocation location = locs.get(index);
+      TextFormatParseLocation expected = TextFormatParseLocation.create(line, column);
+      assertEquals(expected, location);
+    } else if (line != -1 && column != -1) {
+      fail(
+          String.format(
+              "Tree/descriptor/fieldname did not contain index %d, line %d column %d expected",
+              index,
+              line,
+              column));
+    }
+  }
 }

+ 0 - 40
java/core/src/test/java/com/google/protobuf/UnknownFieldSetLiteTest.java

@@ -47,46 +47,6 @@ import java.io.IOException;
  * @author dweis@google.com (Daniel Weis)
  */
 public class UnknownFieldSetLiteTest extends TestCase {
-
-  public void testNoDataIsDefaultInstance() {
-    assertSame(
-        UnknownFieldSetLite.getDefaultInstance(),
-        UnknownFieldSetLite.newBuilder()
-            .build());
-  }
-  
-  public void testBuilderReuse() throws IOException {
-    UnknownFieldSetLite.Builder builder = UnknownFieldSetLite.newBuilder();
-    builder.mergeVarintField(10, 2);
-    builder.build();
-
-    try {
-      builder.build();
-      fail();
-    } catch (UnsupportedOperationException e) {
-      // Expected.
-    }
-
-    try {
-      builder.mergeFieldFrom(0, CodedInputStream.newInstance(new byte[0]));
-      fail();
-    } catch (UnsupportedOperationException e) {
-      // Expected.
-    }
-
-    try {
-      builder.mergeVarintField(5, 1);
-      fail();
-    } catch (UnsupportedOperationException e) {
-      // Expected.
-    }
-  }
-
-  public void testBuilderReuse_empty() {
-    UnknownFieldSetLite.Builder builder = UnknownFieldSetLite.newBuilder();
-    builder.build();
-    builder.build();
-  }
   
   public void testDefaultInstance() {
     UnknownFieldSetLite unknownFields = UnknownFieldSetLite.getDefaultInstance();

+ 11 - 11
java/core/src/test/java/com/google/protobuf/WireFormatTest.java

@@ -30,12 +30,11 @@
 
 package com.google.protobuf;
 
-import junit.framework.TestCase;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.util.List;
-
+import com.google.protobuf.UnittestLite.TestAllExtensionsLite;
+import com.google.protobuf.UnittestLite.TestPackedExtensionsLite;
+import protobuf_unittest.UnittestMset.RawMessageSet;
+import protobuf_unittest.UnittestMset.TestMessageSetExtension1;
+import protobuf_unittest.UnittestMset.TestMessageSetExtension2;
 import protobuf_unittest.UnittestProto;
 import protobuf_unittest.UnittestProto.TestAllExtensions;
 import protobuf_unittest.UnittestProto.TestAllTypes;
@@ -44,12 +43,13 @@ import protobuf_unittest.UnittestProto.TestOneof2;
 import protobuf_unittest.UnittestProto.TestOneofBackwardsCompatible;
 import protobuf_unittest.UnittestProto.TestPackedExtensions;
 import protobuf_unittest.UnittestProto.TestPackedTypes;
-import protobuf_unittest.UnittestMset.RawMessageSet;
-import protobuf_unittest.UnittestMset.TestMessageSetExtension1;
-import protobuf_unittest.UnittestMset.TestMessageSetExtension2;
 import proto2_wireformat_unittest.UnittestMsetWireFormat.TestMessageSet;
-import com.google.protobuf.UnittestLite.TestAllExtensionsLite;
-import com.google.protobuf.UnittestLite.TestPackedExtensionsLite;
+
+import junit.framework.TestCase;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.util.List;
 
 /**
  * Tests related to parsing and serialization.

+ 12 - 0
java/core/src/test/proto/com/google/protobuf/lite_equals_and_hash.proto

@@ -39,6 +39,13 @@ package protobuf_unittest.lite_equals_and_hash;
 option java_generate_equals_and_hash = true;
 option optimize_for = LITE_RUNTIME;
 
+message TestOneofEquals {
+  oneof oneof_field {
+    string name = 1;
+    int32 value = 2;
+  }
+}
+
 message Foo {
   optional int32 value = 1;
   repeated Bar bar = 2;
@@ -70,3 +77,8 @@ extend Foo {
   }
 }
 
+message TestRecursiveOneof {
+  oneof Foo {
+    TestRecursiveOneof r = 1;
+  }
+}

+ 1 - 1
java/pom.xml

@@ -64,7 +64,7 @@
       <dependency>
         <groupId>junit</groupId>
         <artifactId>junit</artifactId>
-        <version>4.4</version>
+        <version>4.12</version>
         <scope>test</scope>
       </dependency>
       <dependency>

+ 44 - 31
java/util/src/main/java/com/google/protobuf/util/FieldMaskTree.java

@@ -60,10 +60,9 @@ import java.util.logging.Logger;
  * FieldMask in a message tree.
  */
 class FieldMaskTree {
-  private static final Logger logger =
-      Logger.getLogger(FieldMaskTree.class.getName());
-  
-  private static final String FIELD_PATH_SEPARATOR_REGEX = "\\."; 
+  private static final Logger logger = Logger.getLogger(FieldMaskTree.class.getName());
+
+  private static final String FIELD_PATH_SEPARATOR_REGEX = "\\.";
 
   private static class Node {
     public TreeMap<String, Node> children = new TreeMap<String, Node>();
@@ -73,12 +72,12 @@ class FieldMaskTree {
 
   /** Creates an empty FieldMaskTree. */
   public FieldMaskTree() {}
-  
+
   /** Creates a FieldMaskTree for a given FieldMask. */
   public FieldMaskTree(FieldMask mask) {
     mergeFromFieldMask(mask);
   }
-  
+
   @Override
   public String toString() {
     return FieldMaskUtil.toString(toFieldMask());
@@ -121,7 +120,7 @@ class FieldMaskTree {
     node.children.clear();
     return this;
   }
-  
+
   /**
    * Merges all field paths in a FieldMask into this tree.
    */
@@ -149,8 +148,7 @@ class FieldMaskTree {
       return;
     }
     for (Entry<String, Node> entry : node.children.entrySet()) {
-      String childPath = path.isEmpty()
-          ? entry.getKey() : path + "." + entry.getKey();
+      String childPath = path.isEmpty() ? entry.getKey() : path + "." + entry.getKey();
       getFieldPaths(entry.getValue(), childPath, paths);
     }
   }
@@ -193,11 +191,10 @@ class FieldMaskTree {
    * Merges all fields specified by this FieldMaskTree from {@code source} to
    * {@code destination}.
    */
-  public void merge(Message source, Message.Builder destination,
-      FieldMaskUtil.MergeOptions options) {
+  public void merge(
+      Message source, Message.Builder destination, FieldMaskUtil.MergeOptions options) {
     if (source.getDescriptorForType() != destination.getDescriptorForType()) {
-      throw new IllegalArgumentException(
-          "Cannot merge messages of different types.");
+      throw new IllegalArgumentException("Cannot merge messages of different types.");
     }
     if (root.children.isEmpty()) {
       return;
@@ -208,30 +205,41 @@ class FieldMaskTree {
   /** Merges all fields specified by a sub-tree from {@code source} to
    * {@code destination}.
    */
-  private void merge(Node node, String path, Message source,
-      Message.Builder destination, FieldMaskUtil.MergeOptions options) {
+  private void merge(
+      Node node,
+      String path,
+      Message source,
+      Message.Builder destination,
+      FieldMaskUtil.MergeOptions options) {
     assert source.getDescriptorForType() == destination.getDescriptorForType();
-    
+
     Descriptor descriptor = source.getDescriptorForType();
     for (Entry<String, Node> entry : node.children.entrySet()) {
-      FieldDescriptor field =
-          descriptor.findFieldByName(entry.getKey());
+      FieldDescriptor field = descriptor.findFieldByName(entry.getKey());
       if (field == null) {
-        logger.warning("Cannot find field \"" + entry.getKey()
-            + "\" in message type " + descriptor.getFullName());
+        logger.warning(
+            "Cannot find field \""
+                + entry.getKey()
+                + "\" in message type "
+                + descriptor.getFullName());
         continue;
       }
       if (!entry.getValue().children.isEmpty()) {
-        if (field.isRepeated()
-            || field.getJavaType() != FieldDescriptor.JavaType.MESSAGE) {
-          logger.warning("Field \"" + field.getFullName() + "\" is not a "
-              + "singluar message field and cannot have sub-fields.");
+        if (field.isRepeated() || field.getJavaType() != FieldDescriptor.JavaType.MESSAGE) {
+          logger.warning(
+              "Field \""
+                  + field.getFullName()
+                  + "\" is not a "
+                  + "singluar message field and cannot have sub-fields.");
           continue;
         }
-        String childPath = path.isEmpty()
-            ? entry.getKey() : path + "." + entry.getKey();
-        merge(entry.getValue(), childPath, (Message) source.getField(field),
-            destination.getFieldBuilder(field), options);
+        String childPath = path.isEmpty() ? entry.getKey() : path + "." + entry.getKey();
+        merge(
+            entry.getValue(),
+            childPath,
+            (Message) source.getField(field),
+            destination.getFieldBuilder(field),
+            options);
         continue;
       }
       if (field.isRepeated()) {
@@ -245,10 +253,15 @@ class FieldMaskTree {
       } else {
         if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
           if (options.replaceMessageFields()) {
-            destination.setField(field, source.getField(field));
+            if (!source.hasField(field)) {
+              destination.clearField(field);
+            } else {
+              destination.setField(field, source.getField(field));
+            }
           } else {
-            destination.getFieldBuilder(field).mergeFrom(
-                (Message) source.getField(field));
+            if (source.hasField(field)) {
+              destination.getFieldBuilder(field).mergeFrom((Message) source.getField(field));
+            }
           }
         } else {
           destination.setField(field, source.getField(field));

+ 22 - 4
java/util/src/main/java/com/google/protobuf/util/JsonFormat.java

@@ -32,6 +32,7 @@ package com.google.protobuf.util;
 
 import com.google.common.io.BaseEncoding;
 import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
 import com.google.gson.JsonArray;
 import com.google.gson.JsonElement;
 import com.google.gson.JsonNull;
@@ -433,7 +434,7 @@ public class JsonFormat {
     private final Gson gson;
 
     private static class GsonHolder {
-      private static final Gson DEFAULT_GSON = new Gson();
+      private static final Gson DEFAULT_GSON = new GsonBuilder().disableHtmlEscaping().create();
     }
 
     PrinterImpl(
@@ -1065,6 +1066,15 @@ public class JsonFormat {
           parser.mergeStruct(json, builder);
         }
       });
+      // Special-case ListValue.
+      parsers.put(ListValue.getDescriptor().getFullName(),
+          new WellKnownTypeParser() {
+        @Override
+        public void merge(ParserImpl parser, JsonElement json,
+            Message.Builder builder) throws InvalidProtocolBufferException {
+          parser.mergeListValue(json, builder);
+        }
+      });
       // Special-case Value.
       parsers.put(Value.getDescriptor().getFullName(),
           new WellKnownTypeParser() {
@@ -1212,6 +1222,16 @@ public class JsonFormat {
       }
       mergeMapField(field, json, builder);
     }
+
+    private void mergeListValue(JsonElement json, Message.Builder builder)
+        throws InvalidProtocolBufferException {
+      Descriptor descriptor = builder.getDescriptorForType();
+      FieldDescriptor field = descriptor.findFieldByName("values");
+      if (field == null) {
+        throw new InvalidProtocolBufferException("Invalid ListValue type.");
+      }
+      mergeRepeatedField(field, json, builder);
+    }
     
     private void mergeValue(JsonElement json, Message.Builder builder)
         throws InvalidProtocolBufferException {
@@ -1236,9 +1256,7 @@ public class JsonFormat {
       } else if (json instanceof JsonArray) {
         FieldDescriptor field = type.findFieldByName("list_value");
         Message.Builder listBuilder = builder.newBuilderForField(field);
-        FieldDescriptor listField =
-            listBuilder.getDescriptorForType().findFieldByName("values");
-        mergeRepeatedField(listField, json, listBuilder);
+        merge(json, listBuilder);
         builder.setField(field, listBuilder.build());
       } else {
         throw new IllegalStateException("Unexpected json data: " + json);

+ 68 - 56
java/util/src/test/java/com/google/protobuf/util/FieldMaskTreeTest.java

@@ -61,19 +61,16 @@ public class FieldMaskTreeTest extends TestCase {
     tree.addFieldPath("bar");
     assertEquals("bar,foo", tree.toString());
   }
-  
+
   public void testMergeFromFieldMask() throws Exception {
-    FieldMaskTree tree = new FieldMaskTree(
-      FieldMaskUtil.fromString("foo,bar.baz,bar.quz"));
+    FieldMaskTree tree = new FieldMaskTree(FieldMaskUtil.fromString("foo,bar.baz,bar.quz"));
     assertEquals("bar.baz,bar.quz,foo", tree.toString());
-    tree.mergeFromFieldMask(
-      FieldMaskUtil.fromString("foo.bar,bar"));
+    tree.mergeFromFieldMask(FieldMaskUtil.fromString("foo.bar,bar"));
     assertEquals("bar,foo", tree.toString());
   }
-  
+
   public void testIntersectFieldPath() throws Exception {
-    FieldMaskTree tree = new FieldMaskTree(
-      FieldMaskUtil.fromString("foo,bar.baz,bar.quz"));
+    FieldMaskTree tree = new FieldMaskTree(FieldMaskUtil.fromString("foo,bar.baz,bar.quz"));
     FieldMaskTree result = new FieldMaskTree();
     // Empty path.
     tree.intersectFieldPath("", result);
@@ -96,16 +93,18 @@ public class FieldMaskTreeTest extends TestCase {
   }
 
   public void testMerge() throws Exception {
-    TestAllTypes value = TestAllTypes.newBuilder()
-        .setOptionalInt32(1234)
-        .setOptionalNestedMessage(NestedMessage.newBuilder().setBb(5678))
-        .addRepeatedInt32(4321)
-        .addRepeatedNestedMessage(NestedMessage.newBuilder().setBb(8765))
-        .build();
-    NestedTestAllTypes source = NestedTestAllTypes.newBuilder()
-        .setPayload(value)
-        .setChild(NestedTestAllTypes.newBuilder().setPayload(value))
-        .build();
+    TestAllTypes value =
+        TestAllTypes.newBuilder()
+            .setOptionalInt32(1234)
+            .setOptionalNestedMessage(NestedMessage.newBuilder().setBb(5678))
+            .addRepeatedInt32(4321)
+            .addRepeatedNestedMessage(NestedMessage.newBuilder().setBb(8765))
+            .build();
+    NestedTestAllTypes source =
+        NestedTestAllTypes.newBuilder()
+            .setPayload(value)
+            .setChild(NestedTestAllTypes.newBuilder().setPayload(value))
+            .build();
     // Now we have a message source with the following structure:
     //   [root] -+- payload -+- optional_int32
     //           |           +- optional_nested_message
@@ -116,114 +115,127 @@ public class FieldMaskTreeTest extends TestCase {
     //                                 +- optional_nested_message
     //                                 +- repeated_int32
     //                                 +- repeated_nested_message
-    
+
     FieldMaskUtil.MergeOptions options = new FieldMaskUtil.MergeOptions();
-    
+
     // Test merging each individual field.
     NestedTestAllTypes.Builder builder = NestedTestAllTypes.newBuilder();
-    new FieldMaskTree().addFieldPath("payload.optional_int32")
-        .merge(source, builder, options);
+    new FieldMaskTree().addFieldPath("payload.optional_int32").merge(source, builder, options);
     NestedTestAllTypes.Builder expected = NestedTestAllTypes.newBuilder();
     expected.getPayloadBuilder().setOptionalInt32(1234);
     assertEquals(expected.build(), builder.build());
 
     builder = NestedTestAllTypes.newBuilder();
-    new FieldMaskTree().addFieldPath("payload.optional_nested_message")
+    new FieldMaskTree()
+        .addFieldPath("payload.optional_nested_message")
         .merge(source, builder, options);
     expected = NestedTestAllTypes.newBuilder();
-    expected.getPayloadBuilder().setOptionalNestedMessage(
-        NestedMessage.newBuilder().setBb(5678));
+    expected.getPayloadBuilder().setOptionalNestedMessage(NestedMessage.newBuilder().setBb(5678));
     assertEquals(expected.build(), builder.build());
 
-
     builder = NestedTestAllTypes.newBuilder();
-    new FieldMaskTree().addFieldPath("payload.repeated_int32")
-        .merge(source, builder, options);
+    new FieldMaskTree().addFieldPath("payload.repeated_int32").merge(source, builder, options);
     expected = NestedTestAllTypes.newBuilder();
     expected.getPayloadBuilder().addRepeatedInt32(4321);
     assertEquals(expected.build(), builder.build());
 
     builder = NestedTestAllTypes.newBuilder();
-    new FieldMaskTree().addFieldPath("payload.repeated_nested_message")
+    new FieldMaskTree()
+        .addFieldPath("payload.repeated_nested_message")
         .merge(source, builder, options);
     expected = NestedTestAllTypes.newBuilder();
-    expected.getPayloadBuilder().addRepeatedNestedMessage(
-        NestedMessage.newBuilder().setBb(8765));
+    expected.getPayloadBuilder().addRepeatedNestedMessage(NestedMessage.newBuilder().setBb(8765));
     assertEquals(expected.build(), builder.build());
 
     builder = NestedTestAllTypes.newBuilder();
-    new FieldMaskTree().addFieldPath("child.payload.optional_int32")
+    new FieldMaskTree()
+        .addFieldPath("child.payload.optional_int32")
         .merge(source, builder, options);
     expected = NestedTestAllTypes.newBuilder();
     expected.getChildBuilder().getPayloadBuilder().setOptionalInt32(1234);
     assertEquals(expected.build(), builder.build());
 
     builder = NestedTestAllTypes.newBuilder();
-    new FieldMaskTree().addFieldPath("child.payload.optional_nested_message")
+    new FieldMaskTree()
+        .addFieldPath("child.payload.optional_nested_message")
         .merge(source, builder, options);
     expected = NestedTestAllTypes.newBuilder();
-    expected.getChildBuilder().getPayloadBuilder().setOptionalNestedMessage(
-        NestedMessage.newBuilder().setBb(5678));
+    expected
+        .getChildBuilder()
+        .getPayloadBuilder()
+        .setOptionalNestedMessage(NestedMessage.newBuilder().setBb(5678));
     assertEquals(expected.build(), builder.build());
 
-
     builder = NestedTestAllTypes.newBuilder();
-    new FieldMaskTree().addFieldPath("child.payload.repeated_int32")
+    new FieldMaskTree()
+        .addFieldPath("child.payload.repeated_int32")
         .merge(source, builder, options);
     expected = NestedTestAllTypes.newBuilder();
     expected.getChildBuilder().getPayloadBuilder().addRepeatedInt32(4321);
     assertEquals(expected.build(), builder.build());
 
-
     builder = NestedTestAllTypes.newBuilder();
-    new FieldMaskTree().addFieldPath("child.payload.repeated_nested_message")
+    new FieldMaskTree()
+        .addFieldPath("child.payload.repeated_nested_message")
         .merge(source, builder, options);
     expected = NestedTestAllTypes.newBuilder();
-    expected.getChildBuilder().getPayloadBuilder().addRepeatedNestedMessage(
-        NestedMessage.newBuilder().setBb(8765));
+    expected
+        .getChildBuilder()
+        .getPayloadBuilder()
+        .addRepeatedNestedMessage(NestedMessage.newBuilder().setBb(8765));
     assertEquals(expected.build(), builder.build());
-    
+
     // Test merging all fields.
     builder = NestedTestAllTypes.newBuilder();
-    new FieldMaskTree().addFieldPath("child").addFieldPath("payload")
-    .merge(source, builder, options);
+    new FieldMaskTree()
+        .addFieldPath("child")
+        .addFieldPath("payload")
+        .merge(source, builder, options);
     assertEquals(source, builder.build());
-    
+
     // Test repeated options.
     builder = NestedTestAllTypes.newBuilder();
     builder.getPayloadBuilder().addRepeatedInt32(1000);
-    new FieldMaskTree().addFieldPath("payload.repeated_int32")
-    .merge(source, builder, options);
+    new FieldMaskTree().addFieldPath("payload.repeated_int32").merge(source, builder, options);
     // Default behavior is to append repeated fields.
     assertEquals(2, builder.getPayload().getRepeatedInt32Count());
     assertEquals(1000, builder.getPayload().getRepeatedInt32(0));
     assertEquals(4321, builder.getPayload().getRepeatedInt32(1));
     // Change to replace repeated fields.
     options.setReplaceRepeatedFields(true);
-    new FieldMaskTree().addFieldPath("payload.repeated_int32")
-    .merge(source, builder, options);
+    new FieldMaskTree().addFieldPath("payload.repeated_int32").merge(source, builder, options);
     assertEquals(1, builder.getPayload().getRepeatedInt32Count());
     assertEquals(4321, builder.getPayload().getRepeatedInt32(0));
-    
+
     // Test message options.
     builder = NestedTestAllTypes.newBuilder();
     builder.getPayloadBuilder().setOptionalInt32(1000);
     builder.getPayloadBuilder().setOptionalUint32(2000);
-    new FieldMaskTree().addFieldPath("payload")
-      .merge(source, builder, options);
+    new FieldMaskTree().addFieldPath("payload").merge(source, builder, options);
     // Default behavior is to merge message fields.
     assertEquals(1234, builder.getPayload().getOptionalInt32());
     assertEquals(2000, builder.getPayload().getOptionalUint32());
-    
+
+    // Test merging unset message fields.
+    NestedTestAllTypes clearedSource = source.toBuilder().clearPayload().build();
+    builder = NestedTestAllTypes.newBuilder();
+    new FieldMaskTree().addFieldPath("payload").merge(clearedSource, builder, options);
+    assertEquals(false, builder.hasPayload());
+
     // Change to replace message fields.
     options.setReplaceMessageFields(true);
     builder = NestedTestAllTypes.newBuilder();
     builder.getPayloadBuilder().setOptionalInt32(1000);
     builder.getPayloadBuilder().setOptionalUint32(2000);
-    new FieldMaskTree().addFieldPath("payload")
-      .merge(source, builder, options);
+    new FieldMaskTree().addFieldPath("payload").merge(source, builder, options);
     assertEquals(1234, builder.getPayload().getOptionalInt32());
     assertEquals(0, builder.getPayload().getOptionalUint32());
+
+    // Test merging unset message fields.
+    builder = NestedTestAllTypes.newBuilder();
+    builder.getPayloadBuilder().setOptionalInt32(1000);
+    builder.getPayloadBuilder().setOptionalUint32(2000);
+    new FieldMaskTree().addFieldPath("payload").merge(clearedSource, builder, options);
+    assertEquals(false, builder.hasPayload());
   }
 }
-

+ 129 - 111
java/util/src/test/java/com/google/protobuf/util/JsonFormatTest.java

@@ -122,11 +122,11 @@ public class JsonFormatTest extends TestCase {
     builder.addRepeatedNestedEnum(NestedEnum.BAZ);
     builder.addRepeatedNestedMessageBuilder().setValue(200);
   }
-  
+
   private void assertRoundTripEquals(Message message) throws Exception {
     assertRoundTripEquals(message, TypeRegistry.getEmptyTypeRegistry());
   }
-  
+
   private void assertRoundTripEquals(Message message, TypeRegistry registry) throws Exception {
     JsonFormat.Printer printer = JsonFormat.printer().usingTypeRegistry(registry);
     JsonFormat.Parser parser = JsonFormat.parser().usingTypeRegistry(registry);
@@ -135,68 +135,68 @@ public class JsonFormatTest extends TestCase {
     Message parsedMessage = builder.build();
     assertEquals(message.toString(), parsedMessage.toString());
   }
-  
+
   private String toJsonString(Message message) throws IOException {
     return JsonFormat.printer().print(message);
   }
-  
+
   private void mergeFromJson(String json, Message.Builder builder) throws IOException {
     JsonFormat.parser().merge(json, builder);
   }
-  
+
   public void testAllFields() throws Exception {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     setAllFields(builder);
     TestAllTypes message = builder.build();
-    
-    assertEquals(        
+
+    assertEquals(
         "{\n"
-        + "  \"optionalInt32\": 1234,\n"
-        + "  \"optionalInt64\": \"1234567890123456789\",\n"
-        + "  \"optionalUint32\": 5678,\n"
-        + "  \"optionalUint64\": \"2345678901234567890\",\n"
-        + "  \"optionalSint32\": 9012,\n"
-        + "  \"optionalSint64\": \"3456789012345678901\",\n"
-        + "  \"optionalFixed32\": 3456,\n"
-        + "  \"optionalFixed64\": \"4567890123456789012\",\n"
-        + "  \"optionalSfixed32\": 7890,\n"
-        + "  \"optionalSfixed64\": \"5678901234567890123\",\n"
-        + "  \"optionalFloat\": 1.5,\n"
-        + "  \"optionalDouble\": 1.25,\n"
-        + "  \"optionalBool\": true,\n"
-        + "  \"optionalString\": \"Hello world!\",\n"
-        + "  \"optionalBytes\": \"AAEC\",\n"
-        + "  \"optionalNestedMessage\": {\n"
-        + "    \"value\": 100\n"
-        + "  },\n"
-        + "  \"optionalNestedEnum\": \"BAR\",\n"
-        + "  \"repeatedInt32\": [1234, 234],\n"
-        + "  \"repeatedInt64\": [\"1234567890123456789\", \"234567890123456789\"],\n"
-        + "  \"repeatedUint32\": [5678, 678],\n"
-        + "  \"repeatedUint64\": [\"2345678901234567890\", \"345678901234567890\"],\n"
-        + "  \"repeatedSint32\": [9012, 10],\n"
-        + "  \"repeatedSint64\": [\"3456789012345678901\", \"456789012345678901\"],\n"
-        + "  \"repeatedFixed32\": [3456, 456],\n"
-        + "  \"repeatedFixed64\": [\"4567890123456789012\", \"567890123456789012\"],\n"
-        + "  \"repeatedSfixed32\": [7890, 890],\n"
-        + "  \"repeatedSfixed64\": [\"5678901234567890123\", \"678901234567890123\"],\n"
-        + "  \"repeatedFloat\": [1.5, 11.5],\n"
-        + "  \"repeatedDouble\": [1.25, 11.25],\n"
-        + "  \"repeatedBool\": [true, true],\n"
-        + "  \"repeatedString\": [\"Hello world!\", \"ello world!\"],\n"
-        + "  \"repeatedBytes\": [\"AAEC\", \"AQI=\"],\n"
-        + "  \"repeatedNestedMessage\": [{\n"
-        + "    \"value\": 100\n"
-        + "  }, {\n"
-        + "    \"value\": 200\n"
-        + "  }],\n"
-        + "  \"repeatedNestedEnum\": [\"BAR\", \"BAZ\"]\n"
-        + "}",
+            + "  \"optionalInt32\": 1234,\n"
+            + "  \"optionalInt64\": \"1234567890123456789\",\n"
+            + "  \"optionalUint32\": 5678,\n"
+            + "  \"optionalUint64\": \"2345678901234567890\",\n"
+            + "  \"optionalSint32\": 9012,\n"
+            + "  \"optionalSint64\": \"3456789012345678901\",\n"
+            + "  \"optionalFixed32\": 3456,\n"
+            + "  \"optionalFixed64\": \"4567890123456789012\",\n"
+            + "  \"optionalSfixed32\": 7890,\n"
+            + "  \"optionalSfixed64\": \"5678901234567890123\",\n"
+            + "  \"optionalFloat\": 1.5,\n"
+            + "  \"optionalDouble\": 1.25,\n"
+            + "  \"optionalBool\": true,\n"
+            + "  \"optionalString\": \"Hello world!\",\n"
+            + "  \"optionalBytes\": \"AAEC\",\n"
+            + "  \"optionalNestedMessage\": {\n"
+            + "    \"value\": 100\n"
+            + "  },\n"
+            + "  \"optionalNestedEnum\": \"BAR\",\n"
+            + "  \"repeatedInt32\": [1234, 234],\n"
+            + "  \"repeatedInt64\": [\"1234567890123456789\", \"234567890123456789\"],\n"
+            + "  \"repeatedUint32\": [5678, 678],\n"
+            + "  \"repeatedUint64\": [\"2345678901234567890\", \"345678901234567890\"],\n"
+            + "  \"repeatedSint32\": [9012, 10],\n"
+            + "  \"repeatedSint64\": [\"3456789012345678901\", \"456789012345678901\"],\n"
+            + "  \"repeatedFixed32\": [3456, 456],\n"
+            + "  \"repeatedFixed64\": [\"4567890123456789012\", \"567890123456789012\"],\n"
+            + "  \"repeatedSfixed32\": [7890, 890],\n"
+            + "  \"repeatedSfixed64\": [\"5678901234567890123\", \"678901234567890123\"],\n"
+            + "  \"repeatedFloat\": [1.5, 11.5],\n"
+            + "  \"repeatedDouble\": [1.25, 11.25],\n"
+            + "  \"repeatedBool\": [true, true],\n"
+            + "  \"repeatedString\": [\"Hello world!\", \"ello world!\"],\n"
+            + "  \"repeatedBytes\": [\"AAEC\", \"AQI=\"],\n"
+            + "  \"repeatedNestedMessage\": [{\n"
+            + "    \"value\": 100\n"
+            + "  }, {\n"
+            + "    \"value\": 200\n"
+            + "  }],\n"
+            + "  \"repeatedNestedEnum\": [\"BAR\", \"BAZ\"]\n"
+            + "}",
         toJsonString(message));
-    
+
     assertRoundTripEquals(message);
   }
-  
+
   public void testUnknownEnumValues() throws Exception {
     TestAllTypes message = TestAllTypes.newBuilder()
         .setOptionalNestedEnumValue(12345)
@@ -209,21 +209,22 @@ public class JsonFormatTest extends TestCase {
         + "  \"repeatedNestedEnum\": [12345, \"FOO\"]\n"
         + "}", toJsonString(message));
     assertRoundTripEquals(message);
-    
+
     TestMap.Builder mapBuilder = TestMap.newBuilder();
     mapBuilder.getMutableInt32ToEnumMapValue().put(1, 0);
     mapBuilder.getMutableInt32ToEnumMapValue().put(2, 12345);
     TestMap mapMessage = mapBuilder.build();
     assertEquals(
-        "{\n" 
-        + "  \"int32ToEnumMap\": {\n" 
-        + "    \"1\": \"FOO\",\n"
-        + "    \"2\": 12345\n"
-        + "  }\n" 
-        + "}", toJsonString(mapMessage));
+        "{\n"
+            + "  \"int32ToEnumMap\": {\n"
+            + "    \"1\": \"FOO\",\n"
+            + "    \"2\": 12345\n"
+            + "  }\n"
+            + "}",
+        toJsonString(mapMessage));
     assertRoundTripEquals(mapMessage);
   }
-  
+
   public void testSpecialFloatValues() throws Exception {
     TestAllTypes message = TestAllTypes.newBuilder()
         .addRepeatedFloat(Float.NaN)
@@ -238,10 +239,10 @@ public class JsonFormatTest extends TestCase {
         + "  \"repeatedFloat\": [\"NaN\", \"Infinity\", \"-Infinity\"],\n"
         + "  \"repeatedDouble\": [\"NaN\", \"Infinity\", \"-Infinity\"]\n"
         + "}", toJsonString(message));
-    
+
     assertRoundTripEquals(message);
   }
-  
+
   public void testParserAcceptStringForNumbericField() throws Exception {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     mergeFromJson(
@@ -265,7 +266,7 @@ public class JsonFormatTest extends TestCase {
     assertEquals(1.25, message.getOptionalDouble());
     assertEquals(true, message.getOptionalBool());
   }
-  
+
   public void testParserAcceptFloatingPointValueForIntegerField() throws Exception {
     // Test that numeric values like "1.000", "1e5" will also be accepted.
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
@@ -287,14 +288,14 @@ public class JsonFormatTest extends TestCase {
       assertEquals(expectedValues[i], builder.getRepeatedInt64(i));
       assertEquals(expectedValues[i], builder.getRepeatedUint64(i));
     }
-    
+
     // Non-integers will still be rejected.
     assertRejects("optionalInt32", "1.5");
     assertRejects("optionalUint32", "1.5");
     assertRejects("optionalInt64", "1.5");
     assertRejects("optionalUint64", "1.5");
   }
-  
+
   private void assertRejects(String name, String value) {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     try {
@@ -312,7 +313,7 @@ public class JsonFormatTest extends TestCase {
       // Expected.
     }
   }
-  
+
   private void assertAccepts(String name, String value) throws IOException {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     // Both numeric form and string form are accepted.
@@ -320,17 +321,17 @@ public class JsonFormatTest extends TestCase {
     builder.clear();
     mergeFromJson("{\"" + name + "\":\"" + value + "\"}", builder);
   }
-  
+
   public void testParserRejectOutOfRangeNumericValues() throws Exception {
     assertAccepts("optionalInt32", String.valueOf(Integer.MAX_VALUE));
     assertAccepts("optionalInt32", String.valueOf(Integer.MIN_VALUE));
     assertRejects("optionalInt32", String.valueOf(Integer.MAX_VALUE + 1L));
     assertRejects("optionalInt32", String.valueOf(Integer.MIN_VALUE - 1L));
-    
+
     assertAccepts("optionalUint32", String.valueOf(Integer.MAX_VALUE + 1L));
     assertRejects("optionalUint32", "123456789012345");
     assertRejects("optionalUint32", "-1");
-    
+
     BigInteger one = new BigInteger("1");
     BigInteger maxLong = new BigInteger(String.valueOf(Long.MAX_VALUE));
     BigInteger minLong = new BigInteger(String.valueOf(Long.MIN_VALUE));
@@ -351,7 +352,7 @@ public class JsonFormatTest extends TestCase {
     assertAccepts("optionalFloat", String.valueOf(-Float.MAX_VALUE));
     assertRejects("optionalFloat", String.valueOf(Double.MAX_VALUE));
     assertRejects("optionalFloat", String.valueOf(-Double.MAX_VALUE));
-    
+
     BigDecimal moreThanOne = new BigDecimal("1.000001");
     BigDecimal maxDouble = new BigDecimal(Double.MAX_VALUE);
     BigDecimal minDouble = new BigDecimal(-Double.MAX_VALUE);
@@ -360,7 +361,7 @@ public class JsonFormatTest extends TestCase {
     assertRejects("optionalDouble", maxDouble.multiply(moreThanOne).toString());
     assertRejects("optionalDouble", minDouble.multiply(moreThanOne).toString());
   }
-  
+
   public void testParserAcceptNull() throws Exception {
     TestAllTypes.Builder builder = TestAllTypes.newBuilder();
     mergeFromJson(
@@ -402,7 +403,7 @@ public class JsonFormatTest extends TestCase {
         + "}", builder);
     TestAllTypes message = builder.build();
     assertEquals(TestAllTypes.getDefaultInstance(), message);
-    
+
     // Repeated field elements cannot be null.
     try {
       builder = TestAllTypes.newBuilder();
@@ -414,7 +415,7 @@ public class JsonFormatTest extends TestCase {
     } catch (InvalidProtocolBufferException e) {
       // Exception expected.
     }
-    
+
     try {
       builder = TestAllTypes.newBuilder();
       mergeFromJson(
@@ -426,14 +427,14 @@ public class JsonFormatTest extends TestCase {
       // Exception expected.
     }
   }
-  
+
   public void testParserRejectDuplicatedFields() throws Exception {
     // TODO(xiaofeng): The parser we are currently using (GSON) will accept and keep the last
     // one if multiple entries have the same name. This is not the desired behavior but it can
     // only be fixed by using our own parser. Here we only test the cases where the names are
     // different but still referring to the same field.
-    
-    // Duplicated optional fields. 
+
+    // Duplicated optional fields.
     try {
       TestAllTypes.Builder builder = TestAllTypes.newBuilder();
       mergeFromJson(
@@ -445,7 +446,7 @@ public class JsonFormatTest extends TestCase {
     } catch (InvalidProtocolBufferException e) {
       // Exception expected.
     }
-    
+
     // Duplicated repeated fields.
     try {
       TestAllTypes.Builder builder = TestAllTypes.newBuilder();
@@ -458,7 +459,7 @@ public class JsonFormatTest extends TestCase {
     } catch (InvalidProtocolBufferException e) {
       // Exception expected.
     }
-    
+
     // Duplicated oneof fields.
     try {
       TestOneof.Builder builder = TestOneof.newBuilder();
@@ -472,7 +473,7 @@ public class JsonFormatTest extends TestCase {
       // Exception expected.
     }
   }
-  
+
   public void testMapFields() throws Exception {
     TestMap.Builder builder = TestMap.newBuilder();
     builder.getMutableInt32ToInt32Map().put(1, 10);
@@ -507,7 +508,7 @@ public class JsonFormatTest extends TestCase {
         8, NestedMessage.newBuilder().setValue(1234).build());
     builder.getMutableInt32ToEnumMap().put(9, NestedEnum.BAR);
     TestMap message = builder.build();
-    
+
     assertEquals(
         "{\n"
         + "  \"int32ToInt32Map\": {\n"
@@ -598,13 +599,13 @@ public class JsonFormatTest extends TestCase {
         + "  }\n"
         + "}", toJsonString(message));
     assertRoundTripEquals(message);
-    
+
     // Test multiple entries.
     builder = TestMap.newBuilder();
     builder.getMutableInt32ToInt32Map().put(1, 2);
     builder.getMutableInt32ToInt32Map().put(3, 4);
     message = builder.build();
-    
+
     assertEquals(
         "{\n"
         + "  \"int32ToInt32Map\": {\n"
@@ -614,7 +615,7 @@ public class JsonFormatTest extends TestCase {
         + "}", toJsonString(message));
     assertRoundTripEquals(message);
   }
-  
+
   public void testMapNullValueIsRejected() throws Exception {
     try {
       TestMap.Builder builder = TestMap.newBuilder();
@@ -627,7 +628,7 @@ public class JsonFormatTest extends TestCase {
     } catch (InvalidProtocolBufferException e) {
       // Exception expected.
     }
-    
+
     try {
       TestMap.Builder builder = TestMap.newBuilder();
       mergeFromJson(
@@ -640,7 +641,7 @@ public class JsonFormatTest extends TestCase {
       // Exception expected.
     }
   }
-  
+
   public void testParserAcceptNonQuotedObjectKey() throws Exception {
     TestMap.Builder builder = TestMap.newBuilder();
     mergeFromJson(
@@ -652,7 +653,7 @@ public class JsonFormatTest extends TestCase {
     assertEquals(2, message.getInt32ToInt32Map().get(1).intValue());
     assertEquals(3, message.getStringToInt32Map().get("hello").intValue());
   }
-  
+
   public void testWrappers() throws Exception {
     TestWrappers.Builder builder = TestWrappers.newBuilder();
     builder.getBoolValueBuilder().setValue(false);
@@ -665,7 +666,7 @@ public class JsonFormatTest extends TestCase {
     builder.getStringValueBuilder().setValue("");
     builder.getBytesValueBuilder().setValue(ByteString.EMPTY);
     TestWrappers message = builder.build();
-    
+
     assertEquals(
         "{\n"
         + "  \"int32Value\": 0,\n"
@@ -691,7 +692,7 @@ public class JsonFormatTest extends TestCase {
     builder.getStringValueBuilder().setValue("7");
     builder.getBytesValueBuilder().setValue(ByteString.copyFrom(new byte[]{8}));
     message = builder.build();
-    
+
     assertEquals(
         "{\n"
         + "  \"int32Value\": 1,\n"
@@ -706,43 +707,43 @@ public class JsonFormatTest extends TestCase {
         + "}", toJsonString(message));
     assertRoundTripEquals(message);
   }
-  
+
   public void testTimestamp() throws Exception {
     TestTimestamp message = TestTimestamp.newBuilder()
         .setTimestampValue(TimeUtil.parseTimestamp("1970-01-01T00:00:00Z"))
         .build();
-    
+
     assertEquals(
         "{\n"
         + "  \"timestampValue\": \"1970-01-01T00:00:00Z\"\n"
         + "}", toJsonString(message));
     assertRoundTripEquals(message);
   }
-  
+
   public void testDuration() throws Exception {
     TestDuration message = TestDuration.newBuilder()
         .setDurationValue(TimeUtil.parseDuration("12345s"))
         .build();
-    
+
     assertEquals(
         "{\n"
         + "  \"durationValue\": \"12345s\"\n"
         + "}", toJsonString(message));
     assertRoundTripEquals(message);
   }
-  
+
   public void testFieldMask() throws Exception {
     TestFieldMask message = TestFieldMask.newBuilder()
         .setFieldMaskValue(FieldMaskUtil.fromString("foo.bar,baz"))
         .build();
-    
+
     assertEquals(
         "{\n"
         + "  \"fieldMaskValue\": \"foo.bar,baz\"\n"
         + "}", toJsonString(message));
     assertRoundTripEquals(message);
   }
-  
+
   public void testStruct() throws Exception {
     // Build a struct with all possible values.
     TestStruct.Builder builder = TestStruct.newBuilder();
@@ -764,7 +765,7 @@ public class JsonFormatTest extends TestCase {
     structBuilder.getMutableFields().put(
         "list_value", Value.newBuilder().setListValue(listBuilder.build()).build());
     TestStruct message = builder.build();
-    
+
     assertEquals(
         "{\n"
         + "  \"structValue\": {\n"
@@ -778,7 +779,7 @@ public class JsonFormatTest extends TestCase {
         + "  }\n"
         + "}", toJsonString(message));
     assertRoundTripEquals(message);
-    
+
     builder = TestStruct.newBuilder();
     builder.setValue(Value.newBuilder().setNullValueValue(0).build());
     message = builder.build();
@@ -787,12 +788,23 @@ public class JsonFormatTest extends TestCase {
         + "  \"value\": null\n"
         + "}", toJsonString(message));
     assertRoundTripEquals(message);
+
+    builder = TestStruct.newBuilder();
+    listBuilder = builder.getListValueBuilder();
+    listBuilder.addValues(Value.newBuilder().setNumberValue(31831.125).build());
+    listBuilder.addValues(Value.newBuilder().setNullValueValue(0).build());
+    message = builder.build();
+    assertEquals(
+        "{\n"
+        + "  \"listValue\": [31831.125, null]\n"
+        + "}", toJsonString(message));
+    assertRoundTripEquals(message);
   }
-  
+
   public void testAnyFields() throws Exception {
     TestAllTypes content = TestAllTypes.newBuilder().setOptionalInt32(1234).build();
     TestAny message = TestAny.newBuilder().setAnyValue(Any.pack(content)).build();
-    
+
     // A TypeRegistry must be provided in order to convert Any types.
     try {
       toJsonString(message);
@@ -800,11 +812,11 @@ public class JsonFormatTest extends TestCase {
     } catch (IOException e) {
       // Expected.
     }
-    
+
     JsonFormat.TypeRegistry registry = JsonFormat.TypeRegistry.newBuilder()
         .add(TestAllTypes.getDescriptor()).build();
     JsonFormat.Printer printer = JsonFormat.printer().usingTypeRegistry(registry);
-    
+
     assertEquals(
         "{\n"
         + "  \"anyValue\": {\n"
@@ -813,8 +825,8 @@ public class JsonFormatTest extends TestCase {
         + "  }\n"
         + "}" , printer.print(message));
     assertRoundTripEquals(message, registry);
-    
-    
+
+
     // Well-known types have a special formatting when embedded in Any.
     //
     // 1. Any in Any.
@@ -828,7 +840,7 @@ public class JsonFormatTest extends TestCase {
         + "  }\n"
         + "}", printer.print(anyMessage));
     assertRoundTripEquals(anyMessage, registry);
-    
+
     // 2. Wrappers in Any.
     anyMessage = Any.pack(Int32Value.newBuilder().setValue(12345).build());
     assertEquals(
@@ -894,7 +906,7 @@ public class JsonFormatTest extends TestCase {
         + "  \"value\": \"AQI=\"\n"
         + "}", printer.print(anyMessage));
     assertRoundTripEquals(anyMessage, registry);
-    
+
     // 3. Timestamp in Any.
     anyMessage = Any.pack(TimeUtil.parseTimestamp("1969-12-31T23:59:59Z"));
     assertEquals(
@@ -903,7 +915,7 @@ public class JsonFormatTest extends TestCase {
         + "  \"value\": \"1969-12-31T23:59:59Z\"\n"
         + "}", printer.print(anyMessage));
     assertRoundTripEquals(anyMessage, registry);
-    
+
     // 4. Duration in Any
     anyMessage = Any.pack(TimeUtil.parseDuration("12345.10s"));
     assertEquals(
@@ -945,7 +957,7 @@ public class JsonFormatTest extends TestCase {
         + "}", printer.print(anyMessage));
     assertRoundTripEquals(anyMessage, registry);
   }
-  
+
   public void testParserMissingTypeUrl() throws Exception {
     try {
       Any.Builder builder = Any.newBuilder();
@@ -958,7 +970,7 @@ public class JsonFormatTest extends TestCase {
       // Expected.
     }
   }
-  
+
   public void testParserUnexpectedTypeUrl() throws Exception {
     try {
       TestAllTypes.Builder builder = TestAllTypes.newBuilder();
@@ -970,9 +982,9 @@ public class JsonFormatTest extends TestCase {
       fail("Exception is expected.");
     } catch (IOException e) {
       // Expected.
-    } 
+    }
   }
-  
+
   public void testParserRejectTrailingComma() throws Exception {
     try {
       TestAllTypes.Builder builder = TestAllTypes.newBuilder();
@@ -1000,13 +1012,13 @@ public class JsonFormatTest extends TestCase {
     //   // Expected.
     // }
   }
-  
+
   public void testParserRejectInvalidBase64() throws Exception {
     assertRejects("optionalBytes", "!@#$");
     // We use standard BASE64 with paddings.
     assertRejects("optionalBytes", "AQI");
   }
-  
+
   public void testParserRejectInvalidEnumValue() throws Exception {
     try {
       TestAllTypes.Builder builder = TestAllTypes.newBuilder();
@@ -1017,7 +1029,7 @@ public class JsonFormatTest extends TestCase {
       fail("Exception is expected.");
     } catch (InvalidProtocolBufferException e) {
       // Expected.
-    } 
+    }
   }
 
   public void testCustomJsonName() throws Exception {
@@ -1026,6 +1038,12 @@ public class JsonFormatTest extends TestCase {
     assertRoundTripEquals(message);
   }
 
+  public void testDefaultGsonDoesNotHtmlEscape() throws Exception {
+    TestAllTypes message = TestAllTypes.newBuilder().setOptionalString("=").build();
+    assertEquals(
+        "{\n" + "  \"optionalString\": \"=\"" + "\n}", JsonFormat.printer().print(message));
+  }
+
   public void testIncludingDefaultValueFields() throws Exception {
     TestAllTypes message = TestAllTypes.getDefaultInstance();
     assertEquals("{\n}", JsonFormat.printer().print(message));

+ 31 - 0
java/util/src/test/proto/com/google/protobuf/util/json_test.proto

@@ -28,6 +28,36 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 syntax = "proto3";
 
 package json_test;
@@ -159,6 +189,7 @@ message TestFieldMask {
 message TestStruct {
   google.protobuf.Struct struct_value = 1;
   google.protobuf.Value value = 2;
+  google.protobuf.ListValue list_value = 3;
 }
 
 message TestAny {

+ 43 - 3
js/binary/constants.js

@@ -41,11 +41,16 @@ goog.provide('jspb.BinaryMessage');
 goog.provide('jspb.BuilderFunction');
 goog.provide('jspb.ByteSource');
 goog.provide('jspb.ClonerFunction');
+goog.provide('jspb.ComparerFunction');
 goog.provide('jspb.ConstBinaryMessage');
+goog.provide('jspb.PrunerFunction');
 goog.provide('jspb.ReaderFunction');
 goog.provide('jspb.RecyclerFunction');
+goog.provide('jspb.RepeatedFieldType');
+goog.provide('jspb.ScalarFieldType');
 goog.provide('jspb.WriterFunction');
 
+
 goog.forwardDeclare('jspb.Message');
 goog.forwardDeclare('jsproto.BinaryExtension');
 
@@ -78,12 +83,30 @@ jspb.BinaryMessage = function() {};
 jspb.ByteSource;
 
 
+/**
+ * A scalar field in jspb can be a boolean, number, or string.
+ * @typedef {boolean|number|string}
+ */
+jspb.ScalarFieldType;
+
+
+/**
+ * A repeated field in jspb is an array of scalars, blobs, or messages.
+ * @typedef {!Array<jspb.ScalarFieldType>|
+             !Array<!Uint8Array>|
+             !Array<!jspb.BinaryMessage>}
+ */
+jspb.RepeatedFieldType;
+
+
 /**
  * A field in jspb can be a scalar, a block of bytes, another proto, or an
  * array of any of the above.
- * @typedef {boolean|number|string|Uint8Array|
-             jspb.BinaryMessage|jsproto.BinaryExtension|
-             Array<jspb.AnyFieldType>}
+ * @typedef {jspb.ScalarFieldType|
+             jspb.RepeatedFieldType|
+             !Uint8Array|
+             !jspb.BinaryMessage|
+             !jsproto.BinaryExtension}
  */
 jspb.AnyFieldType;
 
@@ -124,6 +147,23 @@ jspb.ReaderFunction;
 jspb.WriterFunction;
 
 
+/**
+ * A pruner function removes default-valued fields and empty submessages from a
+ * message and returns either the pruned message or null if the entire message
+ * was pruned away.
+ * @typedef {function(?jspb.BinaryMessage):?jspb.BinaryMessage}
+ */
+jspb.PrunerFunction;
+
+
+/**
+ * A comparer function returns true if two protos are equal.
+ * @typedef {!function(?jspb.ConstBinaryMessage,
+ *                     ?jspb.ConstBinaryMessage):boolean}
+ */
+jspb.ComparerFunction;
+
+
 /**
  * Field type codes, taken from proto2/public/wire_format_lite.h.
  * @enum {number}

+ 7 - 5
js/binary/decoder.js

@@ -223,7 +223,7 @@ jspb.BinaryIterator.prototype.next = function() {
 jspb.BinaryDecoder = function(opt_bytes, opt_start, opt_length) {
   /**
    * Typed byte-wise view of the source buffer.
-   * @private {Uint8Array}
+   * @private {?Uint8Array}
    */
   this.bytes_ = null;
 
@@ -335,7 +335,7 @@ jspb.BinaryDecoder.prototype.clear = function() {
 
 /**
  * Returns the raw buffer.
- * @return {Uint8Array} The raw buffer.
+ * @return {?Uint8Array} The raw buffer.
  */
 jspb.BinaryDecoder.prototype.getBuffer = function() {
   return this.bytes_;
@@ -631,6 +631,7 @@ jspb.BinaryDecoder.prototype.readUnsignedVarint32String = function() {
   return value.toString();
 };
 
+
 /**
  * Reads a 32-bit signed variant and returns its value as a string.
  *
@@ -950,14 +951,15 @@ jspb.BinaryDecoder.prototype.readStringWithLength = function() {
  * Reads a block of raw bytes from the binary stream.
  *
  * @param {number} length The number of bytes to read.
- * @return {Uint8Array} The decoded block of bytes, or null if the length was
- *     invalid.
+ * @return {!Uint8Array} The decoded block of bytes, or an empty block if the
+ *     length was invalid.
  */
 jspb.BinaryDecoder.prototype.readBytes = function(length) {
   if (length < 0 ||
       this.cursor_ + length > this.bytes_.length) {
     this.error_ = true;
-    return null;
+    goog.asserts.fail('Invalid byte length!');
+    return new Uint8Array(0);
   }
 
   var result = this.bytes_.subarray(this.cursor_, this.cursor_ + length);

+ 64 - 56
js/binary/decoder_test.js

@@ -44,11 +44,11 @@
 goog.require('goog.testing.asserts');
 goog.require('jspb.BinaryConstants');
 goog.require('jspb.BinaryDecoder');
-goog.require('jspb.BinaryWriter');
+goog.require('jspb.BinaryEncoder');
 
 
 /**
- * Tests raw encoding and decoding of unsigned types.
+ * Tests encoding and decoding of unsigned types.
  * @param {Function} readValue
  * @param {Function} writeValue
  * @param {number} epsilon
@@ -58,34 +58,38 @@ goog.require('jspb.BinaryWriter');
  */
 function doTestUnsignedValue(readValue,
     writeValue, epsilon, upperLimit, filter) {
-  var writer = new jspb.BinaryWriter();
+  var encoder = new jspb.BinaryEncoder();
 
   // Encode zero and limits.
-  writeValue.call(writer, filter(0));
-  writeValue.call(writer, filter(epsilon));
-  writeValue.call(writer, filter(upperLimit));
+  writeValue.call(encoder, filter(0));
+  writeValue.call(encoder, filter(epsilon));
+  writeValue.call(encoder, filter(upperLimit));
 
   // Encode positive values.
   for (var cursor = epsilon; cursor < upperLimit; cursor *= 1.1) {
-    writeValue.call(writer, filter(cursor));
+    writeValue.call(encoder, filter(cursor));
   }
 
-  var reader = jspb.BinaryDecoder.alloc(writer.getResultBuffer());
+  var decoder = jspb.BinaryDecoder.alloc(encoder.end());
 
   // Check zero and limits.
-  assertEquals(filter(0), readValue.call(reader));
-  assertEquals(filter(epsilon), readValue.call(reader));
-  assertEquals(filter(upperLimit), readValue.call(reader));
+  assertEquals(filter(0), readValue.call(decoder));
+  assertEquals(filter(epsilon), readValue.call(decoder));
+  assertEquals(filter(upperLimit), readValue.call(decoder));
 
   // Check positive values.
   for (var cursor = epsilon; cursor < upperLimit; cursor *= 1.1) {
-    if (filter(cursor) != readValue.call(reader)) throw 'fail!';
+    if (filter(cursor) != readValue.call(decoder)) throw 'fail!';
   }
+
+  // Encoding values outside the valid range should assert.
+  assertThrows(function() {writeValue.call(encoder, -1);});
+  assertThrows(function() {writeValue.call(encoder, upperLimit * 1.1);});
 }
 
 
 /**
- * Tests raw encoding and decoding of signed types.
+ * Tests encoding and decoding of signed types.
  * @param {Function} readValue
  * @param {Function} writeValue
  * @param {number} epsilon
@@ -96,44 +100,48 @@ function doTestUnsignedValue(readValue,
  */
 function doTestSignedValue(readValue,
     writeValue, epsilon, lowerLimit, upperLimit, filter) {
-  var writer = new jspb.BinaryWriter();
+  var encoder = new jspb.BinaryEncoder();
 
   // Encode zero and limits.
-  writeValue.call(writer, filter(lowerLimit));
-  writeValue.call(writer, filter(-epsilon));
-  writeValue.call(writer, filter(0));
-  writeValue.call(writer, filter(epsilon));
-  writeValue.call(writer, filter(upperLimit));
+  writeValue.call(encoder, filter(lowerLimit));
+  writeValue.call(encoder, filter(-epsilon));
+  writeValue.call(encoder, filter(0));
+  writeValue.call(encoder, filter(epsilon));
+  writeValue.call(encoder, filter(upperLimit));
 
   var inputValues = [];
 
   // Encode negative values.
   for (var cursor = lowerLimit; cursor < -epsilon; cursor /= 1.1) {
     var val = filter(cursor);
-    writeValue.call(writer, val);
+    writeValue.call(encoder, val);
     inputValues.push(val);
   }
 
   // Encode positive values.
   for (var cursor = epsilon; cursor < upperLimit; cursor *= 1.1) {
     var val = filter(cursor);
-    writeValue.call(writer, val);
+    writeValue.call(encoder, val);
     inputValues.push(val);
   }
 
-  var reader = jspb.BinaryDecoder.alloc(writer.getResultBuffer());
+  var decoder = jspb.BinaryDecoder.alloc(encoder.end());
 
   // Check zero and limits.
-  assertEquals(filter(lowerLimit), readValue.call(reader));
-  assertEquals(filter(-epsilon), readValue.call(reader));
-  assertEquals(filter(0), readValue.call(reader));
-  assertEquals(filter(epsilon), readValue.call(reader));
-  assertEquals(filter(upperLimit), readValue.call(reader));
+  assertEquals(filter(lowerLimit), readValue.call(decoder));
+  assertEquals(filter(-epsilon), readValue.call(decoder));
+  assertEquals(filter(0), readValue.call(decoder));
+  assertEquals(filter(epsilon), readValue.call(decoder));
+  assertEquals(filter(upperLimit), readValue.call(decoder));
 
   // Verify decoded values.
   for (var i = 0; i < inputValues.length; i++) {
-    assertEquals(inputValues[i], readValue.call(reader));
+    assertEquals(inputValues[i], readValue.call(decoder));
   }
+
+  // Encoding values outside the valid range should assert.
+  assertThrows(function() {writeValue.call(encoder, lowerLimit * 1.1);});
+  assertThrows(function() {writeValue.call(encoder, upperLimit * 1.1);});
 }
 
 describe('binaryDecoderTest', function() {
@@ -169,7 +177,7 @@ describe('binaryDecoderTest', function() {
    * Tests reading 64-bit integers as hash strings.
    */
   it('testHashStrings', function() {
-    var writer = new jspb.BinaryWriter();
+    var encoder = new jspb.BinaryEncoder();
 
     var hashA = String.fromCharCode(0x00, 0x00, 0x00, 0x00,
                                     0x00, 0x00, 0x00, 0x00);
@@ -180,17 +188,17 @@ describe('binaryDecoderTest', function() {
     var hashD = String.fromCharCode(0xFF, 0xFF, 0xFF, 0xFF,
                                     0xFF, 0xFF, 0xFF, 0xFF);
 
-    writer.rawWriteVarintHash64(hashA);
-    writer.rawWriteVarintHash64(hashB);
-    writer.rawWriteVarintHash64(hashC);
-    writer.rawWriteVarintHash64(hashD);
+    encoder.writeVarintHash64(hashA);
+    encoder.writeVarintHash64(hashB);
+    encoder.writeVarintHash64(hashC);
+    encoder.writeVarintHash64(hashD);
 
-    writer.rawWriteFixedHash64(hashA);
-    writer.rawWriteFixedHash64(hashB);
-    writer.rawWriteFixedHash64(hashC);
-    writer.rawWriteFixedHash64(hashD);
+    encoder.writeFixedHash64(hashA);
+    encoder.writeFixedHash64(hashB);
+    encoder.writeFixedHash64(hashC);
+    encoder.writeFixedHash64(hashD);
 
-    var decoder = jspb.BinaryDecoder.alloc(writer.getResultBuffer());
+    var decoder = jspb.BinaryDecoder.alloc(encoder.end());
 
     assertEquals(hashA, decoder.readVarintHash64());
     assertEquals(hashB, decoder.readVarintHash64());
@@ -214,8 +222,8 @@ describe('binaryDecoderTest', function() {
     assertThrows(function() {decoder.readUint64()});
 
     // Overlong varints should trigger assertions.
-    decoder.setBlock(
-        [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0]);
+    decoder.setBlock([255, 255, 255, 255, 255, 255,
+                      255, 255, 255, 255, 255, 0]);
     assertThrows(function() {decoder.readUnsignedVarint64()});
     decoder.reset();
     assertThrows(function() {decoder.readSignedVarint64()});
@@ -244,61 +252,61 @@ describe('binaryDecoderTest', function() {
 
 
   /**
-   * Tests raw encoding and decoding of unsigned integers.
+   * Tests encoding and decoding of unsigned integers.
    */
-  it('testRawUnsigned', function() {
+  it('testUnsignedIntegers', function() {
     doTestUnsignedValue(
         jspb.BinaryDecoder.prototype.readUint8,
-        jspb.BinaryWriter.prototype.rawWriteUint8,
+        jspb.BinaryEncoder.prototype.writeUint8,
         1, 0xFF, Math.round);
 
     doTestUnsignedValue(
         jspb.BinaryDecoder.prototype.readUint16,
-        jspb.BinaryWriter.prototype.rawWriteUint16,
+        jspb.BinaryEncoder.prototype.writeUint16,
         1, 0xFFFF, Math.round);
 
     doTestUnsignedValue(
         jspb.BinaryDecoder.prototype.readUint32,
-        jspb.BinaryWriter.prototype.rawWriteUint32,
+        jspb.BinaryEncoder.prototype.writeUint32,
         1, 0xFFFFFFFF, Math.round);
 
     doTestUnsignedValue(
         jspb.BinaryDecoder.prototype.readUint64,
-        jspb.BinaryWriter.prototype.rawWriteUint64,
+        jspb.BinaryEncoder.prototype.writeUint64,
         1, Math.pow(2, 64) - 1025, Math.round);
   });
 
 
   /**
-   * Tests raw encoding and decoding of signed integers.
+   * Tests encoding and decoding of signed integers.
    */
-  it('testRawSigned', function() {
+  it('testSignedIntegers', function() {
     doTestSignedValue(
         jspb.BinaryDecoder.prototype.readInt8,
-        jspb.BinaryWriter.prototype.rawWriteInt8,
+        jspb.BinaryEncoder.prototype.writeInt8,
         1, -0x80, 0x7F, Math.round);
 
     doTestSignedValue(
         jspb.BinaryDecoder.prototype.readInt16,
-        jspb.BinaryWriter.prototype.rawWriteInt16,
+        jspb.BinaryEncoder.prototype.writeInt16,
         1, -0x8000, 0x7FFF, Math.round);
 
     doTestSignedValue(
         jspb.BinaryDecoder.prototype.readInt32,
-        jspb.BinaryWriter.prototype.rawWriteInt32,
+        jspb.BinaryEncoder.prototype.writeInt32,
         1, -0x80000000, 0x7FFFFFFF, Math.round);
 
     doTestSignedValue(
         jspb.BinaryDecoder.prototype.readInt64,
-        jspb.BinaryWriter.prototype.rawWriteInt64,
+        jspb.BinaryEncoder.prototype.writeInt64,
         1, -Math.pow(2, 63), Math.pow(2, 63) - 513, Math.round);
   });
 
 
   /**
-   * Tests raw encoding and decoding of floats.
+   * Tests encoding and decoding of floats.
    */
-  it('testRawFloats', function() {
+  it('testFloats', function() {
     /**
      * @param {number} x
      * @return {number}
@@ -310,7 +318,7 @@ describe('binaryDecoderTest', function() {
     }
     doTestSignedValue(
         jspb.BinaryDecoder.prototype.readFloat,
-        jspb.BinaryWriter.prototype.rawWriteFloat,
+        jspb.BinaryEncoder.prototype.writeFloat,
         jspb.BinaryConstants.FLOAT32_EPS,
         -jspb.BinaryConstants.FLOAT32_MAX,
         jspb.BinaryConstants.FLOAT32_MAX,
@@ -318,7 +326,7 @@ describe('binaryDecoderTest', function() {
 
     doTestSignedValue(
         jspb.BinaryDecoder.prototype.readDouble,
-        jspb.BinaryWriter.prototype.rawWriteDouble,
+        jspb.BinaryEncoder.prototype.writeDouble,
         jspb.BinaryConstants.FLOAT64_EPS * 10,
         -jspb.BinaryConstants.FLOAT64_MAX,
         jspb.BinaryConstants.FLOAT64_MAX,

+ 430 - 0
js/binary/encoder.js

@@ -0,0 +1,430 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/**
+ * @fileoverview BinaryEncode defines methods for encoding Javascript values
+ * into arrays of bytes compatible with the Protocol Buffer wire format.
+ *
+ * @author aappleby@google.com (Austin Appleby)
+ */
+
+goog.provide('jspb.BinaryEncoder');
+
+goog.require('goog.asserts');
+goog.require('jspb.BinaryConstants');
+goog.require('jspb.utils');
+
+
+
+/**
+ * BinaryEncoder implements encoders for all the wire types specified in
+ * https://developers.google.com/protocol-buffers/docs/encoding.
+ *
+ * @constructor
+ * @struct
+ */
+jspb.BinaryEncoder = function() {
+  /** @private {!Array.<number>} */
+  this.buffer_ = [];
+};
+
+
+/**
+ * @return {number}
+ */
+jspb.BinaryEncoder.prototype.length = function() {
+  return this.buffer_.length;
+};
+
+
+/**
+ * @return {!Array.<number>}
+ */
+jspb.BinaryEncoder.prototype.end = function() {
+  var buffer = this.buffer_;
+  this.buffer_ = [];
+  return buffer;
+};
+
+
+/**
+ * Encodes a 64-bit integer in 32:32 split representation into its wire-format
+ * varint representation and stores it in the buffer.
+ * @param {number} lowBits The low 32 bits of the int.
+ * @param {number} highBits The high 32 bits of the int.
+ */
+jspb.BinaryEncoder.prototype.writeSplitVarint64 = function(lowBits, highBits) {
+  goog.asserts.assert(lowBits == Math.floor(lowBits));
+  goog.asserts.assert(highBits == Math.floor(highBits));
+  goog.asserts.assert((lowBits >= 0) &&
+                      (lowBits < jspb.BinaryConstants.TWO_TO_32));
+  goog.asserts.assert((highBits >= 0) &&
+                      (highBits < jspb.BinaryConstants.TWO_TO_32));
+
+  // Break the binary representation into chunks of 7 bits, set the 8th bit
+  // in each chunk if it's not the final chunk, and append to the result.
+  while (highBits > 0 || lowBits > 127) {
+    this.buffer_.push((lowBits & 0x7f) | 0x80);
+    lowBits = ((lowBits >>> 7) | (highBits << 25)) >>> 0;
+    highBits = highBits >>> 7;
+  }
+  this.buffer_.push(lowBits);
+};
+
+
+/**
+ * Encodes a 32-bit unsigned integer into its wire-format varint representation
+ * and stores it in the buffer.
+ * @param {number} value The integer to convert.
+ */
+jspb.BinaryEncoder.prototype.writeUnsignedVarint32 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= 0) &&
+                      (value < jspb.BinaryConstants.TWO_TO_32));
+
+  while (value > 127) {
+    this.buffer_.push((value & 0x7f) | 0x80);
+    value = value >>> 7;
+  }
+
+  this.buffer_.push(value);
+};
+
+
+/**
+ * Encodes a 32-bit signed integer into its wire-format varint representation
+ * and stores it in the buffer.
+ * @param {number} value The integer to convert.
+ */
+jspb.BinaryEncoder.prototype.writeSignedVarint32 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= -jspb.BinaryConstants.TWO_TO_31) &&
+                      (value < jspb.BinaryConstants.TWO_TO_31));
+
+  // Use the unsigned version if the value is not negative.
+  if (value >= 0) {
+    this.writeUnsignedVarint32(value);
+    return;
+  }
+
+  // Write nine bytes with a _signed_ right shift so we preserve the sign bit.
+  for (var i = 0; i < 9; i++) {
+    this.buffer_.push((value & 0x7f) | 0x80);
+    value = value >> 7;
+  }
+
+  // The above loop writes out 63 bits, so the last byte is always the sign bit
+  // which is always set for negative numbers.
+  this.buffer_.push(1);
+};
+
+
+/**
+ * Encodes a 64-bit unsigned integer into its wire-format varint representation
+ * and stores it in the buffer. Integers that are not representable in 64 bits
+ * will be truncated.
+ * @param {number} value The integer to convert.
+ */
+jspb.BinaryEncoder.prototype.writeUnsignedVarint64 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= 0) &&
+                      (value < jspb.BinaryConstants.TWO_TO_64));
+  jspb.utils.splitInt64(value);
+  this.writeSplitVarint64(jspb.utils.split64Low,
+                          jspb.utils.split64High);
+};
+
+
+/**
+ * Encodes a 64-bit signed integer into its wire-format varint representation
+ * and stores it in the buffer. Integers that are not representable in 64 bits
+ * will be truncated.
+ * @param {number} value The integer to convert.
+ */
+jspb.BinaryEncoder.prototype.writeSignedVarint64 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= -jspb.BinaryConstants.TWO_TO_63) &&
+                      (value < jspb.BinaryConstants.TWO_TO_63));
+  jspb.utils.splitInt64(value);
+  this.writeSplitVarint64(jspb.utils.split64Low,
+                          jspb.utils.split64High);
+};
+
+
+/**
+ * Encodes a JavaScript integer into its wire-format, zigzag-encoded varint
+ * representation and stores it in the buffer.
+ * @param {number} value The integer to convert.
+ */
+jspb.BinaryEncoder.prototype.writeZigzagVarint32 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= -jspb.BinaryConstants.TWO_TO_31) &&
+                      (value < jspb.BinaryConstants.TWO_TO_31));
+  this.writeUnsignedVarint32(((value << 1) ^ (value >> 31)) >>> 0);
+};
+
+
+/**
+ * Encodes a JavaScript integer into its wire-format, zigzag-encoded varint
+ * representation and stores it in the buffer. Integers not representable in 64
+ * bits will be truncated.
+ * @param {number} value The integer to convert.
+ */
+jspb.BinaryEncoder.prototype.writeZigzagVarint64 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= -jspb.BinaryConstants.TWO_TO_63) &&
+                      (value < jspb.BinaryConstants.TWO_TO_63));
+  jspb.utils.splitZigzag64(value);
+  this.writeSplitVarint64(jspb.utils.split64Low,
+                          jspb.utils.split64High);
+};
+
+
+/**
+ * Writes a 8-bit unsigned integer to the buffer. Numbers outside the range
+ * [0,2^8) will be truncated.
+ * @param {number} value The value to write.
+ */
+jspb.BinaryEncoder.prototype.writeUint8 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= 0) && (value < 256));
+  this.buffer_.push((value >>> 0) & 0xFF);
+};
+
+
+/**
+ * Writes a 16-bit unsigned integer to the buffer. Numbers outside the
+ * range [0,2^16) will be truncated.
+ * @param {number} value The value to write.
+ */
+jspb.BinaryEncoder.prototype.writeUint16 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= 0) && (value < 65536));
+  this.buffer_.push((value >>> 0) & 0xFF);
+  this.buffer_.push((value >>> 8) & 0xFF);
+};
+
+
+/**
+ * Writes a 32-bit unsigned integer to the buffer. Numbers outside the
+ * range [0,2^32) will be truncated.
+ * @param {number} value The value to write.
+ */
+jspb.BinaryEncoder.prototype.writeUint32 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= 0) &&
+                      (value < jspb.BinaryConstants.TWO_TO_32));
+  this.buffer_.push((value >>> 0) & 0xFF);
+  this.buffer_.push((value >>> 8) & 0xFF);
+  this.buffer_.push((value >>> 16) & 0xFF);
+  this.buffer_.push((value >>> 24) & 0xFF);
+};
+
+
+/**
+ * Writes a 64-bit unsigned integer to the buffer. Numbers outside the
+ * range [0,2^64) will be truncated.
+ * @param {number} value The value to write.
+ */
+jspb.BinaryEncoder.prototype.writeUint64 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= 0) &&
+                      (value < jspb.BinaryConstants.TWO_TO_64));
+  jspb.utils.splitUint64(value);
+  this.writeUint32(jspb.utils.split64Low);
+  this.writeUint32(jspb.utils.split64High);
+};
+
+
+/**
+ * Writes a 8-bit integer to the buffer. Numbers outside the range
+ * [-2^7,2^7) will be truncated.
+ * @param {number} value The value to write.
+ */
+jspb.BinaryEncoder.prototype.writeInt8 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= -128) && (value < 128));
+  this.buffer_.push((value >>> 0) & 0xFF);
+};
+
+
+/**
+ * Writes a 16-bit integer to the buffer. Numbers outside the range
+ * [-2^15,2^15) will be truncated.
+ * @param {number} value The value to write.
+ */
+jspb.BinaryEncoder.prototype.writeInt16 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= -32768) && (value < 32768));
+  this.buffer_.push((value >>> 0) & 0xFF);
+  this.buffer_.push((value >>> 8) & 0xFF);
+};
+
+
+/**
+ * Writes a 32-bit integer to the buffer. Numbers outside the range
+ * [-2^31,2^31) will be truncated.
+ * @param {number} value The value to write.
+ */
+jspb.BinaryEncoder.prototype.writeInt32 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= -jspb.BinaryConstants.TWO_TO_31) &&
+                      (value < jspb.BinaryConstants.TWO_TO_31));
+  this.buffer_.push((value >>> 0) & 0xFF);
+  this.buffer_.push((value >>> 8) & 0xFF);
+  this.buffer_.push((value >>> 16) & 0xFF);
+  this.buffer_.push((value >>> 24) & 0xFF);
+};
+
+
+/**
+ * Writes a 64-bit integer to the buffer. Numbers outside the range
+ * [-2^63,2^63) will be truncated.
+ * @param {number} value The value to write.
+ */
+jspb.BinaryEncoder.prototype.writeInt64 = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= -jspb.BinaryConstants.TWO_TO_63) &&
+                      (value < jspb.BinaryConstants.TWO_TO_63));
+  jspb.utils.splitInt64(value);
+  this.writeUint32(jspb.utils.split64Low);
+  this.writeUint32(jspb.utils.split64High);
+};
+
+
+/**
+ * Writes a single-precision floating point value to the buffer. Numbers
+ * requiring more than 32 bits of precision will be truncated.
+ * @param {number} value The value to write.
+ */
+jspb.BinaryEncoder.prototype.writeFloat = function(value) {
+  goog.asserts.assert((value >= -jspb.BinaryConstants.FLOAT32_MAX) &&
+                      (value <= jspb.BinaryConstants.FLOAT32_MAX));
+  jspb.utils.splitFloat32(value);
+  this.writeUint32(jspb.utils.split64Low);
+};
+
+
+/**
+ * Writes a double-precision floating point value to the buffer. As this is
+ * the native format used by JavaScript, no precision will be lost.
+ * @param {number} value The value to write.
+ */
+jspb.BinaryEncoder.prototype.writeDouble = function(value) {
+  goog.asserts.assert((value >= -jspb.BinaryConstants.FLOAT64_MAX) &&
+                      (value <= jspb.BinaryConstants.FLOAT64_MAX));
+  jspb.utils.splitFloat64(value);
+  this.writeUint32(jspb.utils.split64Low);
+  this.writeUint32(jspb.utils.split64High);
+};
+
+
+/**
+ * Writes a boolean value to the buffer as a varint.
+ * @param {boolean} value The value to write.
+ */
+jspb.BinaryEncoder.prototype.writeBool = function(value) {
+  goog.asserts.assert(goog.isBoolean(value));
+  this.buffer_.push(value ? 1 : 0);
+};
+
+
+/**
+ * Writes an enum value to the buffer as a varint.
+ * @param {number} value The value to write.
+ */
+jspb.BinaryEncoder.prototype.writeEnum = function(value) {
+  goog.asserts.assert(value == Math.floor(value));
+  goog.asserts.assert((value >= -jspb.BinaryConstants.TWO_TO_31) &&
+                      (value < jspb.BinaryConstants.TWO_TO_31));
+  this.writeSignedVarint32(value);
+};
+
+
+/**
+ * Writes an arbitrary byte array to the buffer.
+ * @param {!Uint8Array} bytes The array of bytes to write.
+ */
+jspb.BinaryEncoder.prototype.writeBytes = function(bytes) {
+  this.buffer_.push.apply(this.buffer_, bytes);
+};
+
+
+/**
+ * Writes a 64-bit hash string (8 characters @ 8 bits of data each) to the
+ * buffer as a varint.
+ * @param {string} hash The hash to write.
+ */
+jspb.BinaryEncoder.prototype.writeVarintHash64 = function(hash) {
+  jspb.utils.splitHash64(hash);
+  this.writeSplitVarint64(jspb.utils.split64Low,
+                          jspb.utils.split64High);
+};
+
+
+/**
+ * Writes a 64-bit hash string (8 characters @ 8 bits of data each) to the
+ * buffer as a fixed64.
+ * @param {string} hash The hash to write.
+ */
+jspb.BinaryEncoder.prototype.writeFixedHash64 = function(hash) {
+  jspb.utils.splitHash64(hash);
+  this.writeUint32(jspb.utils.split64Low);
+  this.writeUint32(jspb.utils.split64High);
+};
+
+
+/**
+ * Writes a UTF16 Javascript string to the buffer encoded as UTF8.
+ * TODO(aappleby): Add support for surrogate pairs, reject unpaired surrogates.
+ * @param {string} value The string to write.
+ * @return {number} The number of bytes used to encode the string.
+ */
+jspb.BinaryEncoder.prototype.writeString = function(value) {
+  var oldLength = this.buffer_.length;
+
+  // UTF16 to UTF8 conversion loop swiped from goog.crypt.stringToUtf8ByteArray.
+  for (var i = 0; i < value.length; i++) {
+    var c = value.charCodeAt(i);
+    if (c < 128) {
+      this.buffer_.push(c);
+    } else if (c < 2048) {
+      this.buffer_.push((c >> 6) | 192);
+      this.buffer_.push((c & 63) | 128);
+    } else {
+      this.buffer_.push((c >> 12) | 224);
+      this.buffer_.push(((c >> 6) & 63) | 128);
+      this.buffer_.push((c & 63) | 128);
+    }
+  }
+
+  var length = this.buffer_.length - oldLength;
+  return length;
+};

+ 227 - 88
js/binary/proto_test.js

@@ -30,7 +30,9 @@
 
 // Test suite is written using Jasmine -- see http://jasmine.github.io/
 
+goog.require('goog.crypt.base64');
 goog.require('goog.testing.asserts');
+goog.require('jspb.Message');
 
 // CommonJS-LoadFromFile: ../testbinary_pb proto.jspb.test
 goog.require('proto.jspb.test.ExtendsWithMessage');
@@ -38,9 +40,61 @@ goog.require('proto.jspb.test.ForeignEnum');
 goog.require('proto.jspb.test.ForeignMessage');
 goog.require('proto.jspb.test.TestAllTypes');
 goog.require('proto.jspb.test.TestExtendable');
+goog.require('proto.jspb.test.extendOptionalBool');
+goog.require('proto.jspb.test.extendOptionalBytes');
+goog.require('proto.jspb.test.extendOptionalDouble');
+goog.require('proto.jspb.test.extendOptionalFixed32');
+goog.require('proto.jspb.test.extendOptionalFixed64');
+goog.require('proto.jspb.test.extendOptionalFloat');
+goog.require('proto.jspb.test.extendOptionalForeignEnum');
+goog.require('proto.jspb.test.extendOptionalInt32');
+goog.require('proto.jspb.test.extendOptionalInt64');
+goog.require('proto.jspb.test.extendOptionalSfixed32');
+goog.require('proto.jspb.test.extendOptionalSfixed64');
+goog.require('proto.jspb.test.extendOptionalSint32');
+goog.require('proto.jspb.test.extendOptionalSint64');
+goog.require('proto.jspb.test.extendOptionalString');
+goog.require('proto.jspb.test.extendOptionalUint32');
+goog.require('proto.jspb.test.extendOptionalUint64');
+goog.require('proto.jspb.test.extendPackedRepeatedBoolList');
+goog.require('proto.jspb.test.extendPackedRepeatedDoubleList');
+goog.require('proto.jspb.test.extendPackedRepeatedFixed32List');
+goog.require('proto.jspb.test.extendPackedRepeatedFixed64List');
+goog.require('proto.jspb.test.extendPackedRepeatedFloatList');
+goog.require('proto.jspb.test.extendPackedRepeatedForeignEnumList');
+goog.require('proto.jspb.test.extendPackedRepeatedInt32List');
+goog.require('proto.jspb.test.extendPackedRepeatedInt64List');
+goog.require('proto.jspb.test.extendPackedRepeatedSfixed32List');
+goog.require('proto.jspb.test.extendPackedRepeatedSfixed64List');
+goog.require('proto.jspb.test.extendPackedRepeatedSint32List');
+goog.require('proto.jspb.test.extendPackedRepeatedSint64List');
+goog.require('proto.jspb.test.extendPackedRepeatedUint32List');
+goog.require('proto.jspb.test.extendPackedRepeatedUint64List');
+goog.require('proto.jspb.test.extendRepeatedBoolList');
+goog.require('proto.jspb.test.extendRepeatedBytesList');
+goog.require('proto.jspb.test.extendRepeatedDoubleList');
+goog.require('proto.jspb.test.extendRepeatedFixed32List');
+goog.require('proto.jspb.test.extendRepeatedFixed64List');
+goog.require('proto.jspb.test.extendRepeatedFloatList');
+goog.require('proto.jspb.test.extendRepeatedForeignEnumList');
+goog.require('proto.jspb.test.extendRepeatedInt32List');
+goog.require('proto.jspb.test.extendRepeatedInt64List');
+goog.require('proto.jspb.test.extendRepeatedSfixed32List');
+goog.require('proto.jspb.test.extendRepeatedSfixed64List');
+goog.require('proto.jspb.test.extendRepeatedSint32List');
+goog.require('proto.jspb.test.extendRepeatedSint64List');
+goog.require('proto.jspb.test.extendRepeatedStringList');
+goog.require('proto.jspb.test.extendRepeatedUint32List');
+goog.require('proto.jspb.test.extendRepeatedUint64List');
+
 
 var suite = {};
 
+var BYTES = new Uint8Array([1, 2, 8, 9]);
+
+var BYTES_B64 = goog.crypt.base64.encodeByteArray(BYTES);
+
+
 /**
  * Helper: fill all fields on a TestAllTypes message.
  * @param {proto.jspb.test.TestAllTypes} msg
@@ -62,7 +116,7 @@ function fillAllFields(msg) {
   msg.setOptionalDouble(-1.5);
   msg.setOptionalBool(true);
   msg.setOptionalString('hello world');
-  msg.setOptionalBytes('bytes');
+  msg.setOptionalBytes(BYTES);
   msg.setOptionalGroup(new proto.jspb.test.TestAllTypes.OptionalGroup());
   msg.getOptionalGroup().setA(100);
   var submsg = new proto.jspb.test.ForeignMessage();
@@ -71,6 +125,7 @@ function fillAllFields(msg) {
   msg.setOptionalForeignEnum(proto.jspb.test.ForeignEnum.FOREIGN_FOO);
   msg.setOneofString('oneof');
 
+
   msg.setRepeatedInt32List([-42]);
   msg.setRepeatedInt64List([-0x7fffffff00000000]);
   msg.setRepeatedUint32List([0x80000000]);
@@ -85,7 +140,7 @@ function fillAllFields(msg) {
   msg.setRepeatedDoubleList([-1.5]);
   msg.setRepeatedBoolList([true]);
   msg.setRepeatedStringList(['hello world']);
-  msg.setRepeatedBytesList(['bytes']);
+  msg.setRepeatedBytesList([BYTES, BYTES]);
   msg.setRepeatedGroupList([new proto.jspb.test.TestAllTypes.RepeatedGroup()]);
   msg.getRepeatedGroupList()[0].setA(100);
   submsg = new proto.jspb.test.ForeignMessage();
@@ -106,106 +161,115 @@ function fillAllFields(msg) {
   msg.setPackedRepeatedFloatList([1.5]);
   msg.setPackedRepeatedDoubleList([-1.5]);
   msg.setPackedRepeatedBoolList([true]);
+
 }
 
 
 /**
- * Helper: compare a bytes field to a string with codepoints 0--255.
+ * Helper: compare a bytes field to an expected value
  * @param {Uint8Array|string} arr
- * @param {string} str
+ * @param {Uint8Array} expected
  * @return {boolean}
  */
-function bytesCompare(arr, str) {
-  if (arr.length != str.length) {
+function bytesCompare(arr, expected) {
+  if (goog.isString(arr)) {
+    arr = goog.crypt.base64.decodeStringToUint8Array(arr);
+  }
+  if (arr.length != expected.length) {
     return false;
   }
-  if (typeof arr == 'string') {
-    for (var i = 0; i < arr.length; i++) {
-      if (arr.charCodeAt(i) != str.charCodeAt(i)) {
-        return false;
-      }
+  for (var i = 0; i < arr.length; i++) {
+    if (arr[i] != expected[i]) {
+      return false;
     }
-    return true;
-  } else {
-    for (var i = 0; i < arr.length; i++) {
-      if (arr[i] != str.charCodeAt(i)) {
-        return false;
-      }
-    }
-    return true;
   }
+  return true;
 }
 
 
 /**
  * Helper: verify contents of given TestAllTypes message as set by
  * fillAllFields().
- * @param {proto.jspb.test.TestAllTypes} msg
+ * @param {proto.jspb.test.TestAllTypes} original
+ * @param {proto.jspb.test.TestAllTypes} copy
  */
-function checkAllFields(msg) {
-  assertEquals(msg.getOptionalInt32(), -42);
-  assertEquals(msg.getOptionalInt64(), -0x7fffffff00000000);
-  assertEquals(msg.getOptionalUint32(), 0x80000000);
-  assertEquals(msg.getOptionalUint64(), 0xf000000000000000);
-  assertEquals(msg.getOptionalSint32(), -100);
-  assertEquals(msg.getOptionalSint64(), -0x8000000000000000);
-  assertEquals(msg.getOptionalFixed32(), 1234);
-  assertEquals(msg.getOptionalFixed64(), 0x1234567800000000);
-  assertEquals(msg.getOptionalSfixed32(), -1234);
-  assertEquals(msg.getOptionalSfixed64(), -0x1234567800000000);
-  assertEquals(msg.getOptionalFloat(), 1.5);
-  assertEquals(msg.getOptionalDouble(), -1.5);
-  assertEquals(msg.getOptionalBool(), true);
-  assertEquals(msg.getOptionalString(), 'hello world');
-  assertEquals(true, bytesCompare(msg.getOptionalBytes(), 'bytes'));
-  assertEquals(msg.getOptionalGroup().getA(), 100);
-  assertEquals(msg.getOptionalForeignMessage().getC(), 16);
-  assertEquals(msg.getOptionalForeignEnum(),
+function checkAllFields(original, copy) {
+  assertTrue(jspb.Message.equals(original, copy));
+
+  assertEquals(copy.getOptionalInt32(), -42);
+  assertEquals(copy.getOptionalInt64(), -0x7fffffff00000000);
+  assertEquals(copy.getOptionalUint32(), 0x80000000);
+  assertEquals(copy.getOptionalUint64(), 0xf000000000000000);
+  assertEquals(copy.getOptionalSint32(), -100);
+  assertEquals(copy.getOptionalSint64(), -0x8000000000000000);
+  assertEquals(copy.getOptionalFixed32(), 1234);
+  assertEquals(copy.getOptionalFixed64(), 0x1234567800000000);
+  assertEquals(copy.getOptionalSfixed32(), -1234);
+  assertEquals(copy.getOptionalSfixed64(), -0x1234567800000000);
+  assertEquals(copy.getOptionalFloat(), 1.5);
+  assertEquals(copy.getOptionalDouble(), -1.5);
+  assertEquals(copy.getOptionalBool(), true);
+  assertEquals(copy.getOptionalString(), 'hello world');
+  assertEquals(true, bytesCompare(copy.getOptionalBytes(), BYTES));
+  assertEquals(true, bytesCompare(copy.getOptionalBytes_asU8(), BYTES));
+  assertEquals(
+      copy.getOptionalBytes_asB64(), goog.crypt.base64.encodeByteArray(BYTES));
+
+  assertEquals(copy.getOptionalGroup().getA(), 100);
+  assertEquals(copy.getOptionalForeignMessage().getC(), 16);
+  assertEquals(copy.getOptionalForeignEnum(),
       proto.jspb.test.ForeignEnum.FOREIGN_FOO);
-  assertEquals(msg.getOneofString(), 'oneof');
-  assertEquals(msg.getOneofFieldCase(),
+
+
+  assertEquals(copy.getOneofString(), 'oneof');
+  assertEquals(copy.getOneofFieldCase(),
       proto.jspb.test.TestAllTypes.OneofFieldCase.ONEOF_STRING);
 
-  assertElementsEquals(msg.getRepeatedInt32List(), [-42]);
-  assertElementsEquals(msg.getRepeatedInt64List(), [-0x7fffffff00000000]);
-  assertElementsEquals(msg.getRepeatedUint32List(), [0x80000000]);
-  assertElementsEquals(msg.getRepeatedUint64List(), [0xf000000000000000]);
-  assertElementsEquals(msg.getRepeatedSint32List(), [-100]);
-  assertElementsEquals(msg.getRepeatedSint64List(), [-0x8000000000000000]);
-  assertElementsEquals(msg.getRepeatedFixed32List(), [1234]);
-  assertElementsEquals(msg.getRepeatedFixed64List(), [0x1234567800000000]);
-  assertElementsEquals(msg.getRepeatedSfixed32List(), [-1234]);
-  assertElementsEquals(msg.getRepeatedSfixed64List(), [-0x1234567800000000]);
-  assertElementsEquals(msg.getRepeatedFloatList(), [1.5]);
-  assertElementsEquals(msg.getRepeatedDoubleList(), [-1.5]);
-  assertElementsEquals(msg.getRepeatedBoolList(), [true]);
-  assertElementsEquals(msg.getRepeatedStringList(), ['hello world']);
-  assertEquals(msg.getRepeatedBytesList().length, 1);
-  assertEquals(true, bytesCompare(msg.getRepeatedBytesList()[0], 'bytes'));
-  assertEquals(msg.getRepeatedGroupList().length, 1);
-  assertEquals(msg.getRepeatedGroupList()[0].getA(), 100);
-  assertEquals(msg.getRepeatedForeignMessageList().length, 1);
-  assertEquals(msg.getRepeatedForeignMessageList()[0].getC(), 1000);
-  assertElementsEquals(msg.getRepeatedForeignEnumList(),
+  assertElementsEquals(copy.getRepeatedInt32List(), [-42]);
+  assertElementsEquals(copy.getRepeatedInt64List(), [-0x7fffffff00000000]);
+  assertElementsEquals(copy.getRepeatedUint32List(), [0x80000000]);
+  assertElementsEquals(copy.getRepeatedUint64List(), [0xf000000000000000]);
+  assertElementsEquals(copy.getRepeatedSint32List(), [-100]);
+  assertElementsEquals(copy.getRepeatedSint64List(), [-0x8000000000000000]);
+  assertElementsEquals(copy.getRepeatedFixed32List(), [1234]);
+  assertElementsEquals(copy.getRepeatedFixed64List(), [0x1234567800000000]);
+  assertElementsEquals(copy.getRepeatedSfixed32List(), [-1234]);
+  assertElementsEquals(copy.getRepeatedSfixed64List(), [-0x1234567800000000]);
+  assertElementsEquals(copy.getRepeatedFloatList(), [1.5]);
+  assertElementsEquals(copy.getRepeatedDoubleList(), [-1.5]);
+  assertElementsEquals(copy.getRepeatedBoolList(), [true]);
+  assertElementsEquals(copy.getRepeatedStringList(), ['hello world']);
+  assertEquals(copy.getRepeatedBytesList().length, 2);
+  assertEquals(true, bytesCompare(copy.getRepeatedBytesList_asU8()[0], BYTES));
+  assertEquals(true, bytesCompare(copy.getRepeatedBytesList()[0], BYTES));
+  assertEquals(true, bytesCompare(copy.getRepeatedBytesList_asU8()[1], BYTES));
+  assertEquals(copy.getRepeatedBytesList_asB64()[0], BYTES_B64);
+  assertEquals(copy.getRepeatedBytesList_asB64()[1], BYTES_B64);
+  assertEquals(copy.getRepeatedGroupList().length, 1);
+  assertEquals(copy.getRepeatedGroupList()[0].getA(), 100);
+  assertEquals(copy.getRepeatedForeignMessageList().length, 1);
+  assertEquals(copy.getRepeatedForeignMessageList()[0].getC(), 1000);
+  assertElementsEquals(copy.getRepeatedForeignEnumList(),
       [proto.jspb.test.ForeignEnum.FOREIGN_FOO]);
 
-  assertElementsEquals(msg.getPackedRepeatedInt32List(), [-42]);
-  assertElementsEquals(msg.getPackedRepeatedInt64List(),
+  assertElementsEquals(copy.getPackedRepeatedInt32List(), [-42]);
+  assertElementsEquals(copy.getPackedRepeatedInt64List(),
       [-0x7fffffff00000000]);
-  assertElementsEquals(msg.getPackedRepeatedUint32List(), [0x80000000]);
-  assertElementsEquals(msg.getPackedRepeatedUint64List(), [0xf000000000000000]);
-  assertElementsEquals(msg.getPackedRepeatedSint32List(), [-100]);
-  assertElementsEquals(msg.getPackedRepeatedSint64List(),
+  assertElementsEquals(copy.getPackedRepeatedUint32List(), [0x80000000]);
+  assertElementsEquals(copy.getPackedRepeatedUint64List(),
+      [0xf000000000000000]);
+  assertElementsEquals(copy.getPackedRepeatedSint32List(), [-100]);
+  assertElementsEquals(copy.getPackedRepeatedSint64List(),
       [-0x8000000000000000]);
-  assertElementsEquals(msg.getPackedRepeatedFixed32List(), [1234]);
-  assertElementsEquals(msg.getPackedRepeatedFixed64List(),
+  assertElementsEquals(copy.getPackedRepeatedFixed32List(), [1234]);
+  assertElementsEquals(copy.getPackedRepeatedFixed64List(),
       [0x1234567800000000]);
-  assertElementsEquals(msg.getPackedRepeatedSfixed32List(), [-1234]);
-  assertElementsEquals(msg.getPackedRepeatedSfixed64List(),
+  assertElementsEquals(copy.getPackedRepeatedSfixed32List(), [-1234]);
+  assertElementsEquals(copy.getPackedRepeatedSfixed64List(),
       [-0x1234567800000000]);
-  assertElementsEquals(msg.getPackedRepeatedFloatList(), [1.5]);
-  assertElementsEquals(msg.getPackedRepeatedDoubleList(), [-1.5]);
-  assertElementsEquals(msg.getPackedRepeatedBoolList(), [true]);
+  assertElementsEquals(copy.getPackedRepeatedFloatList(), [1.5]);
+  assertElementsEquals(copy.getPackedRepeatedDoubleList(), [-1.5]);
+
 }
 
 
@@ -242,14 +306,13 @@ function checkExtensions(msg) {
       msg.getExtension(proto.jspb.test.extendOptionalBool));
   assertEquals('hello world',
       msg.getExtension(proto.jspb.test.extendOptionalString));
-  assertEquals(true,
-      bytesCompare(msg.getExtension(proto.jspb.test.extendOptionalBytes),
-        'bytes'));
+  assertEquals(
+      true, bytesCompare(
+                msg.getExtension(proto.jspb.test.extendOptionalBytes), BYTES));
   assertEquals(16,
       msg.getExtension(
           proto.jspb.test.ExtendsWithMessage.optionalExtension).getFoo());
-  assertEquals(proto.jspb.test.ForeignEnum.FOREIGN_FOO,
-      msg.getExtension(proto.jspb.test.extendOptionalForeignEnum));
+
 
   assertElementsEquals(
       msg.getExtension(proto.jspb.test.extendRepeatedInt32List),
@@ -293,10 +356,10 @@ function checkExtensions(msg) {
   assertElementsEquals(
       msg.getExtension(proto.jspb.test.extendRepeatedStringList),
       ['hello world']);
-  assertEquals(true,
+  assertEquals(
+      true,
       bytesCompare(
-          msg.getExtension(proto.jspb.test.extendRepeatedBytesList)[0],
-          'bytes'));
+          msg.getExtension(proto.jspb.test.extendRepeatedBytesList)[0], BYTES));
   assertEquals(1000,
       msg.getExtension(
           proto.jspb.test.ExtendsWithMessage.repeatedExtensionList)[0]
@@ -305,6 +368,7 @@ function checkExtensions(msg) {
       msg.getExtension(proto.jspb.test.extendRepeatedForeignEnumList),
       [proto.jspb.test.ForeignEnum.FOREIGN_FOO]);
 
+
   assertElementsEquals(
       msg.getExtension(proto.jspb.test.extendPackedRepeatedInt32List),
       [-42]);
@@ -347,6 +411,7 @@ function checkExtensions(msg) {
   assertElementsEquals(
       msg.getExtension(proto.jspb.test.extendPackedRepeatedForeignEnumList),
       [proto.jspb.test.ForeignEnum.FOREIGN_FOO]);
+
 }
 
 
@@ -360,9 +425,82 @@ describe('protoBinaryTest', function() {
     fillAllFields(msg);
     var encoded = msg.serializeBinary();
     var decoded = proto.jspb.test.TestAllTypes.deserializeBinary(encoded);
-    checkAllFields(decoded);
+    checkAllFields(msg, decoded);
   });
 
+  /**
+   * Test that base64 string and Uint8Array are interchangeable in bytes fields.
+   */
+  it('testBytesFieldsGettersInterop', function() {
+    var msg = new proto.jspb.test.TestAllTypes();
+    // Set from a base64 string and check all the getters work.
+    msg.setOptionalBytes(BYTES_B64);
+    assertTrue(bytesCompare(msg.getOptionalBytes_asU8(), BYTES));
+    assertTrue(bytesCompare(msg.getOptionalBytes_asB64(), BYTES));
+    assertTrue(bytesCompare(msg.getOptionalBytes(), BYTES));
+
+    // Test binary serialize round trip doesn't break it.
+    msg = proto.jspb.test.TestAllTypes.deserializeBinary(msg.serializeBinary());
+    assertTrue(bytesCompare(msg.getOptionalBytes_asU8(), BYTES));
+    assertTrue(bytesCompare(msg.getOptionalBytes_asB64(), BYTES));
+    assertTrue(bytesCompare(msg.getOptionalBytes(), BYTES));
+
+    msg = new proto.jspb.test.TestAllTypes();
+    // Set from a Uint8Array and check all the getters work.
+    msg.setOptionalBytes(BYTES);
+    assertTrue(bytesCompare(msg.getOptionalBytes_asU8(), BYTES));
+    assertTrue(bytesCompare(msg.getOptionalBytes_asB64(), BYTES));
+    assertTrue(bytesCompare(msg.getOptionalBytes(), BYTES));
+
+  });
+
+  /**
+   * Test that bytes setters will receive result of any of the getters.
+   */
+  it('testBytesFieldsSettersInterop', function() {
+    var msg = new proto.jspb.test.TestAllTypes();
+    msg.setOptionalBytes(BYTES);
+    assertTrue(bytesCompare(msg.getOptionalBytes(), BYTES));
+
+    msg.setOptionalBytes(msg.getOptionalBytes());
+    assertTrue(bytesCompare(msg.getOptionalBytes(), BYTES));
+    msg.setOptionalBytes(msg.getOptionalBytes_asB64());
+    assertTrue(bytesCompare(msg.getOptionalBytes(), BYTES));
+    msg.setOptionalBytes(msg.getOptionalBytes_asU8());
+    assertTrue(bytesCompare(msg.getOptionalBytes(), BYTES));
+  });
+
+  /**
+   * Test that bytes setters will receive result of any of the getters.
+   */
+  it('testRepeatedBytesGetters', function() {
+    var msg = new proto.jspb.test.TestAllTypes();
+
+    function assertGetters() {
+      assertTrue(goog.isString(msg.getRepeatedBytesList_asB64()[0]));
+      assertTrue(goog.isString(msg.getRepeatedBytesList_asB64()[1]));
+      assertTrue(msg.getRepeatedBytesList_asU8()[0] instanceof Uint8Array);
+      assertTrue(msg.getRepeatedBytesList_asU8()[1] instanceof Uint8Array);
+
+      assertTrue(bytesCompare(msg.getRepeatedBytesList()[0], BYTES));
+      assertTrue(bytesCompare(msg.getRepeatedBytesList()[1], BYTES));
+      assertTrue(bytesCompare(msg.getRepeatedBytesList_asB64()[0], BYTES));
+      assertTrue(bytesCompare(msg.getRepeatedBytesList_asB64()[1], BYTES));
+      assertTrue(bytesCompare(msg.getRepeatedBytesList_asU8()[0], BYTES));
+      assertTrue(bytesCompare(msg.getRepeatedBytesList_asU8()[1], BYTES));
+    }
+
+    msg.setRepeatedBytesList([BYTES, BYTES]);
+    assertGetters();
+
+    msg.setRepeatedBytesList([BYTES_B64, BYTES_B64]);
+    assertGetters();
+
+    msg.setRepeatedBytesList(null);
+    assertEquals(0, msg.getRepeatedBytesList().length);
+    assertEquals(0, msg.getRepeatedBytesList_asB64().length);
+    assertEquals(0, msg.getRepeatedBytesList_asU8().length);
+  });
 
   /**
    * Helper: fill all extension values.
@@ -397,8 +535,7 @@ describe('protoBinaryTest', function() {
         proto.jspb.test.extendOptionalBool, true);
     msg.setExtension(
         proto.jspb.test.extendOptionalString, 'hello world');
-    msg.setExtension(
-        proto.jspb.test.extendOptionalBytes, 'bytes');
+    msg.setExtension(proto.jspb.test.extendOptionalBytes, BYTES);
     var submsg = new proto.jspb.test.ExtendsWithMessage();
     submsg.setFoo(16);
     msg.setExtension(
@@ -407,6 +544,7 @@ describe('protoBinaryTest', function() {
         proto.jspb.test.extendOptionalForeignEnum,
         proto.jspb.test.ForeignEnum.FOREIGN_FOO);
 
+
     msg.setExtension(
         proto.jspb.test.extendRepeatedInt32List, [-42]);
     msg.setExtension(
@@ -435,8 +573,7 @@ describe('protoBinaryTest', function() {
         proto.jspb.test.extendRepeatedBoolList, [true]);
     msg.setExtension(
         proto.jspb.test.extendRepeatedStringList, ['hello world']);
-    msg.setExtension(
-        proto.jspb.test.extendRepeatedBytesList, ['bytes']);
+    msg.setExtension(proto.jspb.test.extendRepeatedBytesList, [BYTES]);
     submsg = new proto.jspb.test.ExtendsWithMessage();
     submsg.setFoo(1000);
     msg.setExtension(
@@ -444,6 +581,7 @@ describe('protoBinaryTest', function() {
     msg.setExtension(proto.jspb.test.extendRepeatedForeignEnumList,
         [proto.jspb.test.ForeignEnum.FOREIGN_FOO]);
 
+
     msg.setExtension(
         proto.jspb.test.extendPackedRepeatedInt32List, [-42]);
     msg.setExtension(
@@ -473,6 +611,7 @@ describe('protoBinaryTest', function() {
         proto.jspb.test.extendPackedRepeatedBoolList, [true]);
     msg.setExtension(proto.jspb.test.extendPackedRepeatedForeignEnumList,
         [proto.jspb.test.ForeignEnum.FOREIGN_FOO]);
+
   }
 
 

+ 4 - 4
js/binary/reader.js

@@ -180,7 +180,7 @@ jspb.BinaryReader.prototype.getCursor = function() {
 
 /**
  * Returns the raw buffer.
- * @return {Uint8Array} The raw buffer.
+ * @return {?Uint8Array} The raw buffer.
  */
 jspb.BinaryReader.prototype.getBuffer = function() {
   return this.decoder_.getBuffer();
@@ -592,8 +592,8 @@ jspb.BinaryReader.prototype.getFieldDecoder = function() {
   var start = this.decoder_.getCursor();
   var end = start + length;
 
-  var innerDecoder = jspb.BinaryDecoder.alloc(this.decoder_.getBuffer(),
-                                                 start, length);
+  var innerDecoder =
+      jspb.BinaryDecoder.alloc(this.decoder_.getBuffer(), start, length);
   this.decoder_.setCursor(end);
   return innerDecoder;
 };
@@ -869,7 +869,7 @@ jspb.BinaryReader.prototype.readString = function() {
  * Reads a length-prefixed block of bytes from the binary stream, or returns
  * null if the next field in the stream has an invalid length value.
  *
- * @return {Uint8Array} The block of bytes.
+ * @return {!Uint8Array} The block of bytes.
  */
 jspb.BinaryReader.prototype.readBytes = function() {
   goog.asserts.assert(

+ 29 - 45
js/binary/reader_test.js

@@ -366,28 +366,28 @@ describe('binaryReaderTest', function() {
     var writer = new jspb.BinaryWriter();
 
     var testSignedData = [
-        '2730538252207801776',
-        '-2688470994844604560',
-        '3398529779486536359',
-        '3568577411627971000',
-        '272477188847484900',
-        '-6649058714086158188',
-        '-7695254765712060806',
-        '-4525541438037104029',
-        '-4993706538836508568',
-        '4990160321893729138'
+      '2730538252207801776',
+      '-2688470994844604560',
+      '3398529779486536359',
+      '3568577411627971000',
+      '272477188847484900',
+      '-6649058714086158188',
+      '-7695254765712060806',
+      '-4525541438037104029',
+      '-4993706538836508568',
+      '4990160321893729138'
     ];
     var testUnsignedData = [
-        '7822732630241694882',
-        '6753602971916687352',
-        '2399935075244442116',
-        '8724292567325338867',
-        '16948784802625696584',
-        '4136275908516066934',
-        '3575388346793700364',
-        '5167142028379259461',
-        '1557573948689737699',
-        '17100725280812548567'
+      '7822732630241694882',
+      '6753602971916687352',
+      '2399935075244442116',
+      '8724292567325338867',
+      '16948784802625696584',
+      '4136275908516066934',
+      '3575388346793700364',
+      '5167142028379259461',
+      '1557573948689737699',
+      '17100725280812548567'
     ];
 
     for (var i = 0; i < testSignedData.length; i++) {
@@ -535,7 +535,7 @@ describe('binaryReaderTest', function() {
    */
   it('testNesting', function() {
     var writer = new jspb.BinaryWriter();
-  var dummyMessage = /** @type {!jspb.BinaryMessage} */({});
+    var dummyMessage = /** @type {!jspb.BinaryMessage} */({});
 
     writer.writeInt32(1, 100);
 
@@ -626,31 +626,15 @@ describe('binaryReaderTest', function() {
     writer.writeBytes(4, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
     writer.writeString(4, 'The quick brown fox jumps over the lazy dog');
 
-    // Write a group with a nested group inside. We use the internal
-    // .rawWriteVarint() to ensure the tested wire data is what we want,
-    // independently of any serialization logic.
+    // Write a group with a nested group inside.
     writer.writeInt32(5, sentinel);
-    // Start group, field 5.
-    writer.rawWriteVarint(
-        (5 << 3) + jspb.BinaryConstants.WireType.START_GROUP);
-    // Varint, field 42.
-    writer.rawWriteVarint(
-        (42 << 3) + jspb.BinaryConstants.WireType.VARINT);
-    // Varint data.
-    writer.rawWriteVarint(42);
-    // Start group, field 6.
-    writer.rawWriteVarint(
-        (6 << 3) + jspb.BinaryConstants.WireType.START_GROUP);
-    // Varint, field 84.
-    writer.rawWriteVarint(
-        (84 << 3) + jspb.BinaryConstants.WireType.VARINT);
-    writer.rawWriteVarint(42);
-    // End group, field 6.
-    writer.rawWriteVarint(
-        (6 << 3) + jspb.BinaryConstants.WireType.END_GROUP);
-    // End group, field 5.
-    writer.rawWriteVarint(
-        (5 << 3) + jspb.BinaryConstants.WireType.END_GROUP);
+    var dummyMessage = /** @type {!jspb.BinaryMessage} */({});
+    writer.writeGroup(5, dummyMessage, function() {
+      writer.writeInt64(42, 42);
+      writer.writeGroup(6, dummyMessage, function() {
+        writer.writeInt64(84, 42);
+      });
+    });
 
     // Write final sentinel.
     writer.writeInt32(6, sentinel);

+ 5 - 59
js/binary/utils.js

@@ -838,63 +838,17 @@ jspb.utils.countDelimitedFields = function(buffer, start, end, field) {
 };
 
 
-/**
- * Clones a scalar field. Pulling this out to a helper method saves us a few
- * bytes of generated code.
- * @param {Array} array
- * @return {Array}
- */
-jspb.utils.cloneRepeatedScalarField = function(array) {
-  return array ? array.slice() : null;
-};
-
-
-/**
- * Clones an array of messages using the provided cloner function.
- * @param {Array.<jspb.BinaryMessage>} messages
- * @param {jspb.ClonerFunction} cloner
- * @return {Array.<jspb.BinaryMessage>}
- */
-jspb.utils.cloneRepeatedMessageField = function(messages, cloner) {
-  if (messages === null) return null;
-  var result = [];
-  for (var i = 0; i < messages.length; i++) {
-    result.push(cloner(messages[i]));
-  }
-  return result;
-};
-
-
-/**
- * Clones an array of byte blobs.
- * @param {Array.<Uint8Array>} blobs
- * @return {Array.<Uint8Array>}
- */
-jspb.utils.cloneRepeatedBlobField = function(blobs) {
-  if (blobs === null) return null;
-  var result = [];
-  for (var i = 0; i < blobs.length; i++) {
-    result.push(new Uint8Array(blobs[i]));
-  }
-  return result;
-};
-
-
 /**
  * String-ify bytes for text format. Should be optimized away in non-debug.
  * The returned string uses \xXX escapes for all values and is itself quoted.
  * [1, 31] serializes to '"\x01\x1f"'.
  * @param {jspb.ByteSource} byteSource The bytes to serialize.
- * @param {boolean=} opt_stringIsRawBytes The string is interpreted as a series
- * of raw bytes rather than base64 data.
  * @return {string} Stringified bytes for text format.
  */
-jspb.utils.debugBytesToTextFormat = function(byteSource,
-                                                opt_stringIsRawBytes) {
+jspb.utils.debugBytesToTextFormat = function(byteSource) {
   var s = '"';
   if (byteSource) {
-    var bytes =
-        jspb.utils.byteSourceToUint8Array(byteSource, opt_stringIsRawBytes);
+    var bytes = jspb.utils.byteSourceToUint8Array(byteSource);
     for (var i = 0; i < bytes.length; i++) {
       s += '\\x';
       if (bytes[i] < 16) s += '0';
@@ -925,9 +879,8 @@ jspb.utils.debugScalarToTextFormat = function(scalar) {
  * exception.
  * @param {string} str
  * @return {!Uint8Array}
- * @private
  */
-jspb.utils.stringToByteArray_ = function(str) {
+jspb.utils.stringToByteArray = function(str) {
   var arr = new Uint8Array(str.length);
   for (var i = 0; i < str.length; i++) {
     var codepoint = str.charCodeAt(i);
@@ -944,13 +897,10 @@ jspb.utils.stringToByteArray_ = function(str) {
 /**
  * Converts any type defined in jspb.ByteSource into a Uint8Array.
  * @param {!jspb.ByteSource} data
- * @param {boolean=} opt_stringIsRawBytes Interpret a string as a series of raw
- * bytes (encoded as codepoints 0--255 inclusive) rather than base64 data
- * (default behavior).
  * @return {!Uint8Array}
  * @suppress {invalidCasts}
  */
-jspb.utils.byteSourceToUint8Array = function(data, opt_stringIsRawBytes) {
+jspb.utils.byteSourceToUint8Array = function(data) {
   if (data.constructor === Uint8Array) {
     return /** @type {!Uint8Array} */(data);
   }
@@ -967,11 +917,7 @@ jspb.utils.byteSourceToUint8Array = function(data, opt_stringIsRawBytes) {
 
   if (data.constructor === String) {
     data = /** @type {string} */(data);
-    if (opt_stringIsRawBytes) {
-      return jspb.utils.stringToByteArray_(data);
-    } else {
-      return goog.crypt.base64.decodeStringToUint8Array(data);
-    }
+    return goog.crypt.base64.decodeStringToUint8Array(data);
   }
 
   goog.asserts.fail('Type not convertible to Uint8Array.');

+ 14 - 13
js/binary/utils_test.js

@@ -310,7 +310,7 @@ describe('binaryUtilsTest', function() {
     // NaN.
     jspb.utils.splitFloat32(NaN);
     if (!isNaN(jspb.utils.joinFloat32(jspb.utils.split64Low,
-                                         jspb.utils.split64High))) {
+                                      jspb.utils.split64High))) {
       throw 'fail!';
     }
 
@@ -324,7 +324,7 @@ describe('binaryUtilsTest', function() {
         if (opt_bits != jspb.utils.split64Low) throw 'fail!';
       }
       if (truncate(x) != jspb.utils.joinFloat32(jspb.utils.split64Low,
-                                                   jspb.utils.split64High)) {
+          jspb.utils.split64High)) {
         throw 'fail!';
       }
     }
@@ -376,7 +376,7 @@ describe('binaryUtilsTest', function() {
     // NaN.
     jspb.utils.splitFloat64(NaN);
     if (!isNaN(jspb.utils.joinFloat64(jspb.utils.split64Low,
-                                         jspb.utils.split64High))) {
+        jspb.utils.split64High))) {
       throw 'fail!';
     }
 
@@ -394,7 +394,7 @@ describe('binaryUtilsTest', function() {
         if (opt_lowBits != jspb.utils.split64Low) throw 'fail!';
       }
       if (x != jspb.utils.joinFloat64(jspb.utils.split64Low,
-                                         jspb.utils.split64High)) {
+          jspb.utils.split64High)) {
         throw 'fail!';
       }
     }
@@ -439,16 +439,20 @@ describe('binaryUtilsTest', function() {
    * Tests counting packed varints.
    */
   it('testCountVarints', function() {
-    var writer = new jspb.BinaryWriter();
-
-    var count = 0;
+    var values = [];
     for (var i = 1; i < 1000000000; i *= 1.1) {
-      writer.rawWriteVarint(Math.floor(i));
-      count++;
+      values.push(Math.floor(i));
     }
 
+    var writer = new jspb.BinaryWriter();
+    writer.writePackedUint64(1, values);
+
     var buffer = new Uint8Array(writer.getResultBuffer());
-    assertEquals(count, jspb.utils.countVarints(buffer, 0, buffer.length));
+
+    // We should have two more varints than we started with - one for the field
+    // tag, one for the packed length.
+    assertEquals(values.length + 2,
+                 jspb.utils.countVarints(buffer, 0, buffer.length));
   });
 
 
@@ -625,8 +629,5 @@ describe('binaryUtilsTest', function() {
 
     // Converting base64-encoded strings into Uint8Arrays should work.
     check(convert(sourceBase64));
-
-    // Converting binary-data strings into Uint8Arrays should work.
-    check(convert(sourceString, /* opt_stringIsRawBytes = */ true));
   });
 });

Rozdílová data souboru nebyla zobrazena, protože soubor je příliš velký
+ 185 - 592
js/binary/writer.js


+ 1 - 1
js/debug.js

@@ -68,7 +68,7 @@ jspb.debug.dump = function(message) {
  * Recursively introspects a message and the values its getters return to
  * make a best effort in creating a human readable representation of the
  * message.
- * @param {*} thing A jspb.Message, Array or primitive type to dump.
+ * @param {?} thing A jspb.Message, Array or primitive type to dump.
  * @return {*}
  * @private
  */

+ 1 - 0
js/debug_test.js

@@ -41,6 +41,7 @@ goog.require('proto.jspb.test.IsExtension');
 goog.require('proto.jspb.test.Simple1');
 
 
+
 describe('debugTest', function() {
   it('testSimple1', function() {
     if (COMPILED) {

+ 248 - 36
js/message.js

@@ -39,8 +39,8 @@ goog.provide('jspb.Message');
 
 goog.require('goog.array');
 goog.require('goog.asserts');
+goog.require('goog.crypt.base64');
 goog.require('goog.json');
-goog.require('goog.object');
 
 // Not needed in compilation units that have no protos with xids.
 goog.forwardDeclare('xid.String');
@@ -119,6 +119,14 @@ jspb.ExtensionFieldInfo = function(fieldNumber, fieldName, ctor, toObjectFn,
 };
 
 
+/**
+ * @return {boolean} Does this field represent a sub Message?
+ */
+jspb.ExtensionFieldInfo.prototype.isMessageType = function() {
+  return !!this.ctor;
+};
+
+
 /**
  * Base class for all JsPb messages.
  * @constructor
@@ -165,6 +173,14 @@ goog.define('jspb.Message.MINIMIZE_MEMORY_ALLOCATIONS', COMPILED);
 // TODO(b/19419436) Turn this on by default.
 
 
+/**
+ * Does this browser support Uint8Aray typed arrays?
+ * @type {boolean}
+ * @private
+ */
+jspb.Message.SUPPORTS_UINT8ARRAY_ = (typeof Uint8Array == 'function');
+
+
 /**
  * The internal data array.
  * @type {!Array}
@@ -210,6 +226,14 @@ jspb.Message.prototype.pivot_;
 jspb.Message.prototype.messageId_;
 
 
+/**
+ * Repeated float or double fields which have been converted to include only
+ * numbers and not strings holding "NaN", "Infinity" and "-Infinity".
+ * @private {!Object<number,boolean>|undefined}
+ */
+jspb.Message.prototype.convertedFloatingPointFields_;
+
+
 /**
  * The xid of this proto type (The same for all instances of a proto). Provides
  * a way to identify a proto by stable obfuscated name.
@@ -284,6 +308,8 @@ jspb.Message.initialize = function(
   msg.arrayIndexOffset_ = messageId === 0 ? -1 : 0;
   msg.array = data;
   jspb.Message.materializeExtensionObject_(msg, suggestedPivot);
+  msg.convertedFloatingPointFields_ = {};
+
   if (repeatedFields) {
     for (var i = 0; i < repeatedFields.length; i++) {
       var fieldNumber = repeatedFields[i];
@@ -419,8 +445,9 @@ jspb.Message.toObjectList = function(field, toObjectFn, opt_includeInstance) {
  * @param {!jspb.Message} proto The proto whose extensions to convert.
  * @param {!Object} obj The Soy object to add converted extension data to.
  * @param {!Object} extensions The proto class' registered extensions.
- * @param {function(jspb.ExtensionFieldInfo) : *} getExtensionFn The proto
- *     class' getExtension function. Passed for effective dead code removal.
+ * @param {function(this:?, jspb.ExtensionFieldInfo) : *} getExtensionFn
+ *     The proto class' getExtension function. Passed for effective dead code
+ *     removal.
  * @param {boolean=} opt_includeInstance Whether to include the JSPB instance
  *     for transitional soy proto support: http://goto/soy-param-migration
  */
@@ -472,7 +499,7 @@ jspb.Message.serializeBinaryExtensions = function(proto, writer, extensions,
     }
     var value = getExtensionFn.call(proto, fieldInfo);
     if (value) {
-      if (fieldInfo.ctor) {  // is this a message type?
+      if (fieldInfo.isMessageType()) {
         // If the message type of the extension was generated without binary
         // support, there may not be a binary message serializer function, and
         // we can't know when we codegen the extending message that the extended
@@ -517,8 +544,7 @@ jspb.Message.readBinaryExtension = function(msg, reader, extensions,
   }
 
   var value;
-  if (fieldInfo.ctor) {
-    // Message type.
+  if (fieldInfo.isMessageType()) {
     value = new fieldInfo.ctor();
     fieldInfo.binaryReaderFn.call(
         reader, value, fieldInfo.binaryMessageDeserializeFn);
@@ -566,6 +592,130 @@ jspb.Message.getField = function(msg, fieldNumber) {
 };
 
 
+/**
+ * Gets the value of an optional float or double field.
+ * @param {!jspb.Message} msg A jspb proto.
+ * @param {number} fieldNumber The field number.
+ * @return {?number|undefined} The field's value.
+ * @protected
+ */
+jspb.Message.getOptionalFloatingPointField = function(msg, fieldNumber) {
+  var value = jspb.Message.getField(msg, fieldNumber);
+  // Converts "NaN", "Infinity" and "-Infinity" to their corresponding numbers.
+  return value == null ? value : +value;
+};
+
+
+/**
+ * Gets the value of a repeated float or double field.
+ * @param {!jspb.Message} msg A jspb proto.
+ * @param {number} fieldNumber The field number.
+ * @return {!Array<number>} The field's value.
+ * @protected
+ */
+jspb.Message.getRepeatedFloatingPointField = function(msg, fieldNumber) {
+  var values = jspb.Message.getField(msg, fieldNumber);
+  if (!msg.convertedFloatingPointFields_) {
+    msg.convertedFloatingPointFields_ = {};
+  }
+  if (!msg.convertedFloatingPointFields_[fieldNumber]) {
+    for (var i = 0; i < values.length; i++) {
+      // Converts "NaN", "Infinity" and "-Infinity" to their corresponding
+      // numbers.
+      values[i] = +values[i];
+    }
+    msg.convertedFloatingPointFields_[fieldNumber] = true;
+  }
+  return /** @type {!Array<number>} */ (values);
+};
+
+
+/**
+ * Coerce a 'bytes' field to a base 64 string.
+ * @param {string|Uint8Array|null} value
+ * @return {?string} The field's coerced value.
+ */
+jspb.Message.bytesAsB64 = function(value) {
+  if (value == null || goog.isString(value)) {
+    return value;
+  }
+  if (jspb.Message.SUPPORTS_UINT8ARRAY_ && value instanceof Uint8Array) {
+    return goog.crypt.base64.encodeByteArray(value);
+  }
+  goog.asserts.fail('Cannot coerce to b64 string: ' + goog.typeOf(value));
+  return null;
+};
+
+
+/**
+ * Coerce a 'bytes' field to a Uint8Array byte buffer.
+ * Note that Uint8Array is not supported on IE versions before 10 nor on Opera
+ * Mini. @see http://caniuse.com/Uint8Array
+ * @param {string|Uint8Array|null} value
+ * @return {?Uint8Array} The field's coerced value.
+ */
+jspb.Message.bytesAsU8 = function(value) {
+  if (value == null || value instanceof Uint8Array) {
+    return value;
+  }
+  if (goog.isString(value)) {
+    return goog.crypt.base64.decodeStringToUint8Array(value);
+  }
+  goog.asserts.fail('Cannot coerce to Uint8Array: ' + goog.typeOf(value));
+  return null;
+};
+
+
+/**
+ * Coerce a repeated 'bytes' field to an array of base 64 strings.
+ * Note: the returned array should be treated as immutable.
+ * @param {!Array<string>|!Array<!Uint8Array>} value
+ * @return {!Array<string?>} The field's coerced value.
+ */
+jspb.Message.bytesListAsB64 = function(value) {
+  jspb.Message.assertConsistentTypes_(value);
+  if (!value.length || goog.isString(value[0])) {
+    return /** @type {!Array<string>} */ (value);
+  }
+  return goog.array.map(value, jspb.Message.bytesAsB64);
+};
+
+
+/**
+ * Coerce a repeated 'bytes' field to an array of Uint8Array byte buffers.
+ * Note: the returned array should be treated as immutable.
+ * Note that Uint8Array is not supported on IE versions before 10 nor on Opera
+ * Mini. @see http://caniuse.com/Uint8Array
+ * @param {!Array<string>|!Array<!Uint8Array>} value
+ * @return {!Array<Uint8Array?>} The field's coerced value.
+ */
+jspb.Message.bytesListAsU8 = function(value) {
+  jspb.Message.assertConsistentTypes_(value);
+  if (!value.length || value[0] instanceof Uint8Array) {
+    return /** @type {!Array<!Uint8Array>} */ (value);
+  }
+  return goog.array.map(value, jspb.Message.bytesAsU8);
+};
+
+
+/**
+ * Asserts that all elements of an array are of the same type.
+ * @param {Array?} array The array to test.
+ * @private
+ */
+jspb.Message.assertConsistentTypes_ = function(array) {
+  if (goog.DEBUG && array && array.length > 1) {
+    var expected = goog.typeOf(array[0]);
+    goog.array.forEach(array, function(e) {
+      if (goog.typeOf(e) != expected) {
+        goog.asserts.fail('Inconsistent type in JSPB repeated field array. ' +
+            'Got ' + goog.typeOf(e) + ' expected ' + expected);
+      }
+    });
+  }
+};
+
+
 /**
  * Gets the value of a non-extension primitive field, with proto3 (non-nullable
  * primitives) semantics. Returns `defaultValue` if the field is not otherwise
@@ -841,7 +991,7 @@ jspb.Message.prototype.getExtension = function(fieldInfo) {
   }
   var fieldNumber = fieldInfo.fieldIndex;
   if (fieldInfo.isRepeated) {
-    if (fieldInfo.ctor) {
+    if (fieldInfo.isMessageType()) {
       if (!this.wrappers_[fieldNumber]) {
         this.wrappers_[fieldNumber] =
             goog.array.map(this.extensionObject_[fieldNumber] || [],
@@ -854,7 +1004,7 @@ jspb.Message.prototype.getExtension = function(fieldInfo) {
       return this.extensionObject_[fieldNumber];
     }
   } else {
-    if (fieldInfo.ctor) {
+    if (fieldInfo.isMessageType()) {
       if (!this.wrappers_[fieldNumber] && this.extensionObject_[fieldNumber]) {
         this.wrappers_[fieldNumber] = new fieldInfo.ctor(
             /** @type {Array|undefined} */ (
@@ -871,7 +1021,8 @@ jspb.Message.prototype.getExtension = function(fieldInfo) {
 /**
  * Sets the value of the extension field in the extended object.
  * @param {jspb.ExtensionFieldInfo} fieldInfo Specifies the field to set.
- * @param {jspb.Message|string|number|boolean|Array} value The value to set.
+ * @param {jspb.Message|string|Uint8Array|number|boolean|Array?} value The value
+ *     to set.
  */
 jspb.Message.prototype.setExtension = function(fieldInfo, value) {
   if (!this.wrappers_) {
@@ -881,7 +1032,7 @@ jspb.Message.prototype.setExtension = function(fieldInfo, value) {
   var fieldNumber = fieldInfo.fieldIndex;
   if (fieldInfo.isRepeated) {
     value = value || [];
-    if (fieldInfo.ctor) {
+    if (fieldInfo.isMessageType()) {
       this.wrappers_[fieldNumber] = value;
       this.extensionObject_[fieldNumber] = goog.array.map(
           /** @type {Array<jspb.Message>} */ (value), function(msg) {
@@ -891,7 +1042,7 @@ jspb.Message.prototype.setExtension = function(fieldInfo, value) {
       this.extensionObject_[fieldNumber] = value;
     }
   } else {
-    if (fieldInfo.ctor) {
+    if (fieldInfo.isMessageType()) {
       this.wrappers_[fieldNumber] = value;
       this.extensionObject_[fieldNumber] = value ? value.toArray() : value;
     } else {
@@ -957,6 +1108,33 @@ jspb.Message.equals = function(m1, m2) {
 };
 
 
+/**
+ * Compares two message extension fields recursively.
+ * @param {!Object} extension1 The first field.
+ * @param {!Object} extension2 The second field.
+ * @return {boolean} true if the extensions are null/undefined, or otherwise
+ *     equal.
+ */
+jspb.Message.compareExtensions = function(extension1, extension2) {
+  extension1 = extension1 || {};
+  extension2 = extension2 || {};
+
+  var keys = {};
+  for (var name in extension1) {
+    keys[name] = 0;
+  }
+  for (var name in extension2) {
+    keys[name] = 0;
+  }
+  for (name in keys) {
+    if (!jspb.Message.compareFields(extension1[name], extension2[name])) {
+      return false;
+    }
+  }
+  return true;
+};
+
+
 /**
  * Compares two message fields recursively.
  * @param {*} field1 The first field.
@@ -964,43 +1142,77 @@ jspb.Message.equals = function(m1, m2) {
  * @return {boolean} true if the fields are null/undefined, or otherwise equal.
  */
 jspb.Message.compareFields = function(field1, field2) {
-  if (goog.isObject(field1) && goog.isObject(field2)) {
-    var keys = {}, name, extensionObject1, extensionObject2;
-    for (name in field1) {
-      field1.hasOwnProperty(name) && (keys[name] = 0);
-    }
-    for (name in field2) {
-      field2.hasOwnProperty(name) && (keys[name] = 0);
+  // If the fields are trivially equal, they're equal.
+  if (field1 == field2) return true;
+
+  // If the fields aren't trivially equal and one of them isn't an object,
+  // they can't possibly be equal.
+  if (!goog.isObject(field1) || !goog.isObject(field2)) {
+    return false;
+  }
+
+  // We have two objects. If they're different types, they're not equal.
+  field1 = /** @type {!Object} */(field1);
+  field2 = /** @type {!Object} */(field2);
+  if (field1.constructor != field2.constructor) return false;
+
+  // If both are Uint8Arrays, compare them element-by-element.
+  if (jspb.Message.SUPPORTS_UINT8ARRAY_ && field1.constructor === Uint8Array) {
+    var bytes1 = /** @type {!Uint8Array} */(field1);
+    var bytes2 = /** @type {!Uint8Array} */(field2);
+    if (bytes1.length != bytes2.length) return false;
+    for (var i = 0; i < bytes1.length; i++) {
+      if (bytes1[i] != bytes2[i]) return false;
     }
-    for (name in keys) {
-      var val1 = field1[name], val2 = field2[name];
-      if (goog.isObject(val1) && !goog.isArray(val1)) {
-        if (extensionObject1 !== undefined) {
-          throw new Error('invalid jspb state');
-        }
-        extensionObject1 = goog.object.isEmpty(val1) ? undefined : val1;
+    return true;
+  }
+
+  // If they're both Arrays, compare them element by element except for the
+  // optional extension objects at the end, which we compare separately.
+  if (field1.constructor === Array) {
+    var extension1 = undefined;
+    var extension2 = undefined;
+
+    var length = Math.max(field1.length, field2.length);
+    for (var i = 0; i < length; i++) {
+      var val1 = field1[i];
+      var val2 = field2[i];
+
+      if (val1 && (val1.constructor == Object)) {
+        goog.asserts.assert(extension1 === undefined);
+        goog.asserts.assert(i === field1.length - 1);
+        extension1 = val1;
         val1 = undefined;
       }
-      if (goog.isObject(val2) && !goog.isArray(val2)) {
-        if (extensionObject2 !== undefined) {
-          throw new Error('invalid jspb state');
-        }
-        extensionObject2 = goog.object.isEmpty(val2) ? undefined : val2;
+
+      if (val2 && (val2.constructor == Object)) {
+        goog.asserts.assert(extension2 === undefined);
+        goog.asserts.assert(i === field2.length - 1);
+        extension2 = val2;
         val2 = undefined;
       }
+
       if (!jspb.Message.compareFields(val1, val2)) {
         return false;
       }
     }
-    if (extensionObject1 || extensionObject2) {
-      return jspb.Message.compareFields(extensionObject1, extensionObject2);
+
+    if (extension1 || extension2) {
+      extension1 = extension1 || {};
+      extension2 = extension2 || {};
+      return jspb.Message.compareExtensions(extension1, extension2);
     }
+
     return true;
   }
-  // Primitive fields, null and undefined compare as equal.
-  // This also forces booleans and 0/1 to compare as equal to ensure
-  // compatibility with the jspb serializer.
-  return field1 == field2;
+
+  // If they're both plain Objects (i.e. extensions), compare them as
+  // extensions.
+  if (field1.constructor === Object) {
+    return jspb.Message.compareExtensions(field1, field2);
+  }
+
+  throw new Error('Invalid type in JSPB array');
 };
 
 

+ 34 - 16
js/message_test.js

@@ -53,6 +53,8 @@ goog.require('proto.jspb.test.Complex');
 goog.require('proto.jspb.test.DefaultValues');
 goog.require('proto.jspb.test.Empty');
 goog.require('proto.jspb.test.EnumContainer');
+goog.require('proto.jspb.test.floatingMsgField');
+goog.require('proto.jspb.test.FloatingPointFields');
 goog.require('proto.jspb.test.floatingStrField');
 goog.require('proto.jspb.test.HasExtensions');
 goog.require('proto.jspb.test.IndirectExtension');
@@ -60,7 +62,6 @@ goog.require('proto.jspb.test.IsExtension');
 goog.require('proto.jspb.test.OptionalFields');
 goog.require('proto.jspb.test.OuterEnum');
 goog.require('proto.jspb.test.OuterMessage.Complex');
-goog.require('proto.jspb.test.simple1');
 goog.require('proto.jspb.test.Simple1');
 goog.require('proto.jspb.test.Simple2');
 goog.require('proto.jspb.test.SpecialCases');
@@ -74,7 +75,7 @@ goog.require('proto.jspb.test.TestReservedNamesExtension');
 // CommonJS-LoadFromFile: test2_pb proto.jspb.test
 goog.require('proto.jspb.test.ExtensionMessage');
 goog.require('proto.jspb.test.TestExtensionsMessage');
-goog.require('proto.jspb.test.floatingMsgField');
+
 
 
 
@@ -98,12 +99,6 @@ describe('Message test suite', function() {
     assertEquals('some_bytes', data.getBytesField());
   });
 
-  it('testNestedMessage', function() {
-    var msg = new proto.jspb.test.OuterMessage.Complex();
-    msg.setInnerComplexField(5);
-    assertObjectEquals({innerComplexField: 5}, msg.toObject());
-  });
-
   it('testComplexConversion', function() {
     var data1 = ['a',,, [, 11], [[, 22], [, 33]],, ['s1', 's2'],, 1];
     var data2 = ['a',,, [, 11], [[, 22], [, 33]],, ['s1', 's2'],, 1];
@@ -160,6 +155,13 @@ describe('Message test suite', function() {
 
   });
 
+  it('testNestedComplexMessage', function() {
+    // Instantiate the message and set a unique field, just to ensure that we
+    // are not getting jspb.test.Complex instead.
+    var msg = new proto.jspb.test.OuterMessage.Complex();
+    msg.setInnerComplexField(5);
+  });
+
   it('testSpecialCases', function() {
     // Note: Some property names are reserved in JavaScript.
     // These names are converted to the Js property named pb_<reserved_name>.
@@ -544,12 +546,6 @@ describe('Message test suite', function() {
     extendable.setExtension(proto.jspb.test.IndirectExtension.str, null);
     assertNull(extendable.getExtension(proto.jspb.test.IndirectExtension.str));
 
-    // These assertions will only work properly in uncompiled mode.
-    // Extension fields defined on proto2 Descriptor messages are filtered out.
-
-    // TODO(haberman): codegen changes to properly ignore descriptor.proto
-    // extensions need to be merged from google3.
-    // assertUndefined(proto.jspb.test.IsExtension['simpleOption']);
 
     // Extension fields with jspb.ignore = true are ignored.
     assertUndefined(proto.jspb.test.IndirectExtension['ignored']);
@@ -907,7 +903,7 @@ describe('Message test suite', function() {
         message.getDefaultOneofACase());
 
     message =
-        new proto.jspb.test.TestMessageWithOneof(new Array(9).concat(567,890));
+        new proto.jspb.test.TestMessageWithOneof(new Array(9).concat(567, 890));
     assertEquals(1234, message.getAone());
     assertEquals(890, message.getAtwo());
     assertEquals(
@@ -936,7 +932,7 @@ describe('Message test suite', function() {
             message.getDefaultOneofBCase());
 
         message = new proto.jspb.test.TestMessageWithOneof(
-            new Array(11).concat(567,890));
+            new Array(11).concat(567, 890));
         assertUndefined(message.getBone());
         assertEquals(890, message.getBtwo());
         assertEquals(
@@ -989,4 +985,26 @@ describe('Message test suite', function() {
     assertEquals('y', array[4]);
   });
 
+  it('testFloatingPointFieldsSupportNan', function() {
+    var assertNan = function(x) {
+      assertTrue('Expected ' + x + ' (' + goog.typeOf(x) + ') to be NaN.',
+          goog.isNumber(x) && isNaN(x));
+    };
+
+    var message = new proto.jspb.test.FloatingPointFields([
+      'NaN', 'NaN', ['NaN', 'NaN'], 'NaN',
+      'NaN', 'NaN', ['NaN', 'NaN'], 'NaN'
+    ]);
+    assertNan(message.getOptionalFloatField());
+    assertNan(message.getRequiredFloatField());
+    assertNan(message.getRepeatedFloatFieldList()[0]);
+    assertNan(message.getRepeatedFloatFieldList()[1]);
+    assertNan(message.getDefaultFloatField());
+    assertNan(message.getOptionalDoubleField());
+    assertNan(message.getRequiredDoubleField());
+    assertNan(message.getRepeatedDoubleFieldList()[0]);
+    assertNan(message.getRepeatedDoubleFieldList()[1]);
+    assertNan(message.getDefaultDoubleField());
+  });
+
 });

+ 60 - 29
js/proto3_test.js

@@ -28,6 +28,7 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+goog.require('goog.crypt.base64');
 goog.require('goog.testing.asserts');
 
 // CommonJS-LoadFromFile: testbinary_pb proto.jspb.test
@@ -37,31 +38,30 @@ goog.require('proto.jspb.test.ForeignMessage');
 goog.require('proto.jspb.test.Proto3Enum');
 goog.require('proto.jspb.test.TestProto3');
 
+
+var BYTES = new Uint8Array([1, 2, 8, 9]);
+var BYTES_B64 = goog.crypt.base64.encodeByteArray(BYTES);
+
+
 /**
- * Helper: compare a bytes field to a string with codepoints 0--255.
+ * Helper: compare a bytes field to an expected value
  * @param {Uint8Array|string} arr
- * @param {string} str
+ * @param {Uint8Array} expected
  * @return {boolean}
  */
-function bytesCompare(arr, str) {
-  if (arr.length != str.length) {
+function bytesCompare(arr, expected) {
+  if (goog.isString(arr)) {
+    arr = goog.crypt.base64.decodeStringToUint8Array(arr);
+  }
+  if (arr.length != expected.length) {
     return false;
   }
-  if (typeof arr == 'string') {
-    for (var i = 0; i < arr.length; i++) {
-      if (arr.charCodeAt(i) != str.charCodeAt(i)) {
-        return false;
-      }
+  for (var i = 0; i < arr.length; i++) {
+    if (arr[i] != expected[i]) {
+      return false;
     }
-    return true;
-  } else {
-    for (var i = 0; i < arr.length; i++) {
-      if (arr[i] != str.charCodeAt(i)) {
-        return false;
-      }
-    }
-    return true;
   }
+  return true;
 }
 
 
@@ -86,13 +86,17 @@ describe('proto3Test', function() {
     assertEquals(msg.getOptionalDouble(), 0);
     assertEquals(msg.getOptionalString(), '');
 
-    // If/when we change bytes fields to return Uint8Array, we'll want to switch
-    // to this assertion instead:
-    //assertEquals(msg.getOptionalBytes() instanceof Uint8Array, true);
+    // TODO(b/26173701): when we change bytes fields default getter to return
+    // Uint8Array, we'll want to switch this assertion to match the u8 case.
     assertEquals(typeof msg.getOptionalBytes(), 'string');
-
+    assertEquals(msg.getOptionalBytes_asU8() instanceof Uint8Array, true);
+    assertEquals(typeof msg.getOptionalBytes_asB64(), 'string');
     assertEquals(msg.getOptionalBytes().length, 0);
-    assertEquals(msg.getOptionalForeignEnum(), proto.jspb.test.Proto3Enum.PROTO3_FOO);
+    assertEquals(msg.getOptionalBytes_asU8().length, 0);
+    assertEquals(msg.getOptionalBytes_asB64(), '');
+
+    assertEquals(msg.getOptionalForeignEnum(),
+                 proto.jspb.test.Proto3Enum.PROTO3_FOO);
     assertEquals(msg.getOptionalForeignMessage(), undefined);
     assertEquals(msg.getOptionalForeignMessage(), undefined);
 
@@ -136,7 +140,7 @@ describe('proto3Test', function() {
     msg.setOptionalDouble(-1.5);
     msg.setOptionalBool(true);
     msg.setOptionalString('hello world');
-    msg.setOptionalBytes('bytes');
+    msg.setOptionalBytes(BYTES);
     var submsg = new proto.jspb.test.ForeignMessage();
     submsg.setC(16);
     msg.setOptionalForeignMessage(submsg);
@@ -156,7 +160,7 @@ describe('proto3Test', function() {
     msg.setRepeatedDoubleList([-1.5]);
     msg.setRepeatedBoolList([true]);
     msg.setRepeatedStringList(['hello world']);
-    msg.setRepeatedBytesList(['bytes']);
+    msg.setRepeatedBytesList([BYTES]);
     submsg = new proto.jspb.test.ForeignMessage();
     submsg.setC(1000);
     msg.setRepeatedForeignMessageList([submsg]);
@@ -181,7 +185,7 @@ describe('proto3Test', function() {
     assertEquals(msg.getOptionalDouble(), -1.5);
     assertEquals(msg.getOptionalBool(), true);
     assertEquals(msg.getOptionalString(), 'hello world');
-    assertEquals(true, bytesCompare(msg.getOptionalBytes(), 'bytes'));
+    assertEquals(true, bytesCompare(msg.getOptionalBytes(), BYTES));
     assertEquals(msg.getOptionalForeignMessage().getC(), 16);
     assertEquals(msg.getOptionalForeignEnum(),
         proto.jspb.test.Proto3Enum.PROTO3_BAR);
@@ -201,7 +205,7 @@ describe('proto3Test', function() {
     assertElementsEquals(msg.getRepeatedBoolList(), [true]);
     assertElementsEquals(msg.getRepeatedStringList(), ['hello world']);
     assertEquals(msg.getRepeatedBytesList().length, 1);
-    assertEquals(true, bytesCompare(msg.getRepeatedBytesList()[0], 'bytes'));
+    assertEquals(true, bytesCompare(msg.getRepeatedBytesList()[0], BYTES));
     assertEquals(msg.getRepeatedForeignMessageList().length, 1);
     assertEquals(msg.getRepeatedForeignMessageList()[0].getC(), 1000);
     assertElementsEquals(msg.getRepeatedForeignEnumList(),
@@ -242,11 +246,12 @@ describe('proto3Test', function() {
     assertEquals(msg.getOneofString(), 'hello');
     assertEquals(msg.getOneofBytes(), undefined);
 
-    msg.setOneofBytes('\u00FF\u00FF');
+    msg.setOneofBytes(goog.crypt.base64.encodeString('\u00FF\u00FF'));
     assertEquals(msg.getOneofUint32(), undefined);
     assertEquals(msg.getOneofForeignMessage(), undefined);
     assertEquals(msg.getOneofString(), undefined);
-    assertEquals(msg.getOneofBytes(), '\u00FF\u00FF');
+    assertEquals(msg.getOneofBytes_asB64(),
+        goog.crypt.base64.encodeString('\u00FF\u00FF'));
   });
 
 
@@ -267,7 +272,7 @@ describe('proto3Test', function() {
     msg.setOptionalBool(false);
     msg.setOptionalString('hello world');
     msg.setOptionalString('');
-    msg.setOptionalBytes('\u00FF\u00FF');
+    msg.setOptionalBytes(goog.crypt.base64.encodeString('\u00FF\u00FF'));
     msg.setOptionalBytes('');
     msg.setOptionalForeignMessage(new proto.jspb.test.ForeignMessage());
     msg.setOptionalForeignMessage(null);
@@ -280,4 +285,30 @@ describe('proto3Test', function() {
     var serialized = msg.serializeBinary();
     assertEquals(0, serialized.length);
   });
+
+  /**
+   * Test that base64 string and Uint8Array are interchangeable in bytes fields.
+   */
+  it('testBytesFieldsInterop', function() {
+    var msg = new proto.jspb.test.TestProto3();
+    // Set as a base64 string and check all the getters work.
+    msg.setOptionalBytes(BYTES_B64);
+    assertTrue(bytesCompare(msg.getOptionalBytes_asU8(), BYTES));
+    assertTrue(bytesCompare(msg.getOptionalBytes_asB64(), BYTES));
+    assertTrue(bytesCompare(msg.getOptionalBytes(), BYTES));
+
+    // Test binary serialize round trip doesn't break it.
+    msg = proto.jspb.test.TestProto3.deserializeBinary(msg.serializeBinary());
+    assertTrue(bytesCompare(msg.getOptionalBytes_asU8(), BYTES));
+    assertTrue(bytesCompare(msg.getOptionalBytes_asB64(), BYTES));
+    assertTrue(bytesCompare(msg.getOptionalBytes(), BYTES));
+
+    msg = new proto.jspb.test.TestProto3();
+    // Set as a Uint8Array and check all the getters work.
+    msg.setOptionalBytes(BYTES);
+    assertTrue(bytesCompare(msg.getOptionalBytes_asU8(), BYTES));
+    assertTrue(bytesCompare(msg.getOptionalBytes_asB64(), BYTES));
+    assertTrue(bytesCompare(msg.getOptionalBytes(), BYTES));
+
+  });
 });

+ 12 - 0
js/test.proto

@@ -145,6 +145,17 @@ message DefaultValues {
   optional bytes bytes_field = 8 [default="moo"]; // Base64 encoding is "bW9v"
 }
 
+message FloatingPointFields {
+  optional float optional_float_field = 1;
+  required float required_float_field = 2;
+  repeated float repeated_float_field = 3;
+  optional float default_float_field = 4 [default = 2.0];
+  optional double optional_double_field = 5;
+  required double required_double_field = 6;
+  repeated double repeated_double_field = 7;
+  optional double default_double_field = 8 [default = 2.0];
+}
+
 message TestClone {
   optional string str = 1;
   optional Simple1 simple1 = 3;
@@ -216,3 +227,4 @@ message TestMessageWithOneof {
     int32 btwo = 13 [default = 1234];
   }
 }
+

+ 10 - 0
php/ext/google/protobuf/config.m4

@@ -0,0 +1,10 @@
+dnl lines starting with "dnl" are comments
+
+PHP_ARG_ENABLE(protobuf, whether to enable Protobuf extension, [  --enable-protobuf   Enable Protobuf extension])
+
+if test "$PHP_PROTOBUF" != "no"; then
+
+  dnl this defines the extension
+  PHP_NEW_EXTENSION(protobuf, upb.c protobuf.c def.c message.c storage.c, $ext_shared)
+
+fi

+ 381 - 0
php/ext/google/protobuf/def.c

@@ -0,0 +1,381 @@
+#include "protobuf.h"
+
+// -----------------------------------------------------------------------------
+// Common Utilities
+// -----------------------------------------------------------------------------
+
+void check_upb_status(const upb_status* status, const char* msg) {
+  if (!upb_ok(status)) {
+    zend_error("%s: %s\n", msg, upb_status_errmsg(status));
+  }
+}
+
+
+static upb_def *check_notfrozen(const upb_def *def) {
+  if (upb_def_isfrozen(def)) {
+    zend_error(E_ERROR,
+               "Attempt to modify a frozen descriptor. Once descriptors are "
+               "added to the descriptor pool, they may not be modified.");
+  }
+  return (upb_def *)def;
+}
+
+static upb_msgdef *check_msgdef_notfrozen(const upb_msgdef *def) {
+  return upb_downcast_msgdef_mutable(check_notfrozen((const upb_def *)def));
+}
+
+static upb_fielddef *check_fielddef_notfrozen(const upb_fielddef *def) {
+  return upb_downcast_fielddef_mutable(check_notfrozen((const upb_def *)def));
+}
+
+#define PROTOBUF_WRAP_INTERN(wrapper, intern, intern_dtor)            \
+  Z_TYPE_P(wrapper) = IS_OBJECT;                                      \
+  Z_OBJVAL_P(wrapper)                                                 \
+      .handle = zend_objects_store_put(                               \
+      intern, (zend_objects_store_dtor_t)zend_objects_destroy_object, \
+      intern_dtor, NULL TSRMLS_CC);                                   \
+  Z_OBJVAL_P(wrapper).handlers = zend_get_std_object_handlers();
+
+#define PROTOBUF_SETUP_ZEND_WRAPPER(class_name, class_name_lower, wrapper,    \
+                                    intern)                                   \
+  Z_TYPE_P(wrapper) = IS_OBJECT;                                              \
+  class_name *intern = ALLOC(class_name);                                     \
+  memset(intern, 0, sizeof(class_name));                                      \
+  class_name_lower##_init_c_instance(intern TSRMLS_CC);                       \
+  Z_OBJVAL_P(wrapper)                                                         \
+      .handle = zend_objects_store_put(intern, NULL, class_name_lower##_free, \
+                                       NULL TSRMLS_CC);                       \
+  Z_OBJVAL_P(wrapper).handlers = zend_get_std_object_handlers();
+
+#define PROTOBUF_CREATE_ZEND_WRAPPER(class_name, class_name_lower, wrapper, \
+                                     intern)                                \
+  MAKE_STD_ZVAL(wrapper);                                                   \
+  PROTOBUF_SETUP_ZEND_WRAPPER(class_name, class_name_lower, wrapper, intern);
+
+#define DEFINE_CLASS(name, name_lower, string_name)                          \
+  zend_class_entry *name_lower##_type;                                       \
+  void name_lower##_init(TSRMLS_D) {                                         \
+    zend_class_entry class_type;                                             \
+    INIT_CLASS_ENTRY(class_type, string_name, name_lower##_methods);         \
+    name_lower##_type = zend_register_internal_class(&class_type TSRMLS_CC); \
+    name_lower##_type->create_object = name_lower##_create;                  \
+  }                                                                          \
+  name *php_to_##name_lower(zval *val TSRMLS_DC) {                           \
+    return (name *)zend_object_store_get_object(val TSRMLS_CC);              \
+  }                                                                          \
+  void name_lower##_free(void *object TSRMLS_DC) {                           \
+    name *intern = (name *)object;                                           \
+    name_lower##_free_c(intern TSRMLS_CC);                                   \
+    efree(object);                                                           \
+  }                                                                          \
+  zend_object_value name_lower##_create(zend_class_entry *ce TSRMLS_DC) {    \
+    zend_object_value return_value;                                          \
+    name *intern = (name *)emalloc(sizeof(name));                            \
+    memset(intern, 0, sizeof(name));                                         \
+    name_lower##_init_c_instance(intern TSRMLS_CC);                          \
+    return_value.handle = zend_objects_store_put(                            \
+        intern, (zend_objects_store_dtor_t)zend_objects_destroy_object,      \
+        name_lower##_free, NULL TSRMLS_CC);                                  \
+    return_value.handlers = zend_get_std_object_handlers();                  \
+    return return_value;                                                     \
+  }
+
+// -----------------------------------------------------------------------------
+// DescriptorPool
+// -----------------------------------------------------------------------------
+
+static zend_function_entry descriptor_pool_methods[] = {
+  PHP_ME(DescriptorPool, addMessage, NULL, ZEND_ACC_PUBLIC)
+  PHP_ME(DescriptorPool, finalize, NULL, ZEND_ACC_PUBLIC)
+  ZEND_FE_END
+};
+
+DEFINE_CLASS(DescriptorPool, descriptor_pool,
+             "Google\\Protobuf\\DescriptorPool");
+
+DescriptorPool *generated_pool;  // The actual generated pool
+
+ZEND_FUNCTION(get_generated_pool) {
+  if (PROTOBUF_G(generated_pool) == NULL) {
+    MAKE_STD_ZVAL(PROTOBUF_G(generated_pool));
+    Z_TYPE_P(PROTOBUF_G(generated_pool)) = IS_OBJECT;
+    generated_pool = ALLOC(DescriptorPool);
+    descriptor_pool_init_c_instance(generated_pool TSRMLS_CC);
+    Z_OBJ_HANDLE_P(PROTOBUF_G(generated_pool)) = zend_objects_store_put(
+        generated_pool, NULL,
+        (zend_objects_free_object_storage_t)descriptor_pool_free, NULL TSRMLS_CC);
+    Z_OBJ_HT_P(PROTOBUF_G(generated_pool)) = zend_get_std_object_handlers();
+  }
+  RETURN_ZVAL(PROTOBUF_G(generated_pool), 1, 0);
+}
+
+void descriptor_pool_init_c_instance(DescriptorPool* pool TSRMLS_DC) {
+  zend_object_std_init(&pool->std, descriptor_pool_type TSRMLS_CC);
+  pool->symtab = upb_symtab_new(&pool->symtab);
+
+  ALLOC_HASHTABLE(pool->pending_list);
+  zend_hash_init(pool->pending_list, 1, NULL, ZVAL_PTR_DTOR, 0);
+}
+
+void descriptor_pool_free_c(DescriptorPool *pool TSRMLS_DC) {
+  upb_symtab_unref(pool->symtab, &pool->symtab);
+  zend_hash_destroy(pool->pending_list);
+  FREE_HASHTABLE(pool->pending_list);
+}
+
+PHP_METHOD(DescriptorPool, addMessage) {
+  char *name = NULL;
+  int str_len;
+  if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &str_len) ==
+      FAILURE) {
+    return;
+  }
+
+  zval* retval = NULL;
+  PROTOBUF_CREATE_ZEND_WRAPPER(MessageBuilderContext, message_builder_context,
+                               retval, context);
+
+  MAKE_STD_ZVAL(context->pool);
+  ZVAL_ZVAL(context->pool, getThis(), 1, 0);
+
+  Descriptor *desc = php_to_descriptor(context->descriptor TSRMLS_CC);
+  Descriptor_name_set(desc, name);
+
+  RETURN_ZVAL(retval, 0, 1);
+}
+
+static void validate_msgdef(const upb_msgdef* msgdef) {
+  // Verify that no required fields exist. proto3 does not support these.
+  upb_msg_field_iter it;
+  for (upb_msg_field_begin(&it, msgdef);
+       !upb_msg_field_done(&it);
+       upb_msg_field_next(&it)) {
+    const upb_fielddef* field = upb_msg_iter_field(&it);
+    if (upb_fielddef_label(field) == UPB_LABEL_REQUIRED) {
+      zend_error(E_ERROR, "Required fields are unsupported in proto3.");
+    }
+  }
+}
+
+PHP_METHOD(DescriptorPool, finalize) {
+  DescriptorPool *self = php_to_descriptor_pool(getThis() TSRMLS_CC);
+  Bucket *temp;
+  int i, num;
+
+  num = zend_hash_num_elements(self->pending_list);
+  upb_def **defs = emalloc(sizeof(upb_def *) * num);
+
+  for (i = 0, temp = self->pending_list->pListHead; temp != NULL;
+       temp = temp->pListNext) {
+    zval *def_php = *(zval **)temp->pData;
+    Descriptor* desc = php_to_descriptor(def_php TSRMLS_CC);
+    defs[i] = (upb_def *)desc->msgdef;
+    validate_msgdef((const upb_msgdef *)defs[i++]);
+  }
+
+  CHECK_UPB(upb_symtab_add(self->symtab, (upb_def **)defs, num, NULL, &status),
+            "Unable to add defs to DescriptorPool");
+
+  for (temp = self->pending_list->pListHead; temp != NULL;
+       temp = temp->pListNext) {
+    // zval *def_php = *(zval **)temp->pData;
+    // Descriptor* desc = php_to_descriptor(def_php TSRMLS_CC);
+    build_class_from_descriptor((zval *)temp->pDataPtr TSRMLS_CC);
+  }
+
+  FREE(defs);
+  zend_hash_destroy(self->pending_list);
+  zend_hash_init(self->pending_list, 1, NULL, ZVAL_PTR_DTOR, 0);
+}
+
+// -----------------------------------------------------------------------------
+// Descriptor
+// -----------------------------------------------------------------------------
+
+static zend_function_entry descriptor_methods[] = {
+  ZEND_FE_END
+};
+
+DEFINE_CLASS(Descriptor, descriptor, "Google\\Protobuf\\Descriptor");
+
+void descriptor_free_c(Descriptor *self TSRMLS_DC) {
+  upb_msg_field_iter iter;
+  upb_msg_field_begin(&iter, self->msgdef);
+  while (!upb_msg_field_done(&iter)) {
+    upb_fielddef *fielddef = upb_msg_iter_field(&iter);
+    upb_fielddef_unref(fielddef, &fielddef);
+    upb_msg_field_next(&iter);
+  }
+  upb_msgdef_unref(self->msgdef, &self->msgdef);
+  if (self->layout) {
+    free_layout(self->layout);
+  }
+}
+
+static void descriptor_add_field(Descriptor *desc,
+                                 const upb_fielddef *fielddef) {
+  upb_msgdef *mut_def = check_msgdef_notfrozen(desc->msgdef);
+  upb_fielddef *mut_field_def = check_fielddef_notfrozen(fielddef);
+  CHECK_UPB(upb_msgdef_addfield(mut_def, mut_field_def, NULL, &status),
+            "Adding field to Descriptor failed");
+  // add_def_obj(fielddef, obj);
+}
+
+void descriptor_init_c_instance(Descriptor* desc TSRMLS_DC) {
+  zend_object_std_init(&desc->std, descriptor_type TSRMLS_CC);
+  desc->msgdef = upb_msgdef_new(&desc->msgdef);
+  desc->layout = NULL;
+  // MAKE_STD_ZVAL(intern->klass);
+  // ZVAL_NULL(intern->klass);
+  desc->pb_serialize_handlers = NULL;
+}
+
+void Descriptor_name_set(Descriptor *desc, const char *name) {
+  upb_msgdef *mut_def = check_msgdef_notfrozen(desc->msgdef);
+  CHECK_UPB(upb_msgdef_setfullname(mut_def, name, &status),
+            "Error setting Descriptor name");
+}
+
+// -----------------------------------------------------------------------------
+// FieldDescriptor
+// -----------------------------------------------------------------------------
+
+static void field_descriptor_name_set(const upb_fielddef* fielddef,
+                                      const char *name) {
+  upb_fielddef *mut_def = check_fielddef_notfrozen(fielddef);
+  CHECK_UPB(upb_fielddef_setname(mut_def, name, &status),
+            "Error setting FieldDescriptor name");
+}
+
+static void field_descriptor_label_set(const upb_fielddef* fielddef,
+                                       upb_label_t upb_label) {
+  upb_fielddef *mut_def = check_fielddef_notfrozen(fielddef);
+  upb_fielddef_setlabel(mut_def, upb_label);
+}
+
+upb_fieldtype_t string_to_descriptortype(const char *type) {
+#define CONVERT(upb, str)   \
+  if (!strcmp(type, str)) { \
+    return UPB_DESCRIPTOR_TYPE_##upb;  \
+  }
+
+  CONVERT(FLOAT, "float");
+  CONVERT(DOUBLE, "double");
+  CONVERT(BOOL, "bool");
+  CONVERT(STRING, "string");
+  CONVERT(BYTES, "bytes");
+  CONVERT(MESSAGE, "message");
+  CONVERT(GROUP, "group");
+  CONVERT(ENUM, "enum");
+  CONVERT(INT32, "int32");
+  CONVERT(INT64, "int64");
+  CONVERT(UINT32, "uint32");
+  CONVERT(UINT64, "uint64");
+  CONVERT(SINT32, "sint32");
+  CONVERT(SINT64, "sint64");
+  CONVERT(FIXED32, "fixed32");
+  CONVERT(FIXED64, "fixed64");
+  CONVERT(SFIXED32, "sfixed32");
+  CONVERT(SFIXED64, "sfixed64");
+
+#undef CONVERT
+
+  zend_error(E_ERROR, "Unknown field type.");
+  return 0;
+}
+
+static void field_descriptor_type_set(const upb_fielddef* fielddef,
+                                      const char *type) {
+  upb_fielddef *mut_def = check_fielddef_notfrozen(fielddef);
+  upb_fielddef_setdescriptortype(mut_def, string_to_descriptortype(type));
+}
+
+static void field_descriptor_number_set(const upb_fielddef* fielddef,
+                                        int number) {
+  upb_fielddef *mut_def = check_fielddef_notfrozen(fielddef);
+  CHECK_UPB(upb_fielddef_setnumber(mut_def, number, &status),
+            "Error setting field number");
+}
+
+// -----------------------------------------------------------------------------
+// MessageBuilderContext
+// -----------------------------------------------------------------------------
+
+static zend_function_entry message_builder_context_methods[] = {
+    PHP_ME(MessageBuilderContext, finalizeToPool, NULL, ZEND_ACC_PUBLIC)
+    PHP_ME(MessageBuilderContext, optional, NULL, ZEND_ACC_PUBLIC)
+    {NULL, NULL, NULL}
+};
+
+DEFINE_CLASS(MessageBuilderContext, message_builder_context,
+             "Google\\Protobuf\\Internal\\MessageBuilderContext");
+
+void message_builder_context_free_c(MessageBuilderContext *context TSRMLS_DC) {
+  zval_ptr_dtor(&context->descriptor);
+  zval_ptr_dtor(&context->pool);
+}
+
+void message_builder_context_init_c_instance(
+    MessageBuilderContext *context TSRMLS_DC) {
+  zend_object_std_init(&context->std, message_builder_context_type TSRMLS_CC);
+  PROTOBUF_CREATE_ZEND_WRAPPER(Descriptor, descriptor, context->descriptor,
+                               desc);
+}
+
+static void msgdef_add_field(Descriptor *desc, upb_label_t upb_label,
+                             const char *name, const char *type, int number,
+                             const char *type_class) {
+  upb_fielddef *fielddef = upb_fielddef_new(&fielddef);
+  upb_fielddef_setpacked(fielddef, false);
+
+  field_descriptor_label_set(fielddef, upb_label);
+  field_descriptor_name_set(fielddef, name);
+  field_descriptor_type_set(fielddef, type);
+  field_descriptor_number_set(fielddef, number);
+
+// //   if (type_class != Qnil) {
+// //     if (TYPE(type_class) != T_STRING) {
+// //       rb_raise(rb_eArgError, "Expected string for type class");
+// //     }
+// //     // Make it an absolute type name by prepending a dot.
+// //     type_class = rb_str_append(rb_str_new2("."), type_class);
+// //     rb_funcall(fielddef, rb_intern("submsg_name="), 1, type_class);
+// //   }
+  descriptor_add_field(desc, fielddef);
+}
+
+PHP_METHOD(MessageBuilderContext, optional) {
+  MessageBuilderContext *self = php_to_message_builder_context(getThis() TSRMLS_CC);
+  Descriptor *desc = php_to_descriptor(self->descriptor TSRMLS_CC);
+  // VALUE name, type, number, type_class;
+  const char *name, *type, *type_class;
+  int number, name_str_len, type_str_len, type_class_str_len;
+  if (ZEND_NUM_ARGS() == 3) {
+    if (zend_parse_parameters(3 TSRMLS_CC, "ssl", &name,
+                              &name_str_len, &type, &type_str_len, &number) == FAILURE) {
+      return;
+    }
+  } else {
+    if (zend_parse_parameters(4 TSRMLS_CC, "ssls", &name,
+                              &name_str_len, &type, &type_str_len, &number, &type_class,
+                              &type_class_str_len) == FAILURE) {
+      return;
+    }
+  }
+
+  msgdef_add_field(desc, UPB_LABEL_OPTIONAL, name, type, number, type_class);
+
+  zval_copy_ctor(getThis());
+  RETURN_ZVAL(getThis(), 1, 0);
+}
+
+PHP_METHOD(MessageBuilderContext, finalizeToPool) {
+  MessageBuilderContext *self = php_to_message_builder_context(getThis() TSRMLS_CC);
+  DescriptorPool *pool = php_to_descriptor_pool(self->pool TSRMLS_CC);
+  Descriptor* desc = php_to_descriptor(self->descriptor TSRMLS_CC);
+
+  Z_ADDREF_P(self->descriptor);
+  zend_hash_next_index_insert(pool->pending_list, &self->descriptor,
+                              sizeof(zval *), NULL);
+  RETURN_ZVAL(self->pool, 1, 0);
+}

+ 273 - 0
php/ext/google/protobuf/message.c

@@ -0,0 +1,273 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2014 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <php.h>
+
+#include "protobuf.h"
+
+// -----------------------------------------------------------------------------
+// Class/module creation from msgdefs and enumdefs, respectively.
+// -----------------------------------------------------------------------------
+
+void* message_data(void* msg) {
+  return ((uint8_t *)msg) + sizeof(MessageHeader);
+}
+
+void message_set_property(zval* object, zval* field_name, zval* value,
+                          const zend_literal* key TSRMLS_DC) {
+  const upb_fielddef* field;
+
+  MessageHeader* self = zend_object_store_get_object(object TSRMLS_CC);
+
+  CHECK_TYPE(field_name, IS_STRING);
+  field = upb_msgdef_ntofz(self->descriptor->msgdef, Z_STRVAL_P(field_name));
+  if (field == NULL) {
+    zend_error(E_ERROR, "Unknown field: %s", Z_STRVAL_P(field_name));
+  }
+  layout_set(self->descriptor->layout, message_data(self), field, value);
+}
+
+zval* message_get_property(zval* object, zval* member, int type,
+                             const zend_literal* key TSRMLS_DC) {
+  MessageHeader* self =
+      (MessageHeader*)zend_object_store_get_object(object TSRMLS_CC);
+  CHECK_TYPE(member, IS_STRING);
+
+  const upb_fielddef* field;
+  field = upb_msgdef_ntofz(self->descriptor->msgdef, Z_STRVAL_P(member));
+  if (field == NULL) {
+    return EG(uninitialized_zval_ptr);
+  }
+  zval* retval = layout_get(self->descriptor->layout, message_data(self), field TSRMLS_CC);
+  return retval;
+}
+
+static  zend_function_entry message_methods[] = {
+  PHP_ME(Message, encode, NULL, ZEND_ACC_PUBLIC)
+  {NULL, NULL, NULL}
+};
+
+/* stringsink *****************************************************************/
+
+// This should probably be factored into a common upb component.
+
+typedef struct {
+  upb_byteshandler handler;
+  upb_bytessink sink;
+  char *ptr;
+  size_t len, size;
+} stringsink;
+
+static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
+  stringsink *sink = _sink;
+  sink->len = 0;
+  return sink;
+}
+
+static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
+                                size_t len, const upb_bufhandle *handle) {
+  stringsink *sink = _sink;
+  size_t new_size = sink->size;
+
+  UPB_UNUSED(hd);
+  UPB_UNUSED(handle);
+
+  while (sink->len + len > new_size) {
+    new_size *= 2;
+  }
+
+  if (new_size != sink->size) {
+    sink->ptr = realloc(sink->ptr, new_size);
+    sink->size = new_size;
+  }
+
+  memcpy(sink->ptr + sink->len, ptr, len);
+  sink->len += len;
+
+  return len;
+}
+
+void stringsink_init(stringsink *sink) {
+  upb_byteshandler_init(&sink->handler);
+  upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
+  upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
+
+  upb_bytessink_reset(&sink->sink, &sink->handler, sink);
+
+  sink->size = 32;
+  sink->ptr = malloc(sink->size);
+  sink->len = 0;
+}
+
+void stringsink_uninit(stringsink *sink) { free(sink->ptr); }
+
+// Stack-allocated context during an encode/decode operation. Contains the upb
+// environment and its stack-based allocator, an initial buffer for allocations
+// to avoid malloc() when possible, and a template for PHP exception messages
+// if any error occurs.
+#define STACK_ENV_STACKBYTES 4096
+typedef struct {
+  upb_env env;
+  upb_seededalloc alloc;
+  const char *php_error_template;
+  char allocbuf[STACK_ENV_STACKBYTES];
+} stackenv;
+
+static void stackenv_init(stackenv* se, const char* errmsg);
+static void stackenv_uninit(stackenv* se);
+
+// Callback invoked by upb if any error occurs during parsing or serialization.
+static bool env_error_func(void* ud, const upb_status* status) {
+    stackenv* se = ud;
+    // Free the env -- rb_raise will longjmp up the stack past the encode/decode
+    // function so it would not otherwise have been freed.
+    stackenv_uninit(se);
+
+    // TODO(teboring): have a way to verify that this is actually a parse error,
+    // instead of just throwing "parse error" unconditionally.
+    zend_error(E_ERROR, se->php_error_template);
+    // Never reached.
+    return false;
+}
+
+static void stackenv_init(stackenv* se, const char* errmsg) {
+  se->php_error_template = errmsg;
+  upb_env_init(&se->env);
+  upb_seededalloc_init(&se->alloc, &se->allocbuf, STACK_ENV_STACKBYTES);
+  upb_env_setallocfunc(&se->env, upb_seededalloc_getallocfunc(&se->alloc),
+                       &se->alloc);
+  upb_env_seterrorfunc(&se->env, env_error_func, se);
+}
+
+static void stackenv_uninit(stackenv* se) {
+  upb_env_uninit(&se->env);
+  upb_seededalloc_uninit(&se->alloc);
+}
+
+// -----------------------------------------------------------------------------
+// Message
+// -----------------------------------------------------------------------------
+
+static const upb_handlers* msgdef_pb_serialize_handlers(Descriptor* desc) {
+  if (desc->pb_serialize_handlers == NULL) {
+    desc->pb_serialize_handlers =
+        upb_pb_encoder_newhandlers(desc->msgdef, &desc->pb_serialize_handlers);
+  }
+  return desc->pb_serialize_handlers;
+}
+
+PHP_METHOD(Message, encode) {
+  Descriptor* desc = (Descriptor*)zend_object_store_get_object(
+      CE_STATIC_MEMBERS(Z_OBJCE_P(getThis()))[0] TSRMLS_CC);
+
+  stringsink sink;
+  stringsink_init(&sink);
+
+  {
+    const upb_handlers* serialize_handlers = msgdef_pb_serialize_handlers(desc);
+
+    stackenv se;
+    upb_pb_encoder* encoder;
+
+    stackenv_init(&se, "Error occurred during encoding: %s");
+    encoder = upb_pb_encoder_create(&se.env, serialize_handlers, &sink.sink);
+
+    putmsg(getThis(), desc, upb_pb_encoder_input(encoder), 0);
+
+    RETVAL_STRINGL(sink.ptr, sink.len, 1);
+
+    stackenv_uninit(&se);
+    stringsink_uninit(&sink);
+  }
+}
+
+void message_free(void * object TSRMLS_DC) {
+  FREE(object);
+}
+
+zend_object_value message_create(zend_class_entry* ce TSRMLS_DC) {
+  zend_object_value return_value;
+
+  zval* php_descriptor = get_def_obj(ce);
+
+  Descriptor* desc = zend_object_store_get_object(php_descriptor TSRMLS_CC);
+  MessageHeader* msg = (MessageHeader*)ALLOC_N(
+      uint8_t, sizeof(MessageHeader) + desc->layout->size);
+  memset(message_data(msg), 0, desc->layout->size);
+
+  // We wrap first so that everything in the message object is GC-rooted in case
+  // a collection happens during object creation in layout_init().
+  msg->descriptor = desc;
+
+  layout_init(desc->layout, message_data(msg));
+  zend_object_std_init(&msg->std, ce TSRMLS_CC);
+
+  return_value.handle = zend_objects_store_put(
+      msg, (zend_objects_store_dtor_t)zend_objects_destroy_object,
+      message_free, NULL TSRMLS_CC);
+
+  return_value.handlers = PROTOBUF_G(message_handlers);
+  return return_value;
+}
+
+const zend_class_entry* build_class_from_descriptor(
+    zval* php_descriptor TSRMLS_DC) {
+  Descriptor* desc = php_to_descriptor(php_descriptor);
+  if (desc->layout == NULL) {
+    MessageLayout* layout = create_layout(desc->msgdef);
+    desc->layout = layout;
+  }
+  // TODO(teboring): Add it back.
+  // if (desc->fill_method == NULL) {
+  //   desc->fill_method = new_fillmsg_decodermethod(desc, &desc->fill_method);
+  // }
+
+  const char* name = upb_msgdef_fullname(desc->msgdef);
+  if (name == NULL) {
+    zend_error(E_ERROR, "Descriptor does not have assigned name.");
+  }
+
+  zend_class_entry class_entry;
+  INIT_CLASS_ENTRY_EX(class_entry, name, strlen(name), message_methods);
+  zend_class_entry* registered_ce =
+      zend_register_internal_class(&class_entry TSRMLS_CC);
+
+  add_def_obj(registered_ce, php_descriptor);
+
+  if (PROTOBUF_G(message_handlers) == NULL) {
+    PROTOBUF_G(message_handlers) = ALLOC(zend_object_handlers);
+    memcpy(PROTOBUF_G(message_handlers), zend_get_std_object_handlers(),
+           sizeof(zend_object_handlers));
+    PROTOBUF_G(message_handlers)->write_property = message_set_property;
+    PROTOBUF_G(message_handlers)->read_property = message_get_property;
+  }
+
+  registered_ce->create_object = message_create;
+}

+ 89 - 0
php/ext/google/protobuf/protobuf.c

@@ -0,0 +1,89 @@
+#include "protobuf.h"
+
+#include <zend_hash.h>
+
+ZEND_DECLARE_MODULE_GLOBALS(protobuf)
+static PHP_GINIT_FUNCTION(protobuf);
+static PHP_GSHUTDOWN_FUNCTION(protobuf);
+
+// -----------------------------------------------------------------------------
+// Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor
+// instances.
+// -----------------------------------------------------------------------------
+
+void add_def_obj(const void* def, zval* value) {
+  uint nIndex = (ulong)def & PROTOBUF_G(upb_def_to_php_obj_map).nTableMask;
+
+  zval* pDest = NULL;
+  Z_ADDREF_P(value);
+  zend_hash_index_update(&PROTOBUF_G(upb_def_to_php_obj_map), (zend_ulong)def,
+                         &value, sizeof(zval*), &pDest);
+}
+
+zval* get_def_obj(const void* def) {
+  zval** value;
+  if (zend_hash_index_find(&PROTOBUF_G(upb_def_to_php_obj_map), (zend_ulong)def,
+                           &value) == FAILURE) {
+    zend_error(E_ERROR, "PHP object not found for given definition.\n");
+    return NULL;
+  }
+  return *value;
+}
+
+// -----------------------------------------------------------------------------
+// Utilities.
+// -----------------------------------------------------------------------------
+
+// define the function(s) we want to add
+zend_function_entry protobuf_functions[] = {
+  ZEND_FE(get_generated_pool, NULL)
+  ZEND_FE_END
+};
+
+// "protobuf_functions" refers to the struct defined above
+// we'll be filling in more of this later: you can use this to specify
+// globals, php.ini info, startup and teardown functions, etc.
+zend_module_entry protobuf_module_entry = {
+  STANDARD_MODULE_HEADER,
+  PHP_PROTOBUF_EXTNAME, // extension name
+  protobuf_functions,   // function list
+  PHP_MINIT(protobuf),  // process startup
+  NULL,  // process shutdown
+  NULL,  // request startup
+  NULL,  // request shutdown
+  NULL,  // extension info
+  PHP_PROTOBUF_VERSION, // extension version
+  PHP_MODULE_GLOBALS(protobuf),  // globals descriptor
+  PHP_GINIT(protobuf), // globals ctor
+  PHP_GSHUTDOWN(protobuf),  // globals dtor
+  NULL,  // post deactivate
+  STANDARD_MODULE_PROPERTIES_EX
+};
+
+// install module
+ZEND_GET_MODULE(protobuf)
+
+// global variables
+static PHP_GINIT_FUNCTION(protobuf) {
+  protobuf_globals->generated_pool = NULL;
+  generated_pool = NULL;
+  protobuf_globals->message_handlers = NULL;
+  zend_hash_init(&protobuf_globals->upb_def_to_php_obj_map, 16, NULL,
+                 ZVAL_PTR_DTOR, 0);
+}
+
+static PHP_GSHUTDOWN_FUNCTION(protobuf) {
+  if (protobuf_globals->generated_pool != NULL) {
+    FREE_ZVAL(protobuf_globals->generated_pool);
+  }
+  if (protobuf_globals->message_handlers != NULL) {
+    FREE(protobuf_globals->message_handlers);
+  }
+  zend_hash_destroy(&protobuf_globals->upb_def_to_php_obj_map);
+}
+
+PHP_MINIT_FUNCTION(protobuf) {
+  descriptor_pool_init(TSRMLS_C);
+  descriptor_init(TSRMLS_C);
+  message_builder_context_init(TSRMLS_C);
+}

+ 281 - 0
php/ext/google/protobuf/protobuf.h

@@ -0,0 +1,281 @@
+#ifndef __GOOGLE_PROTOBUF_PHP_PROTOBUF_H__
+#define __GOOGLE_PROTOBUF_PHP_PROTOBUF_H__
+
+#include <php.h>
+
+#include "upb.h"
+
+#define PHP_PROTOBUF_EXTNAME "protobuf"
+#define PHP_PROTOBUF_VERSION "0.01"
+
+// Forward decls.
+struct DescriptorPool;
+struct Descriptor;
+struct FieldDescriptor;
+struct EnumDescriptor;
+struct MessageLayout;
+struct MessageField;
+struct MessageHeader;
+struct MessageBuilderContext;
+struct EnumBuilderContext;
+
+typedef struct DescriptorPool DescriptorPool;
+typedef struct Descriptor Descriptor;
+typedef struct FieldDescriptor FieldDescriptor;
+typedef struct OneofDescriptor OneofDescriptor;
+typedef struct EnumDescriptor EnumDescriptor;
+typedef struct MessageLayout MessageLayout;
+typedef struct MessageField MessageField;
+typedef struct MessageHeader MessageHeader;
+typedef struct MessageBuilderContext MessageBuilderContext;
+typedef struct OneofBuilderContext OneofBuilderContext;
+typedef struct EnumBuilderContext EnumBuilderContext;
+
+extern zend_class_entry* builder_type;
+extern zend_class_entry* descriptor_type;
+extern zend_class_entry* message_builder_context_type;
+
+extern DescriptorPool* generated_pool;  // The actual generated pool
+
+ZEND_BEGIN_MODULE_GLOBALS(protobuf)
+  zval* generated_pool;
+  zend_object_handlers* message_handlers;
+  HashTable upb_def_to_php_obj_map;
+ZEND_END_MODULE_GLOBALS(protobuf)
+
+ZEND_DECLARE_MODULE_GLOBALS(protobuf)
+
+#ifdef ZTS
+#define PROTOBUF_G(v) TSRMG(protobuf_globals_id, zend_protobuf_globals*, v)
+#else
+#define PROTOBUF_G(v) (protobuf_globals.v)
+#endif
+
+// -----------------------------------------------------------------------------
+// PHP functions and global variables.
+// -----------------------------------------------------------------------------
+
+PHP_MINIT_FUNCTION(protobuf);
+
+// -----------------------------------------------------------------------------
+// PHP class structure.
+// -----------------------------------------------------------------------------
+
+struct DescriptorPool {
+  zend_object std;
+  upb_symtab* symtab;
+  HashTable* pending_list;
+};
+
+struct Descriptor {
+  zend_object std;
+  const upb_msgdef* msgdef;
+  MessageLayout* layout;
+  // zval* klass;  // begins as NULL
+  // const upb_handlers* fill_handlers;
+  // const upb_pbdecodermethod* fill_method;
+  const upb_handlers* pb_serialize_handlers;
+  // const upb_handlers* json_serialize_handlers;
+  // Handlers hold type class references for sub-message fields directly in some
+  // cases. We need to keep these rooted because they might otherwise be
+  // collected.
+  // zval_array typeclass_references;
+};
+
+struct FieldDescriptor {
+  zend_object std;
+  const upb_fielddef* fielddef;
+};
+
+struct OneofDescriptor {
+  zend_object std;
+  const upb_oneofdef* oneofdef;
+};
+
+struct EnumDescriptor {
+  zend_object std;
+  const upb_enumdef* enumdef;
+  // zval* module;  // begins as NULL
+};
+
+// -----------------------------------------------------------------------------
+// Native slot storage abstraction.
+// -----------------------------------------------------------------------------
+
+#define NATIVE_SLOT_MAX_SIZE sizeof(uint64_t)
+
+size_t native_slot_size(upb_fieldtype_t type);
+
+#define MAP_KEY_FIELD 1
+#define MAP_VALUE_FIELD 2
+
+// Oneof case slot value to indicate that no oneof case is set. The value `0` is
+// safe because field numbers are used as case identifiers, and no field can
+// have a number of 0.
+#define ONEOF_CASE_NONE 0
+
+// These operate on a map field (i.e., a repeated field of submessages whose
+// submessage type is a map-entry msgdef).
+bool is_map_field(const upb_fielddef* field);
+const upb_fielddef* map_field_key(const upb_fielddef* field);
+const upb_fielddef* map_field_value(const upb_fielddef* field);
+
+// These operate on a map-entry msgdef.
+const upb_fielddef* map_entry_key(const upb_msgdef* msgdef);
+const upb_fielddef* map_entry_value(const upb_msgdef* msgdef);
+
+// -----------------------------------------------------------------------------
+// Message layout / storage.
+// -----------------------------------------------------------------------------
+
+#define MESSAGE_FIELD_NO_CASE ((size_t)-1)
+
+struct MessageField {
+  size_t offset;
+  size_t case_offset;  // for oneofs, a uint32. Else, MESSAGE_FIELD_NO_CASE.
+};
+
+struct MessageLayout {
+  const upb_msgdef* msgdef;
+  MessageField* fields;
+  size_t size;
+};
+
+void layout_init(MessageLayout* layout, void* storage);
+zval* layout_get(MessageLayout* layout, const void* storage,
+                 const upb_fielddef* field TSRMLS_DC);
+MessageLayout* create_layout(const upb_msgdef* msgdef);
+void free_layout(MessageLayout* layout);
+zval* native_slot_get(upb_fieldtype_t type, /*VALUE type_class,*/
+                      const void* memory TSRMLS_DC);
+
+// -----------------------------------------------------------------------------
+// Message class creation.
+// -----------------------------------------------------------------------------
+
+struct MessageHeader {
+  zend_object std;
+  Descriptor* descriptor;  // kept alive by self.class.descriptor reference.
+                           // Data comes after this.
+};
+
+struct MessageBuilderContext {
+  zend_object std;
+  zval* descriptor;
+  zval* pool;
+};
+
+struct OneofBuilderContext {
+  zend_object std;
+  // VALUE descriptor;
+  // VALUE builder;
+};
+
+struct EnumBuilderContext {
+  zend_object std;
+  // VALUE enumdesc;
+};
+
+// Forward-declare all of the PHP method implementations.
+
+DescriptorPool* php_to_descriptor_pool(zval* value TSRMLS_DC);
+zend_object_value descriptor_pool_create(zend_class_entry *ce TSRMLS_DC);
+void descriptor_pool_free_c(DescriptorPool* object TSRMLS_DC);
+void descriptor_pool_free(void* object TSRMLS_DC);
+void descriptor_pool_init_c_instance(DescriptorPool* pool TSRMLS_DC);
+PHP_METHOD(DescriptorPool, addMessage);
+PHP_METHOD(DescriptorPool, finalize);
+
+Descriptor* php_to_descriptor(zval* value TSRMLS_DC);
+zend_object_value descriptor_create(zend_class_entry *ce TSRMLS_DC);
+void descriptor_init_c_instance(Descriptor* intern TSRMLS_DC);
+void descriptor_free_c(Descriptor* object TSRMLS_DC);
+void descriptor_free(void* object TSRMLS_DC);
+void descriptor_name_set(Descriptor *desc, const char *name);
+
+MessageBuilderContext* php_to_message_builder_context(zval* value TSRMLS_DC);
+zend_object_value message_builder_context_create(
+    zend_class_entry* ce TSRMLS_DC);
+void message_builder_context_init_c_instance(
+    MessageBuilderContext* intern TSRMLS_DC);
+void message_builder_context_free_c(MessageBuilderContext* object TSRMLS_DC);
+void message_builder_context_free(void* object TSRMLS_DC);
+PHP_METHOD(MessageBuilderContext, optional);
+PHP_METHOD(MessageBuilderContext, finalizeToPool);
+
+PHP_METHOD(Message, encode);
+const zend_class_entry* build_class_from_descriptor(
+    zval* php_descriptor TSRMLS_DC);
+
+PHP_FUNCTION(get_generated_pool);
+
+// -----------------------------------------------------------------------------
+// Global map from upb {msg,enum}defs to wrapper Descriptor/EnumDescriptor
+// instances.
+// ----------------------------------------------------------------------------
+
+void add_def_obj(const void* def, zval* value);
+zval* get_def_obj(const void* def);
+
+// -----------------------------------------------------------------------------
+// Utilities.
+// -----------------------------------------------------------------------------
+
+// PHP Array utils.
+#define Z_ARRVAL_SIZE_P(zval_p) zend_hash_num_elements(Z_ARRVAL_P(zval_p))
+#define Z_ARRVAL_BEGIN_P(zval_p) Z_ARRVAL_P(zval_p)->pListHead
+#define Z_BUCKET_NEXT_PP(bucket_pp) *bucket_pp = (*bucket_pp)->pListNext
+
+#define DEFINE_PHP_OBJECT(class_name, class_name_lower, name) \
+  do {                                                        \
+    zval* name;                                               \
+    MAKE_STD_ZVAL(name);                                      \
+    object_init_ex(name, class_name_lower##_type);            \
+  } while (0)
+
+#define DEFINE_PHP_WRAPPER(class_name, class_name_lower, name, intern) \
+  zval* name;                                                          \
+  MAKE_STD_ZVAL(name);                                                 \
+  object_init_ex(name, class_name_lower##_type);                       \
+  Z_OBJVAL_P(name)                                                     \
+      .handle = zend_objects_store_put(                                \
+      intern, (zend_objects_store_dtor_t)zend_objects_destroy_object,  \
+      class_name_lower##_free, NULL TSRMLS_CC);
+
+#define DEFINE_PHP_ZVAL(name) \
+  do {                        \
+    zval* name;               \
+    MAKE_STD_ZVAL(name);      \
+  } while (0)
+
+#define DEFINE_PHP_STRING(name, value) \
+  do {                                 \
+    zval* name;                        \
+    MAKE_STD_ZVAL(name);               \
+    ZVAL_STRING(name, value, 1);       \
+  } while (0)
+
+// Upb Utilities
+
+void check_upb_status(const upb_status* status, const char* msg);
+
+#define CHECK_UPB(code, msg)             \
+  do {                                   \
+    upb_status status = UPB_STATUS_INIT; \
+    code;                                \
+    check_upb_status(&status, msg);      \
+  } while (0)
+
+// Memory management
+
+#define ALLOC(class_name) (class_name*) emalloc(sizeof(class_name))
+#define ALLOC_N(class_name, n) (class_name*) emalloc(sizeof(class_name) * n)
+#define FREE(object) efree(object)
+
+// Type Checking
+#define CHECK_TYPE(field, type)             \
+  if (Z_TYPE_P(field) != type) {            \
+    zend_error(E_ERROR, "Unexpected type"); \
+  }
+
+#endif  // __GOOGLE_PROTOBUF_PHP_PROTOBUF_H__

+ 539 - 0
php/ext/google/protobuf/storage.c

@@ -0,0 +1,539 @@
+#include <stdint.h>
+#include <protobuf.h>
+
+// -----------------------------------------------------------------------------
+// PHP <-> native slot management.
+// -----------------------------------------------------------------------------
+
+static zval* int32_to_zval(int32_t value) {
+  zval* tmp;
+  MAKE_STD_ZVAL(tmp);
+  ZVAL_LONG(tmp, value);
+  php_printf("int32 to zval\n");
+  // ZVAL_LONG(tmp, 1);
+  return tmp;
+}
+
+#define DEREF(memory, type) *(type*)(memory)
+
+size_t native_slot_size(upb_fieldtype_t type) {
+  switch (type) {
+    case UPB_TYPE_FLOAT: return 4;
+    case UPB_TYPE_DOUBLE: return 8;
+    case UPB_TYPE_BOOL: return 1;
+    case UPB_TYPE_STRING: return sizeof(zval*);
+    case UPB_TYPE_BYTES: return sizeof(zval*);
+    case UPB_TYPE_MESSAGE: return sizeof(zval*);
+    case UPB_TYPE_ENUM: return 4;
+    case UPB_TYPE_INT32: return 4;
+    case UPB_TYPE_INT64: return 8;
+    case UPB_TYPE_UINT32: return 4;
+    case UPB_TYPE_UINT64: return 8;
+    default: return 0;
+  }
+}
+
+static bool is_php_num(zval* value) {
+  // Is numerial string also valid?
+  return (Z_TYPE_P(value) == IS_LONG ||
+          Z_TYPE_P(value) == IS_DOUBLE);
+}
+
+void native_slot_check_int_range_precision(upb_fieldtype_t type, zval* val) {
+  // TODO(teboring): Add it back.
+  // if (!is_php_num(val)) {
+  //   zend_error(E_ERROR, "Expected number type for integral field.");
+  // }
+
+  // if (Z_TYPE_P(val) == IS_DOUBLE) {
+  //   double dbl_val = NUM2DBL(val);
+  //   if (floor(dbl_val) != dbl_val) {
+  //     zend_error(E_ERROR,
+  //              "Non-integral floating point value assigned to integer field.");
+  //   }
+  // }
+  // if (type == UPB_TYPE_UINT32 || type == UPB_TYPE_UINT64) {
+  //   if (NUM2DBL(val) < 0) {
+  //     zend_error(E_ERROR,
+  //              "Assigning negative value to unsigned integer field.");
+  //   }
+  // }
+}
+
+zval* native_slot_get(upb_fieldtype_t type, /*VALUE type_class,*/
+                      const void* memory TSRMLS_DC) {
+  zval* retval = NULL;
+  switch (type) {
+    // TODO(teboring): Add it back.
+    // case UPB_TYPE_FLOAT:
+    //   return DBL2NUM(DEREF(memory, float));
+    // case UPB_TYPE_DOUBLE:
+    //   return DBL2NUM(DEREF(memory, double));
+    // case UPB_TYPE_BOOL:
+    //   return DEREF(memory, int8_t) ? Qtrue : Qfalse;
+    // case UPB_TYPE_STRING:
+    // case UPB_TYPE_BYTES:
+    // case UPB_TYPE_MESSAGE:
+    //   return DEREF(memory, VALUE);
+    // case UPB_TYPE_ENUM: {
+    //   int32_t val = DEREF(memory, int32_t);
+    //   VALUE symbol = enum_lookup(type_class, INT2NUM(val));
+    //   if (symbol == Qnil) {
+    //     return INT2NUM(val);
+    //   } else {
+    //     return symbol;
+    //   }
+    // }
+    case UPB_TYPE_INT32:
+      return int32_to_zval(DEREF(memory, int32_t));
+    // TODO(teboring): Add it back.
+    // case UPB_TYPE_INT64:
+    //   return LL2NUM(DEREF(memory, int64_t));
+    // case UPB_TYPE_UINT32:
+    //   return UINT2NUM(DEREF(memory, uint32_t));
+    // case UPB_TYPE_UINT64:
+    //   return ULL2NUM(DEREF(memory, uint64_t));
+    default:
+      return EG(uninitialized_zval_ptr);
+  }
+}
+
+void native_slot_init(upb_fieldtype_t type, void* memory) {
+  switch (type) {
+    case UPB_TYPE_FLOAT:
+      DEREF(memory, float) = 0.0;
+      break;
+    case UPB_TYPE_DOUBLE:
+      DEREF(memory, double) = 0.0;
+      break;
+    case UPB_TYPE_BOOL:
+      DEREF(memory, int8_t) = 0;
+      break;
+    // TODO(teboring): Add it back.
+    // case UPB_TYPE_STRING:
+    // case UPB_TYPE_BYTES:
+    //   DEREF(memory, VALUE) = php_str_new2("");
+    //   php_enc_associate(DEREF(memory, VALUE), (type == UPB_TYPE_BYTES)
+    //                                              ? kRubyString8bitEncoding
+    //                                              : kRubyStringUtf8Encoding);
+    //   break;
+    // case UPB_TYPE_MESSAGE:
+    //   DEREF(memory, VALUE) = Qnil;
+    //   break;
+    case UPB_TYPE_ENUM:
+    case UPB_TYPE_INT32:
+      DEREF(memory, int32_t) = 0;
+      break;
+    case UPB_TYPE_INT64:
+      DEREF(memory, int64_t) = 0;
+      break;
+    case UPB_TYPE_UINT32:
+      DEREF(memory, uint32_t) = 0;
+      break;
+    case UPB_TYPE_UINT64:
+      DEREF(memory, uint64_t) = 0;
+      break;
+    default:
+      break;
+  }
+}
+
+void native_slot_set(upb_fieldtype_t type, /*VALUE type_class,*/ void* memory,
+                     zval* value) {
+  native_slot_set_value_and_case(type, /*type_class,*/ memory, value, NULL, 0);
+}
+
+void native_slot_set_value_and_case(upb_fieldtype_t type, /*VALUE type_class,*/
+                                    void* memory, zval* value,
+                                    uint32_t* case_memory,
+                                    uint32_t case_number) {
+  switch (type) {
+    case UPB_TYPE_FLOAT:
+      if (!Z_TYPE_P(value) == IS_LONG) {
+        zend_error(E_ERROR, "Expected number type for float field.");
+      }
+      DEREF(memory, float) = Z_DVAL_P(value);
+      break;
+    case UPB_TYPE_DOUBLE:
+      // TODO(teboring): Add it back.
+      // if (!is_php_num(value)) {
+      //   zend_error(E_ERROR, "Expected number type for double field.");
+      // }
+      // DEREF(memory, double) = Z_DVAL_P(value);
+      break;
+    case UPB_TYPE_BOOL: {
+      int8_t val = -1;
+      if (zval_is_true(value)) {
+        val = 1;
+      } else {
+        val = 0;
+      }
+      // TODO(teboring): Add it back.
+      // else if (value == Qfalse) {
+      //   val = 0;
+      // }
+      // else {
+      //   php_raise(php_eTypeError, "Invalid argument for boolean field.");
+      // }
+      DEREF(memory, int8_t) = val;
+      break;
+    }
+    case UPB_TYPE_STRING:
+    case UPB_TYPE_BYTES: {
+      // TODO(teboring): Add it back.
+      // if (Z_TYPE_P(value) != IS_STRING) {
+      //   zend_error(E_ERROR, "Invalid argument for string field.");
+      // }
+      // native_slot_validate_string_encoding(type, value);
+      // DEREF(memory, zval*) = value;
+      break;
+    }
+    case UPB_TYPE_MESSAGE: {
+      // TODO(teboring): Add it back.
+      // if (CLASS_OF(value) == CLASS_OF(Qnil)) {
+      //   value = Qnil;
+      // } else if (CLASS_OF(value) != type_class) {
+      //   php_raise(php_eTypeError,
+      //            "Invalid type %s to assign to submessage field.",
+      //            php_class2name(CLASS_OF(value)));
+      // }
+      // DEREF(memory, VALUE) = value;
+      break;
+    }
+    case UPB_TYPE_ENUM: {
+      // TODO(teboring): Add it back.
+      // int32_t int_val = 0;
+      // if (!is_php_num(value) && TYPE(value) != T_SYMBOL) {
+      //   php_raise(php_eTypeError,
+      //            "Expected number or symbol type for enum field.");
+      // }
+      // if (TYPE(value) == T_SYMBOL) {
+      //   // Ensure that the given symbol exists in the enum module.
+      //   VALUE lookup = php_funcall(type_class, php_intern("resolve"), 1, value);
+      //   if (lookup == Qnil) {
+      //     php_raise(php_eRangeError, "Unknown symbol value for enum field.");
+      //   } else {
+      //     int_val = NUM2INT(lookup);
+      //   }
+      // } else {
+      //   native_slot_check_int_range_precision(UPB_TYPE_INT32, value);
+      //   int_val = NUM2INT(value);
+      // }
+      // DEREF(memory, int32_t) = int_val;
+      // break;
+    }
+    case UPB_TYPE_INT32:
+    case UPB_TYPE_INT64:
+    case UPB_TYPE_UINT32:
+    case UPB_TYPE_UINT64:
+      native_slot_check_int_range_precision(type, value);
+      switch (type) {
+        case UPB_TYPE_INT32:
+          php_printf("Setting INT32 field\n");
+          DEREF(memory, int32_t) = Z_LVAL_P(value);
+          break;
+        case UPB_TYPE_INT64:
+          // TODO(teboring): Add it back.
+          // DEREF(memory, int64_t) = NUM2LL(value);
+          break;
+        case UPB_TYPE_UINT32:
+          // TODO(teboring): Add it back.
+          // DEREF(memory, uint32_t) = NUM2UINT(value);
+          break;
+        case UPB_TYPE_UINT64:
+          // TODO(teboring): Add it back.
+          // DEREF(memory, uint64_t) = NUM2ULL(value);
+          break;
+        default:
+          break;
+      }
+      break;
+    default:
+      break;
+  }
+
+  if (case_memory != NULL) {
+    *case_memory = case_number;
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Map field utilities.
+// ----------------------------------------------------------------------------
+
+const upb_msgdef* tryget_map_entry_msgdef(const upb_fielddef* field) {
+  const upb_msgdef* subdef;
+  if (upb_fielddef_label(field) != UPB_LABEL_REPEATED ||
+      upb_fielddef_type(field) != UPB_TYPE_MESSAGE) {
+    return NULL;
+  }
+  subdef = upb_fielddef_msgsubdef(field);
+  return upb_msgdef_mapentry(subdef) ? subdef : NULL;
+}
+
+const upb_msgdef* map_entry_msgdef(const upb_fielddef* field) {
+  const upb_msgdef* subdef = tryget_map_entry_msgdef(field);
+  assert(subdef);
+  return subdef;
+}
+
+bool is_map_field(const upb_fielddef* field) {
+  return tryget_map_entry_msgdef(field) != NULL;
+}
+
+// -----------------------------------------------------------------------------
+// Memory layout management.
+// -----------------------------------------------------------------------------
+
+static size_t align_up_to(size_t offset, size_t granularity) {
+  // Granularity must be a power of two.
+  return (offset + granularity - 1) & ~(granularity - 1);
+}
+
+MessageLayout* create_layout(const upb_msgdef* msgdef) {
+  MessageLayout* layout = ALLOC(MessageLayout);
+  int nfields = upb_msgdef_numfields(msgdef);
+  upb_msg_field_iter it;
+  upb_msg_oneof_iter oit;
+  size_t off = 0;
+
+  layout->fields = ALLOC_N(MessageField, nfields);
+
+  for (upb_msg_field_begin(&it, msgdef); !upb_msg_field_done(&it);
+       upb_msg_field_next(&it)) {
+    const upb_fielddef* field = upb_msg_iter_field(&it);
+    size_t field_size;
+
+    if (upb_fielddef_containingoneof(field)) {
+      // Oneofs are handled separately below.
+      continue;
+    }
+
+    // Allocate |field_size| bytes for this field in the layout.
+    field_size = 0;
+    if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+      field_size = sizeof(zval*);
+    } else {
+      field_size = native_slot_size(upb_fielddef_type(field));
+    }
+
+    // Align current offset up to | size | granularity.
+    off = align_up_to(off, field_size);
+    layout->fields[upb_fielddef_index(field)].offset = off;
+    layout->fields[upb_fielddef_index(field)].case_offset =
+        MESSAGE_FIELD_NO_CASE;
+    off += field_size;
+  }
+
+  // Handle oneofs now -- we iterate over oneofs specifically and allocate only
+  // one slot per oneof.
+  //
+  // We assign all value slots first, then pack the 'case' fields at the end,
+  // since in the common case (modern 64-bit platform) these are 8 bytes and 4
+  // bytes respectively and we want to avoid alignment overhead.
+  //
+  // Note that we reserve 4 bytes (a uint32) per 'case' slot because the value
+  // space for oneof cases is conceptually as wide as field tag numbers.  In
+  // practice, it's unlikely that a oneof would have more than e.g.  256 or 64K
+  // members (8 or 16 bits respectively), so conceivably we could assign
+  // consecutive case numbers and then pick a smaller oneof case slot size, but
+  // the complexity to implement this indirection is probably not worthwhile.
+  for (upb_msg_oneof_begin(&oit, msgdef); !upb_msg_oneof_done(&oit);
+       upb_msg_oneof_next(&oit)) {
+    const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit);
+    upb_oneof_iter fit;
+
+    // Always allocate NATIVE_SLOT_MAX_SIZE bytes, but share the slot between
+    // all fields.
+    size_t field_size = NATIVE_SLOT_MAX_SIZE;
+    // Align the offset .
+    off = align_up_to( off, field_size);
+    // Assign all fields in the oneof this same offset.
+    for (upb_oneof_begin(&fit, oneof); !upb_oneof_done(&fit);
+         upb_oneof_next(&fit)) {
+      const upb_fielddef* field = upb_oneof_iter_field(&fit);
+      layout->fields[upb_fielddef_index(field)].offset = off;
+    }
+    off += field_size;
+  }
+
+  // Now the case fields.
+  for (upb_msg_oneof_begin(&oit, msgdef); !upb_msg_oneof_done(&oit);
+       upb_msg_oneof_next(&oit)) {
+    const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit);
+    upb_oneof_iter fit;
+
+    size_t field_size = sizeof(uint32_t);
+    // Align the offset .
+    off = (off + field_size - 1) & ~(field_size - 1);
+    // Assign all fields in the oneof this same offset.
+    for (upb_oneof_begin(&fit, oneof); !upb_oneof_done(&fit);
+         upb_oneof_next(&fit)) {
+      const upb_fielddef* field = upb_oneof_iter_field(&fit);
+      layout->fields[upb_fielddef_index(field)].case_offset = off;
+    }
+    off += field_size;
+  }
+
+  layout->size = off;
+
+  layout->msgdef = msgdef;
+  upb_msgdef_ref(layout->msgdef, &layout->msgdef);
+
+  return layout;
+}
+
+void free_layout(MessageLayout* layout) {
+  FREE(layout->fields);
+  upb_msgdef_unref(layout->msgdef, &layout->msgdef);
+  FREE(layout);
+}
+
+// TODO(teboring): Add it back.
+// VALUE field_type_class(const upb_fielddef* field) {
+//   VALUE type_class = Qnil;
+//   if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE) {
+//     VALUE submsgdesc = get_def_obj(upb_fielddef_subdef(field));
+//     type_class = Descriptor_msgclass(submsgdesc);
+//   } else if (upb_fielddef_type(field) == UPB_TYPE_ENUM) {
+//     VALUE subenumdesc = get_def_obj(upb_fielddef_subdef(field));
+//     type_class = EnumDescriptor_enummodule(subenumdesc);
+//   }
+//   return type_class;
+// }
+
+static void* slot_memory(MessageLayout* layout, const void* storage,
+                         const upb_fielddef* field) {
+  return ((uint8_t*)storage) + layout->fields[upb_fielddef_index(field)].offset;
+}
+
+static uint32_t* slot_oneof_case(MessageLayout* layout, const void* storage,
+                                 const upb_fielddef* field) {
+  return (uint32_t*)(((uint8_t*)storage) +
+                     layout->fields[upb_fielddef_index(field)].case_offset);
+}
+
+void layout_set(MessageLayout* layout, void* storage, const upb_fielddef* field,
+                zval* val) {
+  void* memory = slot_memory(layout, storage, field);
+  uint32_t* oneof_case = slot_oneof_case(layout, storage, field);
+
+  if (upb_fielddef_containingoneof(field)) {
+    if (Z_TYPE_P(val) == IS_NULL) {
+      // Assigning nil to a oneof field clears the oneof completely.
+      *oneof_case = ONEOF_CASE_NONE;
+      memset(memory, 0, NATIVE_SLOT_MAX_SIZE);
+    } else {
+      // The transition between field types for a single oneof (union) slot is
+      // somewhat complex because we need to ensure that a GC triggered at any
+      // point by a call into the Ruby VM sees a valid state for this field and
+      // does not either go off into the weeds (following what it thinks is a
+      // VALUE but is actually a different field type) or miss an object (seeing
+      // what it thinks is a primitive field but is actually a VALUE for the new
+      // field type).
+      //
+      // In order for the transition to be safe, the oneof case slot must be in
+      // sync with the value slot whenever the Ruby VM has been called. Thus, we
+      // use native_slot_set_value_and_case(), which ensures that both the value
+      // and case number are altered atomically (w.r.t. the Ruby VM).
+      native_slot_set_value_and_case(upb_fielddef_type(field),
+                                     /*field_type_class(field),*/ memory, val,
+                                     oneof_case, upb_fielddef_number(field));
+    }
+  } else if (is_map_field(field)) {
+    // TODO(teboring): Add it back.
+    // check_map_field_type(val, field);
+    // DEREF(memory, zval*) = val;
+  } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+    // TODO(teboring): Add it back.
+    // check_repeated_field_type(val, field);
+    // DEREF(memory, zval*) = val;
+  } else {
+    native_slot_set(upb_fielddef_type(field), /*field_type_class(field),*/ memory,
+                    val);
+  }
+}
+
+void layout_init(MessageLayout* layout, void* storage) {
+  upb_msg_field_iter it;
+  for (upb_msg_field_begin(&it, layout->msgdef); !upb_msg_field_done(&it);
+       upb_msg_field_next(&it)) {
+    const upb_fielddef* field = upb_msg_iter_field(&it);
+    void* memory = slot_memory(layout, storage, field);
+    uint32_t* oneof_case = slot_oneof_case(layout, storage, field);
+
+    if (upb_fielddef_containingoneof(field)) {
+      // TODO(teboring): Add it back.
+      // memset(memory, 0, NATIVE_SLOT_MAX_SIZE);
+      // *oneof_case = ONEOF_CASE_NONE;
+    } else if (is_map_field(field)) {
+      // TODO(teboring): Add it back.
+      // VALUE map = Qnil;
+
+      // const upb_fielddef* key_field = map_field_key(field);
+      // const upb_fielddef* value_field = map_field_value(field);
+      // VALUE type_class = field_type_class(value_field);
+
+      // if (type_class != Qnil) {
+      //   VALUE args[3] = {
+      //       fieldtype_to_php(upb_fielddef_type(key_field)),
+      //       fieldtype_to_php(upb_fielddef_type(value_field)), type_class,
+      //   };
+      //   map = php_class_new_instance(3, args, cMap);
+      // } else {
+      //   VALUE args[2] = {
+      //       fieldtype_to_php(upb_fielddef_type(key_field)),
+      //       fieldtype_to_php(upb_fielddef_type(value_field)),
+      //   };
+      //   map = php_class_new_instance(2, args, cMap);
+      // }
+
+      // DEREF(memory, VALUE) = map;
+    } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+      // TODO(teboring): Add it back.
+      // VALUE ary = Qnil;
+
+      // VALUE type_class = field_type_class(field);
+
+      // if (type_class != Qnil) {
+      //   VALUE args[2] = {
+      //       fieldtype_to_php(upb_fielddef_type(field)), type_class,
+      //   };
+      //   ary = php_class_new_instance(2, args, cRepeatedField);
+      // } else {
+      //   VALUE args[1] = {fieldtype_to_php(upb_fielddef_type(field))};
+      //   ary = php_class_new_instance(1, args, cRepeatedField);
+      // }
+
+      // DEREF(memory, VALUE) = ary;
+    } else {
+      native_slot_init(upb_fielddef_type(field), memory);
+    }
+  }
+}
+
+zval* layout_get(MessageLayout* layout, const void* storage,
+                 const upb_fielddef* field TSRMLS_DC) {
+  void* memory = slot_memory(layout, storage, field);
+  uint32_t* oneof_case = slot_oneof_case(layout, storage, field);
+
+  if (upb_fielddef_containingoneof(field)) {
+    if (*oneof_case != upb_fielddef_number(field)) {
+      return NULL;
+      // TODO(teboring): Add it back.
+      // return Qnil;
+    }
+      return NULL;
+    // TODO(teboring): Add it back.
+    // return native_slot_get(upb_fielddef_type(field), field_type_class(field),
+    //                        memory);
+  } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) {
+      return NULL;
+    // TODO(teboring): Add it back.
+    // return *((VALUE*)memory);
+  } else {
+    return native_slot_get(
+        upb_fielddef_type(field), /*field_type_class(field), */
+        memory TSRMLS_CC);
+  }
+}

+ 15 - 0
php/ext/google/protobuf/test.php

@@ -0,0 +1,15 @@
+<?php
+
+
+namespace Google\Protobuf;
+
+$pool = get_generated_pool();
+$pool->addMessage("TestMessage")
+    ->optional("optional_int32_a", "int32", 1)
+    ->optional("optional_int32_b", "int32", 2)
+    ->finalizeToPool()
+    ->finalize();
+
+$test_message = new \TestMessage();
+
+?>

+ 11990 - 0
php/ext/google/protobuf/upb.c

@@ -0,0 +1,11990 @@
+// Amalgamated source file
+#include "upb.h"
+
+
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct {
+  size_t len;
+  char str[1];  /* Null-terminated string data follows. */
+} str_t;
+
+static str_t *newstr(const char *data, size_t len) {
+  str_t *ret = malloc(sizeof(*ret) + len);
+  if (!ret) return NULL;
+  ret->len = len;
+  memcpy(ret->str, data, len);
+  ret->str[len] = '\0';
+  return ret;
+}
+
+static void freestr(str_t *s) { free(s); }
+
+/* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
+static bool upb_isbetween(char c, char low, char high) {
+  return c >= low && c <= high;
+}
+
+static bool upb_isletter(char c) {
+  return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
+}
+
+static bool upb_isalphanum(char c) {
+  return upb_isletter(c) || upb_isbetween(c, '0', '9');
+}
+
+static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) {
+  bool start = true;
+  size_t i;
+  for (i = 0; i < len; i++) {
+    char c = str[i];
+    if (c == '.') {
+      if (start || !full) {
+        upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
+        return false;
+      }
+      start = true;
+    } else if (start) {
+      if (!upb_isletter(c)) {
+        upb_status_seterrf(
+            s, "invalid name: path components must start with a letter (%s)",
+            str);
+        return false;
+      }
+      start = false;
+    } else {
+      if (!upb_isalphanum(c)) {
+        upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
+                           str);
+        return false;
+      }
+    }
+  }
+  return !start;
+}
+
+
+/* upb_def ********************************************************************/
+
+upb_deftype_t upb_def_type(const upb_def *d) { return d->type; }
+
+const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
+
+bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) {
+  assert(!upb_def_isfrozen(def));
+  if (!upb_isident(fullname, strlen(fullname), true, s)) return false;
+  free((void*)def->fullname);
+  def->fullname = upb_strdup(fullname);
+  return true;
+}
+
+upb_def *upb_def_dup(const upb_def *def, const void *o) {
+  switch (def->type) {
+    case UPB_DEF_MSG:
+      return upb_msgdef_upcast_mutable(
+          upb_msgdef_dup(upb_downcast_msgdef(def), o));
+    case UPB_DEF_FIELD:
+      return upb_fielddef_upcast_mutable(
+          upb_fielddef_dup(upb_downcast_fielddef(def), o));
+    case UPB_DEF_ENUM:
+      return upb_enumdef_upcast_mutable(
+          upb_enumdef_dup(upb_downcast_enumdef(def), o));
+    default: assert(false); return NULL;
+  }
+}
+
+static bool upb_def_init(upb_def *def, upb_deftype_t type,
+                         const struct upb_refcounted_vtbl *vtbl,
+                         const void *owner) {
+  if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return false;
+  def->type = type;
+  def->fullname = NULL;
+  def->came_from_user = false;
+  return true;
+}
+
+static void upb_def_uninit(upb_def *def) {
+  free((void*)def->fullname);
+}
+
+static const char *msgdef_name(const upb_msgdef *m) {
+  const char *name = upb_def_fullname(upb_msgdef_upcast(m));
+  return name ? name : "(anonymous)";
+}
+
+static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
+  if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
+    upb_status_seterrmsg(s, "fielddef must have name and number set");
+    return false;
+  }
+
+  if (!f->type_is_set_) {
+    upb_status_seterrmsg(s, "fielddef type was not initialized");
+    return false;
+  }
+
+  if (upb_fielddef_lazy(f) &&
+      upb_fielddef_descriptortype(f) != UPB_DESCRIPTOR_TYPE_MESSAGE) {
+    upb_status_seterrmsg(s,
+                         "only length-delimited submessage fields may be lazy");
+    return false;
+  }
+
+  if (upb_fielddef_hassubdef(f)) {
+    const upb_def *subdef;
+
+    if (f->subdef_is_symbolic) {
+      upb_status_seterrf(s, "field '%s.%s' has not been resolved",
+                         msgdef_name(f->msg.def), upb_fielddef_name(f));
+      return false;
+    }
+
+    subdef = upb_fielddef_subdef(f);
+    if (subdef == NULL) {
+      upb_status_seterrf(s, "field %s.%s is missing required subdef",
+                         msgdef_name(f->msg.def), upb_fielddef_name(f));
+      return false;
+    }
+
+    if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) {
+      upb_status_seterrf(s,
+                         "subdef of field %s.%s is not frozen or being frozen",
+                         msgdef_name(f->msg.def), upb_fielddef_name(f));
+      return false;
+    }
+  }
+
+  if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
+    bool has_default_name = upb_fielddef_enumhasdefaultstr(f);
+    bool has_default_number = upb_fielddef_enumhasdefaultint32(f);
+
+    /* Previously verified by upb_validate_enumdef(). */
+    assert(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0);
+
+    /* We've already validated that we have an associated enumdef and that it
+     * has at least one member, so at least one of these should be true.
+     * Because if the user didn't set anything, we'll pick up the enum's
+     * default, but if the user *did* set something we should at least pick up
+     * the one they set (int32 or string). */
+    assert(has_default_name || has_default_number);
+
+    if (!has_default_name) {
+      upb_status_seterrf(s,
+                         "enum default for field %s.%s (%d) is not in the enum",
+                         msgdef_name(f->msg.def), upb_fielddef_name(f),
+                         upb_fielddef_defaultint32(f));
+      return false;
+    }
+
+    if (!has_default_number) {
+      upb_status_seterrf(s,
+                         "enum default for field %s.%s (%s) is not in the enum",
+                         msgdef_name(f->msg.def), upb_fielddef_name(f),
+                         upb_fielddef_defaultstr(f, NULL));
+      return false;
+    }
+
+    /* Lift the effective numeric default into the field's default slot, in case
+     * we were only getting it "by reference" from the enumdef. */
+    upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
+  }
+
+  /* Ensure that MapEntry submessages only appear as repeated fields, not
+   * optional/required (singular) fields. */
+  if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
+      upb_fielddef_msgsubdef(f) != NULL) {
+    const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
+    if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
+      upb_status_seterrf(s,
+                         "Field %s refers to mapentry message but is not "
+                         "a repeated field",
+                         upb_fielddef_name(f) ? upb_fielddef_name(f) :
+                         "(unnamed)");
+      return false;
+    }
+  }
+
+  return true;
+}
+
+static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) {
+  if (upb_enumdef_numvals(e) == 0) {
+    upb_status_seterrf(s, "enum %s has no members (must have at least one)",
+                       upb_enumdef_fullname(e));
+    return false;
+  }
+
+  return true;
+}
+
+/* All submessage fields are lower than all other fields.
+ * Secondly, fields are increasing in order. */
+uint32_t field_rank(const upb_fielddef *f) {
+  uint32_t ret = upb_fielddef_number(f);
+  const uint32_t high_bit = 1 << 30;
+  assert(ret < high_bit);
+  if (!upb_fielddef_issubmsg(f))
+    ret |= high_bit;
+  return ret;
+}
+
+int cmp_fields(const void *p1, const void *p2) {
+  const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
+  const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
+  return field_rank(f1) - field_rank(f2);
+}
+
+static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
+  /* Sort fields.  upb internally relies on UPB_TYPE_MESSAGE fields having the
+   * lowest indexes, but we do not publicly guarantee this. */
+  upb_msg_field_iter j;
+  int i;
+  uint32_t selector;
+  int n = upb_msgdef_numfields(m);
+  upb_fielddef **fields = malloc(n * sizeof(*fields));
+  if (!fields) return false;
+
+  m->submsg_field_count = 0;
+  for(i = 0, upb_msg_field_begin(&j, m);
+      !upb_msg_field_done(&j);
+      upb_msg_field_next(&j), i++) {
+    upb_fielddef *f = upb_msg_iter_field(&j);
+    assert(f->msg.def == m);
+    if (!upb_validate_field(f, s)) {
+      free(fields);
+      return false;
+    }
+    if (upb_fielddef_issubmsg(f)) {
+      m->submsg_field_count++;
+    }
+    fields[i] = f;
+  }
+
+  qsort(fields, n, sizeof(*fields), cmp_fields);
+
+  selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
+  for (i = 0; i < n; i++) {
+    upb_fielddef *f = fields[i];
+    f->index_ = i;
+    f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
+    selector += upb_handlers_selectorcount(f);
+  }
+  m->selector_count = selector;
+
+#ifndef NDEBUG
+  {
+    /* Verify that all selectors for the message are distinct. */
+#define TRY(type) \
+    if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v);
+
+    upb_inttable t;
+    upb_value v;
+    upb_selector_t sel;
+
+    upb_inttable_init(&t, UPB_CTYPE_BOOL);
+    v = upb_value_bool(true);
+    upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
+    upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
+    for(upb_msg_field_begin(&j, m);
+        !upb_msg_field_done(&j);
+        upb_msg_field_next(&j)) {
+      upb_fielddef *f = upb_msg_iter_field(&j);
+      /* These calls will assert-fail in upb_table if the value already
+       * exists. */
+      TRY(UPB_HANDLER_INT32);
+      TRY(UPB_HANDLER_INT64)
+      TRY(UPB_HANDLER_UINT32)
+      TRY(UPB_HANDLER_UINT64)
+      TRY(UPB_HANDLER_FLOAT)
+      TRY(UPB_HANDLER_DOUBLE)
+      TRY(UPB_HANDLER_BOOL)
+      TRY(UPB_HANDLER_STARTSTR)
+      TRY(UPB_HANDLER_STRING)
+      TRY(UPB_HANDLER_ENDSTR)
+      TRY(UPB_HANDLER_STARTSUBMSG)
+      TRY(UPB_HANDLER_ENDSUBMSG)
+      TRY(UPB_HANDLER_STARTSEQ)
+      TRY(UPB_HANDLER_ENDSEQ)
+    }
+    upb_inttable_uninit(&t);
+  }
+#undef TRY
+#endif
+
+  free(fields);
+  return true;
+}
+
+bool upb_def_freeze(upb_def *const* defs, int n, upb_status *s) {
+  int i;
+  int maxdepth;
+  bool ret;
+  upb_status_clear(s);
+
+  /* First perform validation, in two passes so we can check that we have a
+   * transitive closure without needing to search. */
+  for (i = 0; i < n; i++) {
+    upb_def *def = defs[i];
+    if (upb_def_isfrozen(def)) {
+      /* Could relax this requirement if it's annoying. */
+      upb_status_seterrmsg(s, "def is already frozen");
+      goto err;
+    } else if (def->type == UPB_DEF_FIELD) {
+      upb_status_seterrmsg(s, "standalone fielddefs can not be frozen");
+      goto err;
+    } else if (def->type == UPB_DEF_ENUM) {
+      if (!upb_validate_enumdef(upb_dyncast_enumdef(def), s)) {
+        goto err;
+      }
+    } else {
+      /* Set now to detect transitive closure in the second pass. */
+      def->came_from_user = true;
+    }
+  }
+
+  /* Second pass of validation.  Also assign selector bases and indexes, and
+   * compact tables. */
+  for (i = 0; i < n; i++) {
+    upb_msgdef *m = upb_dyncast_msgdef_mutable(defs[i]);
+    upb_enumdef *e = upb_dyncast_enumdef_mutable(defs[i]);
+    if (m) {
+      upb_inttable_compact(&m->itof);
+      if (!assign_msg_indices(m, s)) {
+        goto err;
+      }
+    } else if (e) {
+      upb_inttable_compact(&e->iton);
+    }
+  }
+
+  /* Def graph contains FieldDefs between each MessageDef, so double the
+   * limit. */
+  maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
+
+  /* Validation all passed; freeze the defs. */
+  ret = upb_refcounted_freeze((upb_refcounted * const *)defs, n, s, maxdepth);
+  assert(!(s && ret != upb_ok(s)));
+  return ret;
+
+err:
+  for (i = 0; i < n; i++) {
+    defs[i]->came_from_user = false;
+  }
+  assert(!(s && upb_ok(s)));
+  return false;
+}
+
+
+/* upb_enumdef ****************************************************************/
+
+static void upb_enumdef_free(upb_refcounted *r) {
+  upb_enumdef *e = (upb_enumdef*)r;
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, &e->iton);
+  for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    /* To clean up the upb_strdup() from upb_enumdef_addval(). */
+    free(upb_value_getcstr(upb_inttable_iter_value(&i)));
+  }
+  upb_strtable_uninit(&e->ntoi);
+  upb_inttable_uninit(&e->iton);
+  upb_def_uninit(upb_enumdef_upcast_mutable(e));
+  free(e);
+}
+
+upb_enumdef *upb_enumdef_new(const void *owner) {
+  static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_enumdef_free};
+  upb_enumdef *e = malloc(sizeof(*e));
+  if (!e) return NULL;
+  if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM, &vtbl, owner))
+    goto err2;
+  if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2;
+  if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1;
+  return e;
+
+err1:
+  upb_strtable_uninit(&e->ntoi);
+err2:
+  free(e);
+  return NULL;
+}
+
+upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) {
+  upb_enum_iter i;
+  upb_enumdef *new_e = upb_enumdef_new(owner);
+  if (!new_e) return NULL;
+  for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
+    bool success = upb_enumdef_addval(
+        new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL);
+    if (!success) {
+      upb_enumdef_unref(new_e, owner);
+      return NULL;
+    }
+  }
+  return new_e;
+}
+
+bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
+  upb_def *d = upb_enumdef_upcast_mutable(e);
+  return upb_def_freeze(&d, 1, status);
+}
+
+const char *upb_enumdef_fullname(const upb_enumdef *e) {
+  return upb_def_fullname(upb_enumdef_upcast(e));
+}
+
+bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
+                             upb_status *s) {
+  return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s);
+}
+
+bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
+                        upb_status *status) {
+  if (!upb_isident(name, strlen(name), false, status)) {
+    return false;
+  }
+  if (upb_enumdef_ntoiz(e, name, NULL)) {
+    upb_status_seterrf(status, "name '%s' is already defined", name);
+    return false;
+  }
+  if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) {
+    upb_status_seterrmsg(status, "out of memory");
+    return false;
+  }
+  if (!upb_inttable_lookup(&e->iton, num, NULL) &&
+      !upb_inttable_insert(&e->iton, num, upb_value_cstr(upb_strdup(name)))) {
+    upb_status_seterrmsg(status, "out of memory");
+    upb_strtable_remove(&e->ntoi, name, NULL);
+    return false;
+  }
+  if (upb_enumdef_numvals(e) == 1) {
+    bool ok = upb_enumdef_setdefault(e, num, NULL);
+    UPB_ASSERT_VAR(ok, ok);
+  }
+  return true;
+}
+
+int32_t upb_enumdef_default(const upb_enumdef *e) {
+  assert(upb_enumdef_iton(e, e->defaultval));
+  return e->defaultval;
+}
+
+bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s) {
+  assert(!upb_enumdef_isfrozen(e));
+  if (!upb_enumdef_iton(e, val)) {
+    upb_status_seterrf(s, "number '%d' is not in the enum.", val);
+    return false;
+  }
+  e->defaultval = val;
+  return true;
+}
+
+int upb_enumdef_numvals(const upb_enumdef *e) {
+  return upb_strtable_count(&e->ntoi);
+}
+
+void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
+  /* We iterate over the ntoi table, to account for duplicate numbers. */
+  upb_strtable_begin(i, &e->ntoi);
+}
+
+void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
+bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
+
+bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
+                      size_t len, int32_t *num) {
+  upb_value v;
+  if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
+    return false;
+  }
+  if (num) *num = upb_value_getint32(v);
+  return true;
+}
+
+const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
+  upb_value v;
+  return upb_inttable_lookup32(&def->iton, num, &v) ?
+      upb_value_getcstr(v) : NULL;
+}
+
+const char *upb_enum_iter_name(upb_enum_iter *iter) {
+  return upb_strtable_iter_key(iter);
+}
+
+int32_t upb_enum_iter_number(upb_enum_iter *iter) {
+  return upb_value_getint32(upb_strtable_iter_value(iter));
+}
+
+
+/* upb_fielddef ***************************************************************/
+
+static void upb_fielddef_init_default(upb_fielddef *f);
+
+static void upb_fielddef_uninit_default(upb_fielddef *f) {
+  if (f->type_is_set_ && f->default_is_string && f->defaultval.bytes)
+    freestr(f->defaultval.bytes);
+}
+
+static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
+                       void *closure) {
+  const upb_fielddef *f = (const upb_fielddef*)r;
+  if (upb_fielddef_containingtype(f)) {
+    visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure);
+  }
+  if (upb_fielddef_containingoneof(f)) {
+    visit(r, upb_oneofdef_upcast2(upb_fielddef_containingoneof(f)), closure);
+  }
+  if (upb_fielddef_subdef(f)) {
+    visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure);
+  }
+}
+
+static void freefield(upb_refcounted *r) {
+  upb_fielddef *f = (upb_fielddef*)r;
+  upb_fielddef_uninit_default(f);
+  if (f->subdef_is_symbolic)
+    free(f->sub.name);
+  upb_def_uninit(upb_fielddef_upcast_mutable(f));
+  free(f);
+}
+
+static const char *enumdefaultstr(const upb_fielddef *f) {
+  const upb_enumdef *e;
+  assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
+  e = upb_fielddef_enumsubdef(f);
+  if (f->default_is_string && f->defaultval.bytes) {
+    /* Default was explicitly set as a string. */
+    str_t *s = f->defaultval.bytes;
+    return s->str;
+  } else if (e) {
+    if (!f->default_is_string) {
+      /* Default was explicitly set as an integer; look it up in enumdef. */
+      const char *name = upb_enumdef_iton(e, f->defaultval.sint);
+      if (name) {
+        return name;
+      }
+    } else {
+      /* Default is completely unset; pull enumdef default. */
+      if (upb_enumdef_numvals(e) > 0) {
+        const char *name = upb_enumdef_iton(e, upb_enumdef_default(e));
+        assert(name);
+        return name;
+      }
+    }
+  }
+  return NULL;
+}
+
+static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
+  const upb_enumdef *e;
+  assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
+  e = upb_fielddef_enumsubdef(f);
+  if (!f->default_is_string) {
+    /* Default was explicitly set as an integer. */
+    *val = f->defaultval.sint;
+    return true;
+  } else if (e) {
+    if (f->defaultval.bytes) {
+      /* Default was explicitly set as a str; try to lookup corresponding int. */
+      str_t *s = f->defaultval.bytes;
+      if (upb_enumdef_ntoiz(e, s->str, val)) {
+        return true;
+      }
+    } else {
+      /* Default is unset; try to pull in enumdef default. */
+      if (upb_enumdef_numvals(e) > 0) {
+        *val = upb_enumdef_default(e);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+upb_fielddef *upb_fielddef_new(const void *o) {
+  static const struct upb_refcounted_vtbl vtbl = {visitfield, freefield};
+  upb_fielddef *f = malloc(sizeof(*f));
+  if (!f) return NULL;
+  if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD, &vtbl, o)) {
+    free(f);
+    return NULL;
+  }
+  f->msg.def = NULL;
+  f->sub.def = NULL;
+  f->oneof = NULL;
+  f->subdef_is_symbolic = false;
+  f->msg_is_symbolic = false;
+  f->label_ = UPB_LABEL_OPTIONAL;
+  f->type_ = UPB_TYPE_INT32;
+  f->number_ = 0;
+  f->type_is_set_ = false;
+  f->tagdelim = false;
+  f->is_extension_ = false;
+  f->lazy_ = false;
+  f->packed_ = true;
+
+  /* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
+   * with all integer types and is in some since more "default" since the most
+   * normal-looking proto2 types int32/int64/uint32/uint64 use variable.
+   *
+   * Other options to consider:
+   * - there is no default; users must set this manually (like type).
+   * - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
+   *   be an optimal default for signed integers. */
+  f->intfmt = UPB_INTFMT_VARIABLE;
+  return f;
+}
+
+upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) {
+  const char *srcname;
+  upb_fielddef *newf = upb_fielddef_new(owner);
+  if (!newf) return NULL;
+  upb_fielddef_settype(newf, upb_fielddef_type(f));
+  upb_fielddef_setlabel(newf, upb_fielddef_label(f));
+  upb_fielddef_setnumber(newf, upb_fielddef_number(f), NULL);
+  upb_fielddef_setname(newf, upb_fielddef_name(f), NULL);
+  if (f->default_is_string && f->defaultval.bytes) {
+    str_t *s = f->defaultval.bytes;
+    upb_fielddef_setdefaultstr(newf, s->str, s->len, NULL);
+  } else {
+    newf->default_is_string = f->default_is_string;
+    newf->defaultval = f->defaultval;
+  }
+
+  if (f->subdef_is_symbolic) {
+    srcname = f->sub.name;  /* Might be NULL. */
+  } else {
+    srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL;
+  }
+  if (srcname) {
+    char *newname = malloc(strlen(f->sub.def->fullname) + 2);
+    if (!newname) {
+      upb_fielddef_unref(newf, owner);
+      return NULL;
+    }
+    strcpy(newname, ".");
+    strcat(newname, f->sub.def->fullname);
+    upb_fielddef_setsubdefname(newf, newname, NULL);
+    free(newname);
+  }
+
+  return newf;
+}
+
+bool upb_fielddef_typeisset(const upb_fielddef *f) {
+  return f->type_is_set_;
+}
+
+upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
+  assert(f->type_is_set_);
+  return f->type_;
+}
+
+uint32_t upb_fielddef_index(const upb_fielddef *f) {
+  return f->index_;
+}
+
+upb_label_t upb_fielddef_label(const upb_fielddef *f) {
+  return f->label_;
+}
+
+upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) {
+  return f->intfmt;
+}
+
+bool upb_fielddef_istagdelim(const upb_fielddef *f) {
+  return f->tagdelim;
+}
+
+uint32_t upb_fielddef_number(const upb_fielddef *f) {
+  return f->number_;
+}
+
+bool upb_fielddef_isextension(const upb_fielddef *f) {
+  return f->is_extension_;
+}
+
+bool upb_fielddef_lazy(const upb_fielddef *f) {
+  return f->lazy_;
+}
+
+bool upb_fielddef_packed(const upb_fielddef *f) {
+  return f->packed_;
+}
+
+const char *upb_fielddef_name(const upb_fielddef *f) {
+  return upb_def_fullname(upb_fielddef_upcast(f));
+}
+
+const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
+  return f->msg_is_symbolic ? NULL : f->msg.def;
+}
+
+const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
+  return f->oneof;
+}
+
+upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) {
+  return (upb_msgdef*)upb_fielddef_containingtype(f);
+}
+
+const char *upb_fielddef_containingtypename(upb_fielddef *f) {
+  return f->msg_is_symbolic ? f->msg.name : NULL;
+}
+
+static void release_containingtype(upb_fielddef *f) {
+  if (f->msg_is_symbolic) free(f->msg.name);
+}
+
+bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
+                                        upb_status *s) {
+  assert(!upb_fielddef_isfrozen(f));
+  if (upb_fielddef_containingtype(f)) {
+    upb_status_seterrmsg(s, "field has already been added to a message.");
+    return false;
+  }
+  /* TODO: validate name (upb_isident() doesn't quite work atm because this name
+   * may have a leading "."). */
+  release_containingtype(f);
+  f->msg.name = upb_strdup(name);
+  f->msg_is_symbolic = true;
+  return true;
+}
+
+bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
+  if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) {
+    upb_status_seterrmsg(s, "Already added to message or oneof");
+    return false;
+  }
+  return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s);
+}
+
+static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) {
+  UPB_UNUSED(f);
+  UPB_UNUSED(type);
+  assert(f->type_is_set_ && upb_fielddef_type(f) == type);
+}
+
+int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
+  chkdefaulttype(f, UPB_TYPE_INT64);
+  return f->defaultval.sint;
+}
+
+int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
+  if (f->type_is_set_ && upb_fielddef_type(f) == UPB_TYPE_ENUM) {
+    int32_t val;
+    bool ok = enumdefaultint32(f, &val);
+    UPB_ASSERT_VAR(ok, ok);
+    return val;
+  } else {
+    chkdefaulttype(f, UPB_TYPE_INT32);
+    return f->defaultval.sint;
+  }
+}
+
+uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
+  chkdefaulttype(f, UPB_TYPE_UINT64);
+  return f->defaultval.uint;
+}
+
+uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
+  chkdefaulttype(f, UPB_TYPE_UINT32);
+  return f->defaultval.uint;
+}
+
+bool upb_fielddef_defaultbool(const upb_fielddef *f) {
+  chkdefaulttype(f, UPB_TYPE_BOOL);
+  return f->defaultval.uint;
+}
+
+float upb_fielddef_defaultfloat(const upb_fielddef *f) {
+  chkdefaulttype(f, UPB_TYPE_FLOAT);
+  return f->defaultval.flt;
+}
+
+double upb_fielddef_defaultdouble(const upb_fielddef *f) {
+  chkdefaulttype(f, UPB_TYPE_DOUBLE);
+  return f->defaultval.dbl;
+}
+
+const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
+  assert(f->type_is_set_);
+  assert(upb_fielddef_type(f) == UPB_TYPE_STRING ||
+         upb_fielddef_type(f) == UPB_TYPE_BYTES ||
+         upb_fielddef_type(f) == UPB_TYPE_ENUM);
+
+  if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
+    const char *ret = enumdefaultstr(f);
+    assert(ret);
+    /* Enum defaults can't have embedded NULLs. */
+    if (len) *len = strlen(ret);
+    return ret;
+  }
+
+  if (f->default_is_string) {
+    str_t *str = f->defaultval.bytes;
+    if (len) *len = str->len;
+    return str->str;
+  }
+
+  return NULL;
+}
+
+static void upb_fielddef_init_default(upb_fielddef *f) {
+  f->default_is_string = false;
+  switch (upb_fielddef_type(f)) {
+    case UPB_TYPE_DOUBLE: f->defaultval.dbl = 0; break;
+    case UPB_TYPE_FLOAT: f->defaultval.flt = 0; break;
+    case UPB_TYPE_INT32:
+    case UPB_TYPE_INT64: f->defaultval.sint = 0; break;
+    case UPB_TYPE_UINT64:
+    case UPB_TYPE_UINT32:
+    case UPB_TYPE_BOOL: f->defaultval.uint = 0; break;
+    case UPB_TYPE_STRING:
+    case UPB_TYPE_BYTES:
+      f->defaultval.bytes = newstr("", 0);
+      f->default_is_string = true;
+      break;
+    case UPB_TYPE_MESSAGE: break;
+    case UPB_TYPE_ENUM:
+      /* This is our special sentinel that indicates "not set" for an enum. */
+      f->default_is_string = true;
+      f->defaultval.bytes = NULL;
+      break;
+  }
+}
+
+const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
+  return f->subdef_is_symbolic ? NULL : f->sub.def;
+}
+
+const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
+  const upb_def *def = upb_fielddef_subdef(f);
+  return def ? upb_dyncast_msgdef(def) : NULL;
+}
+
+const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
+  const upb_def *def = upb_fielddef_subdef(f);
+  return def ? upb_dyncast_enumdef(def) : NULL;
+}
+
+upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
+  return (upb_def*)upb_fielddef_subdef(f);
+}
+
+const char *upb_fielddef_subdefname(const upb_fielddef *f) {
+  if (f->subdef_is_symbolic) {
+    return f->sub.name;
+  } else if (f->sub.def) {
+    return upb_def_fullname(f->sub.def);
+  } else {
+    return NULL;
+  }
+}
+
+bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s) {
+  if (upb_fielddef_containingtype(f)) {
+    upb_status_seterrmsg(
+        s, "cannot change field number after adding to a message");
+    return false;
+  }
+  if (number == 0 || number > UPB_MAX_FIELDNUMBER) {
+    upb_status_seterrf(s, "invalid field number (%u)", number);
+    return false;
+  }
+  f->number_ = number;
+  return true;
+}
+
+void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
+  assert(!upb_fielddef_isfrozen(f));
+  assert(upb_fielddef_checktype(type));
+  upb_fielddef_uninit_default(f);
+  f->type_ = type;
+  f->type_is_set_ = true;
+  upb_fielddef_init_default(f);
+}
+
+void upb_fielddef_setdescriptortype(upb_fielddef *f, int type) {
+  assert(!upb_fielddef_isfrozen(f));
+  switch (type) {
+    case UPB_DESCRIPTOR_TYPE_DOUBLE:
+      upb_fielddef_settype(f, UPB_TYPE_DOUBLE);
+      break;
+    case UPB_DESCRIPTOR_TYPE_FLOAT:
+      upb_fielddef_settype(f, UPB_TYPE_FLOAT);
+      break;
+    case UPB_DESCRIPTOR_TYPE_INT64:
+    case UPB_DESCRIPTOR_TYPE_SFIXED64:
+    case UPB_DESCRIPTOR_TYPE_SINT64:
+      upb_fielddef_settype(f, UPB_TYPE_INT64);
+      break;
+    case UPB_DESCRIPTOR_TYPE_UINT64:
+    case UPB_DESCRIPTOR_TYPE_FIXED64:
+      upb_fielddef_settype(f, UPB_TYPE_UINT64);
+      break;
+    case UPB_DESCRIPTOR_TYPE_INT32:
+    case UPB_DESCRIPTOR_TYPE_SFIXED32:
+    case UPB_DESCRIPTOR_TYPE_SINT32:
+      upb_fielddef_settype(f, UPB_TYPE_INT32);
+      break;
+    case UPB_DESCRIPTOR_TYPE_UINT32:
+    case UPB_DESCRIPTOR_TYPE_FIXED32:
+      upb_fielddef_settype(f, UPB_TYPE_UINT32);
+      break;
+    case UPB_DESCRIPTOR_TYPE_BOOL:
+      upb_fielddef_settype(f, UPB_TYPE_BOOL);
+      break;
+    case UPB_DESCRIPTOR_TYPE_STRING:
+      upb_fielddef_settype(f, UPB_TYPE_STRING);
+      break;
+    case UPB_DESCRIPTOR_TYPE_BYTES:
+      upb_fielddef_settype(f, UPB_TYPE_BYTES);
+      break;
+    case UPB_DESCRIPTOR_TYPE_GROUP:
+    case UPB_DESCRIPTOR_TYPE_MESSAGE:
+      upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
+      break;
+    case UPB_DESCRIPTOR_TYPE_ENUM:
+      upb_fielddef_settype(f, UPB_TYPE_ENUM);
+      break;
+    default: assert(false);
+  }
+
+  if (type == UPB_DESCRIPTOR_TYPE_FIXED64 ||
+      type == UPB_DESCRIPTOR_TYPE_FIXED32 ||
+      type == UPB_DESCRIPTOR_TYPE_SFIXED64 ||
+      type == UPB_DESCRIPTOR_TYPE_SFIXED32) {
+    upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED);
+  } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 ||
+             type == UPB_DESCRIPTOR_TYPE_SINT32) {
+    upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG);
+  } else {
+    upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE);
+  }
+
+  upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP);
+}
+
+upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
+  switch (upb_fielddef_type(f)) {
+    case UPB_TYPE_FLOAT:  return UPB_DESCRIPTOR_TYPE_FLOAT;
+    case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE;
+    case UPB_TYPE_BOOL:   return UPB_DESCRIPTOR_TYPE_BOOL;
+    case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING;
+    case UPB_TYPE_BYTES:  return UPB_DESCRIPTOR_TYPE_BYTES;
+    case UPB_TYPE_ENUM:   return UPB_DESCRIPTOR_TYPE_ENUM;
+    case UPB_TYPE_INT32:
+      switch (upb_fielddef_intfmt(f)) {
+        case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32;
+        case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_SFIXED32;
+        case UPB_INTFMT_ZIGZAG:   return UPB_DESCRIPTOR_TYPE_SINT32;
+      }
+    case UPB_TYPE_INT64:
+      switch (upb_fielddef_intfmt(f)) {
+        case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64;
+        case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_SFIXED64;
+        case UPB_INTFMT_ZIGZAG:   return UPB_DESCRIPTOR_TYPE_SINT64;
+      }
+    case UPB_TYPE_UINT32:
+      switch (upb_fielddef_intfmt(f)) {
+        case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32;
+        case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_FIXED32;
+        case UPB_INTFMT_ZIGZAG:   return -1;
+      }
+    case UPB_TYPE_UINT64:
+      switch (upb_fielddef_intfmt(f)) {
+        case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64;
+        case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_FIXED64;
+        case UPB_INTFMT_ZIGZAG:   return -1;
+      }
+    case UPB_TYPE_MESSAGE:
+      return upb_fielddef_istagdelim(f) ?
+          UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE;
+  }
+  return 0;
+}
+
+void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension) {
+  assert(!upb_fielddef_isfrozen(f));
+  f->is_extension_ = is_extension;
+}
+
+void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) {
+  assert(!upb_fielddef_isfrozen(f));
+  f->lazy_ = lazy;
+}
+
+void upb_fielddef_setpacked(upb_fielddef *f, bool packed) {
+  assert(!upb_fielddef_isfrozen(f));
+  f->packed_ = packed;
+}
+
+void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
+  assert(!upb_fielddef_isfrozen(f));
+  assert(upb_fielddef_checklabel(label));
+  f->label_ = label;
+}
+
+void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) {
+  assert(!upb_fielddef_isfrozen(f));
+  assert(upb_fielddef_checkintfmt(fmt));
+  f->intfmt = fmt;
+}
+
+void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) {
+  assert(!upb_fielddef_isfrozen(f));
+  f->tagdelim = tag_delim;
+  f->tagdelim = tag_delim;
+}
+
+static bool checksetdefault(upb_fielddef *f, upb_fieldtype_t type) {
+  if (!f->type_is_set_ || upb_fielddef_isfrozen(f) ||
+      upb_fielddef_type(f) != type) {
+    assert(false);
+    return false;
+  }
+  if (f->default_is_string) {
+    str_t *s = f->defaultval.bytes;
+    assert(s || type == UPB_TYPE_ENUM);
+    if (s) freestr(s);
+  }
+  f->default_is_string = false;
+  return true;
+}
+
+void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t value) {
+  if (checksetdefault(f, UPB_TYPE_INT64))
+    f->defaultval.sint = value;
+}
+
+void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t value) {
+  if ((upb_fielddef_type(f) == UPB_TYPE_ENUM &&
+       checksetdefault(f, UPB_TYPE_ENUM)) ||
+      checksetdefault(f, UPB_TYPE_INT32)) {
+    f->defaultval.sint = value;
+  }
+}
+
+void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t value) {
+  if (checksetdefault(f, UPB_TYPE_UINT64))
+    f->defaultval.uint = value;
+}
+
+void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t value) {
+  if (checksetdefault(f, UPB_TYPE_UINT32))
+    f->defaultval.uint = value;
+}
+
+void upb_fielddef_setdefaultbool(upb_fielddef *f, bool value) {
+  if (checksetdefault(f, UPB_TYPE_BOOL))
+    f->defaultval.uint = value;
+}
+
+void upb_fielddef_setdefaultfloat(upb_fielddef *f, float value) {
+  if (checksetdefault(f, UPB_TYPE_FLOAT))
+    f->defaultval.flt = value;
+}
+
+void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) {
+  if (checksetdefault(f, UPB_TYPE_DOUBLE))
+    f->defaultval.dbl = value;
+}
+
+bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
+                                upb_status *s) {
+  str_t *str2;
+  assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM);
+  if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s))
+    return false;
+
+  if (f->default_is_string) {
+    str_t *s = f->defaultval.bytes;
+    assert(s || f->type_ == UPB_TYPE_ENUM);
+    if (s) freestr(s);
+  } else {
+    assert(f->type_ == UPB_TYPE_ENUM);
+  }
+
+  str2 = newstr(str, len);
+  f->defaultval.bytes = str2;
+  f->default_is_string = true;
+  return true;
+}
+
+void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
+                                 upb_status *s) {
+  assert(f->type_is_set_);
+  upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0, s);
+}
+
+bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) {
+  int32_t val;
+  assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
+  return enumdefaultint32(f, &val);
+}
+
+bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f) {
+  assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
+  return enumdefaultstr(f) != NULL;
+}
+
+static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef,
+                                 upb_status *s) {
+  if (f->type_ == UPB_TYPE_MESSAGE) {
+    if (upb_dyncast_msgdef(subdef)) return true;
+    upb_status_seterrmsg(s, "invalid subdef type for this submessage field");
+    return false;
+  } else if (f->type_ == UPB_TYPE_ENUM) {
+    if (upb_dyncast_enumdef(subdef)) return true;
+    upb_status_seterrmsg(s, "invalid subdef type for this enum field");
+    return false;
+  } else {
+    upb_status_seterrmsg(s, "only message and enum fields can have a subdef");
+    return false;
+  }
+}
+
+static void release_subdef(upb_fielddef *f) {
+  if (f->subdef_is_symbolic) {
+    free(f->sub.name);
+  } else if (f->sub.def) {
+    upb_unref2(f->sub.def, f);
+  }
+}
+
+bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
+                            upb_status *s) {
+  assert(!upb_fielddef_isfrozen(f));
+  assert(upb_fielddef_hassubdef(f));
+  if (subdef && !upb_subdef_typecheck(f, subdef, s)) return false;
+  release_subdef(f);
+  f->sub.def = subdef;
+  f->subdef_is_symbolic = false;
+  if (f->sub.def) upb_ref2(f->sub.def, f);
+  return true;
+}
+
+bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
+                               upb_status *s) {
+  return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s);
+}
+
+bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
+                                upb_status *s) {
+  return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s);
+}
+
+bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
+                                upb_status *s) {
+  assert(!upb_fielddef_isfrozen(f));
+  if (!upb_fielddef_hassubdef(f)) {
+    upb_status_seterrmsg(s, "field type does not accept a subdef");
+    return false;
+  }
+  /* TODO: validate name (upb_isident() doesn't quite work atm because this name
+   * may have a leading "."). */
+  release_subdef(f);
+  f->sub.name = upb_strdup(name);
+  f->subdef_is_symbolic = true;
+  return true;
+}
+
+bool upb_fielddef_issubmsg(const upb_fielddef *f) {
+  return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
+}
+
+bool upb_fielddef_isstring(const upb_fielddef *f) {
+  return upb_fielddef_type(f) == UPB_TYPE_STRING ||
+         upb_fielddef_type(f) == UPB_TYPE_BYTES;
+}
+
+bool upb_fielddef_isseq(const upb_fielddef *f) {
+  return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
+}
+
+bool upb_fielddef_isprimitive(const upb_fielddef *f) {
+  return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
+}
+
+bool upb_fielddef_ismap(const upb_fielddef *f) {
+  return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
+         upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
+}
+
+bool upb_fielddef_hassubdef(const upb_fielddef *f) {
+  return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
+}
+
+static bool between(int32_t x, int32_t low, int32_t high) {
+  return x >= low && x <= high;
+}
+
+bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
+bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
+bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
+
+bool upb_fielddef_checkdescriptortype(int32_t type) {
+  return between(type, 1, 18);
+}
+
+/* upb_msgdef *****************************************************************/
+
+static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
+                     void *closure) {
+  upb_msg_oneof_iter o;
+  const upb_msgdef *m = (const upb_msgdef*)r;
+  upb_msg_field_iter i;
+  for(upb_msg_field_begin(&i, m);
+      !upb_msg_field_done(&i);
+      upb_msg_field_next(&i)) {
+    upb_fielddef *f = upb_msg_iter_field(&i);
+    visit(r, upb_fielddef_upcast2(f), closure);
+  }
+  for(upb_msg_oneof_begin(&o, m);
+      !upb_msg_oneof_done(&o);
+      upb_msg_oneof_next(&o)) {
+    upb_oneofdef *f = upb_msg_iter_oneof(&o);
+    visit(r, upb_oneofdef_upcast2(f), closure);
+  }
+}
+
+static void freemsg(upb_refcounted *r) {
+  upb_msgdef *m = (upb_msgdef*)r;
+  upb_strtable_uninit(&m->ntoo);
+  upb_strtable_uninit(&m->ntof);
+  upb_inttable_uninit(&m->itof);
+  upb_def_uninit(upb_msgdef_upcast_mutable(m));
+  free(m);
+}
+
+upb_msgdef *upb_msgdef_new(const void *owner) {
+  static const struct upb_refcounted_vtbl vtbl = {visitmsg, freemsg};
+  upb_msgdef *m = malloc(sizeof(*m));
+  if (!m) return NULL;
+  if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &vtbl, owner))
+    goto err2;
+  if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err3;
+  if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err2;
+  if (!upb_strtable_init(&m->ntoo, UPB_CTYPE_PTR)) goto err1;
+  m->map_entry = false;
+  return m;
+
+err1:
+  upb_strtable_uninit(&m->ntof);
+err2:
+  upb_inttable_uninit(&m->itof);
+err3:
+  free(m);
+  return NULL;
+}
+
+upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
+  bool ok;
+  upb_msg_field_iter i;
+  upb_msg_oneof_iter o;
+
+  upb_msgdef *newm = upb_msgdef_new(owner);
+  if (!newm) return NULL;
+  ok = upb_def_setfullname(upb_msgdef_upcast_mutable(newm),
+                           upb_def_fullname(upb_msgdef_upcast(m)),
+                           NULL);
+  newm->map_entry = m->map_entry;
+  UPB_ASSERT_VAR(ok, ok);
+  for(upb_msg_field_begin(&i, m);
+      !upb_msg_field_done(&i);
+      upb_msg_field_next(&i)) {
+    upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f);
+    /* Fields in oneofs are dup'd below. */
+    if (upb_fielddef_containingoneof(f)) continue;
+    if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) {
+      upb_msgdef_unref(newm, owner);
+      return NULL;
+    }
+  }
+  for(upb_msg_oneof_begin(&o, m);
+      !upb_msg_oneof_done(&o);
+      upb_msg_oneof_next(&o)) {
+    upb_oneofdef *f = upb_oneofdef_dup(upb_msg_iter_oneof(&o), &f);
+    if (!f || !upb_msgdef_addoneof(newm, f, &f, NULL)) {
+      upb_msgdef_unref(newm, owner);
+      return NULL;
+    }
+  }
+  return newm;
+}
+
+bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) {
+  upb_def *d = upb_msgdef_upcast_mutable(m);
+  return upb_def_freeze(&d, 1, status);
+}
+
+const char *upb_msgdef_fullname(const upb_msgdef *m) {
+  return upb_def_fullname(upb_msgdef_upcast(m));
+}
+
+bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
+                            upb_status *s) {
+  return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s);
+}
+
+/* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
+ * on status |s| and return false if not. */
+static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
+                            upb_status *s) {
+  if (upb_fielddef_containingtype(f) != NULL) {
+    upb_status_seterrmsg(s, "fielddef already belongs to a message");
+    return false;
+  } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
+    upb_status_seterrmsg(s, "field name or number were not set");
+    return false;
+  } else if (upb_msgdef_ntofz(m, upb_fielddef_name(f)) ||
+             upb_msgdef_itof(m, upb_fielddef_number(f))) {
+    upb_status_seterrmsg(s, "duplicate field name or number for field");
+    return false;
+  }
+  return true;
+}
+
+static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
+  release_containingtype(f);
+  f->msg.def = m;
+  f->msg_is_symbolic = false;
+  upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
+  upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
+  upb_ref2(f, m);
+  upb_ref2(m, f);
+  if (ref_donor) upb_fielddef_unref(f, ref_donor);
+}
+
+bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
+                         upb_status *s) {
+  /* TODO: extensions need to have a separate namespace, because proto2 allows a
+   * top-level extension (ie. one not in any package) to have the same name as a
+   * field from the message.
+   *
+   * This also implies that there needs to be a separate lookup-by-name method
+   * for extensions.  It seems desirable for iteration to return both extensions
+   * and non-extensions though.
+   *
+   * We also need to validate that the field number is in an extension range iff
+   * it is an extension.
+   *
+   * This method is idempotent. Check if |f| is already part of this msgdef and
+   * return immediately if so. */
+  if (upb_fielddef_containingtype(f) == m) {
+    return true;
+  }
+
+  /* Check constraints for all fields before performing any action. */
+  if (!check_field_add(m, f, s)) {
+    return false;
+  } else if (upb_fielddef_containingoneof(f) != NULL) {
+    /* Fields in a oneof can only be added by adding the oneof to the msgdef. */
+    upb_status_seterrmsg(s, "fielddef is part of a oneof");
+    return false;
+  }
+
+  /* Constraint checks ok, perform the action. */
+  add_field(m, f, ref_donor);
+  return true;
+}
+
+bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
+                         upb_status *s) {
+  upb_oneof_iter it;
+
+  /* Check various conditions that would prevent this oneof from being added. */
+  if (upb_oneofdef_containingtype(o)) {
+    upb_status_seterrmsg(s, "oneofdef already belongs to a message");
+    return false;
+  } else if (upb_oneofdef_name(o) == NULL) {
+    upb_status_seterrmsg(s, "oneofdef name was not set");
+    return false;
+  } else if (upb_msgdef_ntooz(m, upb_oneofdef_name(o))) {
+    upb_status_seterrmsg(s, "duplicate oneof name");
+    return false;
+  }
+
+  /* Check that all of the oneof's fields do not conflict with names or numbers
+   * of fields already in the message. */
+  for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
+    const upb_fielddef *f = upb_oneof_iter_field(&it);
+    if (!check_field_add(m, f, s)) {
+      return false;
+    }
+  }
+
+  /* Everything checks out -- commit now. */
+
+  /* Add oneof itself first. */
+  o->parent = m;
+  upb_strtable_insert(&m->ntoo, upb_oneofdef_name(o), upb_value_ptr(o));
+  upb_ref2(o, m);
+  upb_ref2(m, o);
+
+  /* Add each field of the oneof directly to the msgdef. */
+  for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
+    upb_fielddef *f = upb_oneof_iter_field(&it);
+    add_field(m, f, NULL);
+  }
+
+  if (ref_donor) upb_oneofdef_unref(o, ref_donor);
+
+  return true;
+}
+
+const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
+  upb_value val;
+  return upb_inttable_lookup32(&m->itof, i, &val) ?
+      upb_value_getptr(val) : NULL;
+}
+
+const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
+                                    size_t len) {
+  upb_value val;
+  return upb_strtable_lookup2(&m->ntof, name, len, &val) ?
+      upb_value_getptr(val) : NULL;
+}
+
+const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
+                                    size_t len) {
+  upb_value val;
+  return upb_strtable_lookup2(&m->ntoo, name, len, &val) ?
+      upb_value_getptr(val) : NULL;
+}
+
+int upb_msgdef_numfields(const upb_msgdef *m) {
+  return upb_strtable_count(&m->ntof);
+}
+
+int upb_msgdef_numoneofs(const upb_msgdef *m) {
+  return upb_strtable_count(&m->ntoo);
+}
+
+void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
+  assert(!upb_msgdef_isfrozen(m));
+  m->map_entry = map_entry;
+}
+
+bool upb_msgdef_mapentry(const upb_msgdef *m) {
+  return m->map_entry;
+}
+
+void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
+  upb_inttable_begin(iter, &m->itof);
+}
+
+void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
+
+bool upb_msg_field_done(const upb_msg_field_iter *iter) {
+  return upb_inttable_done(iter);
+}
+
+upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
+  return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
+}
+
+void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
+  upb_inttable_iter_setdone(iter);
+}
+
+void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
+  upb_strtable_begin(iter, &m->ntoo);
+}
+
+void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { upb_strtable_next(iter); }
+
+bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
+  return upb_strtable_done(iter);
+}
+
+upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
+  return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter));
+}
+
+void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
+  upb_strtable_iter_setdone(iter);
+}
+
+/* upb_oneofdef ***************************************************************/
+
+static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
+                       void *closure) {
+  const upb_oneofdef *o = (const upb_oneofdef*)r;
+  upb_oneof_iter i;
+  for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
+    const upb_fielddef *f = upb_oneof_iter_field(&i);
+    visit(r, upb_fielddef_upcast2(f), closure);
+  }
+  if (o->parent) {
+    visit(r, upb_msgdef_upcast2(o->parent), closure);
+  }
+}
+
+static void freeoneof(upb_refcounted *r) {
+  upb_oneofdef *o = (upb_oneofdef*)r;
+  upb_strtable_uninit(&o->ntof);
+  upb_inttable_uninit(&o->itof);
+  upb_def_uninit(upb_oneofdef_upcast_mutable(o));
+  free(o);
+}
+
+upb_oneofdef *upb_oneofdef_new(const void *owner) {
+  static const struct upb_refcounted_vtbl vtbl = {visitoneof, freeoneof};
+  upb_oneofdef *o = malloc(sizeof(*o));
+  o->parent = NULL;
+  if (!o) return NULL;
+  if (!upb_def_init(upb_oneofdef_upcast_mutable(o), UPB_DEF_ONEOF, &vtbl,
+                    owner))
+    goto err2;
+  if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
+  if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
+  return o;
+
+err1:
+  upb_inttable_uninit(&o->itof);
+err2:
+  free(o);
+  return NULL;
+}
+
+upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) {
+  bool ok;
+  upb_oneof_iter i;
+  upb_oneofdef *newo = upb_oneofdef_new(owner);
+  if (!newo) return NULL;
+  ok = upb_def_setfullname(upb_oneofdef_upcast_mutable(newo),
+                           upb_def_fullname(upb_oneofdef_upcast(o)), NULL);
+  UPB_ASSERT_VAR(ok, ok);
+  for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
+    upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f);
+    if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) {
+      upb_oneofdef_unref(newo, owner);
+      return NULL;
+    }
+  }
+  return newo;
+}
+
+const char *upb_oneofdef_name(const upb_oneofdef *o) {
+  return upb_def_fullname(upb_oneofdef_upcast(o));
+}
+
+bool upb_oneofdef_setname(upb_oneofdef *o, const char *fullname,
+                             upb_status *s) {
+  if (upb_oneofdef_containingtype(o)) {
+    upb_status_seterrmsg(s, "oneof already added to a message");
+    return false;
+  }
+  return upb_def_setfullname(upb_oneofdef_upcast_mutable(o), fullname, s);
+}
+
+const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
+  return o->parent;
+}
+
+int upb_oneofdef_numfields(const upb_oneofdef *o) {
+  return upb_strtable_count(&o->ntof);
+}
+
+bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
+                           const void *ref_donor,
+                           upb_status *s) {
+  assert(!upb_oneofdef_isfrozen(o));
+  assert(!o->parent || !upb_msgdef_isfrozen(o->parent));
+
+  /* This method is idempotent. Check if |f| is already part of this oneofdef
+   * and return immediately if so. */
+  if (upb_fielddef_containingoneof(f) == o) {
+    return true;
+  }
+
+  /* The field must have an OPTIONAL label. */
+  if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
+    upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
+    return false;
+  }
+
+  /* Check that no field with this name or number exists already in the oneof.
+   * Also check that the field is not already part of a oneof. */
+  if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
+    upb_status_seterrmsg(s, "field name or number were not set");
+    return false;
+  } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) ||
+             upb_oneofdef_ntofz(o, upb_fielddef_name(f))) {
+    upb_status_seterrmsg(s, "duplicate field name or number");
+    return false;
+  } else if (upb_fielddef_containingoneof(f) != NULL) {
+    upb_status_seterrmsg(s, "fielddef already belongs to a oneof");
+    return false;
+  }
+
+  /* We allow adding a field to the oneof either if the field is not part of a
+   * msgdef, or if it is and we are also part of the same msgdef. */
+  if (o->parent == NULL) {
+    /* If we're not in a msgdef, the field cannot be either. Otherwise we would
+     * need to magically add this oneof to a msgdef to remain consistent, which
+     * is surprising behavior. */
+    if (upb_fielddef_containingtype(f) != NULL) {
+      upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
+                              "oneof does not");
+      return false;
+    }
+  } else {
+    /* If we're in a msgdef, the user can add fields that either aren't in any
+     * msgdef (in which case they're added to our msgdef) or already a part of
+     * our msgdef. */
+    if (upb_fielddef_containingtype(f) != NULL &&
+        upb_fielddef_containingtype(f) != o->parent) {
+      upb_status_seterrmsg(s, "fielddef belongs to a different message "
+                              "than oneof");
+      return false;
+    }
+  }
+
+  /* Commit phase. First add the field to our parent msgdef, if any, because
+   * that may fail; then add the field to our own tables. */
+
+  if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
+    if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {
+      return false;
+    }
+  }
+
+  release_containingtype(f);
+  f->oneof = o;
+  upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f));
+  upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f));
+  upb_ref2(f, o);
+  upb_ref2(o, f);
+  if (ref_donor) upb_fielddef_unref(f, ref_donor);
+
+  return true;
+}
+
+const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
+                                      const char *name, size_t length) {
+  upb_value val;
+  return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
+      upb_value_getptr(val) : NULL;
+}
+
+const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
+  upb_value val;
+  return upb_inttable_lookup32(&o->itof, num, &val) ?
+      upb_value_getptr(val) : NULL;
+}
+
+void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
+  upb_inttable_begin(iter, &o->itof);
+}
+
+void upb_oneof_next(upb_oneof_iter *iter) {
+  upb_inttable_next(iter);
+}
+
+bool upb_oneof_done(upb_oneof_iter *iter) {
+  return upb_inttable_done(iter);
+}
+
+upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
+  return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
+}
+
+void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
+  upb_inttable_iter_setdone(iter);
+}
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+typedef struct cleanup_ent {
+  upb_cleanup_func *cleanup;
+  void *ud;
+  struct cleanup_ent *next;
+} cleanup_ent;
+
+static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size);
+
+/* Default allocator **********************************************************/
+
+/* Just use realloc, keeping all allocated blocks in a linked list to destroy at
+ * the end. */
+
+typedef struct mem_block {
+  /* List is doubly-linked, because in cases where realloc() moves an existing
+   * block, we need to be able to remove the old pointer from the list
+   * efficiently. */
+  struct mem_block *prev, *next;
+#ifndef NDEBUG
+  size_t size;  /* Doesn't include mem_block structure. */
+#endif
+} mem_block;
+
+typedef struct {
+  mem_block *head;
+} default_alloc_ud;
+
+static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
+  default_alloc_ud *ud = _ud;
+  mem_block *from, *block;
+  void *ret;
+  UPB_UNUSED(oldsize);
+
+  from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL;
+
+#ifndef NDEBUG
+  if (from) {
+    assert(oldsize <= from->size);
+  }
+#endif
+
+  /* TODO(haberman): we probably need to provide even better alignment here,
+   * like 16-byte alignment of the returned data pointer. */
+  block = realloc(from, size + sizeof(mem_block));
+  if (!block) return NULL;
+  ret = (char*)block + sizeof(*block);
+
+#ifndef NDEBUG
+  block->size = size;
+#endif
+
+  if (from) {
+    if (block != from) {
+      /* The block was moved, so pointers in next and prev blocks must be
+       * updated to its new location. */
+      if (block->next) block->next->prev = block;
+      if (block->prev) block->prev->next = block;
+      if (ud->head == from) ud->head = block;
+    }
+  } else {
+    /* Insert at head of linked list. */
+    block->prev = NULL;
+    block->next = ud->head;
+    if (block->next) block->next->prev = block;
+    ud->head = block;
+  }
+
+  return ret;
+}
+
+static void default_alloc_cleanup(void *_ud) {
+  default_alloc_ud *ud = _ud;
+  mem_block *block = ud->head;
+
+  while (block) {
+    void *to_free = block;
+    block = block->next;
+    free(to_free);
+  }
+}
+
+
+/* Standard error functions ***************************************************/
+
+static bool default_err(void *ud, const upb_status *status) {
+  UPB_UNUSED(ud);
+  UPB_UNUSED(status);
+  return false;
+}
+
+static bool write_err_to(void *ud, const upb_status *status) {
+  upb_status *copy_to = ud;
+  upb_status_copy(copy_to, status);
+  return false;
+}
+
+
+/* upb_env ********************************************************************/
+
+void upb_env_init(upb_env *e) {
+  default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud;
+  e->ok_ = true;
+  e->bytes_allocated = 0;
+  e->cleanup_head = NULL;
+
+  ud->head = NULL;
+
+  /* Set default functions. */
+  upb_env_setallocfunc(e, default_alloc, ud);
+  upb_env_seterrorfunc(e, default_err, NULL);
+}
+
+void upb_env_uninit(upb_env *e) {
+  cleanup_ent *ent = e->cleanup_head;
+
+  while (ent) {
+    ent->cleanup(ent->ud);
+    ent = ent->next;
+  }
+
+  /* Must do this after running cleanup functions, because this will delete
+     the memory we store our cleanup entries in! */
+  if (e->alloc == default_alloc) {
+    default_alloc_cleanup(e->alloc_ud);
+  }
+}
+
+UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc,
+                                          void *ud) {
+  e->alloc = alloc;
+  e->alloc_ud = ud;
+}
+
+UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func,
+                                          void *ud) {
+  e->err = func;
+  e->err_ud = ud;
+}
+
+void upb_env_reporterrorsto(upb_env *e, upb_status *status) {
+  e->err = write_err_to;
+  e->err_ud = status;
+}
+
+bool upb_env_ok(const upb_env *e) {
+  return e->ok_;
+}
+
+bool upb_env_reporterror(upb_env *e, const upb_status *status) {
+  e->ok_ = false;
+  return e->err(e->err_ud, status);
+}
+
+bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
+  cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent));
+  if (!ent) return false;
+
+  ent->cleanup = func;
+  ent->ud = ud;
+  ent->next = e->cleanup_head;
+  e->cleanup_head = ent;
+
+  return true;
+}
+
+void *upb_env_malloc(upb_env *e, size_t size) {
+  e->bytes_allocated += size;
+  if (e->alloc == seeded_alloc) {
+    /* This is equivalent to the next branch, but allows inlining for a
+     * measurable perf benefit. */
+    return seeded_alloc(e->alloc_ud, NULL, 0, size);
+  } else {
+    return e->alloc(e->alloc_ud, NULL, 0, size);
+  }
+}
+
+void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
+  char *ret;
+  assert(oldsize <= size);
+  ret = e->alloc(e->alloc_ud, ptr, oldsize, size);
+
+#ifndef NDEBUG
+  /* Overwrite non-preserved memory to ensure callers are passing the oldsize
+   * that they truly require. */
+  memset(ret + oldsize, 0xff, size - oldsize);
+#endif
+
+  return ret;
+}
+
+size_t upb_env_bytesallocated(const upb_env *e) {
+  return e->bytes_allocated;
+}
+
+
+/* upb_seededalloc ************************************************************/
+
+/* Be conservative and choose 16 in case anyone is using SSE. */
+static const size_t maxalign = 16;
+
+static size_t align_up(size_t size) {
+  return ((size + maxalign - 1) / maxalign) * maxalign;
+}
+
+UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize,
+                                          size_t size) {
+  upb_seededalloc *a = ud;
+
+  size = align_up(size);
+
+  assert(a->mem_limit >= a->mem_ptr);
+
+  if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) {
+    /* Fast path: we can satisfy from the initial allocation. */
+    void *ret = a->mem_ptr;
+    a->mem_ptr += size;
+    return ret;
+  } else {
+    char *chptr = ptr;
+    /* Slow path: fallback to other allocator. */
+    a->need_cleanup = true;
+    /* Is `ptr` part of the user-provided initial block? Don't pass it to the
+     * default allocator if so; otherwise, it may try to realloc() the block. */
+    if (chptr >= a->mem_base && chptr < a->mem_limit) {
+      void *ret;
+      assert(chptr + oldsize <= a->mem_limit);
+      ret = a->alloc(a->alloc_ud, NULL, 0, size);
+      if (ret) memcpy(ret, ptr, oldsize);
+      return ret;
+    } else {
+      return a->alloc(a->alloc_ud, ptr, oldsize, size);
+    }
+  }
+}
+
+void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) {
+  default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud;
+  a->mem_base = mem;
+  a->mem_ptr = mem;
+  a->mem_limit = (char*)mem + len;
+  a->need_cleanup = false;
+  a->returned_allocfunc = false;
+
+  ud->head = NULL;
+
+  upb_seededalloc_setfallbackalloc(a, default_alloc, ud);
+}
+
+void upb_seededalloc_uninit(upb_seededalloc *a) {
+  if (a->alloc == default_alloc && a->need_cleanup) {
+    default_alloc_cleanup(a->alloc_ud);
+  }
+}
+
+UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a,
+                                                      upb_alloc_func *alloc,
+                                                      void *ud) {
+  assert(!a->returned_allocfunc);
+  a->alloc = alloc;
+  a->alloc_ud = ud;
+}
+
+upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) {
+  a->returned_allocfunc = true;
+  return seeded_alloc;
+}
+/*
+** TODO(haberman): it's unclear whether a lot of the consistency checks should
+** assert() or return false.
+*/
+
+
+#include <stdlib.h>
+#include <string.h>
+
+
+
+/* Defined for the sole purpose of having a unique pointer value for
+ * UPB_NO_CLOSURE. */
+char _upb_noclosure;
+
+static void freehandlers(upb_refcounted *r) {
+  upb_handlers *h = (upb_handlers*)r;
+
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, &h->cleanup_);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    void *val = (void*)upb_inttable_iter_key(&i);
+    upb_value func_val = upb_inttable_iter_value(&i);
+    upb_handlerfree *func = upb_value_getfptr(func_val);
+    func(val);
+  }
+
+  upb_inttable_uninit(&h->cleanup_);
+  upb_msgdef_unref(h->msg, h);
+  free(h->sub);
+  free(h);
+}
+
+static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
+                          void *closure) {
+  const upb_handlers *h = (const upb_handlers*)r;
+  upb_msg_field_iter i;
+  for(upb_msg_field_begin(&i, h->msg);
+      !upb_msg_field_done(&i);
+      upb_msg_field_next(&i)) {
+    upb_fielddef *f = upb_msg_iter_field(&i);
+    const upb_handlers *sub;
+    if (!upb_fielddef_issubmsg(f)) continue;
+    sub = upb_handlers_getsubhandlers(h, f);
+    if (sub) visit(r, upb_handlers_upcast(sub), closure);
+  }
+}
+
+static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
+
+typedef struct {
+  upb_inttable tab;  /* maps upb_msgdef* -> upb_handlers*. */
+  upb_handlers_callback *callback;
+  const void *closure;
+} dfs_state;
+
+/* TODO(haberman): discard upb_handlers* objects that do not actually have any
+ * handlers set and cannot reach any upb_handlers* object that does.  This is
+ * slightly tricky to do correctly. */
+static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
+                               dfs_state *s) {
+  upb_msg_field_iter i;
+  upb_handlers *h = upb_handlers_new(m, owner);
+  if (!h) return NULL;
+  if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;
+
+  s->callback(s->closure, h);
+
+  /* For each submessage field, get or create a handlers object and set it as
+   * the subhandlers. */
+  for(upb_msg_field_begin(&i, m);
+      !upb_msg_field_done(&i);
+      upb_msg_field_next(&i)) {
+    upb_fielddef *f = upb_msg_iter_field(&i);
+    const upb_msgdef *subdef;
+    upb_value subm_ent;
+
+    if (!upb_fielddef_issubmsg(f)) continue;
+
+    subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
+    if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) {
+      upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent));
+    } else {
+      upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s);
+      if (!sub_mh) goto oom;
+      upb_handlers_setsubhandlers(h, f, sub_mh);
+      upb_handlers_unref(sub_mh, &sub_mh);
+    }
+  }
+  return h;
+
+oom:
+  upb_handlers_unref(h, owner);
+  return NULL;
+}
+
+/* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
+ * subhandlers for this submessage field. */
+#define SUBH(h, selector) (h->sub[selector])
+
+/* The selector for a submessage field is the field index. */
+#define SUBH_F(h, f) SUBH(h, f->index_)
+
+static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
+                         upb_handlertype_t type) {
+  upb_selector_t sel;
+  assert(!upb_handlers_isfrozen(h));
+  if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) {
+    upb_status_seterrf(
+        &h->status_, "type mismatch: field %s does not belong to message %s",
+        upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h)));
+    return -1;
+  }
+  if (!upb_handlers_getselector(f, type, &sel)) {
+    upb_status_seterrf(
+        &h->status_,
+        "type mismatch: cannot register handler type %d for field %s",
+        type, upb_fielddef_name(f));
+    return -1;
+  }
+  return sel;
+}
+
+static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
+                             upb_handlertype_t type) {
+  int32_t sel = trygetsel(h, f, type);
+  assert(sel >= 0);
+  return sel;
+}
+
+static const void **returntype(upb_handlers *h, const upb_fielddef *f,
+                               upb_handlertype_t type) {
+  return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_;
+}
+
+static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
+                  upb_handlertype_t type, upb_func *func,
+                  upb_handlerattr *attr) {
+  upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
+  const void *closure_type;
+  const void **context_closure_type;
+
+  assert(!upb_handlers_isfrozen(h));
+
+  if (sel < 0) {
+    upb_status_seterrmsg(&h->status_,
+                         "incorrect handler type for this field.");
+    return false;
+  }
+
+  if (h->table[sel].func) {
+    upb_status_seterrmsg(&h->status_,
+                         "cannot change handler once it has been set.");
+    return false;
+  }
+
+  if (attr) {
+    set_attr = *attr;
+  }
+
+  /* Check that the given closure type matches the closure type that has been
+   * established for this context (if any). */
+  closure_type = upb_handlerattr_closuretype(&set_attr);
+
+  if (type == UPB_HANDLER_STRING) {
+    context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
+  } else if (f && upb_fielddef_isseq(f) &&
+             type != UPB_HANDLER_STARTSEQ &&
+             type != UPB_HANDLER_ENDSEQ) {
+    context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
+  } else {
+    context_closure_type = &h->top_closure_type;
+  }
+
+  if (closure_type && *context_closure_type &&
+      closure_type != *context_closure_type) {
+    /* TODO(haberman): better message for debugging. */
+    if (f) {
+      upb_status_seterrf(&h->status_,
+                         "closure type does not match for field %s",
+                         upb_fielddef_name(f));
+    } else {
+      upb_status_seterrmsg(
+          &h->status_, "closure type does not match for message-level handler");
+    }
+    return false;
+  }
+
+  if (closure_type)
+    *context_closure_type = closure_type;
+
+  /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
+   * matches any pre-existing expectations about what type is expected. */
+  if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
+    const void *return_type = upb_handlerattr_returnclosuretype(&set_attr);
+    const void *table_return_type =
+        upb_handlerattr_returnclosuretype(&h->table[sel].attr);
+    if (return_type && table_return_type && return_type != table_return_type) {
+      upb_status_seterrmsg(&h->status_, "closure return type does not match");
+      return false;
+    }
+
+    if (table_return_type && !return_type)
+      upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type);
+  }
+
+  h->table[sel].func = (upb_func*)func;
+  h->table[sel].attr = set_attr;
+  return true;
+}
+
+/* Returns the effective closure type for this handler (which will propagate
+ * from outer frames if this frame has no START* handler).  Not implemented for
+ * UPB_HANDLER_STRING at the moment since this is not needed.  Returns NULL is
+ * the effective closure type is unspecified (either no handler was registered
+ * to specify it or the handler that was registered did not specify the closure
+ * type). */
+const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
+                                   upb_handlertype_t type) {
+  const void *ret;
+  upb_selector_t sel;
+
+  assert(type != UPB_HANDLER_STRING);
+  ret = h->top_closure_type;
+
+  if (upb_fielddef_isseq(f) &&
+      type != UPB_HANDLER_STARTSEQ &&
+      type != UPB_HANDLER_ENDSEQ &&
+      h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
+    ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
+  }
+
+  if (type == UPB_HANDLER_STRING &&
+      h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
+    ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
+  }
+
+  /* The effective type of the submessage; not used yet.
+   * if (type == SUBMESSAGE &&
+   *     h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
+   *   ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
+   * } */
+
+  return ret;
+}
+
+/* Checks whether the START* handler specified by f & type is missing even
+ * though it is required to convert the established type of an outer frame
+ * ("closure_type") into the established type of an inner frame (represented in
+ * the return closure type of this handler's attr. */
+bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
+                upb_status *status) {
+  const void *closure_type;
+  const upb_handlerattr *attr;
+  const void *return_closure_type;
+
+  upb_selector_t sel = handlers_getsel(h, f, type);
+  if (h->table[sel].func) return true;
+  closure_type = effective_closure_type(h, f, type);
+  attr = &h->table[sel].attr;
+  return_closure_type = upb_handlerattr_returnclosuretype(attr);
+  if (closure_type && return_closure_type &&
+      closure_type != return_closure_type) {
+    upb_status_seterrf(status,
+                       "expected start handler to return sub type for field %f",
+                       upb_fielddef_name(f));
+    return false;
+  }
+  return true;
+}
+
+/* Public interface ***********************************************************/
+
+upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
+  int extra;
+  upb_handlers *h;
+
+  assert(upb_msgdef_isfrozen(md));
+
+  extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
+  h = calloc(sizeof(*h) + extra, 1);
+  if (!h) return NULL;
+
+  h->msg = md;
+  upb_msgdef_ref(h->msg, h);
+  upb_status_clear(&h->status_);
+  h->sub = calloc(md->submsg_field_count, sizeof(*h->sub));
+  if (!h->sub) goto oom;
+  if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner))
+    goto oom;
+  if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom;
+
+  /* calloc() above initialized all handlers to NULL. */
+  return h;
+
+oom:
+  freehandlers(upb_handlers_upcast_mutable(h));
+  return NULL;
+}
+
+const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
+                                           const void *owner,
+                                           upb_handlers_callback *callback,
+                                           const void *closure) {
+  dfs_state state;
+  upb_handlers *ret;
+  bool ok;
+  upb_refcounted *r;
+
+  state.callback = callback;
+  state.closure = closure;
+  if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
+
+  ret = newformsg(m, owner, &state);
+
+  upb_inttable_uninit(&state.tab);
+  if (!ret) return NULL;
+
+  r = upb_handlers_upcast_mutable(ret);
+  ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
+  UPB_ASSERT_VAR(ok, ok);
+
+  return ret;
+}
+
+const upb_status *upb_handlers_status(upb_handlers *h) {
+  assert(!upb_handlers_isfrozen(h));
+  return &h->status_;
+}
+
+void upb_handlers_clearerr(upb_handlers *h) {
+  assert(!upb_handlers_isfrozen(h));
+  upb_status_clear(&h->status_);
+}
+
+#define SETTER(name, handlerctype, handlertype) \
+  bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \
+                                handlerctype func, upb_handlerattr *attr) { \
+    int32_t sel = trygetsel(h, f, handlertype); \
+    return doset(h, sel, f, handlertype, (upb_func*)func, attr); \
+  }
+
+SETTER(int32,       upb_int32_handlerfunc*,       UPB_HANDLER_INT32)
+SETTER(int64,       upb_int64_handlerfunc*,       UPB_HANDLER_INT64)
+SETTER(uint32,      upb_uint32_handlerfunc*,      UPB_HANDLER_UINT32)
+SETTER(uint64,      upb_uint64_handlerfunc*,      UPB_HANDLER_UINT64)
+SETTER(float,       upb_float_handlerfunc*,       UPB_HANDLER_FLOAT)
+SETTER(double,      upb_double_handlerfunc*,      UPB_HANDLER_DOUBLE)
+SETTER(bool,        upb_bool_handlerfunc*,        UPB_HANDLER_BOOL)
+SETTER(startstr,    upb_startstr_handlerfunc*,    UPB_HANDLER_STARTSTR)
+SETTER(string,      upb_string_handlerfunc*,      UPB_HANDLER_STRING)
+SETTER(endstr,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSTR)
+SETTER(startseq,    upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSEQ)
+SETTER(startsubmsg, upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSUBMSG)
+SETTER(endsubmsg,   upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSUBMSG)
+SETTER(endseq,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSEQ)
+
+#undef SETTER
+
+bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
+                              upb_handlerattr *attr) {
+  return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
+               (upb_func *)func, attr);
+}
+
+bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
+                            upb_handlerattr *attr) {
+  assert(!upb_handlers_isfrozen(h));
+  return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
+               (upb_func *)func, attr);
+}
+
+bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
+                                 const upb_handlers *sub) {
+  assert(sub);
+  assert(!upb_handlers_isfrozen(h));
+  assert(upb_fielddef_issubmsg(f));
+  if (SUBH_F(h, f)) return false;  /* Can't reset. */
+  if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
+    return false;
+  }
+  SUBH_F(h, f) = sub;
+  upb_ref2(sub, h);
+  return true;
+}
+
+const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
+                                                const upb_fielddef *f) {
+  assert(upb_fielddef_issubmsg(f));
+  return SUBH_F(h, f);
+}
+
+bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
+                          upb_handlerattr *attr) {
+  if (!upb_handlers_gethandler(h, sel))
+    return false;
+  *attr = h->table[sel].attr;
+  return true;
+}
+
+const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
+                                                    upb_selector_t sel) {
+  /* STARTSUBMSG selector in sel is the field's selector base. */
+  return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
+}
+
+const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
+
+bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
+  bool ok;
+  if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) {
+    return false;
+  }
+  ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
+  UPB_ASSERT_VAR(ok, ok);
+  return true;
+}
+
+
+/* "Static" methods ***********************************************************/
+
+bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
+  /* TODO: verify we have a transitive closure. */
+  int i;
+  for (i = 0; i < n; i++) {
+    upb_msg_field_iter j;
+    upb_handlers *h = handlers[i];
+
+    if (!upb_ok(&h->status_)) {
+      upb_status_seterrf(s, "handlers for message %s had error status: %s",
+                         upb_msgdef_fullname(upb_handlers_msgdef(h)),
+                         upb_status_errmsg(&h->status_));
+      return false;
+    }
+
+    /* Check that there are no closure mismatches due to missing Start* handlers
+     * or subhandlers with different type-level types. */
+    for(upb_msg_field_begin(&j, h->msg);
+        !upb_msg_field_done(&j);
+        upb_msg_field_next(&j)) {
+
+      const upb_fielddef *f = upb_msg_iter_field(&j);
+      if (upb_fielddef_isseq(f)) {
+        if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s))
+          return false;
+      }
+
+      if (upb_fielddef_isstring(f)) {
+        if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s))
+          return false;
+      }
+
+      if (upb_fielddef_issubmsg(f)) {
+        bool hashandler = false;
+        if (upb_handlers_gethandler(
+                h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) ||
+            upb_handlers_gethandler(
+                h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) {
+          hashandler = true;
+        }
+
+        if (upb_fielddef_isseq(f) &&
+            (upb_handlers_gethandler(
+                 h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) ||
+             upb_handlers_gethandler(
+                 h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) {
+          hashandler = true;
+        }
+
+        if (hashandler && !upb_handlers_getsubhandlers(h, f)) {
+          /* For now we add an empty subhandlers in this case.  It makes the
+           * decoder code generator simpler, because it only has to handle two
+           * cases (submessage has handlers or not) as opposed to three
+           * (submessage has handlers in enclosing message but no subhandlers).
+           *
+           * This makes parsing less efficient in the case that we want to
+           * notice a submessage but skip its contents (like if we're testing
+           * for submessage presence or counting the number of repeated
+           * submessages).  In this case we will end up parsing the submessage
+           * field by field and throwing away the results for each, instead of
+           * skipping the whole delimited thing at once.  If this is an issue we
+           * can revisit it, but do remember that this only arises when you have
+           * handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
+           * submessage but no subhandlers.  The uses cases for this are
+           * limited. */
+          upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub);
+          upb_handlers_setsubhandlers(h, f, sub);
+          upb_handlers_unref(sub, &sub);
+        }
+
+        /* TODO(haberman): check type of submessage.
+         * This is slightly tricky; also consider whether we should check that
+         * they match at setsubhandlers time. */
+      }
+    }
+  }
+
+  if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s,
+                             UPB_MAX_HANDLER_DEPTH)) {
+    return false;
+  }
+
+  return true;
+}
+
+upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
+  switch (upb_fielddef_type(f)) {
+    case UPB_TYPE_INT32:
+    case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
+    case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
+    case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
+    case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
+    case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
+    case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
+    case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
+    default: assert(false); return -1;  /* Invalid input. */
+  }
+}
+
+bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
+                              upb_selector_t *s) {
+  switch (type) {
+    case UPB_HANDLER_INT32:
+    case UPB_HANDLER_INT64:
+    case UPB_HANDLER_UINT32:
+    case UPB_HANDLER_UINT64:
+    case UPB_HANDLER_FLOAT:
+    case UPB_HANDLER_DOUBLE:
+    case UPB_HANDLER_BOOL:
+      if (!upb_fielddef_isprimitive(f) ||
+          upb_handlers_getprimitivehandlertype(f) != type)
+        return false;
+      *s = f->selector_base;
+      break;
+    case UPB_HANDLER_STRING:
+      if (upb_fielddef_isstring(f)) {
+        *s = f->selector_base;
+      } else if (upb_fielddef_lazy(f)) {
+        *s = f->selector_base + 3;
+      } else {
+        return false;
+      }
+      break;
+    case UPB_HANDLER_STARTSTR:
+      if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
+        *s = f->selector_base + 1;
+      } else {
+        return false;
+      }
+      break;
+    case UPB_HANDLER_ENDSTR:
+      if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
+        *s = f->selector_base + 2;
+      } else {
+        return false;
+      }
+      break;
+    case UPB_HANDLER_STARTSEQ:
+      if (!upb_fielddef_isseq(f)) return false;
+      *s = f->selector_base - 2;
+      break;
+    case UPB_HANDLER_ENDSEQ:
+      if (!upb_fielddef_isseq(f)) return false;
+      *s = f->selector_base - 1;
+      break;
+    case UPB_HANDLER_STARTSUBMSG:
+      if (!upb_fielddef_issubmsg(f)) return false;
+      /* Selectors for STARTSUBMSG are at the beginning of the table so that the
+       * selector can also be used as an index into the "sub" array of
+       * subhandlers.  The indexes for the two into these two tables are the
+       * same, except that in the handler table the static selectors come first. */
+      *s = f->index_ + UPB_STATIC_SELECTOR_COUNT;
+      break;
+    case UPB_HANDLER_ENDSUBMSG:
+      if (!upb_fielddef_issubmsg(f)) return false;
+      *s = f->selector_base;
+      break;
+  }
+  assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
+  return true;
+}
+
+uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
+  return upb_fielddef_isseq(f) ? 2 : 0;
+}
+
+uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
+  uint32_t ret = 1;
+  if (upb_fielddef_isseq(f)) ret += 2;    /* STARTSEQ/ENDSEQ */
+  if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
+  if (upb_fielddef_issubmsg(f)) {
+    /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
+    ret += 0;
+    if (upb_fielddef_lazy(f)) {
+      /* STARTSTR/ENDSTR/STRING (for lazy) */
+      ret += 3;
+    }
+  }
+  return ret;
+}
+
+
+/* upb_handlerattr ************************************************************/
+
+void upb_handlerattr_init(upb_handlerattr *attr) {
+  upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER;
+  memcpy(attr, &from, sizeof(*attr));
+}
+
+void upb_handlerattr_uninit(upb_handlerattr *attr) {
+  UPB_UNUSED(attr);
+}
+
+bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) {
+  attr->handler_data_ = hd;
+  return true;
+}
+
+bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) {
+  attr->closure_type_ = type;
+  return true;
+}
+
+const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) {
+  return attr->closure_type_;
+}
+
+bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr,
+                                          const void *type) {
+  attr->return_closure_type_ = type;
+  return true;
+}
+
+const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) {
+  return attr->return_closure_type_;
+}
+
+bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) {
+  attr->alwaysok_ = alwaysok;
+  return true;
+}
+
+bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) {
+  return attr->alwaysok_;
+}
+
+/* upb_bufhandle **************************************************************/
+
+size_t upb_bufhandle_objofs(const upb_bufhandle *h) {
+  return h->objofs_;
+}
+
+/* upb_byteshandler ***********************************************************/
+
+void upb_byteshandler_init(upb_byteshandler* h) {
+  memset(h, 0, sizeof(*h));
+}
+
+/* For when we support handlerfree callbacks. */
+void upb_byteshandler_uninit(upb_byteshandler* h) {
+  UPB_UNUSED(h);
+}
+
+bool upb_byteshandler_setstartstr(upb_byteshandler *h,
+                                  upb_startstr_handlerfunc *func, void *d) {
+  h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
+  h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d;
+  return true;
+}
+
+bool upb_byteshandler_setstring(upb_byteshandler *h,
+                                upb_string_handlerfunc *func, void *d) {
+  h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
+  h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d;
+  return true;
+}
+
+bool upb_byteshandler_setendstr(upb_byteshandler *h,
+                                upb_endfield_handlerfunc *func, void *d) {
+  h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
+  h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d;
+  return true;
+}
+/*
+** upb::RefCounted Implementation
+**
+** Our key invariants are:
+** 1. reference cycles never span groups
+** 2. for ref2(to, from), we increment to's count iff group(from) != group(to)
+**
+** The previous two are how we avoid leaking cycles.  Other important
+** invariants are:
+** 3. for mutable objects "from" and "to", if there exists a ref2(to, from)
+**    this implies group(from) == group(to).  (In practice, what we implement
+**    is even stronger; "from" and "to" will share a group if there has *ever*
+**    been a ref2(to, from), but all that is necessary for correctness is the
+**    weaker one).
+** 4. mutable and immutable objects are never in the same group.
+*/
+
+
+#include <setjmp.h>
+#include <stdlib.h>
+
+static void freeobj(upb_refcounted *o);
+
+const char untracked_val;
+const void *UPB_UNTRACKED_REF = &untracked_val;
+
+/* arch-specific atomic primitives  *******************************************/
+
+#ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/
+
+static void atomic_inc(uint32_t *a) { (*a)++; }
+static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
+
+#elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/
+
+static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
+static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
+
+#elif defined(WIN32) /*-------------------------------------------------------*/
+
+#include <Windows.h>
+
+static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
+static bool atomic_dec(upb_atomic_t *a) {
+  return InterlockedDecrement(&a->val) == 0;
+}
+
+#else
+#error Atomic primitives not defined for your platform/CPU.  \
+       Implement them or compile with UPB_THREAD_UNSAFE.
+#endif
+
+/* All static objects point to this refcount.
+ * It is special-cased in ref/unref below.  */
+uint32_t static_refcount = -1;
+
+/* We can avoid atomic ops for statically-declared objects.
+ * This is a minor optimization but nice since we can avoid degrading under
+ * contention in this case. */
+
+static void refgroup(uint32_t *group) {
+  if (group != &static_refcount)
+    atomic_inc(group);
+}
+
+static bool unrefgroup(uint32_t *group) {
+  if (group == &static_refcount) {
+    return false;
+  } else {
+    return atomic_dec(group);
+  }
+}
+
+
+/* Reference tracking (debug only) ********************************************/
+
+#ifdef UPB_DEBUG_REFS
+
+#ifdef UPB_THREAD_UNSAFE
+
+static void upb_lock() {}
+static void upb_unlock() {}
+
+#else
+
+/* User must define functions that lock/unlock a global mutex and link this
+ * file against them. */
+void upb_lock();
+void upb_unlock();
+
+#endif
+
+/* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
+ * code-paths that can normally never fail, like upb_refcounted_ref().  Since
+ * we have no way to propagage out-of-memory errors back to the user, and since
+ * these errors can only occur in UPB_DEBUG_REFS mode, we immediately fail. */
+#define CHECK_OOM(predicate) if (!(predicate)) { assert(predicate); exit(1); }
+
+typedef struct {
+  int count;  /* How many refs there are (duplicates only allowed for ref2). */
+  bool is_ref2;
+} trackedref;
+
+static trackedref *trackedref_new(bool is_ref2) {
+  trackedref *ret = malloc(sizeof(*ret));
+  CHECK_OOM(ret);
+  ret->count = 1;
+  ret->is_ref2 = is_ref2;
+  return ret;
+}
+
+static void track(const upb_refcounted *r, const void *owner, bool ref2) {
+  upb_value v;
+
+  assert(owner);
+  if (owner == UPB_UNTRACKED_REF) return;
+
+  upb_lock();
+  if (upb_inttable_lookupptr(r->refs, owner, &v)) {
+    trackedref *ref = upb_value_getptr(v);
+    /* Since we allow multiple ref2's for the same to/from pair without
+     * allocating separate memory for each one, we lose the fine-grained
+     * tracking behavior we get with regular refs.  Since ref2s only happen
+     * inside upb, we'll accept this limitation until/unless there is a really
+     * difficult upb-internal bug that can't be figured out without it. */
+    assert(ref2);
+    assert(ref->is_ref2);
+    ref->count++;
+  } else {
+    trackedref *ref = trackedref_new(ref2);
+    bool ok = upb_inttable_insertptr(r->refs, owner, upb_value_ptr(ref));
+    CHECK_OOM(ok);
+    if (ref2) {
+      /* We know this cast is safe when it is a ref2, because it's coming from
+       * another refcounted object. */
+      const upb_refcounted *from = owner;
+      assert(!upb_inttable_lookupptr(from->ref2s, r, NULL));
+      ok = upb_inttable_insertptr(from->ref2s, r, upb_value_ptr(NULL));
+      CHECK_OOM(ok);
+    }
+  }
+  upb_unlock();
+}
+
+static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
+  upb_value v;
+  bool found;
+  trackedref *ref;
+
+  assert(owner);
+  if (owner == UPB_UNTRACKED_REF) return;
+
+  upb_lock();
+  found = upb_inttable_lookupptr(r->refs, owner, &v);
+  /* This assert will fail if an owner attempts to release a ref it didn't have. */
+  UPB_ASSERT_VAR(found, found);
+  ref = upb_value_getptr(v);
+  assert(ref->is_ref2 == ref2);
+  if (--ref->count == 0) {
+    free(ref);
+    upb_inttable_removeptr(r->refs, owner, NULL);
+    if (ref2) {
+      /* We know this cast is safe when it is a ref2, because it's coming from
+       * another refcounted object. */
+      const upb_refcounted *from = owner;
+      bool removed = upb_inttable_removeptr(from->ref2s, r, NULL);
+      assert(removed);
+    }
+  }
+  upb_unlock();
+}
+
+static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
+  upb_value v;
+  bool found;
+  trackedref *ref;
+
+  upb_lock();
+  found = upb_inttable_lookupptr(r->refs, owner, &v);
+  UPB_ASSERT_VAR(found, found);
+  ref = upb_value_getptr(v);
+  assert(ref->is_ref2 == ref2);
+  upb_unlock();
+}
+
+/* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
+ * originate from the given owner. */
+static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
+  upb_inttable_iter i;
+
+  upb_lock();
+  upb_inttable_begin(&i, owner->ref2s);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    upb_value v;
+    upb_value count;
+    trackedref *ref;
+    bool ok;
+    bool found;
+
+    upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i);
+
+    /* To get the count we need to look in the target's table. */
+    found = upb_inttable_lookupptr(to->refs, owner, &v);
+    assert(found);
+    ref = upb_value_getptr(v);
+    count = upb_value_int32(ref->count);
+
+    ok = upb_inttable_insertptr(tab, to, count);
+    CHECK_OOM(ok);
+  }
+  upb_unlock();
+}
+
+typedef struct {
+  upb_inttable ref2;
+  const upb_refcounted *obj;
+} check_state;
+
+static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
+                        void *closure) {
+  check_state *s = closure;
+  upb_inttable *ref2 = &s->ref2;
+  upb_value v;
+  bool removed;
+  int32_t newcount;
+
+  assert(obj == s->obj);
+  assert(subobj);
+  removed = upb_inttable_removeptr(ref2, subobj, &v);
+  /* The following assertion will fail if the visit() function visits a subobj
+   * that it did not have a ref2 on, or visits the same subobj too many times. */
+  assert(removed);
+  newcount = upb_value_getint32(v) - 1;
+  if (newcount > 0) {
+    upb_inttable_insert(ref2, (uintptr_t)subobj, upb_value_int32(newcount));
+  }
+}
+
+static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
+                  void *closure) {
+  bool ok;
+
+  /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
+   * exactly the set of nodes that visit() should visit.  So we verify visit()'s
+   * correctness here. */
+  check_state state;
+  state.obj = r;
+  ok = upb_inttable_init(&state.ref2, UPB_CTYPE_INT32);
+  CHECK_OOM(ok);
+  getref2s(r, &state.ref2);
+
+  /* This should visit any children in the ref2 table. */
+  if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
+
+  /* This assertion will fail if the visit() function missed any children. */
+  assert(upb_inttable_count(&state.ref2) == 0);
+  upb_inttable_uninit(&state.ref2);
+  if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
+}
+
+static bool trackinit(upb_refcounted *r) {
+  r->refs = malloc(sizeof(*r->refs));
+  r->ref2s = malloc(sizeof(*r->ref2s));
+  if (!r->refs || !r->ref2s) goto err1;
+
+  if (!upb_inttable_init(r->refs, UPB_CTYPE_PTR)) goto err1;
+  if (!upb_inttable_init(r->ref2s, UPB_CTYPE_PTR)) goto err2;
+  return true;
+
+err2:
+  upb_inttable_uninit(r->refs);
+err1:
+  free(r->refs);
+  free(r->ref2s);
+  return false;
+}
+
+static void trackfree(const upb_refcounted *r) {
+  upb_inttable_uninit(r->refs);
+  upb_inttable_uninit(r->ref2s);
+  free(r->refs);
+  free(r->ref2s);
+}
+
+#else
+
+static void track(const upb_refcounted *r, const void *owner, bool ref2) {
+  UPB_UNUSED(r);
+  UPB_UNUSED(owner);
+  UPB_UNUSED(ref2);
+}
+
+static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
+  UPB_UNUSED(r);
+  UPB_UNUSED(owner);
+  UPB_UNUSED(ref2);
+}
+
+static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
+  UPB_UNUSED(r);
+  UPB_UNUSED(owner);
+  UPB_UNUSED(ref2);
+}
+
+static bool trackinit(upb_refcounted *r) {
+  UPB_UNUSED(r);
+  return true;
+}
+
+static void trackfree(const upb_refcounted *r) {
+  UPB_UNUSED(r);
+}
+
+static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
+                  void *closure) {
+  if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
+}
+
+#endif  /* UPB_DEBUG_REFS */
+
+
+/* freeze() *******************************************************************/
+
+/* The freeze() operation is by far the most complicated part of this scheme.
+ * We compute strongly-connected components and then mutate the graph such that
+ * we preserve the invariants documented at the top of this file.  And we must
+ * handle out-of-memory errors gracefully (without leaving the graph
+ * inconsistent), which adds to the fun. */
+
+/* The state used by the freeze operation (shared across many functions). */
+typedef struct {
+  int depth;
+  int maxdepth;
+  uint64_t index;
+  /* Maps upb_refcounted* -> attributes (color, etc).  attr layout varies by
+   * color. */
+  upb_inttable objattr;
+  upb_inttable stack;   /* stack of upb_refcounted* for Tarjan's algorithm. */
+  upb_inttable groups;  /* array of uint32_t*, malloc'd refcounts for new groups */
+  upb_status *status;
+  jmp_buf err;
+} tarjan;
+
+static void release_ref2(const upb_refcounted *obj,
+                         const upb_refcounted *subobj,
+                         void *closure);
+
+/* Node attributes -----------------------------------------------------------*/
+
+/* After our analysis phase all nodes will be either GRAY or WHITE. */
+
+typedef enum {
+  BLACK = 0,  /* Object has not been seen. */
+  GRAY,   /* Object has been found via a refgroup but may not be reachable. */
+  GREEN,  /* Object is reachable and is currently on the Tarjan stack. */
+  WHITE   /* Object is reachable and has been assigned a group (SCC). */
+} color_t;
+
+UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
+UPB_NORETURN static void oom(tarjan *t) {
+  upb_status_seterrmsg(t->status, "out of memory");
+  err(t);
+}
+
+static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) {
+  upb_value v;
+  return upb_inttable_lookupptr(&t->objattr, r, &v) ?
+      upb_value_getuint64(v) : 0;
+}
+
+static uint64_t getattr(const tarjan *t, const upb_refcounted *r) {
+  upb_value v;
+  bool found = upb_inttable_lookupptr(&t->objattr, r, &v);
+  UPB_ASSERT_VAR(found, found);
+  return upb_value_getuint64(v);
+}
+
+static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
+  upb_inttable_removeptr(&t->objattr, r, NULL);
+  upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr));
+}
+
+static color_t color(tarjan *t, const upb_refcounted *r) {
+  return trygetattr(t, r) & 0x3;  /* Color is always stored in the low 2 bits. */
+}
+
+static void set_gray(tarjan *t, const upb_refcounted *r) {
+  assert(color(t, r) == BLACK);
+  setattr(t, r, GRAY);
+}
+
+/* Pushes an obj onto the Tarjan stack and sets it to GREEN. */
+static void push(tarjan *t, const upb_refcounted *r) {
+  assert(color(t, r) == BLACK || color(t, r) == GRAY);
+  /* This defines the attr layout for the GREEN state.  "index" and "lowlink"
+   * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */
+  setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
+  if (++t->index == 0x80000000) {
+    upb_status_seterrmsg(t->status, "too many objects to freeze");
+    err(t);
+  }
+  upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
+}
+
+/* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
+ * SCC group. */
+static upb_refcounted *pop(tarjan *t) {
+  upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
+  assert(color(t, r) == GREEN);
+  /* This defines the attr layout for nodes in the WHITE state.
+   * Top of group stack is [group, NULL]; we point at group. */
+  setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
+  return r;
+}
+
+static void tarjan_newgroup(tarjan *t) {
+  uint32_t *group = malloc(sizeof(*group));
+  if (!group) oom(t);
+  /* Push group and empty group leader (we'll fill in leader later). */
+  if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
+      !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
+    free(group);
+    oom(t);
+  }
+  *group = 0;
+}
+
+static uint32_t idx(tarjan *t, const upb_refcounted *r) {
+  assert(color(t, r) == GREEN);
+  return (getattr(t, r) >> 2) & 0x7FFFFFFF;
+}
+
+static uint32_t lowlink(tarjan *t, const upb_refcounted *r) {
+  if (color(t, r) == GREEN) {
+    return getattr(t, r) >> 33;
+  } else {
+    return UINT32_MAX;
+  }
+}
+
+static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
+  assert(color(t, r) == GREEN);
+  setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF));
+}
+
+static uint32_t *group(tarjan *t, upb_refcounted *r) {
+  uint64_t groupnum;
+  upb_value v;
+  bool found;
+
+  assert(color(t, r) == WHITE);
+  groupnum = getattr(t, r) >> 8;
+  found = upb_inttable_lookup(&t->groups, groupnum, &v);
+  UPB_ASSERT_VAR(found, found);
+  return upb_value_getptr(v);
+}
+
+/* If the group leader for this object's group has not previously been set,
+ * the given object is assigned to be its leader. */
+static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
+  uint64_t leader_slot;
+  upb_value v;
+  bool found;
+
+  assert(color(t, r) == WHITE);
+  leader_slot = (getattr(t, r) >> 8) + 1;
+  found = upb_inttable_lookup(&t->groups, leader_slot, &v);
+  UPB_ASSERT_VAR(found, found);
+  if (upb_value_getptr(v)) {
+    return upb_value_getptr(v);
+  } else {
+    upb_inttable_remove(&t->groups, leader_slot, NULL);
+    upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r));
+    return r;
+  }
+}
+
+
+/* Tarjan's algorithm --------------------------------------------------------*/
+
+/* See:
+ *   http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */
+static void do_tarjan(const upb_refcounted *obj, tarjan *t);
+
+static void tarjan_visit(const upb_refcounted *obj,
+                         const upb_refcounted *subobj,
+                         void *closure) {
+  tarjan *t = closure;
+  if (++t->depth > t->maxdepth) {
+    upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
+    err(t);
+  } else if (subobj->is_frozen || color(t, subobj) == WHITE) {
+    /* Do nothing: we don't want to visit or color already-frozen nodes,
+     * and WHITE nodes have already been assigned a SCC. */
+  } else if (color(t, subobj) < GREEN) {
+    /* Subdef has not yet been visited; recurse on it. */
+    do_tarjan(subobj, t);
+    set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
+  } else if (color(t, subobj) == GREEN) {
+    /* Subdef is in the stack and hence in the current SCC. */
+    set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
+  }
+  --t->depth;
+}
+
+static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
+  if (color(t, obj) == BLACK) {
+    /* We haven't seen this object's group; mark the whole group GRAY. */
+    const upb_refcounted *o = obj;
+    do { set_gray(t, o); } while ((o = o->next) != obj);
+  }
+
+  push(t, obj);
+  visit(obj, tarjan_visit, t);
+  if (lowlink(t, obj) == idx(t, obj)) {
+    tarjan_newgroup(t);
+    while (pop(t) != obj)
+      ;
+  }
+}
+
+
+/* freeze() ------------------------------------------------------------------*/
+
+static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
+                     void *_t) {
+  tarjan *t = _t;
+  assert(color(t, r) > BLACK);
+  if (color(t, subobj) > BLACK && r->group != subobj->group) {
+    /* Previously this ref was not reflected in subobj->group because they
+     * were in the same group; now that they are split a ref must be taken. */
+    refgroup(subobj->group);
+  }
+}
+
+static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
+                   int maxdepth) {
+  volatile bool ret = false;
+  int i;
+  upb_inttable_iter iter;
+
+  /* We run in two passes so that we can allocate all memory before performing
+   * any mutation of the input -- this allows us to leave the input unchanged
+   * in the case of memory allocation failure. */
+  tarjan t;
+  t.index = 0;
+  t.depth = 0;
+  t.maxdepth = maxdepth;
+  t.status = s;
+  if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1;
+  if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2;
+  if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3;
+  if (setjmp(t.err) != 0) goto err4;
+
+
+  for (i = 0; i < n; i++) {
+    if (color(&t, roots[i]) < GREEN) {
+      do_tarjan(roots[i], &t);
+    }
+  }
+
+  /* If we've made it this far, no further errors are possible so it's safe to
+   * mutate the objects without risk of leaving them in an inconsistent state. */
+  ret = true;
+
+  /* The transformation that follows requires care.  The preconditions are:
+   * - all objects in attr map are WHITE or GRAY, and are in mutable groups
+   *   (groups of all mutable objs)
+   * - no ref2(to, from) refs have incremented count(to) if both "to" and
+   *   "from" are in our attr map (this follows from invariants (2) and (3)) */
+
+  /* Pass 1: we remove WHITE objects from their mutable groups, and add them to
+   * new groups  according to the SCC's we computed.  These new groups will
+   * consist of only frozen objects.  None will be immediately collectible,
+   * because WHITE objects are by definition reachable from one of "roots",
+   * which the caller must own refs on. */
+  upb_inttable_begin(&iter, &t.objattr);
+  for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
+    upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
+    /* Since removal from a singly-linked list requires access to the object's
+     * predecessor, we consider obj->next instead of obj for moving.  With the
+     * while() loop we guarantee that we will visit every node's predecessor.
+     * Proof:
+     *  1. every node's predecessor is in our attr map.
+     *  2. though the loop body may change a node's predecessor, it will only
+     *     change it to be the node we are currently operating on, so with a
+     *     while() loop we guarantee ourselves the chance to remove each node. */
+    while (color(&t, obj->next) == WHITE &&
+           group(&t, obj->next) != obj->next->group) {
+      upb_refcounted *leader;
+
+      /* Remove from old group. */
+      upb_refcounted *move = obj->next;
+      if (obj == move) {
+        /* Removing the last object from a group. */
+        assert(*obj->group == obj->individual_count);
+        free(obj->group);
+      } else {
+        obj->next = move->next;
+        /* This may decrease to zero; we'll collect GRAY objects (if any) that
+         * remain in the group in the third pass. */
+        assert(*move->group >= move->individual_count);
+        *move->group -= move->individual_count;
+      }
+
+      /* Add to new group. */
+      leader = groupleader(&t, move);
+      if (move == leader) {
+        /* First object added to new group is its leader. */
+        move->group = group(&t, move);
+        move->next = move;
+        *move->group = move->individual_count;
+      } else {
+        /* Group already has at least one object in it. */
+        assert(leader->group == group(&t, move));
+        move->group = group(&t, move);
+        move->next = leader->next;
+        leader->next = move;
+        *move->group += move->individual_count;
+      }
+
+      move->is_frozen = true;
+    }
+  }
+
+  /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
+   * increment count(to) if group(obj) != group(to) (which could now be the
+   * case if "to" was just frozen). */
+  upb_inttable_begin(&iter, &t.objattr);
+  for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
+    upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
+    visit(obj, crossref, &t);
+  }
+
+  /* Pass 3: GRAY objects are collected if their group's refcount dropped to
+   * zero when we removed its white nodes.  This can happen if they had only
+   * been kept alive by virtue of sharing a group with an object that was just
+   * frozen.
+   *
+   * It is important that we do this last, since the GRAY object's free()
+   * function could call unref2() on just-frozen objects, which will decrement
+   * refs that were added in pass 2. */
+  upb_inttable_begin(&iter, &t.objattr);
+  for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
+    upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
+    if (obj->group == NULL || *obj->group == 0) {
+      if (obj->group) {
+        upb_refcounted *o;
+
+        /* We eagerly free() the group's count (since we can't easily determine
+         * the group's remaining size it's the easiest way to ensure it gets
+         * done). */
+        free(obj->group);
+
+        /* Visit to release ref2's (done in a separate pass since release_ref2
+         * depends on o->group being unmodified so it can test merged()). */
+        o = obj;
+        do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
+
+        /* Mark "group" fields as NULL so we know to free the objects later in
+         * this loop, but also don't try to delete the group twice. */
+        o = obj;
+        do { o->group = NULL; } while ((o = o->next) != obj);
+      }
+      freeobj(obj);
+    }
+  }
+
+err4:
+  if (!ret) {
+    upb_inttable_begin(&iter, &t.groups);
+    for(; !upb_inttable_done(&iter); upb_inttable_next(&iter))
+      free(upb_value_getptr(upb_inttable_iter_value(&iter)));
+  }
+  upb_inttable_uninit(&t.groups);
+err3:
+  upb_inttable_uninit(&t.stack);
+err2:
+  upb_inttable_uninit(&t.objattr);
+err1:
+  return ret;
+}
+
+
+/* Misc internal functions  ***************************************************/
+
+static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
+  return r->group == r2->group;
+}
+
+static void merge(upb_refcounted *r, upb_refcounted *from) {
+  upb_refcounted *base;
+  upb_refcounted *tmp;
+
+  if (merged(r, from)) return;
+  *r->group += *from->group;
+  free(from->group);
+  base = from;
+
+  /* Set all refcount pointers in the "from" chain to the merged refcount.
+   *
+   * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
+   * if the user continuously extends a group by one object.  Prevent this by
+   * using one of the techniques in this paper:
+   *     ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf */
+  do { from->group = r->group; } while ((from = from->next) != base);
+
+  /* Merge the two circularly linked lists by swapping their next pointers. */
+  tmp = r->next;
+  r->next = base->next;
+  base->next = tmp;
+}
+
+static void unref(const upb_refcounted *r);
+
+static void release_ref2(const upb_refcounted *obj,
+                         const upb_refcounted *subobj,
+                         void *closure) {
+  UPB_UNUSED(closure);
+  untrack(subobj, obj, true);
+  if (!merged(obj, subobj)) {
+    assert(subobj->is_frozen);
+    unref(subobj);
+  }
+}
+
+static void unref(const upb_refcounted *r) {
+  if (unrefgroup(r->group)) {
+    const upb_refcounted *o;
+
+    free(r->group);
+
+    /* In two passes, since release_ref2 needs a guarantee that any subobjs
+     * are alive. */
+    o = r;
+    do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
+
+    o = r;
+    do {
+      const upb_refcounted *next = o->next;
+      assert(o->is_frozen || o->individual_count == 0);
+      freeobj((upb_refcounted*)o);
+      o = next;
+    } while(o != r);
+  }
+}
+
+static void freeobj(upb_refcounted *o) {
+  trackfree(o);
+  o->vtbl->free((upb_refcounted*)o);
+}
+
+
+/* Public interface ***********************************************************/
+
+bool upb_refcounted_init(upb_refcounted *r,
+                         const struct upb_refcounted_vtbl *vtbl,
+                         const void *owner) {
+#ifndef NDEBUG
+  /* Endianness check.  This is unrelated to upb_refcounted, it's just a
+   * convenient place to put the check that we can be assured will run for
+   * basically every program using upb. */
+  const int x = 1;
+#ifdef UPB_BIG_ENDIAN
+  assert(*(char*)&x != 1);
+#else
+  assert(*(char*)&x == 1);
+#endif
+#endif
+
+  r->next = r;
+  r->vtbl = vtbl;
+  r->individual_count = 0;
+  r->is_frozen = false;
+  r->group = malloc(sizeof(*r->group));
+  if (!r->group) return false;
+  *r->group = 0;
+  if (!trackinit(r)) {
+    free(r->group);
+    return false;
+  }
+  upb_refcounted_ref(r, owner);
+  return true;
+}
+
+bool upb_refcounted_isfrozen(const upb_refcounted *r) {
+  return r->is_frozen;
+}
+
+void upb_refcounted_ref(const upb_refcounted *r, const void *owner) {
+  track(r, owner, false);
+  if (!r->is_frozen)
+    ((upb_refcounted*)r)->individual_count++;
+  refgroup(r->group);
+}
+
+void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
+  untrack(r, owner, false);
+  if (!r->is_frozen)
+    ((upb_refcounted*)r)->individual_count--;
+  unref(r);
+}
+
+void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
+  assert(!from->is_frozen);  /* Non-const pointer implies this. */
+  track(r, from, true);
+  if (r->is_frozen) {
+    refgroup(r->group);
+  } else {
+    merge((upb_refcounted*)r, from);
+  }
+}
+
+void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
+  assert(!from->is_frozen);  /* Non-const pointer implies this. */
+  untrack(r, from, true);
+  if (r->is_frozen) {
+    unref(r);
+  } else {
+    assert(merged(r, from));
+  }
+}
+
+void upb_refcounted_donateref(
+    const upb_refcounted *r, const void *from, const void *to) {
+  assert(from != to);
+  if (to != NULL)
+    upb_refcounted_ref(r, to);
+  if (from != NULL)
+    upb_refcounted_unref(r, from);
+}
+
+void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
+  checkref(r, owner, false);
+}
+
+bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
+                           int maxdepth) {
+  int i;
+  for (i = 0; i < n; i++) {
+    assert(!roots[i]->is_frozen);
+  }
+  return freeze(roots, n, s, maxdepth);
+}
+
+
+#include <stdlib.h>
+
+/* Fallback implementation if the shim is not specialized by the JIT. */
+#define SHIM_WRITER(type, ctype)                                              \
+  bool upb_shim_set ## type (void *c, const void *hd, ctype val) {            \
+    uint8_t *m = c;                                                           \
+    const upb_shim_data *d = hd;                                              \
+    if (d->hasbit > 0)                                                        \
+      *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8);                   \
+    *(ctype*)&m[d->offset] = val;                                             \
+    return true;                                                              \
+  }                                                                           \
+
+SHIM_WRITER(double, double)
+SHIM_WRITER(float,  float)
+SHIM_WRITER(int32,  int32_t)
+SHIM_WRITER(int64,  int64_t)
+SHIM_WRITER(uint32, uint32_t)
+SHIM_WRITER(uint64, uint64_t)
+SHIM_WRITER(bool,   bool)
+#undef SHIM_WRITER
+
+bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
+                  int32_t hasbit) {
+  upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+  bool ok;
+
+  upb_shim_data *d = malloc(sizeof(*d));
+  if (!d) return false;
+  d->offset = offset;
+  d->hasbit = hasbit;
+
+  upb_handlerattr_sethandlerdata(&attr, d);
+  upb_handlerattr_setalwaysok(&attr, true);
+  upb_handlers_addcleanup(h, d, free);
+
+#define TYPE(u, l) \
+  case UPB_TYPE_##u: \
+    ok = upb_handlers_set##l(h, f, upb_shim_set##l, &attr); break;
+
+  ok = false;
+
+  switch (upb_fielddef_type(f)) {
+    TYPE(INT64,  int64);
+    TYPE(INT32,  int32);
+    TYPE(ENUM,   int32);
+    TYPE(UINT64, uint64);
+    TYPE(UINT32, uint32);
+    TYPE(DOUBLE, double);
+    TYPE(FLOAT,  float);
+    TYPE(BOOL,   bool);
+    default: assert(false); break;
+  }
+#undef TYPE
+
+  upb_handlerattr_uninit(&attr);
+  return ok;
+}
+
+const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s,
+                                      upb_fieldtype_t *type) {
+  upb_func *f = upb_handlers_gethandler(h, s);
+
+  if ((upb_int64_handlerfunc*)f == upb_shim_setint64) {
+    *type = UPB_TYPE_INT64;
+  } else if ((upb_int32_handlerfunc*)f == upb_shim_setint32) {
+    *type = UPB_TYPE_INT32;
+  } else if ((upb_uint64_handlerfunc*)f == upb_shim_setuint64) {
+    *type = UPB_TYPE_UINT64;
+  } else if ((upb_uint32_handlerfunc*)f == upb_shim_setuint32) {
+    *type = UPB_TYPE_UINT32;
+  } else if ((upb_double_handlerfunc*)f == upb_shim_setdouble) {
+    *type = UPB_TYPE_DOUBLE;
+  } else if ((upb_float_handlerfunc*)f == upb_shim_setfloat) {
+    *type = UPB_TYPE_FLOAT;
+  } else if ((upb_bool_handlerfunc*)f == upb_shim_setbool) {
+    *type = UPB_TYPE_BOOL;
+  } else {
+    return NULL;
+  }
+
+  return (const upb_shim_data*)upb_handlers_gethandlerdata(h, s);
+}
+
+
+#include <stdlib.h>
+#include <string.h>
+
+static void upb_symtab_free(upb_refcounted *r) {
+  upb_symtab *s = (upb_symtab*)r;
+  upb_strtable_iter i;
+  upb_strtable_begin(&i, &s->symtab);
+  for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
+    upb_def_unref(def, s);
+  }
+  upb_strtable_uninit(&s->symtab);
+  free(s);
+}
+
+
+upb_symtab *upb_symtab_new(const void *owner) {
+  static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free};
+  upb_symtab *s = malloc(sizeof(*s));
+  upb_refcounted_init(upb_symtab_upcast_mutable(s), &vtbl, owner);
+  upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
+  return s;
+}
+
+void upb_symtab_freeze(upb_symtab *s) {
+  upb_refcounted *r;
+  bool ok;
+
+  assert(!upb_symtab_isfrozen(s));
+  r = upb_symtab_upcast_mutable(s);
+  /* The symtab does not take ref2's (see refcounted.h) on the defs, because
+   * defs cannot refer back to the table and therefore cannot create cycles.  So
+   * 0 will suffice for maxdepth here. */
+  ok = upb_refcounted_freeze(&r, 1, NULL, 0);
+  UPB_ASSERT_VAR(ok, ok);
+}
+
+const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
+  upb_value v;
+  upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
+      upb_value_getptr(v) : NULL;
+  return ret;
+}
+
+const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
+  upb_value v;
+  upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
+      upb_value_getptr(v) : NULL;
+  return def ? upb_dyncast_msgdef(def) : NULL;
+}
+
+const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
+  upb_value v;
+  upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
+      upb_value_getptr(v) : NULL;
+  return def ? upb_dyncast_enumdef(def) : NULL;
+}
+
+/* Given a symbol and the base symbol inside which it is defined, find the
+ * symbol's definition in t. */
+static upb_def *upb_resolvename(const upb_strtable *t,
+                                const char *base, const char *sym) {
+  if(strlen(sym) == 0) return NULL;
+  if(sym[0] == '.') {
+    /* Symbols starting with '.' are absolute, so we do a single lookup.
+     * Slice to omit the leading '.' */
+    upb_value v;
+    return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
+  } else {
+    /* Remove components from base until we find an entry or run out.
+     * TODO: This branch is totally broken, but currently not used. */
+    (void)base;
+    assert(false);
+    return NULL;
+  }
+}
+
+const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
+                                  const char *sym) {
+  upb_def *ret = upb_resolvename(&s->symtab, base, sym);
+  return ret;
+}
+
+/* Starts a depth-first traversal at "def", recursing into any subdefs
+ * (ie. submessage types).  Adds duplicates of existing defs to addtab
+ * wherever necessary, so that the resulting symtab will be consistent once
+ * addtab is added.
+ *
+ * More specifically, if any def D is found in the DFS that:
+ *
+ *   1. can reach a def that is being replaced by something in addtab, AND
+ *
+ *   2. is not itself being replaced already (ie. this name doesn't already
+ *      exist in addtab)
+ *
+ * ...then a duplicate (new copy) of D will be added to addtab.
+ *
+ * Returns true if this happened for any def reachable from "def."
+ *
+ * It is slightly tricky to do this correctly in the presence of cycles.  If we
+ * detect that our DFS has hit a cycle, we might not yet know if any SCCs on
+ * our stack can reach a def in addtab or not.  Once we figure this out, that
+ * answer needs to apply to *all* defs in these SCCs, even if we visited them
+ * already.  So a straight up one-pass cycle-detecting DFS won't work.
+ *
+ * To work around this problem, we traverse each SCC (which we already
+ * computed, since these defs are frozen) as a single node.  We first compute
+ * whether the SCC as a whole can reach any def in addtab, then we dup (or not)
+ * the entire SCC.  This requires breaking the encapsulation of upb_refcounted,
+ * since that is where we get the data about what SCC we are in. */
+static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
+                            const void *new_owner, upb_inttable *seen,
+                            upb_status *s) {
+  upb_value v;
+  bool need_dup;
+  const upb_def *base;
+  const void* memoize_key;
+
+  /* Memoize results of this function for efficiency (since we're traversing a
+   * DAG this is not needed to limit the depth of the search).
+   *
+   * We memoize by SCC instead of by individual def. */
+  memoize_key = def->base.group;
+
+  if (upb_inttable_lookupptr(seen, memoize_key, &v))
+    return upb_value_getbool(v);
+
+  /* Visit submessages for all messages in the SCC. */
+  need_dup = false;
+  base = def;
+  do {
+    upb_value v;
+    const upb_msgdef *m;
+
+    assert(upb_def_isfrozen(def));
+    if (def->type == UPB_DEF_FIELD) continue;
+    if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) {
+      need_dup = true;
+    }
+
+    /* For messages, continue the recursion by visiting all subdefs, but only
+     * ones in different SCCs. */
+    m = upb_dyncast_msgdef(def);
+    if (m) {
+      upb_msg_field_iter i;
+      for(upb_msg_field_begin(&i, m);
+          !upb_msg_field_done(&i);
+          upb_msg_field_next(&i)) {
+        upb_fielddef *f = upb_msg_iter_field(&i);
+        const upb_def *subdef;
+
+        if (!upb_fielddef_hassubdef(f)) continue;
+        subdef = upb_fielddef_subdef(f);
+
+        /* Skip subdefs in this SCC. */
+        if (def->base.group == subdef->base.group) continue;
+
+        /* |= to avoid short-circuit; we need its side-effects. */
+        need_dup |= upb_resolve_dfs(subdef, addtab, new_owner, seen, s);
+        if (!upb_ok(s)) return false;
+      }
+    }
+  } while ((def = (upb_def*)def->base.next) != base);
+
+  if (need_dup) {
+    /* Dup all defs in this SCC that don't already have entries in addtab. */
+    def = base;
+    do {
+      const char *name;
+
+      if (def->type == UPB_DEF_FIELD) continue;
+      name = upb_def_fullname(def);
+      if (!upb_strtable_lookup(addtab, name, NULL)) {
+        upb_def *newdef = upb_def_dup(def, new_owner);
+        if (!newdef) goto oom;
+        newdef->came_from_user = false;
+        if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef)))
+          goto oom;
+      }
+    } while ((def = (upb_def*)def->base.next) != base);
+  }
+
+  upb_inttable_insertptr(seen, memoize_key, upb_value_bool(need_dup));
+  return need_dup;
+
+oom:
+  upb_status_seterrmsg(s, "out of memory");
+  return false;
+}
+
+/* TODO(haberman): we need a lot more testing of error conditions.
+ * The came_from_user stuff in particular is not tested. */
+bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
+                    upb_status *status) {
+  int i;
+  upb_strtable_iter iter;
+  upb_def **add_defs = NULL;
+  upb_strtable addtab;
+  upb_inttable seen;
+
+  assert(!upb_symtab_isfrozen(s));
+  if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
+    upb_status_seterrmsg(status, "out of memory");
+    return false;
+  }
+
+  /* Add new defs to our "add" set. */
+  for (i = 0; i < n; i++) {
+    upb_def *def = defs[i];
+    const char *fullname;
+    upb_fielddef *f;
+
+    if (upb_def_isfrozen(def)) {
+      upb_status_seterrmsg(status, "added defs must be mutable");
+      goto err;
+    }
+    assert(!upb_def_isfrozen(def));
+    fullname = upb_def_fullname(def);
+    if (!fullname) {
+      upb_status_seterrmsg(
+          status, "Anonymous defs cannot be added to a symtab");
+      goto err;
+    }
+
+    f = upb_dyncast_fielddef_mutable(def);
+
+    if (f) {
+      if (!upb_fielddef_containingtypename(f)) {
+        upb_status_seterrmsg(status,
+                             "Standalone fielddefs must have a containing type "
+                             "(extendee) name set");
+        goto err;
+      }
+    } else {
+      if (upb_strtable_lookup(&addtab, fullname, NULL)) {
+        upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
+        goto err;
+      }
+      /* We need this to back out properly, because if there is a failure we
+       * need to donate the ref back to the caller. */
+      def->came_from_user = true;
+      upb_def_donateref(def, ref_donor, s);
+      if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
+        goto oom_err;
+    }
+  }
+
+  /* Add standalone fielddefs (ie. extensions) to the appropriate messages.
+   * If the appropriate message only exists in the existing symtab, duplicate
+   * it so we have a mutable copy we can add the fields to. */
+  for (i = 0; i < n; i++) {
+    upb_def *def = defs[i];
+    upb_fielddef *f = upb_dyncast_fielddef_mutable(def);
+    const char *msgname;
+    upb_value v;
+    upb_msgdef *m;
+
+    if (!f) continue;
+    msgname = upb_fielddef_containingtypename(f);
+    /* We validated this earlier in this function. */
+    assert(msgname);
+
+    /* If the extendee name is absolutely qualified, move past the initial ".".
+     * TODO(haberman): it is not obvious what it would mean if this was not
+     * absolutely qualified. */
+    if (msgname[0] == '.') {
+      msgname++;
+    }
+
+    if (upb_strtable_lookup(&addtab, msgname, &v)) {
+      /* Extendee is in the set of defs the user asked us to add. */
+      m = upb_value_getptr(v);
+    } else {
+      /* Need to find and dup the extendee from the existing symtab. */
+      const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname);
+      if (!frozen_m) {
+        upb_status_seterrf(status,
+                           "Tried to extend message %s that does not exist "
+                           "in this SymbolTable.",
+                           msgname);
+        goto err;
+      }
+      m = upb_msgdef_dup(frozen_m, s);
+      if (!m) goto oom_err;
+      if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) {
+        upb_msgdef_unref(m, s);
+        goto oom_err;
+      }
+    }
+
+    if (!upb_msgdef_addfield(m, f, ref_donor, status)) {
+      goto err;
+    }
+  }
+
+  /* Add dups of any existing def that can reach a def with the same name as
+   * anything in our "add" set. */
+  if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err;
+  upb_strtable_begin(&iter, &s->symtab);
+  for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
+    upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
+    upb_resolve_dfs(def, &addtab, s, &seen, status);
+    if (!upb_ok(status)) goto err;
+  }
+  upb_inttable_uninit(&seen);
+
+  /* Now using the table, resolve symbolic references for subdefs. */
+  upb_strtable_begin(&iter, &addtab);
+  for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
+    const char *base;
+    upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
+    upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
+    upb_msg_field_iter j;
+
+    if (!m) continue;
+    /* Type names are resolved relative to the message in which they appear. */
+    base = upb_msgdef_fullname(m);
+
+    for(upb_msg_field_begin(&j, m);
+        !upb_msg_field_done(&j);
+        upb_msg_field_next(&j)) {
+      upb_fielddef *f = upb_msg_iter_field(&j);
+      const char *name = upb_fielddef_subdefname(f);
+      if (name && !upb_fielddef_subdef(f)) {
+        /* Try the lookup in the current set of to-be-added defs first. If not
+         * there, try existing defs. */
+        upb_def *subdef = upb_resolvename(&addtab, base, name);
+        if (subdef == NULL) {
+          subdef = upb_resolvename(&s->symtab, base, name);
+        }
+        if (subdef == NULL) {
+          upb_status_seterrf(
+              status, "couldn't resolve name '%s' in message '%s'", name, base);
+          goto err;
+        } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
+          goto err;
+        }
+      }
+    }
+  }
+
+  /* We need an array of the defs in addtab, for passing to upb_def_freeze. */
+  add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab));
+  if (add_defs == NULL) goto oom_err;
+  upb_strtable_begin(&iter, &addtab);
+  for (n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
+    add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
+  }
+
+  if (!upb_def_freeze(add_defs, n, status)) goto err;
+
+  /* This must be delayed until all errors have been detected, since error
+   * recovery code uses this table to cleanup defs. */
+  upb_strtable_uninit(&addtab);
+
+  /* TODO(haberman) we don't properly handle errors after this point (like
+   * OOM in upb_strtable_insert() below). */
+  for (i = 0; i < n; i++) {
+    upb_def *def = add_defs[i];
+    const char *name = upb_def_fullname(def);
+    upb_value v;
+    bool success;
+
+    if (upb_strtable_remove(&s->symtab, name, &v)) {
+      const upb_def *def = upb_value_getptr(v);
+      upb_def_unref(def, s);
+    }
+    success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
+    UPB_ASSERT_VAR(success, success == true);
+  }
+  free(add_defs);
+  return true;
+
+oom_err:
+  upb_status_seterrmsg(status, "out of memory");
+err: {
+    /* For defs the user passed in, we need to donate the refs back.  For defs
+     * we dup'd, we need to just unref them. */
+    upb_strtable_begin(&iter, &addtab);
+    for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
+      upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
+      bool came_from_user = def->came_from_user;
+      def->came_from_user = false;
+      if (came_from_user) {
+        upb_def_donateref(def, s, ref_donor);
+      } else {
+        upb_def_unref(def, s);
+      }
+    }
+  }
+  upb_strtable_uninit(&addtab);
+  free(add_defs);
+  assert(!upb_ok(status));
+  return false;
+}
+
+/* Iteration. */
+
+static void advance_to_matching(upb_symtab_iter *iter) {
+  if (iter->type == UPB_DEF_ANY)
+    return;
+
+  while (!upb_strtable_done(&iter->iter) &&
+         iter->type != upb_symtab_iter_def(iter)->type) {
+    upb_strtable_next(&iter->iter);
+  }
+}
+
+void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
+                      upb_deftype_t type) {
+  upb_strtable_begin(&iter->iter, &s->symtab);
+  iter->type = type;
+  advance_to_matching(iter);
+}
+
+void upb_symtab_next(upb_symtab_iter *iter) {
+  upb_strtable_next(&iter->iter);
+  advance_to_matching(iter);
+}
+
+bool upb_symtab_done(const upb_symtab_iter *iter) {
+  return upb_strtable_done(&iter->iter);
+}
+
+const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
+  return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
+}
+/*
+** upb_table Implementation
+**
+** Implementation is heavily inspired by Lua's ltable.c.
+*/
+
+
+#include <stdlib.h>
+#include <string.h>
+
+#define UPB_MAXARRSIZE 16  /* 64k. */
+
+/* From Chromium. */
+#define ARRAY_SIZE(x) \
+    ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
+
+static const double MAX_LOAD = 0.85;
+
+/* The minimum utilization of the array part of a mixed hash/array table.  This
+ * is a speed/memory-usage tradeoff (though it's not straightforward because of
+ * cache effects).  The lower this is, the more memory we'll use. */
+static const double MIN_DENSITY = 0.1;
+
+bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
+
+int log2ceil(uint64_t v) {
+  int ret = 0;
+  bool pow2 = is_pow2(v);
+  while (v >>= 1) ret++;
+  ret = pow2 ? ret : ret + 1;  /* Ceiling. */
+  return UPB_MIN(UPB_MAXARRSIZE, ret);
+}
+
+char *upb_strdup(const char *s) {
+  return upb_strdup2(s, strlen(s));
+}
+
+char *upb_strdup2(const char *s, size_t len) {
+  size_t n;
+  char *p;
+
+  /* Prevent overflow errors. */
+  if (len == SIZE_MAX) return NULL;
+  /* Always null-terminate, even if binary data; but don't rely on the input to
+   * have a null-terminating byte since it may be a raw binary buffer. */
+  n = len + 1;
+  p = malloc(n);
+  if (p) {
+    memcpy(p, s, len);
+    p[len] = 0;
+  }
+  return p;
+}
+
+/* A type to represent the lookup key of either a strtable or an inttable. */
+typedef union {
+  uintptr_t num;
+  struct {
+    const char *str;
+    size_t len;
+  } str;
+} lookupkey_t;
+
+static lookupkey_t strkey2(const char *str, size_t len) {
+  lookupkey_t k;
+  k.str.str = str;
+  k.str.len = len;
+  return k;
+}
+
+static lookupkey_t intkey(uintptr_t key) {
+  lookupkey_t k;
+  k.num = key;
+  return k;
+}
+
+typedef uint32_t hashfunc_t(upb_tabkey key);
+typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
+
+/* Base table (shared code) ***************************************************/
+
+/* For when we need to cast away const. */
+static upb_tabent *mutable_entries(upb_table *t) {
+  return (upb_tabent*)t->entries;
+}
+
+static bool isfull(upb_table *t) {
+  return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
+}
+
+static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2) {
+  size_t bytes;
+
+  t->count = 0;
+  t->ctype = ctype;
+  t->size_lg2 = size_lg2;
+  t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
+  bytes = upb_table_size(t) * sizeof(upb_tabent);
+  if (bytes > 0) {
+    t->entries = malloc(bytes);
+    if (!t->entries) return false;
+    memset(mutable_entries(t), 0, bytes);
+  } else {
+    t->entries = NULL;
+  }
+  return true;
+}
+
+static void uninit(upb_table *t) { free(mutable_entries(t)); }
+
+static upb_tabent *emptyent(upb_table *t) {
+  upb_tabent *e = mutable_entries(t) + upb_table_size(t);
+  while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
+}
+
+static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
+  return (upb_tabent*)upb_getentry(t, hash);
+}
+
+static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
+                                   uint32_t hash, eqlfunc_t *eql) {
+  const upb_tabent *e;
+
+  if (t->size_lg2 == 0) return NULL;
+  e = upb_getentry(t, hash);
+  if (upb_tabent_isempty(e)) return NULL;
+  while (1) {
+    if (eql(e->key, key)) return e;
+    if ((e = e->next) == NULL) return NULL;
+  }
+}
+
+static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
+                                     uint32_t hash, eqlfunc_t *eql) {
+  return (upb_tabent*)findentry(t, key, hash, eql);
+}
+
+static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
+                   uint32_t hash, eqlfunc_t *eql) {
+  const upb_tabent *e = findentry(t, key, hash, eql);
+  if (e) {
+    if (v) {
+      _upb_value_setval(v, e->val.val, t->ctype);
+    }
+    return true;
+  } else {
+    return false;
+  }
+}
+
+/* The given key must not already exist in the table. */
+static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
+                   upb_value val, uint32_t hash,
+                   hashfunc_t *hashfunc, eqlfunc_t *eql) {
+  upb_tabent *mainpos_e;
+  upb_tabent *our_e;
+
+  UPB_UNUSED(eql);
+  UPB_UNUSED(key);
+  assert(findentry(t, key, hash, eql) == NULL);
+  assert(val.ctype == t->ctype);
+
+  t->count++;
+  mainpos_e = getentry_mutable(t, hash);
+  our_e = mainpos_e;
+
+  if (upb_tabent_isempty(mainpos_e)) {
+    /* Our main position is empty; use it. */
+    our_e->next = NULL;
+  } else {
+    /* Collision. */
+    upb_tabent *new_e = emptyent(t);
+    /* Head of collider's chain. */
+    upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
+    if (chain == mainpos_e) {
+      /* Existing ent is in its main posisiton (it has the same hash as us, and
+       * is the head of our chain).  Insert to new ent and append to this chain. */
+      new_e->next = mainpos_e->next;
+      mainpos_e->next = new_e;
+      our_e = new_e;
+    } else {
+      /* Existing ent is not in its main position (it is a node in some other
+       * chain).  This implies that no existing ent in the table has our hash.
+       * Evict it (updating its chain) and use its ent for head of our chain. */
+      *new_e = *mainpos_e;  /* copies next. */
+      while (chain->next != mainpos_e) {
+        chain = (upb_tabent*)chain->next;
+        assert(chain);
+      }
+      chain->next = new_e;
+      our_e = mainpos_e;
+      our_e->next = NULL;
+    }
+  }
+  our_e->key = tabkey;
+  our_e->val.val = val.val;
+  assert(findentry(t, key, hash, eql) == our_e);
+}
+
+static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
+               upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
+  upb_tabent *chain = getentry_mutable(t, hash);
+  if (upb_tabent_isempty(chain)) return false;
+  if (eql(chain->key, key)) {
+    /* Element to remove is at the head of its chain. */
+    t->count--;
+    if (val) {
+      _upb_value_setval(val, chain->val.val, t->ctype);
+    }
+    if (chain->next) {
+      upb_tabent *move = (upb_tabent*)chain->next;
+      *chain = *move;
+      if (removed) *removed = move->key;
+      move->key = 0;  /* Make the slot empty. */
+    } else {
+      if (removed) *removed = chain->key;
+      chain->key = 0;  /* Make the slot empty. */
+    }
+    return true;
+  } else {
+    /* Element to remove is either in a non-head position or not in the
+     * table. */
+    while (chain->next && !eql(chain->next->key, key))
+      chain = (upb_tabent*)chain->next;
+    if (chain->next) {
+      /* Found element to remove. */
+      upb_tabent *rm;
+
+      if (val) {
+        _upb_value_setval(val, chain->next->val.val, t->ctype);
+      }
+      rm = (upb_tabent*)chain->next;
+      if (removed) *removed = rm->key;
+      rm->key = 0;
+      chain->next = rm->next;
+      t->count--;
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
+
+static size_t next(const upb_table *t, size_t i) {
+  do {
+    if (++i >= upb_table_size(t))
+      return SIZE_MAX;
+  } while(upb_tabent_isempty(&t->entries[i]));
+
+  return i;
+}
+
+static size_t begin(const upb_table *t) {
+  return next(t, -1);
+}
+
+
+/* upb_strtable ***************************************************************/
+
+/* A simple "subclass" of upb_table that only adds a hash function for strings. */
+
+static upb_tabkey strcopy(lookupkey_t k2) {
+  char *str = malloc(k2.str.len + sizeof(uint32_t) + 1);
+  if (str == NULL) return 0;
+  memcpy(str, &k2.str.len, sizeof(uint32_t));
+  memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
+  return (uintptr_t)str;
+}
+
+static uint32_t strhash(upb_tabkey key) {
+  uint32_t len;
+  char *str = upb_tabstr(key, &len);
+  return MurmurHash2(str, len, 0);
+}
+
+static bool streql(upb_tabkey k1, lookupkey_t k2) {
+  uint32_t len;
+  char *str = upb_tabstr(k1, &len);
+  return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
+}
+
+bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
+  return init(&t->t, ctype, 2);
+}
+
+void upb_strtable_uninit(upb_strtable *t) {
+  size_t i;
+  for (i = 0; i < upb_table_size(&t->t); i++)
+    free((void*)t->t.entries[i].key);
+  uninit(&t->t);
+}
+
+bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
+  upb_strtable new_table;
+  upb_strtable_iter i;
+
+  if (!init(&new_table.t, t->t.ctype, size_lg2))
+    return false;
+  upb_strtable_begin(&i, t);
+  for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+    upb_strtable_insert2(
+        &new_table,
+        upb_strtable_iter_key(&i),
+        upb_strtable_iter_keylength(&i),
+        upb_strtable_iter_value(&i));
+  }
+  upb_strtable_uninit(t);
+  *t = new_table;
+  return true;
+}
+
+bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
+                          upb_value v) {
+  lookupkey_t key;
+  upb_tabkey tabkey;
+  uint32_t hash;
+
+  if (isfull(&t->t)) {
+    /* Need to resize.  New table of double the size, add old elements to it. */
+    if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
+      return false;
+    }
+  }
+
+  key = strkey2(k, len);
+  tabkey = strcopy(key);
+  if (tabkey == 0) return false;
+
+  hash = MurmurHash2(key.str.str, key.str.len, 0);
+  insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
+  return true;
+}
+
+bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
+                          upb_value *v) {
+  uint32_t hash = MurmurHash2(key, len, 0);
+  return lookup(&t->t, strkey2(key, len), v, hash, &streql);
+}
+
+bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
+                         upb_value *val) {
+  uint32_t hash = MurmurHash2(key, strlen(key), 0);
+  upb_tabkey tabkey;
+  if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
+    free((void*)tabkey);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+/* Iteration */
+
+static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
+  return &i->t->t.entries[i->index];
+}
+
+void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
+  i->t = t;
+  i->index = begin(&t->t);
+}
+
+void upb_strtable_next(upb_strtable_iter *i) {
+  i->index = next(&i->t->t, i->index);
+}
+
+bool upb_strtable_done(const upb_strtable_iter *i) {
+  return i->index >= upb_table_size(&i->t->t) ||
+         upb_tabent_isempty(str_tabent(i));
+}
+
+const char *upb_strtable_iter_key(upb_strtable_iter *i) {
+  assert(!upb_strtable_done(i));
+  return upb_tabstr(str_tabent(i)->key, NULL);
+}
+
+size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
+  uint32_t len;
+  assert(!upb_strtable_done(i));
+  upb_tabstr(str_tabent(i)->key, &len);
+  return len;
+}
+
+upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
+  assert(!upb_strtable_done(i));
+  return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
+}
+
+void upb_strtable_iter_setdone(upb_strtable_iter *i) {
+  i->index = SIZE_MAX;
+}
+
+bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
+                               const upb_strtable_iter *i2) {
+  if (upb_strtable_done(i1) && upb_strtable_done(i2))
+    return true;
+  return i1->t == i2->t && i1->index == i2->index;
+}
+
+
+/* upb_inttable ***************************************************************/
+
+/* For inttables we use a hybrid structure where small keys are kept in an
+ * array and large keys are put in the hash table. */
+
+static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
+
+static bool inteql(upb_tabkey k1, lookupkey_t k2) {
+  return k1 == k2.num;
+}
+
+static upb_tabval *mutable_array(upb_inttable *t) {
+  return (upb_tabval*)t->array;
+}
+
+static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
+  if (key < t->array_size) {
+    return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
+  } else {
+    upb_tabent *e =
+        findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
+    return e ? &e->val : NULL;
+  }
+}
+
+static const upb_tabval *inttable_val_const(const upb_inttable *t,
+                                            uintptr_t key) {
+  return inttable_val((upb_inttable*)t, key);
+}
+
+size_t upb_inttable_count(const upb_inttable *t) {
+  return t->t.count + t->array_count;
+}
+
+static void check(upb_inttable *t) {
+  UPB_UNUSED(t);
+#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
+  {
+    /* This check is very expensive (makes inserts/deletes O(N)). */
+    size_t count = 0;
+    upb_inttable_iter i;
+    upb_inttable_begin(&i, t);
+    for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
+      assert(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
+    }
+    assert(count == upb_inttable_count(t));
+  }
+#endif
+}
+
+bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
+                            size_t asize, int hsize_lg2) {
+  size_t array_bytes;
+
+  if (!init(&t->t, ctype, hsize_lg2)) return false;
+  /* Always make the array part at least 1 long, so that we know key 0
+   * won't be in the hash part, which simplifies things. */
+  t->array_size = UPB_MAX(1, asize);
+  t->array_count = 0;
+  array_bytes = t->array_size * sizeof(upb_value);
+  t->array = malloc(array_bytes);
+  if (!t->array) {
+    uninit(&t->t);
+    return false;
+  }
+  memset(mutable_array(t), 0xff, array_bytes);
+  check(t);
+  return true;
+}
+
+bool upb_inttable_init(upb_inttable *t, upb_ctype_t ctype) {
+  return upb_inttable_sizedinit(t, ctype, 0, 4);
+}
+
+void upb_inttable_uninit(upb_inttable *t) {
+  uninit(&t->t);
+  free(mutable_array(t));
+}
+
+bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
+  /* XXX: Table can't store value (uint64_t)-1.  Need to somehow statically
+   * guarantee that this is not necessary, or fix the limitation. */
+  upb_tabval tabval;
+  tabval.val = val.val;
+  UPB_UNUSED(tabval);
+  assert(upb_arrhas(tabval));
+
+  if (key < t->array_size) {
+    assert(!upb_arrhas(t->array[key]));
+    t->array_count++;
+    mutable_array(t)[key].val = val.val;
+  } else {
+    if (isfull(&t->t)) {
+      /* Need to resize the hash part, but we re-use the array part. */
+      size_t i;
+      upb_table new_table;
+      if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1))
+        return false;
+      for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
+        const upb_tabent *e = &t->t.entries[i];
+        uint32_t hash;
+        upb_value v;
+
+        _upb_value_setval(&v, e->val.val, t->t.ctype);
+        hash = upb_inthash(e->key);
+        insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
+      }
+
+      assert(t->t.count == new_table.count);
+
+      uninit(&t->t);
+      t->t = new_table;
+    }
+    insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
+  }
+  check(t);
+  return true;
+}
+
+bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
+  const upb_tabval *table_v = inttable_val_const(t, key);
+  if (!table_v) return false;
+  if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
+  return true;
+}
+
+bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
+  upb_tabval *table_v = inttable_val(t, key);
+  if (!table_v) return false;
+  table_v->val = val.val;
+  return true;
+}
+
+bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
+  bool success;
+  if (key < t->array_size) {
+    if (upb_arrhas(t->array[key])) {
+      upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
+      t->array_count--;
+      if (val) {
+        _upb_value_setval(val, t->array[key].val, t->t.ctype);
+      }
+      mutable_array(t)[key] = empty;
+      success = true;
+    } else {
+      success = false;
+    }
+  } else {
+    upb_tabkey removed;
+    uint32_t hash = upb_inthash(key);
+    success = rm(&t->t, intkey(key), val, &removed, hash, &inteql);
+  }
+  check(t);
+  return success;
+}
+
+bool upb_inttable_push(upb_inttable *t, upb_value val) {
+  return upb_inttable_insert(t, upb_inttable_count(t), val);
+}
+
+upb_value upb_inttable_pop(upb_inttable *t) {
+  upb_value val;
+  bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
+  UPB_ASSERT_VAR(ok, ok);
+  return val;
+}
+
+bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) {
+  return upb_inttable_insert(t, (uintptr_t)key, val);
+}
+
+bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
+                            upb_value *v) {
+  return upb_inttable_lookup(t, (uintptr_t)key, v);
+}
+
+bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
+  return upb_inttable_remove(t, (uintptr_t)key, val);
+}
+
+void upb_inttable_compact(upb_inttable *t) {
+  /* Create a power-of-two histogram of the table keys. */
+  int counts[UPB_MAXARRSIZE + 1] = {0};
+  uintptr_t max_key = 0;
+  upb_inttable_iter i;
+  size_t arr_size;
+  int arr_count;
+  upb_inttable new_t;
+
+  upb_inttable_begin(&i, t);
+  for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    uintptr_t key = upb_inttable_iter_key(&i);
+    if (key > max_key) {
+      max_key = key;
+    }
+    counts[log2ceil(key)]++;
+  }
+
+  arr_size = 1;
+  arr_count = upb_inttable_count(t);
+
+  if (upb_inttable_count(t) >= max_key * MIN_DENSITY) {
+    /* We can put 100% of the entries in the array part. */
+    arr_size = max_key + 1;
+  } else {
+    /* Find the largest power of two that satisfies the MIN_DENSITY
+     * definition. */
+    int size_lg2;
+    for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 1; size_lg2--) {
+      arr_size = 1 << size_lg2;
+      arr_count -= counts[size_lg2];
+      if (arr_count >= arr_size * MIN_DENSITY) {
+        break;
+      }
+    }
+  }
+
+  /* Array part must always be at least 1 entry large to catch lookups of key
+   * 0.  Key 0 must always be in the array part because "0" in the hash part
+   * denotes an empty entry. */
+  arr_size = UPB_MAX(arr_size, 1);
+
+  {
+    /* Insert all elements into new, perfectly-sized table. */
+    int hash_count = upb_inttable_count(t) - arr_count;
+    int hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
+    int hashsize_lg2 = log2ceil(hash_size);
+
+    assert(hash_count >= 0);
+    upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2);
+    upb_inttable_begin(&i, t);
+    for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+      uintptr_t k = upb_inttable_iter_key(&i);
+      upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i));
+    }
+    assert(new_t.array_size == arr_size);
+    assert(new_t.t.size_lg2 == hashsize_lg2);
+  }
+  upb_inttable_uninit(t);
+  *t = new_t;
+}
+
+/* Iteration. */
+
+static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
+  assert(!i->array_part);
+  return &i->t->t.entries[i->index];
+}
+
+static upb_tabval int_arrent(const upb_inttable_iter *i) {
+  assert(i->array_part);
+  return i->t->array[i->index];
+}
+
+void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
+  i->t = t;
+  i->index = -1;
+  i->array_part = true;
+  upb_inttable_next(i);
+}
+
+void upb_inttable_next(upb_inttable_iter *iter) {
+  const upb_inttable *t = iter->t;
+  if (iter->array_part) {
+    while (++iter->index < t->array_size) {
+      if (upb_arrhas(int_arrent(iter))) {
+        return;
+      }
+    }
+    iter->array_part = false;
+    iter->index = begin(&t->t);
+  } else {
+    iter->index = next(&t->t, iter->index);
+  }
+}
+
+bool upb_inttable_done(const upb_inttable_iter *i) {
+  if (i->array_part) {
+    return i->index >= i->t->array_size ||
+           !upb_arrhas(int_arrent(i));
+  } else {
+    return i->index >= upb_table_size(&i->t->t) ||
+           upb_tabent_isempty(int_tabent(i));
+  }
+}
+
+uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
+  assert(!upb_inttable_done(i));
+  return i->array_part ? i->index : int_tabent(i)->key;
+}
+
+upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
+  assert(!upb_inttable_done(i));
+  return _upb_value_val(
+      i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
+      i->t->t.ctype);
+}
+
+void upb_inttable_iter_setdone(upb_inttable_iter *i) {
+  i->index = SIZE_MAX;
+  i->array_part = false;
+}
+
+bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
+                                          const upb_inttable_iter *i2) {
+  if (upb_inttable_done(i1) && upb_inttable_done(i2))
+    return true;
+  return i1->t == i2->t && i1->index == i2->index &&
+         i1->array_part == i2->array_part;
+}
+
+#ifdef UPB_UNALIGNED_READS_OK
+/* -----------------------------------------------------------------------------
+ * MurmurHash2, by Austin Appleby (released as public domain).
+ * Reformatted and C99-ified by Joshua Haberman.
+ * Note - This code makes a few assumptions about how your machine behaves -
+ *   1. We can read a 4-byte value from any address without crashing
+ *   2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
+ * And it has a few limitations -
+ *   1. It will not work incrementally.
+ *   2. It will not produce the same results on little-endian and big-endian
+ *      machines. */
+uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
+  /* 'm' and 'r' are mixing constants generated offline.
+   * They're not really 'magic', they just happen to work well. */
+  const uint32_t m = 0x5bd1e995;
+  const int32_t r = 24;
+
+  /* Initialize the hash to a 'random' value */
+  uint32_t h = seed ^ len;
+
+  /* Mix 4 bytes at a time into the hash */
+  const uint8_t * data = (const uint8_t *)key;
+  while(len >= 4) {
+    uint32_t k = *(uint32_t *)data;
+
+    k *= m;
+    k ^= k >> r;
+    k *= m;
+
+    h *= m;
+    h ^= k;
+
+    data += 4;
+    len -= 4;
+  }
+
+  /* Handle the last few bytes of the input array */
+  switch(len) {
+    case 3: h ^= data[2] << 16;
+    case 2: h ^= data[1] << 8;
+    case 1: h ^= data[0]; h *= m;
+  };
+
+  /* Do a few final mixes of the hash to ensure the last few
+   * bytes are well-incorporated. */
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+
+  return h;
+}
+
+#else /* !UPB_UNALIGNED_READS_OK */
+
+/* -----------------------------------------------------------------------------
+ * MurmurHashAligned2, by Austin Appleby
+ * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
+ * on certain platforms.
+ * Performance will be lower than MurmurHash2 */
+
+#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+
+uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
+  const uint32_t m = 0x5bd1e995;
+  const int32_t r = 24;
+  const uint8_t * data = (const uint8_t *)key;
+  uint32_t h = seed ^ len;
+  uint8_t align = (uintptr_t)data & 3;
+
+  if(align && (len >= 4)) {
+    /* Pre-load the temp registers */
+    uint32_t t = 0, d = 0;
+    int32_t sl;
+    int32_t sr;
+
+    switch(align) {
+      case 1: t |= data[2] << 16;
+      case 2: t |= data[1] << 8;
+      case 3: t |= data[0];
+    }
+
+    t <<= (8 * align);
+
+    data += 4-align;
+    len -= 4-align;
+
+    sl = 8 * (4-align);
+    sr = 8 * align;
+
+    /* Mix */
+
+    while(len >= 4) {
+      uint32_t k;
+
+      d = *(uint32_t *)data;
+      t = (t >> sr) | (d << sl);
+
+      k = t;
+
+      MIX(h,k,m);
+
+      t = d;
+
+      data += 4;
+      len -= 4;
+    }
+
+    /* Handle leftover data in temp registers */
+
+    d = 0;
+
+    if(len >= align) {
+      uint32_t k;
+
+      switch(align) {
+        case 3: d |= data[2] << 16;
+        case 2: d |= data[1] << 8;
+        case 1: d |= data[0];
+      }
+
+      k = (t >> sr) | (d << sl);
+      MIX(h,k,m);
+
+      data += align;
+      len -= align;
+
+      /* ----------
+       * Handle tail bytes */
+
+      switch(len) {
+        case 3: h ^= data[2] << 16;
+        case 2: h ^= data[1] << 8;
+        case 1: h ^= data[0]; h *= m;
+      };
+    } else {
+      switch(len) {
+        case 3: d |= data[2] << 16;
+        case 2: d |= data[1] << 8;
+        case 1: d |= data[0];
+        case 0: h ^= (t >> sr) | (d << sl); h *= m;
+      }
+    }
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  } else {
+    while(len >= 4) {
+      uint32_t k = *(uint32_t *)data;
+
+      MIX(h,k,m);
+
+      data += 4;
+      len -= 4;
+    }
+
+    /* ----------
+     * Handle tail bytes */
+
+    switch(len) {
+      case 3: h ^= data[2] << 16;
+      case 2: h ^= data[1] << 8;
+      case 1: h ^= data[0]; h *= m;
+    };
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+  }
+}
+#undef MIX
+
+#endif /* UPB_UNALIGNED_READS_OK */
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+bool upb_dumptostderr(void *closure, const upb_status* status) {
+  UPB_UNUSED(closure);
+  fprintf(stderr, "%s\n", upb_status_errmsg(status));
+  return false;
+}
+
+/* Guarantee null-termination and provide ellipsis truncation.
+ * It may be tempting to "optimize" this by initializing these final
+ * four bytes up-front and then being careful never to overwrite them,
+ * this is safer and simpler. */
+static void nullz(upb_status *status) {
+  const char *ellipsis = "...";
+  size_t len = strlen(ellipsis);
+  assert(sizeof(status->msg) > len);
+  memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len);
+}
+
+void upb_status_clear(upb_status *status) {
+  if (!status) return;
+  status->ok_ = true;
+  status->code_ = 0;
+  status->msg[0] = '\0';
+}
+
+bool upb_ok(const upb_status *status) { return status->ok_; }
+
+upb_errorspace *upb_status_errspace(const upb_status *status) {
+  return status->error_space_;
+}
+
+int upb_status_errcode(const upb_status *status) { return status->code_; }
+
+const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
+
+void upb_status_seterrmsg(upb_status *status, const char *msg) {
+  if (!status) return;
+  status->ok_ = false;
+  strncpy(status->msg, msg, sizeof(status->msg));
+  nullz(status);
+}
+
+void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  upb_status_vseterrf(status, fmt, args);
+  va_end(args);
+}
+
+void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
+  if (!status) return;
+  status->ok_ = false;
+  _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
+  nullz(status);
+}
+
+void upb_status_seterrcode(upb_status *status, upb_errorspace *space,
+                           int code) {
+  if (!status) return;
+  status->ok_ = false;
+  status->error_space_ = space;
+  status->code_ = code;
+  space->set_message(status, code);
+}
+
+void upb_status_copy(upb_status *to, const upb_status *from) {
+  if (!to) return;
+  *to = *from;
+}
+/* This file was generated by upbc (the upb compiler).
+ * Do not edit -- your changes will be discarded when the file is
+ * regenerated. */
+
+
+static const upb_msgdef msgs[20];
+static const upb_fielddef fields[81];
+static const upb_enumdef enums[4];
+static const upb_tabent strentries[236];
+static const upb_tabent intentries[14];
+static const upb_tabval arrays[232];
+
+#ifdef UPB_DEBUG_REFS
+static upb_inttable reftables[212];
+#endif
+
+static const upb_msgdef msgs[20] = {
+  UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", 27, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[0], 8, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[0]),&reftables[0], &reftables[1]),
+  UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", 4, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[8], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[16]),&reftables[2], &reftables[3]),
+  UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[11], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[20]),&reftables[4], &reftables[5]),
+  UPB_MSGDEF_INIT("google.protobuf.EnumOptions", 7, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[0], &arrays[15], 8, 1), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[24]),&reftables[6], &reftables[7]),
+  UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", 8, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[23], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[28]),&reftables[8], &reftables[9]),
+  UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[2], &arrays[27], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[32]),&reftables[10], &reftables[11]),
+  UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", 19, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[31], 9, 8), UPB_STRTABLE_INIT(8, 15, UPB_CTYPE_PTR, 4, &strentries[36]),&reftables[12], &reftables[13]),
+  UPB_MSGDEF_INIT("google.protobuf.FieldOptions", 14, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[4], &arrays[40], 32, 6), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[52]),&reftables[14], &reftables[15]),
+  UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", 39, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[72], 12, 11), UPB_STRTABLE_INIT(11, 15, UPB_CTYPE_PTR, 4, &strentries[68]),&reftables[16], &reftables[17]),
+  UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[84], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[84]),&reftables[18], &reftables[19]),
+  UPB_MSGDEF_INIT("google.protobuf.FileOptions", 21, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[6], &arrays[86], 64, 9), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[88]),&reftables[20], &reftables[21]),
+  UPB_MSGDEF_INIT("google.protobuf.MessageOptions", 8, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[8], &arrays[150], 16, 2), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[104]),&reftables[22], &reftables[23]),
+  UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", 13, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[166], 5, 4), UPB_STRTABLE_INIT(4, 7, UPB_CTYPE_PTR, 3, &strentries[108]),&reftables[24], &reftables[25]),
+  UPB_MSGDEF_INIT("google.protobuf.MethodOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[10], &arrays[171], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[116]),&reftables[26], &reftables[27]),
+  UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[175], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[120]),&reftables[28], &reftables[29]),
+  UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[12], &arrays[179], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[124]),&reftables[30], &reftables[31]),
+  UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[183], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[128]),&reftables[32], &reftables[33]),
+  UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", 14, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[185], 5, 4), UPB_STRTABLE_INIT(4, 7, UPB_CTYPE_PTR, 3, &strentries[132]),&reftables[34], &reftables[35]),
+  UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", 18, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[190], 9, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[140]),&reftables[36], &reftables[37]),
+  UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", 6, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[199], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[156]),&reftables[38], &reftables[39]),
+};
+
+static const upb_fielddef fields[81] = {
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[18], NULL, 15, 6, {0},&reftables[40], &reftables[41]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[3], NULL, 6, 1, {0},&reftables[42], &reftables[43]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[10], NULL, 17, 6, {0},&reftables[44], &reftables[45]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[7], (const upb_def*)(&enums[2]), 6, 1, {0},&reftables[46], &reftables[47]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[6], NULL, 16, 7, {0},&reftables[48], &reftables[49]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[8], NULL, 30, 8, {0},&reftables[50], &reftables[51]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[7], NULL, 8, 3, {0},&reftables[52], &reftables[53]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[18], NULL, 11, 4, {0},&reftables[54], &reftables[55]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[56], &reftables[57]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], (const upb_def*)(&msgs[2]), 16, 2, {0},&reftables[58], &reftables[59]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[8], (const upb_def*)(&msgs[2]), 13, 1, {0},&reftables[60], &reftables[61]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "experimental_map_key", 9, &msgs[7], NULL, 10, 5, {0},&reftables[62], &reftables[63]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[6], NULL, 7, 2, {0},&reftables[64], &reftables[65]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[8], (const upb_def*)(&msgs[6]), 19, 3, {0},&reftables[66], &reftables[67]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], (const upb_def*)(&msgs[6]), 22, 4, {0},&reftables[68], &reftables[69]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], (const upb_def*)(&msgs[1]), 19, 3, {0},&reftables[70], &reftables[71]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], (const upb_def*)(&msgs[6]), 10, 0, {0},&reftables[72], &reftables[73]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[9], (const upb_def*)(&msgs[8]), 5, 0, {0},&reftables[74], &reftables[75]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[10], NULL, 14, 5, {0},&reftables[76], &reftables[77]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[18], NULL, 6, 1, {0},&reftables[78], &reftables[79]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[12], NULL, 7, 2, {0},&reftables[80], &reftables[81]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[19], NULL, 5, 1, {0},&reftables[82], &reftables[83]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[10], NULL, 20, 9, {0},&reftables[84], &reftables[85]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[10], NULL, 18, 7, {0},&reftables[86], &reftables[87]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[10], NULL, 13, 4, {0},&reftables[88], &reftables[89]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[10], NULL, 9, 2, {0},&reftables[90], &reftables[91]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[10], NULL, 6, 1, {0},&reftables[92], &reftables[93]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[6], (const upb_def*)(&enums[0]), 11, 4, {0},&reftables[94], &reftables[95]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[7], NULL, 9, 4, {0},&reftables[96], &reftables[97]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[17], NULL, 8, 2, {0},&reftables[98], &reftables[99]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[16], (const upb_def*)(&msgs[17]), 5, 0, {0},&reftables[100], &reftables[101]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[11], NULL, 6, 1, {0},&reftables[102], &reftables[103]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[8], (const upb_def*)(&msgs[0]), 10, 0, {0},&reftables[104], &reftables[105]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[14], (const upb_def*)(&msgs[12]), 6, 0, {0},&reftables[106], &reftables[107]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[8], NULL, 22, 6, {0},&reftables[108], &reftables[109]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[14], NULL, 8, 2, {0},&reftables[110], &reftables[111]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[18], (const upb_def*)(&msgs[19]), 5, 0, {0},&reftables[112], &reftables[113]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[4], NULL, 4, 1, {0},&reftables[114], &reftables[115]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 24, 6, {0},&reftables[116], &reftables[117]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[12], NULL, 4, 1, {0},&reftables[118], &reftables[119]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[2], NULL, 8, 2, {0},&reftables[120], &reftables[121]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[6], NULL, 4, 1, {0},&reftables[122], &reftables[123]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[19], NULL, 2, 0, {0},&reftables[124], &reftables[125]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[18], NULL, 10, 3, {0},&reftables[126], &reftables[127]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], (const upb_def*)(&msgs[0]), 13, 1, {0},&reftables[128], &reftables[129]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[11], NULL, 7, 2, {0},&reftables[130], &reftables[131]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[6], NULL, 10, 3, {0},&reftables[132], &reftables[133]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[4], NULL, 7, 2, {0},&reftables[134], &reftables[135]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[10], (const upb_def*)(&enums[3]), 12, 3, {0},&reftables[136], &reftables[137]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], (const upb_def*)(&msgs[11]), 23, 5, {0},&reftables[138], &reftables[139]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[2], (const upb_def*)(&msgs[3]), 7, 1, {0},&reftables[140], &reftables[141]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[6], (const upb_def*)(&msgs[7]), 3, 0, {0},&reftables[142], &reftables[143]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[4], (const upb_def*)(&msgs[5]), 3, 0, {0},&reftables[144], &reftables[145]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[8], (const upb_def*)(&msgs[10]), 20, 4, {0},&reftables[146], &reftables[147]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[14], (const upb_def*)(&msgs[15]), 7, 1, {0},&reftables[148], &reftables[149]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[12], (const upb_def*)(&msgs[13]), 3, 0, {0},&reftables[150], &reftables[151]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[12], NULL, 10, 3, {0},&reftables[152], &reftables[153]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[8], NULL, 25, 7, {0},&reftables[154], &reftables[155]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[7], NULL, 7, 2, {0},&reftables[156], &reftables[157]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[17], NULL, 4, 0, {0},&reftables[158], &reftables[159]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[18], NULL, 9, 2, {0},&reftables[160], &reftables[161]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[8], NULL, 35, 9, {0},&reftables[162], &reftables[163]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[10], NULL, 19, 8, {0},&reftables[164], &reftables[165]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[8], (const upb_def*)(&msgs[14]), 16, 2, {0},&reftables[166], &reftables[167]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[8], (const upb_def*)(&msgs[16]), 21, 5, {0},&reftables[168], &reftables[169]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[17], NULL, 7, 1, {0},&reftables[170], &reftables[171]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[172], &reftables[173]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[18], NULL, 12, 5, {0},&reftables[174], &reftables[175]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[17], NULL, 11, 3, {0},&reftables[176], &reftables[177]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[6], (const upb_def*)(&enums[1]), 12, 5, {0},&reftables[178], &reftables[179]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[6], NULL, 13, 6, {0},&reftables[180], &reftables[181]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[5], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[182], &reftables[183]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[15], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[184], &reftables[185]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[3], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[186], &reftables[187]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[13], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[188], &reftables[189]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[10], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[190], &reftables[191]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[192], &reftables[193]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[7], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[194], &reftables[195]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[2], (const upb_def*)(&msgs[4]), 6, 0, {0},&reftables[196], &reftables[197]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[7], NULL, 13, 6, {0},&reftables[198], &reftables[199]),
+  UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[8], NULL, 38, 10, {0},&reftables[200], &reftables[201]),
+};
+
+static const upb_enumdef enums[4] = {
+  UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[160]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[202], 4, 3), 0, &reftables[202], &reftables[203]),
+  UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_INT32, 5, &strentries[164]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[206], 19, 18), 0, &reftables[204], &reftables[205]),
+  UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[196]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[225], 3, 3), 0, &reftables[206], &reftables[207]),
+  UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[200]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[228], 4, 3), 0, &reftables[208], &reftables[209]),
+};
+
+static const upb_tabent strentries[236] = {
+  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[38]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "field"), UPB_TABVALUE_PTR_INIT(&fields[16]), NULL},
+  {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "extension_range"), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "nested_type"), UPB_TABVALUE_PTR_INIT(&fields[44]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[49]), NULL},
+  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[9]), &strentries[14]},
+  {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[66]), NULL},
+  {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[8]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "value"), UPB_TABVALUE_PTR_INIT(&fields[78]), NULL},
+  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[50]), NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[40]), &strentries[22]},
+  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "allow_alias"), UPB_TABVALUE_PTR_INIT(&fields[1]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[47]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[52]), NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[37]), &strentries[30]},
+  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "label"), UPB_TABVALUE_PTR_INIT(&fields[27]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[41]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[46]), &strentries[49]},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "type_name"), UPB_TABVALUE_PTR_INIT(&fields[70]), NULL},
+  {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "extendee"), UPB_TABVALUE_PTR_INIT(&fields[12]), NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "type"), UPB_TABVALUE_PTR_INIT(&fields[69]), &strentries[48]},
+  {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "default_value"), UPB_TABVALUE_PTR_INIT(&fields[4]), NULL},
+  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[51]), NULL},
+  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "experimental_map_key"), UPB_TABVALUE_PTR_INIT(&fields[11]), &strentries[67]},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "weak"), UPB_TABVALUE_PTR_INIT(&fields[79]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "packed"), UPB_TABVALUE_PTR_INIT(&fields[58]), NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "lazy"), UPB_TABVALUE_PTR_INIT(&fields[28]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "ctype"), UPB_TABVALUE_PTR_INIT(&fields[3]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[6]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
+  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[13]), NULL},
+  {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "weak_dependency"), UPB_TABVALUE_PTR_INIT(&fields[80]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[34]), NULL},
+  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "service"), UPB_TABVALUE_PTR_INIT(&fields[63]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "source_code_info"), UPB_TABVALUE_PTR_INIT(&fields[64]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "dependency"), UPB_TABVALUE_PTR_INIT(&fields[5]), NULL},
+  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "message_type"), UPB_TABVALUE_PTR_INIT(&fields[32]), NULL},
+  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "package"), UPB_TABVALUE_PTR_INIT(&fields[57]), NULL},
+  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[53]), &strentries[82]},
+  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
+  {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "public_dependency"), UPB_TABVALUE_PTR_INIT(&fields[61]), &strentries[81]},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "file"), UPB_TABVALUE_PTR_INIT(&fields[17]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "cc_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[2]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "java_multiple_files"), UPB_TABVALUE_PTR_INIT(&fields[24]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\025", "\000", "\000", "\000", "java_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[23]), &strentries[102]},
+  {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "java_generate_equals_and_hash"), UPB_TABVALUE_PTR_INIT(&fields[22]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "go_package"), UPB_TABVALUE_PTR_INIT(&fields[18]), NULL},
+  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "java_package"), UPB_TABVALUE_PTR_INIT(&fields[26]), NULL},
+  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "optimize_for"), UPB_TABVALUE_PTR_INIT(&fields[48]), NULL},
+  {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "py_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[62]), NULL},
+  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "java_outer_classname"), UPB_TABVALUE_PTR_INIT(&fields[25]), NULL},
+  {UPB_TABKEY_STR("\027", "\000", "\000", "\000", "message_set_wire_format"), UPB_TABVALUE_PTR_INIT(&fields[31]), &strentries[106]},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
+  {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "no_standard_descriptor_accessor"), UPB_TABVALUE_PTR_INIT(&fields[45]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[39]), NULL},
+  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "input_type"), UPB_TABVALUE_PTR_INIT(&fields[20]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "output_type"), UPB_TABVALUE_PTR_INIT(&fields[56]), NULL},
+  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[55]), NULL},
+  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[54]), &strentries[122]},
+  {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "method"), UPB_TABVALUE_PTR_INIT(&fields[33]), NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[35]), &strentries[121]},
+  {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[72]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "location"), UPB_TABVALUE_PTR_INIT(&fields[30]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "span"), UPB_TABVALUE_PTR_INIT(&fields[65]), &strentries[139]},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "trailing_comments"), UPB_TABVALUE_PTR_INIT(&fields[68]), NULL},
+  {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "leading_comments"), UPB_TABVALUE_PTR_INIT(&fields[29]), &strentries[137]},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "path"), UPB_TABVALUE_PTR_INIT(&fields[59]), NULL},
+  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "double_value"), UPB_TABVALUE_PTR_INIT(&fields[7]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[36]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "negative_int_value"), UPB_TABVALUE_PTR_INIT(&fields[43]), NULL},
+  {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "aggregate_value"), UPB_TABVALUE_PTR_INIT(&fields[0]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "positive_int_value"), UPB_TABVALUE_PTR_INIT(&fields[60]), NULL},
+  {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "identifier_value"), UPB_TABVALUE_PTR_INIT(&fields[19]), NULL},
+  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "string_value"), UPB_TABVALUE_PTR_INIT(&fields[67]), &strentries[154]},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "is_extension"), UPB_TABVALUE_PTR_INIT(&fields[21]), NULL},
+  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "name_part"), UPB_TABVALUE_PTR_INIT(&fields[42]), NULL},
+  {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REQUIRED"), UPB_TABVALUE_INT_INIT(2), &strentries[162]},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REPEATED"), UPB_TABVALUE_INT_INIT(3), NULL},
+  {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_OPTIONAL"), UPB_TABVALUE_INT_INIT(1), NULL},
+  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED64"), UPB_TABVALUE_INT_INIT(6), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_STRING"), UPB_TABVALUE_INT_INIT(9), NULL},
+  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_FLOAT"), UPB_TABVALUE_INT_INIT(2), &strentries[193]},
+  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_DOUBLE"), UPB_TABVALUE_INT_INIT(1), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT32"), UPB_TABVALUE_INT_INIT(5), NULL},
+  {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED32"), UPB_TABVALUE_INT_INIT(15), NULL},
+  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED32"), UPB_TABVALUE_INT_INIT(7), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_MESSAGE"), UPB_TABVALUE_INT_INIT(11), &strentries[194]},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT64"), UPB_TABVALUE_INT_INIT(3), &strentries[191]},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_ENUM"), UPB_TABVALUE_INT_INIT(14), NULL},
+  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT32"), UPB_TABVALUE_INT_INIT(13), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT64"), UPB_TABVALUE_INT_INIT(4), &strentries[190]},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED64"), UPB_TABVALUE_INT_INIT(16), NULL},
+  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_BYTES"), UPB_TABVALUE_INT_INIT(12), NULL},
+  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT64"), UPB_TABVALUE_INT_INIT(18), NULL},
+  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_BOOL"), UPB_TABVALUE_INT_INIT(8), NULL},
+  {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_GROUP"), UPB_TABVALUE_INT_INIT(10), NULL},
+  {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT32"), UPB_TABVALUE_INT_INIT(17), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "CORD"), UPB_TABVALUE_INT_INIT(1), NULL},
+  {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "STRING"), UPB_TABVALUE_INT_INIT(0), &strentries[197]},
+  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "STRING_PIECE"), UPB_TABVALUE_INT_INIT(2), NULL},
+  {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "CODE_SIZE"), UPB_TABVALUE_INT_INIT(2), NULL},
+  {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "SPEED"), UPB_TABVALUE_INT_INIT(1), &strentries[203]},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "LITE_RUNTIME"), UPB_TABVALUE_INT_INIT(3), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\047", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo.Location"), UPB_TABVALUE_PTR_INIT(&msgs[17]), NULL},
+  {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.UninterpretedOption"), UPB_TABVALUE_PTR_INIT(&msgs[18]), NULL},
+  {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.FileDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[8]), NULL},
+  {UPB_TABKEY_STR("\045", "\000", "\000", "\000", "google.protobuf.MethodDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[12]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\040", "\000", "\000", "\000", "google.protobuf.EnumValueOptions"), UPB_TABVALUE_PTR_INIT(&msgs[5]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "google.protobuf.DescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[0]), &strentries[228]},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo"), UPB_TABVALUE_PTR_INIT(&msgs[16]), NULL},
+  {UPB_TABKEY_STR("\051", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Type"), UPB_TABVALUE_PTR_INIT(&enums[1]), NULL},
+  {UPB_TABKEY_STR("\056", "\000", "\000", "\000", "google.protobuf.DescriptorProto.ExtensionRange"), UPB_TABVALUE_PTR_INIT(&msgs[1]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.EnumValueDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[4]), NULL},
+  {UPB_TABKEY_STR("\034", "\000", "\000", "\000", "google.protobuf.FieldOptions"), UPB_TABVALUE_PTR_INIT(&msgs[7]), NULL},
+  {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.FileOptions"), UPB_TABVALUE_PTR_INIT(&msgs[10]), NULL},
+  {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.EnumDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[2]), &strentries[233]},
+  {UPB_TABKEY_STR("\052", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Label"), UPB_TABVALUE_PTR_INIT(&enums[0]), NULL},
+  {UPB_TABKEY_STR("\046", "\000", "\000", "\000", "google.protobuf.ServiceDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[14]), NULL},
+  {UPB_TABKEY_STR("\042", "\000", "\000", "\000", "google.protobuf.FieldOptions.CType"), UPB_TABVALUE_PTR_INIT(&enums[2]), &strentries[229]},
+  {UPB_TABKEY_STR("\041", "\000", "\000", "\000", "google.protobuf.FileDescriptorSet"), UPB_TABVALUE_PTR_INIT(&msgs[9]), &strentries[235]},
+  {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.EnumOptions"), UPB_TABVALUE_PTR_INIT(&msgs[3]), NULL},
+  {UPB_TABKEY_STR("\044", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[6]), NULL},
+  {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.FileOptions.OptimizeMode"), UPB_TABVALUE_PTR_INIT(&enums[3]), &strentries[221]},
+  {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.ServiceOptions"), UPB_TABVALUE_PTR_INIT(&msgs[15]), NULL},
+  {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.MessageOptions"), UPB_TABVALUE_PTR_INIT(&msgs[11]), NULL},
+  {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "google.protobuf.MethodOptions"), UPB_TABVALUE_PTR_INIT(&msgs[13]), &strentries[226]},
+  {UPB_TABKEY_STR("\054", "\000", "\000", "\000", "google.protobuf.UninterpretedOption.NamePart"), UPB_TABVALUE_PTR_INIT(&msgs[19]), NULL},
+};
+
+static const upb_tabent intentries[14] = {
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
+  {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
+  {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[72]), NULL},
+};
+
+static const upb_tabval arrays[232] = {
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[38]),
+  UPB_TABVALUE_PTR_INIT(&fields[16]),
+  UPB_TABVALUE_PTR_INIT(&fields[44]),
+  UPB_TABVALUE_PTR_INIT(&fields[9]),
+  UPB_TABVALUE_PTR_INIT(&fields[15]),
+  UPB_TABVALUE_PTR_INIT(&fields[14]),
+  UPB_TABVALUE_PTR_INIT(&fields[49]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[66]),
+  UPB_TABVALUE_PTR_INIT(&fields[8]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[40]),
+  UPB_TABVALUE_PTR_INIT(&fields[78]),
+  UPB_TABVALUE_PTR_INIT(&fields[50]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[1]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[37]),
+  UPB_TABVALUE_PTR_INIT(&fields[47]),
+  UPB_TABVALUE_PTR_INIT(&fields[52]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[41]),
+  UPB_TABVALUE_PTR_INIT(&fields[12]),
+  UPB_TABVALUE_PTR_INIT(&fields[46]),
+  UPB_TABVALUE_PTR_INIT(&fields[27]),
+  UPB_TABVALUE_PTR_INIT(&fields[69]),
+  UPB_TABVALUE_PTR_INIT(&fields[70]),
+  UPB_TABVALUE_PTR_INIT(&fields[4]),
+  UPB_TABVALUE_PTR_INIT(&fields[51]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[3]),
+  UPB_TABVALUE_PTR_INIT(&fields[58]),
+  UPB_TABVALUE_PTR_INIT(&fields[6]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[28]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[11]),
+  UPB_TABVALUE_PTR_INIT(&fields[79]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[34]),
+  UPB_TABVALUE_PTR_INIT(&fields[57]),
+  UPB_TABVALUE_PTR_INIT(&fields[5]),
+  UPB_TABVALUE_PTR_INIT(&fields[32]),
+  UPB_TABVALUE_PTR_INIT(&fields[10]),
+  UPB_TABVALUE_PTR_INIT(&fields[63]),
+  UPB_TABVALUE_PTR_INIT(&fields[13]),
+  UPB_TABVALUE_PTR_INIT(&fields[53]),
+  UPB_TABVALUE_PTR_INIT(&fields[64]),
+  UPB_TABVALUE_PTR_INIT(&fields[61]),
+  UPB_TABVALUE_PTR_INIT(&fields[80]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[17]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[26]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[25]),
+  UPB_TABVALUE_PTR_INIT(&fields[48]),
+  UPB_TABVALUE_PTR_INIT(&fields[24]),
+  UPB_TABVALUE_PTR_INIT(&fields[18]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[2]),
+  UPB_TABVALUE_PTR_INIT(&fields[23]),
+  UPB_TABVALUE_PTR_INIT(&fields[62]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[22]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[31]),
+  UPB_TABVALUE_PTR_INIT(&fields[45]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[39]),
+  UPB_TABVALUE_PTR_INIT(&fields[20]),
+  UPB_TABVALUE_PTR_INIT(&fields[56]),
+  UPB_TABVALUE_PTR_INIT(&fields[55]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[35]),
+  UPB_TABVALUE_PTR_INIT(&fields[33]),
+  UPB_TABVALUE_PTR_INIT(&fields[54]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[30]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[59]),
+  UPB_TABVALUE_PTR_INIT(&fields[65]),
+  UPB_TABVALUE_PTR_INIT(&fields[29]),
+  UPB_TABVALUE_PTR_INIT(&fields[68]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[36]),
+  UPB_TABVALUE_PTR_INIT(&fields[19]),
+  UPB_TABVALUE_PTR_INIT(&fields[60]),
+  UPB_TABVALUE_PTR_INIT(&fields[43]),
+  UPB_TABVALUE_PTR_INIT(&fields[7]),
+  UPB_TABVALUE_PTR_INIT(&fields[67]),
+  UPB_TABVALUE_PTR_INIT(&fields[0]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT(&fields[42]),
+  UPB_TABVALUE_PTR_INIT(&fields[21]),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT("LABEL_OPTIONAL"),
+  UPB_TABVALUE_PTR_INIT("LABEL_REQUIRED"),
+  UPB_TABVALUE_PTR_INIT("LABEL_REPEATED"),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT("TYPE_DOUBLE"),
+  UPB_TABVALUE_PTR_INIT("TYPE_FLOAT"),
+  UPB_TABVALUE_PTR_INIT("TYPE_INT64"),
+  UPB_TABVALUE_PTR_INIT("TYPE_UINT64"),
+  UPB_TABVALUE_PTR_INIT("TYPE_INT32"),
+  UPB_TABVALUE_PTR_INIT("TYPE_FIXED64"),
+  UPB_TABVALUE_PTR_INIT("TYPE_FIXED32"),
+  UPB_TABVALUE_PTR_INIT("TYPE_BOOL"),
+  UPB_TABVALUE_PTR_INIT("TYPE_STRING"),
+  UPB_TABVALUE_PTR_INIT("TYPE_GROUP"),
+  UPB_TABVALUE_PTR_INIT("TYPE_MESSAGE"),
+  UPB_TABVALUE_PTR_INIT("TYPE_BYTES"),
+  UPB_TABVALUE_PTR_INIT("TYPE_UINT32"),
+  UPB_TABVALUE_PTR_INIT("TYPE_ENUM"),
+  UPB_TABVALUE_PTR_INIT("TYPE_SFIXED32"),
+  UPB_TABVALUE_PTR_INIT("TYPE_SFIXED64"),
+  UPB_TABVALUE_PTR_INIT("TYPE_SINT32"),
+  UPB_TABVALUE_PTR_INIT("TYPE_SINT64"),
+  UPB_TABVALUE_PTR_INIT("STRING"),
+  UPB_TABVALUE_PTR_INIT("CORD"),
+  UPB_TABVALUE_PTR_INIT("STRING_PIECE"),
+  UPB_TABVALUE_EMPTY_INIT,
+  UPB_TABVALUE_PTR_INIT("SPEED"),
+  UPB_TABVALUE_PTR_INIT("CODE_SIZE"),
+  UPB_TABVALUE_PTR_INIT("LITE_RUNTIME"),
+};
+
+static const upb_symtab symtab = UPB_SYMTAB_INIT(UPB_STRTABLE_INIT(24, 31, UPB_CTYPE_PTR, 5, &strentries[204]), &reftables[210], &reftables[211]);
+
+const upb_symtab *upbdefs_google_protobuf_descriptor(const void *owner) {
+  upb_symtab_ref(&symtab, owner);
+  return &symtab;
+}
+
+#ifdef UPB_DEBUG_REFS
+static upb_inttable reftables[212] = {
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+  UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
+};
+#endif
+
+/*
+** XXX: The routines in this file that consume a string do not currently
+** support having the string span buffers.  In the future, as upb_sink and
+** its buffering/sharing functionality evolve there should be an easy and
+** idiomatic way of correctly handling this case.  For now, we accept this
+** limitation since we currently only parse descriptors from single strings.
+*/
+
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* upb_deflist is an internal-only dynamic array for storing a growing list of
+ * upb_defs. */
+typedef struct {
+  upb_def **defs;
+  size_t len;
+  size_t size;
+  bool owned;
+} upb_deflist;
+
+/* We keep a stack of all the messages scopes we are currently in, as well as
+ * the top-level file scope.  This is necessary to correctly qualify the
+ * definitions that are contained inside.  "name" tracks the name of the
+ * message or package (a bare name -- not qualified by any enclosing scopes). */
+typedef struct {
+  char *name;
+  /* Index of the first def that is under this scope.  For msgdefs, the
+   * msgdef itself is at start-1. */
+  int start;
+} upb_descreader_frame;
+
+/* The maximum number of nested declarations that are allowed, ie.
+ * message Foo {
+ *   message Bar {
+ *     message Baz {
+ *     }
+ *   }
+ * }
+ *
+ * This is a resource limit that affects how big our runtime stack can grow.
+ * TODO: make this a runtime-settable property of the Reader instance. */
+#define UPB_MAX_MESSAGE_NESTING 64
+
+struct upb_descreader {
+  upb_sink sink;
+  upb_deflist defs;
+  upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
+  int stack_len;
+
+  uint32_t number;
+  char *name;
+  bool saw_number;
+  bool saw_name;
+
+  char *default_string;
+
+  upb_fielddef *f;
+};
+
+static char *upb_strndup(const char *buf, size_t n) {
+  char *ret = malloc(n + 1);
+  if (!ret) return NULL;
+  memcpy(ret, buf, n);
+  ret[n] = '\0';
+  return ret;
+}
+
+/* Returns a newly allocated string that joins input strings together, for
+ * example:
+ *   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
+ *   join("", "Baz") -> "Baz"
+ * Caller owns a ref on the returned string. */
+static char *upb_join(const char *base, const char *name) {
+  if (!base || strlen(base) == 0) {
+    return upb_strdup(name);
+  } else {
+    char *ret = malloc(strlen(base) + strlen(name) + 2);
+    ret[0] = '\0';
+    strcat(ret, base);
+    strcat(ret, ".");
+    strcat(ret, name);
+    return ret;
+  }
+}
+
+
+/* upb_deflist ****************************************************************/
+
+void upb_deflist_init(upb_deflist *l) {
+  l->size = 0;
+  l->defs = NULL;
+  l->len = 0;
+  l->owned = true;
+}
+
+void upb_deflist_uninit(upb_deflist *l) {
+  size_t i;
+  if (l->owned)
+    for(i = 0; i < l->len; i++)
+      upb_def_unref(l->defs[i], l);
+  free(l->defs);
+}
+
+bool upb_deflist_push(upb_deflist *l, upb_def *d) {
+  if(++l->len >= l->size) {
+    size_t new_size = UPB_MAX(l->size, 4);
+    new_size *= 2;
+    l->defs = realloc(l->defs, new_size * sizeof(void *));
+    if (!l->defs) return false;
+    l->size = new_size;
+  }
+  l->defs[l->len - 1] = d;
+  return true;
+}
+
+void upb_deflist_donaterefs(upb_deflist *l, void *owner) {
+  size_t i;
+  assert(l->owned);
+  for (i = 0; i < l->len; i++)
+    upb_def_donateref(l->defs[i], l, owner);
+  l->owned = false;
+}
+
+static upb_def *upb_deflist_last(upb_deflist *l) {
+  return l->defs[l->len-1];
+}
+
+/* Qualify the defname for all defs starting with offset "start" with "str". */
+static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
+  uint32_t i;
+  for (i = start; i < l->len; i++) {
+    upb_def *def = l->defs[i];
+    char *name = upb_join(str, upb_def_fullname(def));
+    upb_def_setfullname(def, name, NULL);
+    free(name);
+  }
+}
+
+
+/* upb_descreader  ************************************************************/
+
+static upb_msgdef *upb_descreader_top(upb_descreader *r) {
+  int index;
+  assert(r->stack_len > 1);
+  index = r->stack[r->stack_len-1].start - 1;
+  assert(index >= 0);
+  return upb_downcast_msgdef_mutable(r->defs.defs[index]);
+}
+
+static upb_def *upb_descreader_last(upb_descreader *r) {
+  return upb_deflist_last(&r->defs);
+}
+
+/* Start/end handlers for FileDescriptorProto and DescriptorProto (the two
+ * entities that have names and can contain sub-definitions. */
+void upb_descreader_startcontainer(upb_descreader *r) {
+  upb_descreader_frame *f = &r->stack[r->stack_len++];
+  f->start = r->defs.len;
+  f->name = NULL;
+}
+
+void upb_descreader_endcontainer(upb_descreader *r) {
+  upb_descreader_frame *f = &r->stack[--r->stack_len];
+  upb_deflist_qualify(&r->defs, f->name, f->start);
+  free(f->name);
+  f->name = NULL;
+}
+
+void upb_descreader_setscopename(upb_descreader *r, char *str) {
+  upb_descreader_frame *f = &r->stack[r->stack_len-1];
+  free(f->name);
+  f->name = str;
+}
+
+/* Handlers for google.protobuf.FileDescriptorProto. */
+static bool file_startmsg(void *r, const void *hd) {
+  UPB_UNUSED(hd);
+  upb_descreader_startcontainer(r);
+  return true;
+}
+
+static bool file_endmsg(void *closure, const void *hd, upb_status *status) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+  UPB_UNUSED(status);
+  upb_descreader_endcontainer(r);
+  return true;
+}
+
+static size_t file_onpackage(void *closure, const void *hd, const char *buf,
+                             size_t n, const upb_bufhandle *handle) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+  UPB_UNUSED(handle);
+  /* XXX: see comment at the top of the file. */
+  upb_descreader_setscopename(r, upb_strndup(buf, n));
+  return n;
+}
+
+/* Handlers for google.protobuf.EnumValueDescriptorProto. */
+static bool enumval_startmsg(void *closure, const void *hd) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+  r->saw_number = false;
+  r->saw_name = false;
+  return true;
+}
+
+static size_t enumval_onname(void *closure, const void *hd, const char *buf,
+                             size_t n, const upb_bufhandle *handle) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+  UPB_UNUSED(handle);
+  /* XXX: see comment at the top of the file. */
+  free(r->name);
+  r->name = upb_strndup(buf, n);
+  r->saw_name = true;
+  return n;
+}
+
+static bool enumval_onnumber(void *closure, const void *hd, int32_t val) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+  r->number = val;
+  r->saw_number = true;
+  return true;
+}
+
+static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
+  upb_descreader *r = closure;
+  upb_enumdef *e;
+  UPB_UNUSED(hd);
+
+  if(!r->saw_number || !r->saw_name) {
+    upb_status_seterrmsg(status, "Enum value missing name or number.");
+    return false;
+  }
+  e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
+  upb_enumdef_addval(e, r->name, r->number, status);
+  free(r->name);
+  r->name = NULL;
+  return true;
+}
+
+
+/* Handlers for google.protobuf.EnumDescriptorProto. */
+static bool enum_startmsg(void *closure, const void *hd) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+  upb_deflist_push(&r->defs,
+                   upb_enumdef_upcast_mutable(upb_enumdef_new(&r->defs)));
+  return true;
+}
+
+static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
+  upb_descreader *r = closure;
+  upb_enumdef *e;
+  UPB_UNUSED(hd);
+
+  e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
+  if (upb_def_fullname(upb_descreader_last(r)) == NULL) {
+    upb_status_seterrmsg(status, "Enum had no name.");
+    return false;
+  }
+  if (upb_enumdef_numvals(e) == 0) {
+    upb_status_seterrmsg(status, "Enum had no values.");
+    return false;
+  }
+  return true;
+}
+
+static size_t enum_onname(void *closure, const void *hd, const char *buf,
+                          size_t n, const upb_bufhandle *handle) {
+  upb_descreader *r = closure;
+  char *fullname = upb_strndup(buf, n);
+  UPB_UNUSED(hd);
+  UPB_UNUSED(handle);
+  /* XXX: see comment at the top of the file. */
+  upb_def_setfullname(upb_descreader_last(r), fullname, NULL);
+  free(fullname);
+  return n;
+}
+
+/* Handlers for google.protobuf.FieldDescriptorProto */
+static bool field_startmsg(void *closure, const void *hd) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+  r->f = upb_fielddef_new(&r->defs);
+  free(r->default_string);
+  r->default_string = NULL;
+
+  /* fielddefs default to packed, but descriptors default to non-packed. */
+  upb_fielddef_setpacked(r->f, false);
+  return true;
+}
+
+/* Converts the default value in string "str" into "d".  Passes a ref on str.
+ * Returns true on success. */
+static bool parse_default(char *str, upb_fielddef *f) {
+  bool success = true;
+  char *end;
+  switch (upb_fielddef_type(f)) {
+    case UPB_TYPE_INT32: {
+      long val = strtol(str, &end, 0);
+      if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
+        success = false;
+      else
+        upb_fielddef_setdefaultint32(f, val);
+      break;
+    }
+    case UPB_TYPE_INT64: {
+      /* XXX: Need to write our own strtoll, since it's not available in c89. */
+      long long val = strtol(str, &end, 0);
+      if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end)
+        success = false;
+      else
+        upb_fielddef_setdefaultint64(f, val);
+      break;
+    }
+    case UPB_TYPE_UINT32: {
+      unsigned long val = strtoul(str, &end, 0);
+      if (val > UINT32_MAX || errno == ERANGE || *end)
+        success = false;
+      else
+        upb_fielddef_setdefaultuint32(f, val);
+      break;
+    }
+    case UPB_TYPE_UINT64: {
+      /* XXX: Need to write our own strtoull, since it's not available in c89. */
+      unsigned long long val = strtoul(str, &end, 0);
+      if (val > UINT64_MAX || errno == ERANGE || *end)
+        success = false;
+      else
+        upb_fielddef_setdefaultuint64(f, val);
+      break;
+    }
+    case UPB_TYPE_DOUBLE: {
+      double val = strtod(str, &end);
+      if (errno == ERANGE || *end)
+        success = false;
+      else
+        upb_fielddef_setdefaultdouble(f, val);
+      break;
+    }
+    case UPB_TYPE_FLOAT: {
+      /* XXX: Need to write our own strtof, since it's not available in c89. */
+      float val = strtod(str, &end);
+      if (errno == ERANGE || *end)
+        success = false;
+      else
+        upb_fielddef_setdefaultfloat(f, val);
+      break;
+    }
+    case UPB_TYPE_BOOL: {
+      if (strcmp(str, "false") == 0)
+        upb_fielddef_setdefaultbool(f, false);
+      else if (strcmp(str, "true") == 0)
+        upb_fielddef_setdefaultbool(f, true);
+      else
+        success = false;
+      break;
+    }
+    default: abort();
+  }
+  return success;
+}
+
+static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
+  upb_descreader *r = closure;
+  upb_fielddef *f = r->f;
+  UPB_UNUSED(hd);
+
+  /* TODO: verify that all required fields were present. */
+  assert(upb_fielddef_number(f) != 0);
+  assert(upb_fielddef_name(f) != NULL);
+  assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));
+
+  if (r->default_string) {
+    if (upb_fielddef_issubmsg(f)) {
+      upb_status_seterrmsg(status, "Submessages cannot have defaults.");
+      return false;
+    }
+    if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) {
+      upb_fielddef_setdefaultcstr(f, r->default_string, NULL);
+    } else {
+      if (r->default_string && !parse_default(r->default_string, f)) {
+        /* We don't worry too much about giving a great error message since the
+         * compiler should have ensured this was correct. */
+        upb_status_seterrmsg(status, "Error converting default value.");
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+static bool field_onlazy(void *closure, const void *hd, bool val) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+
+  upb_fielddef_setlazy(r->f, val);
+  return true;
+}
+
+static bool field_onpacked(void *closure, const void *hd, bool val) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+
+  upb_fielddef_setpacked(r->f, val);
+  return true;
+}
+
+static bool field_ontype(void *closure, const void *hd, int32_t val) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+
+  upb_fielddef_setdescriptortype(r->f, val);
+  return true;
+}
+
+static bool field_onlabel(void *closure, const void *hd, int32_t val) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+
+  upb_fielddef_setlabel(r->f, val);
+  return true;
+}
+
+static bool field_onnumber(void *closure, const void *hd, int32_t val) {
+  upb_descreader *r = closure;
+  bool ok = upb_fielddef_setnumber(r->f, val, NULL);
+  UPB_UNUSED(hd);
+
+  UPB_ASSERT_VAR(ok, ok);
+  return true;
+}
+
+static size_t field_onname(void *closure, const void *hd, const char *buf,
+                           size_t n, const upb_bufhandle *handle) {
+  upb_descreader *r = closure;
+  char *name = upb_strndup(buf, n);
+  UPB_UNUSED(hd);
+  UPB_UNUSED(handle);
+
+  /* XXX: see comment at the top of the file. */
+  upb_fielddef_setname(r->f, name, NULL);
+  free(name);
+  return n;
+}
+
+static size_t field_ontypename(void *closure, const void *hd, const char *buf,
+                               size_t n, const upb_bufhandle *handle) {
+  upb_descreader *r = closure;
+  char *name = upb_strndup(buf, n);
+  UPB_UNUSED(hd);
+  UPB_UNUSED(handle);
+
+  /* XXX: see comment at the top of the file. */
+  upb_fielddef_setsubdefname(r->f, name, NULL);
+  free(name);
+  return n;
+}
+
+static size_t field_onextendee(void *closure, const void *hd, const char *buf,
+                               size_t n, const upb_bufhandle *handle) {
+  upb_descreader *r = closure;
+  char *name = upb_strndup(buf, n);
+  UPB_UNUSED(hd);
+  UPB_UNUSED(handle);
+
+  /* XXX: see comment at the top of the file. */
+  upb_fielddef_setcontainingtypename(r->f, name, NULL);
+  free(name);
+  return n;
+}
+
+static size_t field_ondefaultval(void *closure, const void *hd, const char *buf,
+                                 size_t n, const upb_bufhandle *handle) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+  UPB_UNUSED(handle);
+
+  /* Have to convert from string to the correct type, but we might not know the
+   * type yet, so we save it as a string until the end of the field.
+   * XXX: see comment at the top of the file. */
+  free(r->default_string);
+  r->default_string = upb_strndup(buf, n);
+  return n;
+}
+
+/* Handlers for google.protobuf.DescriptorProto (representing a message). */
+static bool msg_startmsg(void *closure, const void *hd) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+
+  upb_deflist_push(&r->defs,
+                   upb_msgdef_upcast_mutable(upb_msgdef_new(&r->defs)));
+  upb_descreader_startcontainer(r);
+  return true;
+}
+
+static bool msg_endmsg(void *closure, const void *hd, upb_status *status) {
+  upb_descreader *r = closure;
+  upb_msgdef *m = upb_descreader_top(r);
+  UPB_UNUSED(hd);
+
+  if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) {
+    upb_status_seterrmsg(status, "Encountered message with no name.");
+    return false;
+  }
+  upb_descreader_endcontainer(r);
+  return true;
+}
+
+static size_t msg_onname(void *closure, const void *hd, const char *buf,
+                         size_t n, const upb_bufhandle *handle) {
+  upb_descreader *r = closure;
+  upb_msgdef *m = upb_descreader_top(r);
+  /* XXX: see comment at the top of the file. */
+  char *name = upb_strndup(buf, n);
+  UPB_UNUSED(hd);
+  UPB_UNUSED(handle);
+
+  upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL);
+  upb_descreader_setscopename(r, name);  /* Passes ownership of name. */
+  return n;
+}
+
+static bool msg_onendfield(void *closure, const void *hd) {
+  upb_descreader *r = closure;
+  upb_msgdef *m = upb_descreader_top(r);
+  UPB_UNUSED(hd);
+
+  upb_msgdef_addfield(m, r->f, &r->defs, NULL);
+  r->f = NULL;
+  return true;
+}
+
+static bool pushextension(void *closure, const void *hd) {
+  upb_descreader *r = closure;
+  UPB_UNUSED(hd);
+
+  assert(upb_fielddef_containingtypename(r->f));
+  upb_fielddef_setisextension(r->f, true);
+  upb_deflist_push(&r->defs, upb_fielddef_upcast_mutable(r->f));
+  r->f = NULL;
+  return true;
+}
+
+#define D(name) upbdefs_google_protobuf_ ## name(s)
+
+static void reghandlers(const void *closure, upb_handlers *h) {
+  const upb_symtab *s = closure;
+  const upb_msgdef *m = upb_handlers_msgdef(h);
+
+  if (m == D(DescriptorProto)) {
+    upb_handlers_setstartmsg(h, &msg_startmsg, NULL);
+    upb_handlers_setendmsg(h, &msg_endmsg, NULL);
+    upb_handlers_setstring(h, D(DescriptorProto_name), &msg_onname, NULL);
+    upb_handlers_setendsubmsg(h, D(DescriptorProto_field), &msg_onendfield,
+                              NULL);
+    upb_handlers_setendsubmsg(h, D(DescriptorProto_extension), &pushextension,
+                              NULL);
+  } else if (m == D(FileDescriptorProto)) {
+    upb_handlers_setstartmsg(h, &file_startmsg, NULL);
+    upb_handlers_setendmsg(h, &file_endmsg, NULL);
+    upb_handlers_setstring(h, D(FileDescriptorProto_package), &file_onpackage,
+                           NULL);
+    upb_handlers_setendsubmsg(h, D(FileDescriptorProto_extension), &pushextension,
+                              NULL);
+  } else if (m == D(EnumValueDescriptorProto)) {
+    upb_handlers_setstartmsg(h, &enumval_startmsg, NULL);
+    upb_handlers_setendmsg(h, &enumval_endmsg, NULL);
+    upb_handlers_setstring(h, D(EnumValueDescriptorProto_name), &enumval_onname, NULL);
+    upb_handlers_setint32(h, D(EnumValueDescriptorProto_number), &enumval_onnumber,
+                          NULL);
+  } else if (m == D(EnumDescriptorProto)) {
+    upb_handlers_setstartmsg(h, &enum_startmsg, NULL);
+    upb_handlers_setendmsg(h, &enum_endmsg, NULL);
+    upb_handlers_setstring(h, D(EnumDescriptorProto_name), &enum_onname, NULL);
+  } else if (m == D(FieldDescriptorProto)) {
+    upb_handlers_setstartmsg(h, &field_startmsg, NULL);
+    upb_handlers_setendmsg(h, &field_endmsg, NULL);
+    upb_handlers_setint32(h, D(FieldDescriptorProto_type), &field_ontype,
+                          NULL);
+    upb_handlers_setint32(h, D(FieldDescriptorProto_label), &field_onlabel,
+                          NULL);
+    upb_handlers_setint32(h, D(FieldDescriptorProto_number), &field_onnumber,
+                          NULL);
+    upb_handlers_setstring(h, D(FieldDescriptorProto_name), &field_onname,
+                           NULL);
+    upb_handlers_setstring(h, D(FieldDescriptorProto_type_name),
+                           &field_ontypename, NULL);
+    upb_handlers_setstring(h, D(FieldDescriptorProto_extendee),
+                           &field_onextendee, NULL);
+    upb_handlers_setstring(h, D(FieldDescriptorProto_default_value),
+                           &field_ondefaultval, NULL);
+  } else if (m == D(FieldOptions)) {
+    upb_handlers_setbool(h, D(FieldOptions_lazy), &field_onlazy, NULL);
+    upb_handlers_setbool(h, D(FieldOptions_packed), &field_onpacked, NULL);
+  }
+}
+
+#undef D
+
+void descreader_cleanup(void *_r) {
+  upb_descreader *r = _r;
+  free(r->name);
+  upb_deflist_uninit(&r->defs);
+  free(r->default_string);
+  while (r->stack_len > 0) {
+    upb_descreader_frame *f = &r->stack[--r->stack_len];
+    free(f->name);
+  }
+}
+
+
+/* Public API  ****************************************************************/
+
+upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
+  upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
+  if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
+    return NULL;
+  }
+
+  upb_deflist_init(&r->defs);
+  upb_sink_reset(upb_descreader_input(r), h, r);
+  r->stack_len = 0;
+  r->name = NULL;
+  r->default_string = NULL;
+
+  return r;
+}
+
+upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
+  *n = r->defs.len;
+  upb_deflist_donaterefs(&r->defs, owner);
+  return r->defs.defs;
+}
+
+upb_sink *upb_descreader_input(upb_descreader *r) {
+  return &r->sink;
+}
+
+const upb_handlers *upb_descreader_newhandlers(const void *owner) {
+  const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s);
+  const upb_handlers *h = upb_handlers_newfrozen(
+      upbdefs_google_protobuf_FileDescriptorSet(s), owner, reghandlers, s);
+  upb_symtab_unref(s, &s);
+  return h;
+}
+/*
+** protobuf decoder bytecode compiler
+**
+** Code to compile a upb::Handlers into bytecode for decoding a protobuf
+** according to that specific schema and destination handlers.
+**
+** Compiling to bytecode is always the first step.  If we are using the
+** interpreted decoder we leave it as bytecode and interpret that.  If we are
+** using a JIT decoder we use a code generator to turn the bytecode into native
+** code, LLVM IR, etc.
+**
+** Bytecode definition is in decoder.int.h.
+*/
+
+#include <stdarg.h>
+
+#ifdef UPB_DUMP_BYTECODE
+#include <stdio.h>
+#endif
+
+#define MAXLABEL 5
+#define EMPTYLABEL -1
+
+/* mgroup *********************************************************************/
+
+static void freegroup(upb_refcounted *r) {
+  mgroup *g = (mgroup*)r;
+  upb_inttable_uninit(&g->methods);
+#ifdef UPB_USE_JIT_X64
+  upb_pbdecoder_freejit(g);
+#endif
+  free(g->bytecode);
+  free(g);
+}
+
+static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
+                       void *closure) {
+  const mgroup *g = (const mgroup*)r;
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, &g->methods);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
+    visit(r, upb_pbdecodermethod_upcast(method), closure);
+  }
+}
+
+mgroup *newgroup(const void *owner) {
+  mgroup *g = malloc(sizeof(*g));
+  static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
+  upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
+  upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
+  g->bytecode = NULL;
+  g->bytecode_end = NULL;
+  return g;
+}
+
+
+/* upb_pbdecodermethod ********************************************************/
+
+static void freemethod(upb_refcounted *r) {
+  upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
+
+  if (method->dest_handlers_) {
+    upb_handlers_unref(method->dest_handlers_, method);
+  }
+
+  upb_inttable_uninit(&method->dispatch);
+  free(method);
+}
+
+static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
+                        void *closure) {
+  const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r;
+  visit(r, m->group, closure);
+}
+
+static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
+                                      mgroup *group) {
+  static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
+  upb_pbdecodermethod *ret = malloc(sizeof(*ret));
+  upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
+  upb_byteshandler_init(&ret->input_handler_);
+
+  /* The method references the group and vice-versa, in a circular reference. */
+  upb_ref2(ret, group);
+  upb_ref2(group, ret);
+  upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
+  upb_pbdecodermethod_unref(ret, &ret);
+
+  ret->group = mgroup_upcast_mutable(group);
+  ret->dest_handlers_ = dest_handlers;
+  ret->is_native_ = false;  /* If we JIT, it will update this later. */
+  upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
+
+  if (ret->dest_handlers_) {
+    upb_handlers_ref(ret->dest_handlers_, ret);
+  }
+  return ret;
+}
+
+const upb_handlers *upb_pbdecodermethod_desthandlers(
+    const upb_pbdecodermethod *m) {
+  return m->dest_handlers_;
+}
+
+const upb_byteshandler *upb_pbdecodermethod_inputhandler(
+    const upb_pbdecodermethod *m) {
+  return &m->input_handler_;
+}
+
+bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
+  return m->is_native_;
+}
+
+const upb_pbdecodermethod *upb_pbdecodermethod_new(
+    const upb_pbdecodermethodopts *opts, const void *owner) {
+  const upb_pbdecodermethod *ret;
+  upb_pbcodecache cache;
+
+  upb_pbcodecache_init(&cache);
+  ret = upb_pbcodecache_getdecodermethod(&cache, opts);
+  upb_pbdecodermethod_ref(ret, owner);
+  upb_pbcodecache_uninit(&cache);
+  return ret;
+}
+
+
+/* bytecode compiler **********************************************************/
+
+/* Data used only at compilation time. */
+typedef struct {
+  mgroup *group;
+
+  uint32_t *pc;
+  int fwd_labels[MAXLABEL];
+  int back_labels[MAXLABEL];
+
+  /* For fields marked "lazy", parse them lazily or eagerly? */
+  bool lazy;
+} compiler;
+
+static compiler *newcompiler(mgroup *group, bool lazy) {
+  compiler *ret = malloc(sizeof(*ret));
+  int i;
+
+  ret->group = group;
+  ret->lazy = lazy;
+  for (i = 0; i < MAXLABEL; i++) {
+    ret->fwd_labels[i] = EMPTYLABEL;
+    ret->back_labels[i] = EMPTYLABEL;
+  }
+  return ret;
+}
+
+static void freecompiler(compiler *c) {
+  free(c);
+}
+
+const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
+
+/* How many words an instruction is. */
+static int instruction_len(uint32_t instr) {
+  switch (getop(instr)) {
+    case OP_SETDISPATCH: return 1 + ptr_words;
+    case OP_TAGN: return 3;
+    case OP_SETBIGGROUPNUM: return 2;
+    default: return 1;
+  }
+}
+
+bool op_has_longofs(int32_t instruction) {
+  switch (getop(instruction)) {
+    case OP_CALL:
+    case OP_BRANCH:
+    case OP_CHECKDELIM:
+      return true;
+    /* The "tag" instructions only have 8 bytes available for the jump target,
+     * but that is ok because these opcodes only require short jumps. */
+    case OP_TAG1:
+    case OP_TAG2:
+    case OP_TAGN:
+      return false;
+    default:
+      assert(false);
+      return false;
+  }
+}
+
+static int32_t getofs(uint32_t instruction) {
+  if (op_has_longofs(instruction)) {
+    return (int32_t)instruction >> 8;
+  } else {
+    return (int8_t)(instruction >> 8);
+  }
+}
+
+static void setofs(uint32_t *instruction, int32_t ofs) {
+  if (op_has_longofs(*instruction)) {
+    *instruction = getop(*instruction) | ofs << 8;
+  } else {
+    *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
+  }
+  assert(getofs(*instruction) == ofs);  /* Would fail in cases of overflow. */
+}
+
+static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
+
+/* Defines a local label at the current PC location.  All previous forward
+ * references are updated to point to this location.  The location is noted
+ * for any future backward references. */
+static void label(compiler *c, unsigned int label) {
+  int val;
+  uint32_t *codep;
+
+  assert(label < MAXLABEL);
+  val = c->fwd_labels[label];
+  codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
+  while (codep) {
+    int ofs = getofs(*codep);
+    setofs(codep, c->pc - codep - instruction_len(*codep));
+    codep = ofs ? codep + ofs : NULL;
+  }
+  c->fwd_labels[label] = EMPTYLABEL;
+  c->back_labels[label] = pcofs(c);
+}
+
+/* Creates a reference to a numbered label; either a forward reference
+ * (positive arg) or backward reference (negative arg).  For forward references
+ * the value returned now is actually a "next" pointer into a linked list of all
+ * instructions that use this label and will be patched later when the label is
+ * defined with label().
+ *
+ * The returned value is the offset that should be written into the instruction.
+ */
+static int32_t labelref(compiler *c, int label) {
+  assert(label < MAXLABEL);
+  if (label == LABEL_DISPATCH) {
+    /* No resolving required. */
+    return 0;
+  } else if (label < 0) {
+    /* Backward local label.  Relative to the next instruction. */
+    uint32_t from = (c->pc + 1) - c->group->bytecode;
+    return c->back_labels[-label] - from;
+  } else {
+    /* Forward local label: prepend to (possibly-empty) linked list. */
+    int *lptr = &c->fwd_labels[label];
+    int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
+    *lptr = pcofs(c);
+    return ret;
+  }
+}
+
+static void put32(compiler *c, uint32_t v) {
+  mgroup *g = c->group;
+  if (c->pc == g->bytecode_end) {
+    int ofs = pcofs(c);
+    size_t oldsize = g->bytecode_end - g->bytecode;
+    size_t newsize = UPB_MAX(oldsize * 2, 64);
+    /* TODO(haberman): handle OOM. */
+    g->bytecode = realloc(g->bytecode, newsize * sizeof(uint32_t));
+    g->bytecode_end = g->bytecode + newsize;
+    c->pc = g->bytecode + ofs;
+  }
+  *c->pc++ = v;
+}
+
+static void putop(compiler *c, opcode op, ...) {
+  va_list ap;
+  va_start(ap, op);
+
+  switch (op) {
+    case OP_SETDISPATCH: {
+      uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
+      put32(c, OP_SETDISPATCH);
+      put32(c, ptr);
+      if (sizeof(uintptr_t) > sizeof(uint32_t))
+        put32(c, (uint64_t)ptr >> 32);
+      break;
+    }
+    case OP_STARTMSG:
+    case OP_ENDMSG:
+    case OP_PUSHLENDELIM:
+    case OP_POP:
+    case OP_SETDELIM:
+    case OP_HALT:
+    case OP_RET:
+    case OP_DISPATCH:
+      put32(c, op);
+      break;
+    case OP_PARSE_DOUBLE:
+    case OP_PARSE_FLOAT:
+    case OP_PARSE_INT64:
+    case OP_PARSE_UINT64:
+    case OP_PARSE_INT32:
+    case OP_PARSE_FIXED64:
+    case OP_PARSE_FIXED32:
+    case OP_PARSE_BOOL:
+    case OP_PARSE_UINT32:
+    case OP_PARSE_SFIXED32:
+    case OP_PARSE_SFIXED64:
+    case OP_PARSE_SINT32:
+    case OP_PARSE_SINT64:
+    case OP_STARTSEQ:
+    case OP_ENDSEQ:
+    case OP_STARTSUBMSG:
+    case OP_ENDSUBMSG:
+    case OP_STARTSTR:
+    case OP_STRING:
+    case OP_ENDSTR:
+    case OP_PUSHTAGDELIM:
+      put32(c, op | va_arg(ap, upb_selector_t) << 8);
+      break;
+    case OP_SETBIGGROUPNUM:
+      put32(c, op);
+      put32(c, va_arg(ap, int));
+      break;
+    case OP_CALL: {
+      const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
+      put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
+      break;
+    }
+    case OP_CHECKDELIM:
+    case OP_BRANCH: {
+      uint32_t instruction = op;
+      int label = va_arg(ap, int);
+      setofs(&instruction, labelref(c, label));
+      put32(c, instruction);
+      break;
+    }
+    case OP_TAG1:
+    case OP_TAG2: {
+      int label = va_arg(ap, int);
+      uint64_t tag = va_arg(ap, uint64_t);
+      uint32_t instruction = op | (tag << 16);
+      assert(tag <= 0xffff);
+      setofs(&instruction, labelref(c, label));
+      put32(c, instruction);
+      break;
+    }
+    case OP_TAGN: {
+      int label = va_arg(ap, int);
+      uint64_t tag = va_arg(ap, uint64_t);
+      uint32_t instruction = op | (upb_value_size(tag) << 16);
+      setofs(&instruction, labelref(c, label));
+      put32(c, instruction);
+      put32(c, tag);
+      put32(c, tag >> 32);
+      break;
+    }
+  }
+
+  va_end(ap);
+}
+
+#if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
+
+const char *upb_pbdecoder_getopname(unsigned int op) {
+#define QUOTE(x) #x
+#define EXPAND_AND_QUOTE(x) QUOTE(x)
+#define OPNAME(x) OP_##x
+#define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
+#define T(x) OP(PARSE_##x)
+  /* Keep in sync with list in decoder.int.h. */
+  switch ((opcode)op) {
+    T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
+    T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
+    OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
+    OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
+    OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
+    OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
+    OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
+  }
+  return "<unknown op>";
+#undef OP
+#undef T
+}
+
+#endif
+
+#ifdef UPB_DUMP_BYTECODE
+
+static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
+
+  uint32_t *begin = p;
+
+  while (p < end) {
+    fprintf(f, "%p  %8tx", p, p - begin);
+    uint32_t instr = *p++;
+    uint8_t op = getop(instr);
+    fprintf(f, " %s", upb_pbdecoder_getopname(op));
+    switch ((opcode)op) {
+      case OP_SETDISPATCH: {
+        const upb_inttable *dispatch;
+        memcpy(&dispatch, p, sizeof(void*));
+        p += ptr_words;
+        const upb_pbdecodermethod *method =
+            (void *)((char *)dispatch -
+                     offsetof(upb_pbdecodermethod, dispatch));
+        fprintf(f, " %s", upb_msgdef_fullname(
+                              upb_handlers_msgdef(method->dest_handlers_)));
+        break;
+      }
+      case OP_DISPATCH:
+      case OP_STARTMSG:
+      case OP_ENDMSG:
+      case OP_PUSHLENDELIM:
+      case OP_POP:
+      case OP_SETDELIM:
+      case OP_HALT:
+      case OP_RET:
+        break;
+      case OP_PARSE_DOUBLE:
+      case OP_PARSE_FLOAT:
+      case OP_PARSE_INT64:
+      case OP_PARSE_UINT64:
+      case OP_PARSE_INT32:
+      case OP_PARSE_FIXED64:
+      case OP_PARSE_FIXED32:
+      case OP_PARSE_BOOL:
+      case OP_PARSE_UINT32:
+      case OP_PARSE_SFIXED32:
+      case OP_PARSE_SFIXED64:
+      case OP_PARSE_SINT32:
+      case OP_PARSE_SINT64:
+      case OP_STARTSEQ:
+      case OP_ENDSEQ:
+      case OP_STARTSUBMSG:
+      case OP_ENDSUBMSG:
+      case OP_STARTSTR:
+      case OP_STRING:
+      case OP_ENDSTR:
+      case OP_PUSHTAGDELIM:
+        fprintf(f, " %d", instr >> 8);
+        break;
+      case OP_SETBIGGROUPNUM:
+        fprintf(f, " %d", *p++);
+        break;
+      case OP_CHECKDELIM:
+      case OP_CALL:
+      case OP_BRANCH:
+        fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
+        break;
+      case OP_TAG1:
+      case OP_TAG2: {
+        fprintf(f, " tag:0x%x", instr >> 16);
+        if (getofs(instr)) {
+          fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
+        }
+        break;
+      }
+      case OP_TAGN: {
+        uint64_t tag = *p++;
+        tag |= (uint64_t)*p++ << 32;
+        fprintf(f, " tag:0x%llx", (long long)tag);
+        fprintf(f, " n:%d", instr >> 16);
+        if (getofs(instr)) {
+          fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
+        }
+        break;
+      }
+    }
+    fputs("\n", f);
+  }
+}
+
+#endif
+
+static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
+  uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
+  uint64_t encoded_tag = upb_vencode32(tag);
+  /* No tag should be greater than 5 bytes. */
+  assert(encoded_tag <= 0xffffffffff);
+  return encoded_tag;
+}
+
+static void putchecktag(compiler *c, const upb_fielddef *f,
+                        int wire_type, int dest) {
+  uint64_t tag = get_encoded_tag(f, wire_type);
+  switch (upb_value_size(tag)) {
+    case 1:
+      putop(c, OP_TAG1, dest, tag);
+      break;
+    case 2:
+      putop(c, OP_TAG2, dest, tag);
+      break;
+    default:
+      putop(c, OP_TAGN, dest, tag);
+      break;
+  }
+}
+
+static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
+  upb_selector_t selector;
+  bool ok = upb_handlers_getselector(f, type, &selector);
+  UPB_ASSERT_VAR(ok, ok);
+  return selector;
+}
+
+/* Takes an existing, primary dispatch table entry and repacks it with a
+ * different alternate wire type.  Called when we are inserting a secondary
+ * dispatch table entry for an alternate wire type. */
+static uint64_t repack(uint64_t dispatch, int new_wt2) {
+  uint64_t ofs;
+  uint8_t wt1;
+  uint8_t old_wt2;
+  upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
+  assert(old_wt2 == NO_WIRE_TYPE);  /* wt2 should not be set yet. */
+  return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
+}
+
+/* Marks the current bytecode position as the dispatch target for this message,
+ * field, and wire type. */
+static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
+                           const upb_fielddef *f, int wire_type) {
+  /* Offset is relative to msg base. */
+  uint64_t ofs = pcofs(c) - method->code_base.ofs;
+  uint32_t fn = upb_fielddef_number(f);
+  upb_inttable *d = &method->dispatch;
+  upb_value v;
+  if (upb_inttable_remove(d, fn, &v)) {
+    /* TODO: prioritize based on packed setting in .proto file. */
+    uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
+    upb_inttable_insert(d, fn, upb_value_uint64(repacked));
+    upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
+  } else {
+    uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
+    upb_inttable_insert(d, fn, upb_value_uint64(val));
+  }
+}
+
+static void putpush(compiler *c, const upb_fielddef *f) {
+  if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
+    putop(c, OP_PUSHLENDELIM);
+  } else {
+    uint32_t fn = upb_fielddef_number(f);
+    if (fn >= 1 << 24) {
+      putop(c, OP_PUSHTAGDELIM, 0);
+      putop(c, OP_SETBIGGROUPNUM, fn);
+    } else {
+      putop(c, OP_PUSHTAGDELIM, fn);
+    }
+  }
+}
+
+static upb_pbdecodermethod *find_submethod(const compiler *c,
+                                           const upb_pbdecodermethod *method,
+                                           const upb_fielddef *f) {
+  const upb_handlers *sub =
+      upb_handlers_getsubhandlers(method->dest_handlers_, f);
+  upb_value v;
+  return upb_inttable_lookupptr(&c->group->methods, sub, &v)
+             ? upb_value_getptr(v)
+             : NULL;
+}
+
+static void putsel(compiler *c, opcode op, upb_selector_t sel,
+                   const upb_handlers *h) {
+  if (upb_handlers_gethandler(h, sel)) {
+    putop(c, op, sel);
+  }
+}
+
+/* Puts an opcode to call a callback, but only if a callback actually exists for
+ * this field and handler type. */
+static void maybeput(compiler *c, opcode op, const upb_handlers *h,
+                     const upb_fielddef *f, upb_handlertype_t type) {
+  putsel(c, op, getsel(f, type), h);
+}
+
+static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
+  if (!upb_fielddef_lazy(f))
+    return false;
+
+  return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) ||
+         upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) ||
+         upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
+}
+
+
+/* bytecode compiler code generation ******************************************/
+
+/* Symbolic names for our local labels. */
+#define LABEL_LOOPSTART 1  /* Top of a repeated field loop. */
+#define LABEL_LOOPBREAK 2  /* To jump out of a repeated loop */
+#define LABEL_FIELD     3  /* Jump backward to find the most recent field. */
+#define LABEL_ENDMSG    4  /* To reach the OP_ENDMSG instr for this msg. */
+
+/* Generates bytecode to parse a single non-lazy message field. */
+static void generate_msgfield(compiler *c, const upb_fielddef *f,
+                              upb_pbdecodermethod *method) {
+  const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
+  const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
+  int wire_type;
+
+  if (!sub_m) {
+    /* Don't emit any code for this field at all; it will be parsed as an
+     * unknown field. */
+    return;
+  }
+
+  label(c, LABEL_FIELD);
+
+  wire_type =
+      (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
+          ? UPB_WIRE_TYPE_DELIMITED
+          : UPB_WIRE_TYPE_START_GROUP;
+
+  if (upb_fielddef_isseq(f)) {
+    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+    putchecktag(c, f, wire_type, LABEL_DISPATCH);
+   dispatchtarget(c, method, f, wire_type);
+    putop(c, OP_PUSHTAGDELIM, 0);
+    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
+   label(c, LABEL_LOOPSTART);
+    putpush(c, f);
+    putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
+    putop(c, OP_CALL, sub_m);
+    putop(c, OP_POP);
+    maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
+    if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
+      putop(c, OP_SETDELIM);
+    }
+    putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
+    putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
+    putop(c, OP_BRANCH, -LABEL_LOOPSTART);
+   label(c, LABEL_LOOPBREAK);
+    putop(c, OP_POP);
+    maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
+  } else {
+    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+    putchecktag(c, f, wire_type, LABEL_DISPATCH);
+   dispatchtarget(c, method, f, wire_type);
+    putpush(c, f);
+    putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
+    putop(c, OP_CALL, sub_m);
+    putop(c, OP_POP);
+    maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
+    if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
+      putop(c, OP_SETDELIM);
+    }
+  }
+}
+
+/* Generates bytecode to parse a single string or lazy submessage field. */
+static void generate_delimfield(compiler *c, const upb_fielddef *f,
+                                upb_pbdecodermethod *method) {
+  const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
+
+  label(c, LABEL_FIELD);
+  if (upb_fielddef_isseq(f)) {
+    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+    putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
+   dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
+    putop(c, OP_PUSHTAGDELIM, 0);
+    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
+   label(c, LABEL_LOOPSTART);
+    putop(c, OP_PUSHLENDELIM);
+    putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
+    /* Need to emit even if no handler to skip past the string. */
+    putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
+    putop(c, OP_POP);
+    maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
+    putop(c, OP_SETDELIM);
+    putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
+    putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
+    putop(c, OP_BRANCH, -LABEL_LOOPSTART);
+   label(c, LABEL_LOOPBREAK);
+    putop(c, OP_POP);
+    maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
+  } else {
+    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+    putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
+   dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
+    putop(c, OP_PUSHLENDELIM);
+    putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
+    putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
+    putop(c, OP_POP);
+    maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
+    putop(c, OP_SETDELIM);
+  }
+}
+
+/* Generates bytecode to parse a single primitive field. */
+static void generate_primitivefield(compiler *c, const upb_fielddef *f,
+                                    upb_pbdecodermethod *method) {
+  const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
+  upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
+  opcode parse_type;
+  upb_selector_t sel;
+  int wire_type;
+
+  label(c, LABEL_FIELD);
+
+  /* From a decoding perspective, ENUM is the same as INT32. */
+  if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
+    descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
+
+  parse_type = (opcode)descriptor_type;
+
+  /* TODO(haberman): generate packed or non-packed first depending on "packed"
+   * setting in the fielddef.  This will favor (in speed) whichever was
+   * specified. */
+
+  assert((int)parse_type >= 0 && parse_type <= OP_MAX);
+  sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
+  wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
+  if (upb_fielddef_isseq(f)) {
+    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+    putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
+   dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
+    putop(c, OP_PUSHLENDELIM);
+    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Packed */
+   label(c, LABEL_LOOPSTART);
+    putop(c, parse_type, sel);
+    putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
+    putop(c, OP_BRANCH, -LABEL_LOOPSTART);
+   dispatchtarget(c, method, f, wire_type);
+    putop(c, OP_PUSHTAGDELIM, 0);
+    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Non-packed */
+   label(c, LABEL_LOOPSTART);
+    putop(c, parse_type, sel);
+    putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
+    putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
+    putop(c, OP_BRANCH, -LABEL_LOOPSTART);
+   label(c, LABEL_LOOPBREAK);
+    putop(c, OP_POP);  /* Packed and non-packed join. */
+    maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
+    putop(c, OP_SETDELIM);  /* Could remove for non-packed by dup ENDSEQ. */
+  } else {
+    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+    putchecktag(c, f, wire_type, LABEL_DISPATCH);
+   dispatchtarget(c, method, f, wire_type);
+    putop(c, parse_type, sel);
+  }
+}
+
+/* Adds bytecode for parsing the given message to the given decoderplan,
+ * while adding all dispatch targets to this message's dispatch table. */
+static void compile_method(compiler *c, upb_pbdecodermethod *method) {
+  const upb_handlers *h;
+  const upb_msgdef *md;
+  uint32_t* start_pc;
+  upb_msg_field_iter i;
+  upb_value val;
+
+  assert(method);
+
+  /* Clear all entries in the dispatch table. */
+  upb_inttable_uninit(&method->dispatch);
+  upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
+
+  h = upb_pbdecodermethod_desthandlers(method);
+  md = upb_handlers_msgdef(h);
+
+ method->code_base.ofs = pcofs(c);
+  putop(c, OP_SETDISPATCH, &method->dispatch);
+  putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
+ label(c, LABEL_FIELD);
+  start_pc = c->pc;
+  for(upb_msg_field_begin(&i, md);
+      !upb_msg_field_done(&i);
+      upb_msg_field_next(&i)) {
+    const upb_fielddef *f = upb_msg_iter_field(&i);
+    upb_fieldtype_t type = upb_fielddef_type(f);
+
+    if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
+      generate_msgfield(c, f, method);
+    } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
+               type == UPB_TYPE_MESSAGE) {
+      generate_delimfield(c, f, method);
+    } else {
+      generate_primitivefield(c, f, method);
+    }
+  }
+
+  /* If there were no fields, or if no handlers were defined, we need to
+   * generate a non-empty loop body so that we can at least dispatch for unknown
+   * fields and check for the end of the message. */
+  if (c->pc == start_pc) {
+    /* Check for end-of-message. */
+    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
+    /* Unconditionally dispatch. */
+    putop(c, OP_DISPATCH, 0);
+  }
+
+  /* For now we just loop back to the last field of the message (or if none,
+   * the DISPATCH opcode for the message). */
+  putop(c, OP_BRANCH, -LABEL_FIELD);
+
+  /* Insert both a label and a dispatch table entry for this end-of-msg. */
+ label(c, LABEL_ENDMSG);
+  val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
+  upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
+
+  putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
+  putop(c, OP_RET);
+
+  upb_inttable_compact(&method->dispatch);
+}
+
+/* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
+ * Returns the method for these handlers.
+ *
+ * Generates a new method for every destination handlers reachable from "h". */
+static void find_methods(compiler *c, const upb_handlers *h) {
+  upb_value v;
+  upb_msg_field_iter i;
+  const upb_msgdef *md;
+
+  if (upb_inttable_lookupptr(&c->group->methods, h, &v))
+    return;
+  newmethod(h, c->group);
+
+  /* Find submethods. */
+  md = upb_handlers_msgdef(h);
+  for(upb_msg_field_begin(&i, md);
+      !upb_msg_field_done(&i);
+      upb_msg_field_next(&i)) {
+    const upb_fielddef *f = upb_msg_iter_field(&i);
+    const upb_handlers *sub_h;
+    if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
+        (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
+      /* We only generate a decoder method for submessages with handlers.
+       * Others will be parsed as unknown fields. */
+      find_methods(c, sub_h);
+    }
+  }
+}
+
+/* (Re-)compile bytecode for all messages in "msgs."
+ * Overwrites any existing bytecode in "c". */
+static void compile_methods(compiler *c) {
+  upb_inttable_iter i;
+
+  /* Start over at the beginning of the bytecode. */
+  c->pc = c->group->bytecode;
+
+  upb_inttable_begin(&i, &c->group->methods);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
+    compile_method(c, method);
+  }
+}
+
+static void set_bytecode_handlers(mgroup *g) {
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, &g->methods);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
+    upb_byteshandler *h = &m->input_handler_;
+
+    m->code_base.ptr = g->bytecode + m->code_base.ofs;
+
+    upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
+    upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
+    upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
+  }
+}
+
+
+/* JIT setup. *****************************************************************/
+
+#ifdef UPB_USE_JIT_X64
+
+static void sethandlers(mgroup *g, bool allowjit) {
+  g->jit_code = NULL;
+  if (allowjit) {
+    /* Compile byte-code into machine code, create handlers. */
+    upb_pbdecoder_jit(g);
+  } else {
+    set_bytecode_handlers(g);
+  }
+}
+
+#else  /* UPB_USE_JIT_X64 */
+
+static void sethandlers(mgroup *g, bool allowjit) {
+  /* No JIT compiled in; use bytecode handlers unconditionally. */
+  UPB_UNUSED(allowjit);
+  set_bytecode_handlers(g);
+}
+
+#endif  /* UPB_USE_JIT_X64 */
+
+
+/* TODO(haberman): allow this to be constructed for an arbitrary set of dest
+ * handlers and other mgroups (but verify we have a transitive closure). */
+const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
+                         const void *owner) {
+  mgroup *g;
+  compiler *c;
+
+  UPB_UNUSED(allowjit);
+  assert(upb_handlers_isfrozen(dest));
+
+  g = newgroup(owner);
+  c = newcompiler(g, lazy);
+  find_methods(c, dest);
+
+  /* We compile in two passes:
+   * 1. all messages are assigned relative offsets from the beginning of the
+   *    bytecode (saved in method->code_base).
+   * 2. forwards OP_CALL instructions can be correctly linked since message
+   *    offsets have been previously assigned.
+   *
+   * Could avoid the second pass by linking OP_CALL instructions somehow. */
+  compile_methods(c);
+  compile_methods(c);
+  g->bytecode_end = c->pc;
+  freecompiler(c);
+
+#ifdef UPB_DUMP_BYTECODE
+  {
+    FILE *f = fopen("/tmp/upb-bytecode", "wb");
+    assert(f);
+    dumpbc(g->bytecode, g->bytecode_end, stderr);
+    dumpbc(g->bytecode, g->bytecode_end, f);
+    fclose(f);
+  }
+#endif
+
+  sethandlers(g, allowjit);
+  return g;
+}
+
+
+/* upb_pbcodecache ************************************************************/
+
+void upb_pbcodecache_init(upb_pbcodecache *c) {
+  upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR);
+  c->allow_jit_ = true;
+}
+
+void upb_pbcodecache_uninit(upb_pbcodecache *c) {
+  upb_inttable_iter i;
+  upb_inttable_begin(&i, &c->groups);
+  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+    const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
+    mgroup_unref(group, c);
+  }
+  upb_inttable_uninit(&c->groups);
+}
+
+bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
+  return c->allow_jit_;
+}
+
+bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
+  if (upb_inttable_count(&c->groups) > 0)
+    return false;
+  c->allow_jit_ = allow;
+  return true;
+}
+
+const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
+    upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
+  upb_value v;
+  bool ok;
+
+  /* Right now we build a new DecoderMethod every time.
+   * TODO(haberman): properly cache methods by their true key. */
+  const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
+  upb_inttable_push(&c->groups, upb_value_constptr(g));
+
+  ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
+  UPB_ASSERT_VAR(ok, ok);
+  return upb_value_getptr(v);
+}
+
+
+/* upb_pbdecodermethodopts ****************************************************/
+
+void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
+                                  const upb_handlers *h) {
+  opts->handlers = h;
+  opts->lazy = false;
+}
+
+void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
+  opts->lazy = lazy;
+}
+/*
+** upb::Decoder (Bytecode Decoder VM)
+**
+** Bytecode must previously have been generated using the bytecode compiler in
+** compile_decoder.c.  This decoder then walks through the bytecode op-by-op to
+** parse the input.
+**
+** Decoding is fully resumable; we just keep a pointer to the current bytecode
+** instruction and resume from there.  A fair amount of the logic here is to
+** handle the fact that values can span buffer seams and we have to be able to
+** be capable of suspending/resuming from any byte in the stream.  This
+** sometimes requires keeping a few trailing bytes from the last buffer around
+** in the "residual" buffer.
+*/
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#ifdef UPB_DUMP_BYTECODE
+#include <stdio.h>
+#endif
+
+#define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
+
+/* Error messages that are shared between the bytecode and JIT decoders. */
+const char *kPbDecoderStackOverflow = "Nesting too deep.";
+const char *kPbDecoderSubmessageTooLong =
+    "Submessage end extends past enclosing submessage.";
+
+/* Error messages shared within this file. */
+static const char *kUnterminatedVarint = "Unterminated varint.";
+
+/* upb_pbdecoder **************************************************************/
+
+static opcode halt = OP_HALT;
+
+/* Whether an op consumes any of the input buffer. */
+static bool consumes_input(opcode op) {
+  switch (op) {
+    case OP_SETDISPATCH:
+    case OP_STARTMSG:
+    case OP_ENDMSG:
+    case OP_STARTSEQ:
+    case OP_ENDSEQ:
+    case OP_STARTSUBMSG:
+    case OP_ENDSUBMSG:
+    case OP_STARTSTR:
+    case OP_ENDSTR:
+    case OP_PUSHTAGDELIM:
+    case OP_POP:
+    case OP_SETDELIM:
+    case OP_SETBIGGROUPNUM:
+    case OP_CHECKDELIM:
+    case OP_CALL:
+    case OP_RET:
+    case OP_BRANCH:
+      return false;
+    default:
+      return true;
+  }
+}
+
+static size_t stacksize(upb_pbdecoder *d, size_t entries) {
+  UPB_UNUSED(d);
+  return entries * sizeof(upb_pbdecoder_frame);
+}
+
+static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
+  UPB_UNUSED(d);
+
+#ifdef UPB_USE_JIT_X64
+  if (d->method_->is_native_) {
+    /* Each native stack frame needs two pointers, plus we need a few frames for
+     * the enter/exit trampolines. */
+    size_t ret = entries * sizeof(void*) * 2;
+    ret += sizeof(void*) * 10;
+    return ret;
+  }
+#endif
+
+  return entries * sizeof(uint32_t*);
+}
+
+
+static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
+
+/* It's unfortunate that we have to micro-manage the compiler with
+ * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
+ * specific to one hardware configuration.  But empirically on a Core i7,
+ * performance increases 30-50% with these annotations.  Every instance where
+ * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
+ * benchmarks. */
+
+static void seterr(upb_pbdecoder *d, const char *msg) {
+  upb_status status = UPB_STATUS_INIT;
+  upb_status_seterrmsg(&status, msg);
+  upb_env_reporterror(d->env, &status);
+}
+
+void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
+  seterr(d, msg);
+}
+
+
+/* Buffering ******************************************************************/
+
+/* We operate on one buffer at a time, which is either the user's buffer passed
+ * to our "decode" callback or some residual bytes from the previous buffer. */
+
+/* How many bytes can be safely read from d->ptr without reading past end-of-buf
+ * or past the current delimited end. */
+static size_t curbufleft(const upb_pbdecoder *d) {
+  assert(d->data_end >= d->ptr);
+  return d->data_end - d->ptr;
+}
+
+/* How many bytes are available before end-of-buffer. */
+static size_t bufleft(const upb_pbdecoder *d) {
+  return d->end - d->ptr;
+}
+
+/* Overall stream offset of d->ptr. */
+uint64_t offset(const upb_pbdecoder *d) {
+  return d->bufstart_ofs + (d->ptr - d->buf);
+}
+
+/* How many bytes are available before the end of this delimited region. */
+size_t delim_remaining(const upb_pbdecoder *d) {
+  return d->top->end_ofs - offset(d);
+}
+
+/* Advances d->ptr. */
+static void advance(upb_pbdecoder *d, size_t len) {
+  assert(curbufleft(d) >= len);
+  d->ptr += len;
+}
+
+static bool in_buf(const char *p, const char *buf, const char *end) {
+  return p >= buf && p <= end;
+}
+
+static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
+  return in_buf(p, d->residual, d->residual_end);
+}
+
+/* Calculates the delim_end value, which is affected by both the current buffer
+ * and the parsing stack, so must be called whenever either is updated. */
+static void set_delim_end(upb_pbdecoder *d) {
+  size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
+  if (delim_ofs <= (size_t)(d->end - d->buf)) {
+    d->delim_end = d->buf + delim_ofs;
+    d->data_end = d->delim_end;
+  } else {
+    d->data_end = d->end;
+    d->delim_end = NULL;
+  }
+}
+
+static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
+  d->ptr = buf;
+  d->buf = buf;
+  d->end = end;
+  set_delim_end(d);
+}
+
+static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
+  assert(curbufleft(d) == 0);
+  d->bufstart_ofs += (d->end - d->buf);
+  switchtobuf(d, buf, buf + len);
+}
+
+static void checkpoint(upb_pbdecoder *d) {
+  /* The assertion here is in the interests of efficiency, not correctness.
+   * We are trying to ensure that we don't checkpoint() more often than
+   * necessary. */
+  assert(d->checkpoint != d->ptr);
+  d->checkpoint = d->ptr;
+}
+
+/* Skips "bytes" bytes in the stream, which may be more than available.  If we
+ * skip more bytes than are available, we return a long read count to the caller
+ * indicating how many bytes can be skipped over before passing actual data
+ * again.  Skipped bytes can pass a NULL buffer and the decoder guarantees they
+ * won't actually be read.
+ */
+static int32_t skip(upb_pbdecoder *d, size_t bytes) {
+  assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
+  assert(d->skip == 0);
+  if (bytes > delim_remaining(d)) {
+    seterr(d, "Skipped value extended beyond enclosing submessage.");
+    return upb_pbdecoder_suspend(d);
+  } else if (bufleft(d) > bytes) {
+    /* Skipped data is all in current buffer, and more is still available. */
+    advance(d, bytes);
+    d->skip = 0;
+    return DECODE_OK;
+  } else {
+    /* Skipped data extends beyond currently available buffers. */
+    d->pc = d->last;
+    d->skip = bytes - curbufleft(d);
+    d->bufstart_ofs += (d->end - d->buf);
+    d->residual_end = d->residual;
+    switchtobuf(d, d->residual, d->residual_end);
+    return d->size_param + d->skip;
+  }
+}
+
+
+/* Resumes the decoder from an initial state or from a previous suspend. */
+int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
+                             size_t size, const upb_bufhandle *handle) {
+  UPB_UNUSED(p);  /* Useless; just for the benefit of the JIT. */
+
+  d->buf_param = buf;
+  d->size_param = size;
+  d->handle = handle;
+
+  if (d->residual_end > d->residual) {
+    /* We have residual bytes from the last buffer. */
+    assert(d->ptr == d->residual);
+  } else {
+    switchtobuf(d, buf, buf + size);
+  }
+
+  d->checkpoint = d->ptr;
+
+  if (d->skip) {
+    size_t skip_bytes = d->skip;
+    d->skip = 0;
+    CHECK_RETURN(skip(d, skip_bytes));
+    d->checkpoint = d->ptr;
+  }
+
+  if (!buf) {
+    /* NULL buf is ok if its entire span is covered by the "skip" above, but
+     * by this point we know that "skip" doesn't cover the buffer. */
+    seterr(d, "Passed NULL buffer over non-skippable region.");
+    return upb_pbdecoder_suspend(d);
+  }
+
+  if (d->top->groupnum < 0) {
+    CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
+    d->checkpoint = d->ptr;
+  }
+
+  return DECODE_OK;
+}
+
+/* Suspends the decoder at the last checkpoint, without saving any residual
+ * bytes.  If there are any unconsumed bytes, returns a short byte count. */
+size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
+  d->pc = d->last;
+  if (d->checkpoint == d->residual) {
+    /* Checkpoint was in residual buf; no user bytes were consumed. */
+    d->ptr = d->residual;
+    return 0;
+  } else {
+    size_t consumed;
+    assert(!in_residual_buf(d, d->checkpoint));
+    assert(d->buf == d->buf_param);
+
+    consumed = d->checkpoint - d->buf;
+    d->bufstart_ofs += consumed;
+    d->residual_end = d->residual;
+    switchtobuf(d, d->residual, d->residual_end);
+    return consumed;
+  }
+}
+
+/* Suspends the decoder at the last checkpoint, and saves any unconsumed
+ * bytes in our residual buffer.  This is necessary if we need more user
+ * bytes to form a complete value, which might not be contiguous in the
+ * user's buffers.  Always consumes all user bytes. */
+static size_t suspend_save(upb_pbdecoder *d) {
+  /* We hit end-of-buffer before we could parse a full value.
+   * Save any unconsumed bytes (if any) to the residual buffer. */
+  d->pc = d->last;
+
+  if (d->checkpoint == d->residual) {
+    /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
+    assert((d->residual_end - d->residual) + d->size_param <=
+           sizeof(d->residual));
+    if (!in_residual_buf(d, d->ptr)) {
+      d->bufstart_ofs -= (d->residual_end - d->residual);
+    }
+    memcpy(d->residual_end, d->buf_param, d->size_param);
+    d->residual_end += d->size_param;
+  } else {
+    /* Checkpoint was in user buf; old residual bytes not needed. */
+    size_t save;
+    assert(!in_residual_buf(d, d->checkpoint));
+
+    d->ptr = d->checkpoint;
+    save = curbufleft(d);
+    assert(save <= sizeof(d->residual));
+    memcpy(d->residual, d->ptr, save);
+    d->residual_end = d->residual + save;
+    d->bufstart_ofs = offset(d);
+  }
+
+  switchtobuf(d, d->residual, d->residual_end);
+  return d->size_param;
+}
+
+/* Copies the next "bytes" bytes into "buf" and advances the stream.
+ * Requires that this many bytes are available in the current buffer. */
+UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
+                                         size_t bytes) {
+  assert(bytes <= curbufleft(d));
+  memcpy(buf, d->ptr, bytes);
+  advance(d, bytes);
+}
+
+/* Slow path for getting the next "bytes" bytes, regardless of whether they are
+ * available in the current buffer or not.  Returns a status code as described
+ * in decoder.int.h. */
+UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
+                                          size_t bytes) {
+  const size_t avail = curbufleft(d);
+  consumebytes(d, buf, avail);
+  bytes -= avail;
+  assert(bytes > 0);
+  if (in_residual_buf(d, d->ptr)) {
+    advancetobuf(d, d->buf_param, d->size_param);
+  }
+  if (curbufleft(d) >= bytes) {
+    consumebytes(d, (char *)buf + avail, bytes);
+    return DECODE_OK;
+  } else if (d->data_end == d->delim_end) {
+    seterr(d, "Submessage ended in the middle of a value or group");
+    return upb_pbdecoder_suspend(d);
+  } else {
+    return suspend_save(d);
+  }
+}
+
+/* Gets the next "bytes" bytes, regardless of whether they are available in the
+ * current buffer or not.  Returns a status code as described in decoder.int.h.
+ */
+UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
+                                        size_t bytes) {
+  if (curbufleft(d) >= bytes) {
+    /* Buffer has enough data to satisfy. */
+    consumebytes(d, buf, bytes);
+    return DECODE_OK;
+  } else {
+    return getbytes_slow(d, buf, bytes);
+  }
+}
+
+UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
+                                          size_t bytes) {
+  size_t ret = curbufleft(d);
+  memcpy(buf, d->ptr, ret);
+  if (in_residual_buf(d, d->ptr)) {
+    size_t copy = UPB_MIN(bytes - ret, d->size_param);
+    memcpy((char *)buf + ret, d->buf_param, copy);
+    ret += copy;
+  }
+  return ret;
+}
+
+UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
+                                        size_t bytes) {
+  if (curbufleft(d) >= bytes) {
+    memcpy(buf, d->ptr, bytes);
+    return bytes;
+  } else {
+    return peekbytes_slow(d, buf, bytes);
+  }
+}
+
+
+/* Decoding of wire types *****************************************************/
+
+/* Slow path for decoding a varint from the current buffer position.
+ * Returns a status code as described in decoder.int.h. */
+UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
+                                                      uint64_t *u64) {
+  uint8_t byte = 0x80;
+  int bitpos;
+  *u64 = 0;
+  for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
+    int32_t ret = getbytes(d, &byte, 1);
+    if (ret >= 0) return ret;
+    *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
+  }
+  if(bitpos == 70 && (byte & 0x80)) {
+    seterr(d, kUnterminatedVarint);
+    return upb_pbdecoder_suspend(d);
+  }
+  return DECODE_OK;
+}
+
+/* Decodes a varint from the current buffer position.
+ * Returns a status code as described in decoder.int.h. */
+UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
+  if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
+    *u64 = *d->ptr;
+    advance(d, 1);
+    return DECODE_OK;
+  } else if (curbufleft(d) >= 10) {
+    /* Fast case. */
+    upb_decoderet r = upb_vdecode_fast(d->ptr);
+    if (r.p == NULL) {
+      seterr(d, kUnterminatedVarint);
+      return upb_pbdecoder_suspend(d);
+    }
+    advance(d, r.p - d->ptr);
+    *u64 = r.val;
+    return DECODE_OK;
+  } else {
+    /* Slow case -- varint spans buffer seam. */
+    return upb_pbdecoder_decode_varint_slow(d, u64);
+  }
+}
+
+/* Decodes a 32-bit varint from the current buffer position.
+ * Returns a status code as described in decoder.int.h. */
+UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
+  uint64_t u64;
+  int32_t ret = decode_varint(d, &u64);
+  if (ret >= 0) return ret;
+  if (u64 > UINT32_MAX) {
+    seterr(d, "Unterminated 32-bit varint");
+    /* TODO(haberman) guarantee that this function return is >= 0 somehow,
+     * so we know this path will always be treated as error by our caller.
+     * Right now the size_t -> int32_t can overflow and produce negative values.
+     */
+    *u32 = 0;
+    return upb_pbdecoder_suspend(d);
+  }
+  *u32 = u64;
+  return DECODE_OK;
+}
+
+/* Decodes a fixed32 from the current buffer position.
+ * Returns a status code as described in decoder.int.h.
+ * TODO: proper byte swapping for big-endian machines. */
+UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
+  return getbytes(d, u32, 4);
+}
+
+/* Decodes a fixed64 from the current buffer position.
+ * Returns a status code as described in decoder.int.h.
+ * TODO: proper byte swapping for big-endian machines. */
+UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
+  return getbytes(d, u64, 8);
+}
+
+/* Non-static versions of the above functions.
+ * These are called by the JIT for fallback paths. */
+int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
+  return decode_fixed32(d, u32);
+}
+
+int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
+  return decode_fixed64(d, u64);
+}
+
+static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
+static float  as_float(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }
+
+/* Pushes a frame onto the decoder stack. */
+static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
+  upb_pbdecoder_frame *fr = d->top;
+
+  if (end > fr->end_ofs) {
+    seterr(d, kPbDecoderSubmessageTooLong);
+    return false;
+  } else if (fr == d->limit) {
+    seterr(d, kPbDecoderStackOverflow);
+    return false;
+  }
+
+  fr++;
+  fr->end_ofs = end;
+  fr->dispatch = NULL;
+  fr->groupnum = 0;
+  d->top = fr;
+  return true;
+}
+
+static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
+  /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
+   * field number) prior to hitting any enclosing submessage end, pushing our
+   * existing delim end prevents us from continuing to parse values from a
+   * corrupt proto that doesn't give us an END tag in time. */
+  if (!decoder_push(d, d->top->end_ofs))
+    return false;
+  d->top->groupnum = arg;
+  return true;
+}
+
+/* Pops a frame from the decoder stack. */
+static void decoder_pop(upb_pbdecoder *d) { d->top--; }
+
+UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
+                                                 uint64_t expected) {
+  uint64_t data = 0;
+  size_t bytes = upb_value_size(expected);
+  size_t read = peekbytes(d, &data, bytes);
+  if (read == bytes && data == expected) {
+    /* Advance past matched bytes. */
+    int32_t ok = getbytes(d, &data, read);
+    UPB_ASSERT_VAR(ok, ok < 0);
+    return DECODE_OK;
+  } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
+    return suspend_save(d);
+  } else {
+    return DECODE_MISMATCH;
+  }
+}
+
+int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
+                                  uint8_t wire_type) {
+  if (fieldnum >= 0)
+    goto have_tag;
+
+  while (true) {
+    uint32_t tag;
+    CHECK_RETURN(decode_v32(d, &tag));
+    wire_type = tag & 0x7;
+    fieldnum = tag >> 3;
+
+have_tag:
+    if (fieldnum == 0) {
+      seterr(d, "Saw invalid field number (0)");
+      return upb_pbdecoder_suspend(d);
+    }
+
+    /* TODO: deliver to unknown field callback. */
+    switch (wire_type) {
+      case UPB_WIRE_TYPE_32BIT:
+        CHECK_RETURN(skip(d, 4));
+        break;
+      case UPB_WIRE_TYPE_64BIT:
+        CHECK_RETURN(skip(d, 8));
+        break;
+      case UPB_WIRE_TYPE_VARINT: {
+        uint64_t u64;
+        CHECK_RETURN(decode_varint(d, &u64));
+        break;
+      }
+      case UPB_WIRE_TYPE_DELIMITED: {
+        uint32_t len;
+        CHECK_RETURN(decode_v32(d, &len));
+        CHECK_RETURN(skip(d, len));
+        break;
+      }
+      case UPB_WIRE_TYPE_START_GROUP:
+        CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
+        break;
+      case UPB_WIRE_TYPE_END_GROUP:
+        if (fieldnum == -d->top->groupnum) {
+          decoder_pop(d);
+        } else if (fieldnum == d->top->groupnum) {
+          return DECODE_ENDGROUP;
+        } else {
+          seterr(d, "Unmatched ENDGROUP tag.");
+          return upb_pbdecoder_suspend(d);
+        }
+        break;
+      default:
+        seterr(d, "Invalid wire type");
+        return upb_pbdecoder_suspend(d);
+    }
+
+    if (d->top->groupnum >= 0) {
+      return DECODE_OK;
+    }
+
+    /* Unknown group -- continue looping over unknown fields. */
+    checkpoint(d);
+  }
+}
+
+static void goto_endmsg(upb_pbdecoder *d) {
+  upb_value v;
+  bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
+  UPB_ASSERT_VAR(found, found);
+  d->pc = d->top->base + upb_value_getuint64(v);
+}
+
+/* Parses a tag and jumps to the corresponding bytecode instruction for this
+ * field.
+ *
+ * If the tag is unknown (or the wire type doesn't match), parses the field as
+ * unknown.  If the tag is a valid ENDGROUP tag, jumps to the bytecode
+ * instruction for the end of message. */
+static int32_t dispatch(upb_pbdecoder *d) {
+  upb_inttable *dispatch = d->top->dispatch;
+  uint32_t tag;
+  uint8_t wire_type;
+  uint32_t fieldnum;
+  upb_value val;
+  int32_t retval;
+
+  /* Decode tag. */
+  CHECK_RETURN(decode_v32(d, &tag));
+  wire_type = tag & 0x7;
+  fieldnum = tag >> 3;
+
+  /* Lookup tag.  Because of packed/non-packed compatibility, we have to
+   * check the wire type against two possibilities. */
+  if (fieldnum != DISPATCH_ENDMSG &&
+      upb_inttable_lookup32(dispatch, fieldnum, &val)) {
+    uint64_t v = upb_value_getuint64(val);
+    if (wire_type == (v & 0xff)) {
+      d->pc = d->top->base + (v >> 16);
+      return DECODE_OK;
+    } else if (wire_type == ((v >> 8) & 0xff)) {
+      bool found =
+          upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
+      UPB_ASSERT_VAR(found, found);
+      d->pc = d->top->base + upb_value_getuint64(val);
+      return DECODE_OK;
+    }
+  }
+
+  /* We have some unknown fields (or ENDGROUP) to parse.  The DISPATCH or TAG
+   * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
+   * we need to back up to, so that when we're done skipping unknown data we
+   * can re-check the delimited end. */
+  d->last--;  /* Necessary if we get suspended */
+  d->pc = d->last;
+  assert(getop(*d->last) == OP_CHECKDELIM);
+
+  /* Unknown field or ENDGROUP. */
+  retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
+
+  CHECK_RETURN(retval);
+
+  if (retval == DECODE_ENDGROUP) {
+    goto_endmsg(d);
+    return DECODE_OK;
+  }
+
+  return DECODE_OK;
+}
+
+/* Callers know that the stack is more than one deep because the opcodes that
+ * call this only occur after PUSH operations. */
+upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
+  assert(d->top != d->stack);
+  return d->top - 1;
+}
+
+
+/* The main decoding loop *****************************************************/
+
+/* The main decoder VM function.  Uses traditional bytecode dispatch loop with a
+ * switch() statement. */
+size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
+                      const upb_bufhandle* handle) {
+
+#define VMCASE(op, code) \
+  case op: { code; if (consumes_input(op)) checkpoint(d); break; }
+#define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
+  VMCASE(OP_PARSE_ ## type, { \
+    ctype val; \
+    CHECK_RETURN(decode_ ## wt(d, &val)); \
+    upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \
+  })
+
+  while(1) {
+    int32_t instruction;
+    opcode op;
+    uint32_t arg;
+    int32_t longofs;
+
+    d->last = d->pc;
+    instruction = *d->pc++;
+    op = getop(instruction);
+    arg = instruction >> 8;
+    longofs = arg;
+    assert(d->ptr != d->residual_end);
+    UPB_UNUSED(group);
+#ifdef UPB_DUMP_BYTECODE
+    fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
+                    "%x %s (%d)\n",
+            (int)offset(d),
+            (int)(d->ptr - d->buf),
+            (int)(d->data_end - d->ptr),
+            (int)(d->end - d->ptr),
+            (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
+            (int)(d->pc - 1 - group->bytecode),
+            upb_pbdecoder_getopname(op),
+            arg);
+#endif
+    switch (op) {
+      /* Technically, we are losing data if we see a 32-bit varint that is not
+       * properly sign-extended.  We could detect this and error about the data
+       * loss, but proto2 does not do this, so we pass. */
+      PRIMITIVE_OP(INT32,    varint,  int32,  int32_t,      uint64_t)
+      PRIMITIVE_OP(INT64,    varint,  int64,  int64_t,      uint64_t)
+      PRIMITIVE_OP(UINT32,   varint,  uint32, uint32_t,     uint64_t)
+      PRIMITIVE_OP(UINT64,   varint,  uint64, uint64_t,     uint64_t)
+      PRIMITIVE_OP(FIXED32,  fixed32, uint32, uint32_t,     uint32_t)
+      PRIMITIVE_OP(FIXED64,  fixed64, uint64, uint64_t,     uint64_t)
+      PRIMITIVE_OP(SFIXED32, fixed32, int32,  int32_t,      uint32_t)
+      PRIMITIVE_OP(SFIXED64, fixed64, int64,  int64_t,      uint64_t)
+      PRIMITIVE_OP(BOOL,     varint,  bool,   bool,         uint64_t)
+      PRIMITIVE_OP(DOUBLE,   fixed64, double, as_double,    uint64_t)
+      PRIMITIVE_OP(FLOAT,    fixed32, float,  as_float,     uint32_t)
+      PRIMITIVE_OP(SINT32,   varint,  int32,  upb_zzdec_32, uint64_t)
+      PRIMITIVE_OP(SINT64,   varint,  int64,  upb_zzdec_64, uint64_t)
+
+      VMCASE(OP_SETDISPATCH,
+        d->top->base = d->pc - 1;
+        memcpy(&d->top->dispatch, d->pc, sizeof(void*));
+        d->pc += sizeof(void*) / sizeof(uint32_t);
+      )
+      VMCASE(OP_STARTMSG,
+        CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink));
+      )
+      VMCASE(OP_ENDMSG,
+        CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
+      )
+      VMCASE(OP_STARTSEQ,
+        upb_pbdecoder_frame *outer = outer_frame(d);
+        CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink));
+      )
+      VMCASE(OP_ENDSEQ,
+        CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg));
+      )
+      VMCASE(OP_STARTSUBMSG,
+        upb_pbdecoder_frame *outer = outer_frame(d);
+        CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink));
+      )
+      VMCASE(OP_ENDSUBMSG,
+        CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
+      )
+      VMCASE(OP_STARTSTR,
+        uint32_t len = delim_remaining(d);
+        upb_pbdecoder_frame *outer = outer_frame(d);
+        CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
+        if (len == 0) {
+          d->pc++;  /* Skip OP_STRING. */
+        }
+      )
+      VMCASE(OP_STRING,
+        uint32_t len = curbufleft(d);
+        size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle);
+        if (n > len) {
+          if (n > delim_remaining(d)) {
+            seterr(d, "Tried to skip past end of string.");
+            return upb_pbdecoder_suspend(d);
+          } else {
+            int32_t ret = skip(d, n);
+            /* This shouldn't return DECODE_OK, because n > len. */
+            assert(ret >= 0);
+            return ret;
+          }
+        }
+        advance(d, n);
+        if (n < len || d->delim_end == NULL) {
+          /* We aren't finished with this string yet. */
+          d->pc--;  /* Repeat OP_STRING. */
+          if (n > 0) checkpoint(d);
+          return upb_pbdecoder_suspend(d);
+        }
+      )
+      VMCASE(OP_ENDSTR,
+        CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
+      )
+      VMCASE(OP_PUSHTAGDELIM,
+        CHECK_SUSPEND(pushtagdelim(d, arg));
+      )
+      VMCASE(OP_SETBIGGROUPNUM,
+        d->top->groupnum = *d->pc++;
+      )
+      VMCASE(OP_POP,
+        assert(d->top > d->stack);
+        decoder_pop(d);
+      )
+      VMCASE(OP_PUSHLENDELIM,
+        uint32_t len;
+        CHECK_RETURN(decode_v32(d, &len));
+        CHECK_SUSPEND(decoder_push(d, offset(d) + len));
+        set_delim_end(d);
+      )
+      VMCASE(OP_SETDELIM,
+        set_delim_end(d);
+      )
+      VMCASE(OP_CHECKDELIM,
+        /* We are guaranteed of this assert because we never allow ourselves to
+         * consume bytes beyond data_end, which covers delim_end when non-NULL.
+         */
+        assert(!(d->delim_end && d->ptr > d->delim_end));
+        if (d->ptr == d->delim_end)
+          d->pc += longofs;
+      )
+      VMCASE(OP_CALL,
+        d->callstack[d->call_len++] = d->pc;
+        d->pc += longofs;
+      )
+      VMCASE(OP_RET,
+        assert(d->call_len > 0);
+        d->pc = d->callstack[--d->call_len];
+      )
+      VMCASE(OP_BRANCH,
+        d->pc += longofs;
+      )
+      VMCASE(OP_TAG1,
+        uint8_t expected;
+        CHECK_SUSPEND(curbufleft(d) > 0);
+        expected = (arg >> 8) & 0xff;
+        if (*d->ptr == expected) {
+          advance(d, 1);
+        } else {
+          int8_t shortofs;
+         badtag:
+          shortofs = arg;
+          if (shortofs == LABEL_DISPATCH) {
+            CHECK_RETURN(dispatch(d));
+          } else {
+            d->pc += shortofs;
+            break; /* Avoid checkpoint(). */
+          }
+        }
+      )
+      VMCASE(OP_TAG2,
+        uint16_t expected;
+        CHECK_SUSPEND(curbufleft(d) > 0);
+        expected = (arg >> 8) & 0xffff;
+        if (curbufleft(d) >= 2) {
+          uint16_t actual;
+          memcpy(&actual, d->ptr, 2);
+          if (expected == actual) {
+            advance(d, 2);
+          } else {
+            goto badtag;
+          }
+        } else {
+          int32_t result = upb_pbdecoder_checktag_slow(d, expected);
+          if (result == DECODE_MISMATCH) goto badtag;
+          if (result >= 0) return result;
+        }
+      )
+      VMCASE(OP_TAGN, {
+        uint64_t expected;
+        int32_t result;
+        memcpy(&expected, d->pc, 8);
+        d->pc += 2;
+        result = upb_pbdecoder_checktag_slow(d, expected);
+        if (result == DECODE_MISMATCH) goto badtag;
+        if (result >= 0) return result;
+      })
+      VMCASE(OP_DISPATCH, {
+        CHECK_RETURN(dispatch(d));
+      })
+      VMCASE(OP_HALT, {
+        return d->size_param;
+      })
+    }
+  }
+}
+
+
+/* BytesHandler handlers ******************************************************/
+
+void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
+  upb_pbdecoder *d = closure;
+  UPB_UNUSED(size_hint);
+  d->top->end_ofs = UINT64_MAX;
+  d->bufstart_ofs = 0;
+  d->call_len = 1;
+  d->callstack[0] = &halt;
+  d->pc = pc;
+  d->skip = 0;
+  return d;
+}
+
+void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
+  upb_pbdecoder *d = closure;
+  UPB_UNUSED(hd);
+  UPB_UNUSED(size_hint);
+  d->top->end_ofs = UINT64_MAX;
+  d->bufstart_ofs = 0;
+  d->call_len = 0;
+  d->skip = 0;
+  return d;
+}
+
+bool upb_pbdecoder_end(void *closure, const void *handler_data) {
+  upb_pbdecoder *d = closure;
+  const upb_pbdecodermethod *method = handler_data;
+  uint64_t end;
+  char dummy;
+
+  if (d->residual_end > d->residual) {
+    seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
+    return false;
+  }
+
+  if (d->skip) {
+    seterr(d, "Unexpected EOF inside skipped data");
+    return false;
+  }
+
+  if (d->top->end_ofs != UINT64_MAX) {
+    seterr(d, "Unexpected EOF inside delimited string");
+    return false;
+  }
+
+  /* The user's end() call indicates that the message ends here. */
+  end = offset(d);
+  d->top->end_ofs = end;
+
+#ifdef UPB_USE_JIT_X64
+  if (method->is_native_) {
+    const mgroup *group = (const mgroup*)method->group;
+    if (d->top != d->stack)
+      d->stack->end_ofs = 0;
+    group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
+  } else
+#endif
+  {
+    const uint32_t *p = d->pc;
+    d->stack->end_ofs = end;
+    /* Check the previous bytecode, but guard against beginning. */
+    if (p != method->code_base.ptr) p--;
+    if (getop(*p) == OP_CHECKDELIM) {
+      /* Rewind from OP_TAG* to OP_CHECKDELIM. */
+      assert(getop(*d->pc) == OP_TAG1 ||
+             getop(*d->pc) == OP_TAG2 ||
+             getop(*d->pc) == OP_TAGN ||
+             getop(*d->pc) == OP_DISPATCH);
+      d->pc = p;
+    }
+    upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
+  }
+
+  if (d->call_len != 0) {
+    seterr(d, "Unexpected EOF inside submessage or group");
+    return false;
+  }
+
+  return true;
+}
+
+size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
+                            size_t size, const upb_bufhandle *handle) {
+  int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
+
+  if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
+  CHECK_RETURN(result);
+
+  return run_decoder_vm(decoder, group, handle);
+}
+
+
+/* Public API *****************************************************************/
+
+void upb_pbdecoder_reset(upb_pbdecoder *d) {
+  d->top = d->stack;
+  d->top->groupnum = 0;
+  d->ptr = d->residual;
+  d->buf = d->residual;
+  d->end = d->residual;
+  d->residual_end = d->residual;
+}
+
+upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
+                                    upb_sink *sink) {
+  const size_t default_max_nesting = 64;
+#ifndef NDEBUG
+  size_t size_before = upb_env_bytesallocated(e);
+#endif
+
+  upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
+  if (!d) return NULL;
+
+  d->method_ = m;
+  d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
+  d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
+  if (!d->stack || !d->callstack) {
+    return NULL;
+  }
+
+  d->env = e;
+  d->limit = d->stack + default_max_nesting - 1;
+  d->stack_size = default_max_nesting;
+
+  upb_pbdecoder_reset(d);
+  upb_bytessink_reset(&d->input_, &m->input_handler_, d);
+
+  assert(sink);
+  if (d->method_->dest_handlers_) {
+    if (sink->handlers != d->method_->dest_handlers_)
+      return NULL;
+  }
+  upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
+
+  /* If this fails, increase the value in decoder.h. */
+  assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
+  return d;
+}
+
+uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
+  return offset(d);
+}
+
+const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
+  return d->method_;
+}
+
+upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
+  return &d->input_;
+}
+
+size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
+  return d->stack_size;
+}
+
+bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
+  assert(d->top >= d->stack);
+
+  if (max < (size_t)(d->top - d->stack)) {
+    /* Can't set a limit smaller than what we are currently at. */
+    return false;
+  }
+
+  if (max > d->stack_size) {
+    /* Need to reallocate stack and callstack to accommodate. */
+    size_t old_size = stacksize(d, d->stack_size);
+    size_t new_size = stacksize(d, max);
+    void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
+    if (!p) {
+      return false;
+    }
+    d->stack = p;
+
+    old_size = callstacksize(d, d->stack_size);
+    new_size = callstacksize(d, max);
+    p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
+    if (!p) {
+      return false;
+    }
+    d->callstack = p;
+
+    d->stack_size = max;
+  }
+
+  d->limit = d->stack + max - 1;
+  return true;
+}
+/*
+** upb::Encoder
+**
+** Since we are implementing pure handlers (ie. without any out-of-band access
+** to pre-computed lengths), we have to buffer all submessages before we can
+** emit even their first byte.
+**
+** Not knowing the size of submessages also means we can't write a perfect
+** zero-copy implementation, even with buffering.  Lengths are stored as
+** varints, which means that we don't know how many bytes to reserve for the
+** length until we know what the length is.
+**
+** This leaves us with three main choices:
+**
+** 1. buffer all submessage data in a temporary buffer, then copy it exactly
+**    once into the output buffer.
+**
+** 2. attempt to buffer data directly into the output buffer, estimating how
+**    many bytes each length will take.  When our guesses are wrong, use
+**    memmove() to grow or shrink the allotted space.
+**
+** 3. buffer directly into the output buffer, allocating a max length
+**    ahead-of-time for each submessage length.  If we overallocated, we waste
+**    space, but no memcpy() or memmove() is required.  This approach requires
+**    defining a maximum size for submessages and rejecting submessages that
+**    exceed that size.
+**
+** (2) and (3) have the potential to have better performance, but they are more
+** complicated and subtle to implement:
+**
+**   (3) requires making an arbitrary choice of the maximum message size; it
+**       wastes space when submessages are shorter than this and fails
+**       completely when they are longer.  This makes it more finicky and
+**       requires configuration based on the input.  It also makes it impossible
+**       to perfectly match the output of reference encoders that always use the
+**       optimal amount of space for each length.
+**
+**   (2) requires guessing the the size upfront, and if multiple lengths are
+**       guessed wrong the minimum required number of memmove() operations may
+**       be complicated to compute correctly.  Implemented properly, it may have
+**       a useful amortized or average cost, but more investigation is required
+**       to determine this and what the optimal algorithm is to achieve it.
+**
+**   (1) makes you always pay for exactly one copy, but its implementation is
+**       the simplest and its performance is predictable.
+**
+** So for now, we implement (1) only.  If we wish to optimize later, we should
+** be able to do it without affecting users.
+**
+** The strategy is to buffer the segments of data that do *not* depend on
+** unknown lengths in one buffer, and keep a separate buffer of segment pointers
+** and lengths.  When the top-level submessage ends, we can go beginning to end,
+** alternating the writing of lengths with memcpy() of the rest of the data.
+** At the top level though, no buffering is required.
+*/
+
+
+#include <stdlib.h>
+
+/* The output buffer is divided into segments; a segment is a string of data
+ * that is "ready to go" -- it does not need any varint lengths inserted into
+ * the middle.  The seams between segments are where varints will be inserted
+ * once they are known.
+ *
+ * We also use the concept of a "run", which is a range of encoded bytes that
+ * occur at a single submessage level.  Every segment contains one or more runs.
+ *
+ * A segment can span messages.  Consider:
+ *
+ *                  .--Submessage lengths---------.
+ *                  |       |                     |
+ *                  |       V                     V
+ *                  V      | |---------------    | |-----------------
+ * Submessages:    | |-----------------------------------------------
+ * Top-level msg: ------------------------------------------------------------
+ *
+ * Segments:          -----   -------------------   -----------------
+ * Runs:              *----   *--------------*---   *----------------
+ * (* marks the start)
+ *
+ * Note that the top-level menssage is not in any segment because it does not
+ * have any length preceding it.
+ *
+ * A segment is only interrupted when another length needs to be inserted.  So
+ * observe how the second segment spans both the inner submessage and part of
+ * the next enclosing message. */
+typedef struct {
+  uint32_t msglen;  /* The length to varint-encode before this segment. */
+  uint32_t seglen;  /* Length of the segment. */
+} upb_pb_encoder_segment;
+
+struct upb_pb_encoder {
+  upb_env *env;
+
+  /* Our input and output. */
+  upb_sink input_;
+  upb_bytessink *output_;
+
+  /* The "subclosure" -- used as the inner closure as part of the bytessink
+   * protocol. */
+  void *subc;
+
+  /* The output buffer and limit, and our current write position.  "buf"
+   * initially points to "initbuf", but is dynamically allocated if we need to
+   * grow beyond the initial size. */
+  char *buf, *ptr, *limit;
+
+  /* The beginning of the current run, or undefined if we are at the top
+   * level. */
+  char *runbegin;
+
+  /* The list of segments we are accumulating. */
+  upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
+
+  /* The stack of enclosing submessages.  Each entry in the stack points to the
+   * segment where this submessage's length is being accumulated. */
+  int *stack, *top, *stacklimit;
+
+  /* Depth of startmsg/endmsg calls. */
+  int depth;
+};
+
+/* low-level buffering ********************************************************/
+
+/* Low-level functions for interacting with the output buffer. */
+
+/* TODO(haberman): handle pushback */
+static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
+  size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
+  UPB_ASSERT_VAR(n, n == len);
+}
+
+static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
+  return &e->segbuf[*e->top];
+}
+
+/* Call to ensure that at least "bytes" bytes are available for writing at
+ * e->ptr.  Returns false if the bytes could not be allocated. */
+static bool reserve(upb_pb_encoder *e, size_t bytes) {
+  if ((size_t)(e->limit - e->ptr) < bytes) {
+    /* Grow buffer. */
+    char *new_buf;
+    size_t needed = bytes + (e->ptr - e->buf);
+    size_t old_size = e->limit - e->buf;
+
+    size_t new_size = old_size;
+
+    while (new_size < needed) {
+      new_size *= 2;
+    }
+
+    new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
+
+    if (new_buf == NULL) {
+      return false;
+    }
+
+    e->ptr = new_buf + (e->ptr - e->buf);
+    e->runbegin = new_buf + (e->runbegin - e->buf);
+    e->limit = new_buf + new_size;
+    e->buf = new_buf;
+  }
+
+  return true;
+}
+
+/* Call when "bytes" bytes have been writte at e->ptr.  The caller *must* have
+ * previously called reserve() with at least this many bytes. */
+static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
+  assert((size_t)(e->limit - e->ptr) >= bytes);
+  e->ptr += bytes;
+}
+
+/* Call when all of the bytes for a handler have been written.  Flushes the
+ * bytes if possible and necessary, returning false if this failed. */
+static bool commit(upb_pb_encoder *e) {
+  if (!e->top) {
+    /* We aren't inside a delimited region.  Flush our accumulated bytes to
+     * the output.
+     *
+     * TODO(haberman): in the future we may want to delay flushing for
+     * efficiency reasons. */
+    putbuf(e, e->buf, e->ptr - e->buf);
+    e->ptr = e->buf;
+  }
+
+  return true;
+}
+
+/* Writes the given bytes to the buffer, handling reserve/advance. */
+static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
+  if (!reserve(e, len)) {
+    return false;
+  }
+
+  memcpy(e->ptr, data, len);
+  encoder_advance(e, len);
+  return true;
+}
+
+/* Finish the current run by adding the run totals to the segment and message
+ * length. */
+static void accumulate(upb_pb_encoder *e) {
+  size_t run_len;
+  assert(e->ptr >= e->runbegin);
+  run_len = e->ptr - e->runbegin;
+  e->segptr->seglen += run_len;
+  top(e)->msglen += run_len;
+  e->runbegin = e->ptr;
+}
+
+/* Call to indicate the start of delimited region for which the full length is
+ * not yet known.  All data will be buffered until the length is known.
+ * Delimited regions may be nested; their lengths will all be tracked properly. */
+static bool start_delim(upb_pb_encoder *e) {
+  if (e->top) {
+    /* We are already buffering, advance to the next segment and push it on the
+     * stack. */
+    accumulate(e);
+
+    if (++e->top == e->stacklimit) {
+      /* TODO(haberman): grow stack? */
+      return false;
+    }
+
+    if (++e->segptr == e->seglimit) {
+      /* Grow segment buffer. */
+      size_t old_size =
+          (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
+      size_t new_size = old_size * 2;
+      upb_pb_encoder_segment *new_buf =
+          upb_env_realloc(e->env, e->segbuf, old_size, new_size);
+
+      if (new_buf == NULL) {
+        return false;
+      }
+
+      e->segptr = new_buf + (e->segptr - e->segbuf);
+      e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
+      e->segbuf = new_buf;
+    }
+  } else {
+    /* We were previously at the top level, start buffering. */
+    e->segptr = e->segbuf;
+    e->top = e->stack;
+    e->runbegin = e->ptr;
+  }
+
+  *e->top = e->segptr - e->segbuf;
+  e->segptr->seglen = 0;
+  e->segptr->msglen = 0;
+
+  return true;
+}
+
+/* Call to indicate the end of a delimited region.  We now know the length of
+ * the delimited region.  If we are not nested inside any other delimited
+ * regions, we can now emit all of the buffered data we accumulated. */
+static bool end_delim(upb_pb_encoder *e) {
+  size_t msglen;
+  accumulate(e);
+  msglen = top(e)->msglen;
+
+  if (e->top == e->stack) {
+    /* All lengths are now available, emit all buffered data. */
+    char buf[UPB_PB_VARINT_MAX_LEN];
+    upb_pb_encoder_segment *s;
+    const char *ptr = e->buf;
+    for (s = e->segbuf; s <= e->segptr; s++) {
+      size_t lenbytes = upb_vencode64(s->msglen, buf);
+      putbuf(e, buf, lenbytes);
+      putbuf(e, ptr, s->seglen);
+      ptr += s->seglen;
+    }
+
+    e->ptr = e->buf;
+    e->top = NULL;
+  } else {
+    /* Need to keep buffering; propagate length info into enclosing
+     * submessages. */
+    --e->top;
+    top(e)->msglen += msglen + upb_varint_size(msglen);
+  }
+
+  return true;
+}
+
+
+/* tag_t **********************************************************************/
+
+/* A precomputed (pre-encoded) tag and length. */
+
+typedef struct {
+  uint8_t bytes;
+  char tag[7];
+} tag_t;
+
+/* Allocates a new tag for this field, and sets it in these handlerattr. */
+static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
+                    upb_handlerattr *attr) {
+  uint32_t n = upb_fielddef_number(f);
+
+  tag_t *tag = malloc(sizeof(tag_t));
+  tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
+
+  upb_handlerattr_init(attr);
+  upb_handlerattr_sethandlerdata(attr, tag);
+  upb_handlers_addcleanup(h, tag, free);
+}
+
+static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
+  return encode_bytes(e, tag->tag, tag->bytes);
+}
+
+
+/* encoding of wire types *****************************************************/
+
+static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
+  /* TODO(haberman): byte-swap for big endian. */
+  return encode_bytes(e, &val, sizeof(uint64_t));
+}
+
+static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
+  /* TODO(haberman): byte-swap for big endian. */
+  return encode_bytes(e, &val, sizeof(uint32_t));
+}
+
+static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
+  if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
+    return false;
+  }
+
+  encoder_advance(e, upb_vencode64(val, e->ptr));
+  return true;
+}
+
+static uint64_t dbl2uint64(double d) {
+  uint64_t ret;
+  memcpy(&ret, &d, sizeof(uint64_t));
+  return ret;
+}
+
+static uint32_t flt2uint32(float d) {
+  uint32_t ret;
+  memcpy(&ret, &d, sizeof(uint32_t));
+  return ret;
+}
+
+
+/* encoding of proto types ****************************************************/
+
+static bool startmsg(void *c, const void *hd) {
+  upb_pb_encoder *e = c;
+  UPB_UNUSED(hd);
+  if (e->depth++ == 0) {
+    upb_bytessink_start(e->output_, 0, &e->subc);
+  }
+  return true;
+}
+
+static bool endmsg(void *c, const void *hd, upb_status *status) {
+  upb_pb_encoder *e = c;
+  UPB_UNUSED(hd);
+  UPB_UNUSED(status);
+  if (--e->depth == 0) {
+    upb_bytessink_end(e->output_);
+  }
+  return true;
+}
+
+static void *encode_startdelimfield(void *c, const void *hd) {
+  bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
+  return ok ? c : UPB_BREAK;
+}
+
+static bool encode_enddelimfield(void *c, const void *hd) {
+  UPB_UNUSED(hd);
+  return end_delim(c);
+}
+
+static void *encode_startgroup(void *c, const void *hd) {
+  return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
+}
+
+static bool encode_endgroup(void *c, const void *hd) {
+  return encode_tag(c, hd) && commit(c);
+}
+
+static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
+  UPB_UNUSED(size_hint);
+  return encode_startdelimfield(c, hd);
+}
+
+static size_t encode_strbuf(void *c, const void *hd, const char *buf,
+                            size_t len, const upb_bufhandle *h) {
+  UPB_UNUSED(hd);
+  UPB_UNUSED(h);
+  return encode_bytes(c, buf, len) ? len : 0;
+}
+
+#define T(type, ctype, convert, encode)                                  \
+  static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
+    return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e);  \
+  }                                                                      \
+  static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
+    UPB_UNUSED(hd);                                                      \
+    return encode(e, (convert)(val));                                    \
+  }
+
+T(double,   double,   dbl2uint64,   encode_fixed64)
+T(float,    float,    flt2uint32,   encode_fixed32)
+T(int64,    int64_t,  uint64_t,     encode_varint)
+T(int32,    int32_t,  uint32_t,     encode_varint)
+T(fixed64,  uint64_t, uint64_t,     encode_fixed64)
+T(fixed32,  uint32_t, uint32_t,     encode_fixed32)
+T(bool,     bool,     bool,         encode_varint)
+T(uint32,   uint32_t, uint32_t,     encode_varint)
+T(uint64,   uint64_t, uint64_t,     encode_varint)
+T(enum,     int32_t,  uint32_t,     encode_varint)
+T(sfixed32, int32_t,  uint32_t,     encode_fixed32)
+T(sfixed64, int64_t,  uint64_t,     encode_fixed64)
+T(sint32,   int32_t,  upb_zzenc_32, encode_varint)
+T(sint64,   int64_t,  upb_zzenc_64, encode_varint)
+
+#undef T
+
+
+/* code to build the handlers *************************************************/
+
+static void newhandlers_callback(const void *closure, upb_handlers *h) {
+  const upb_msgdef *m;
+  upb_msg_field_iter i;
+
+  UPB_UNUSED(closure);
+
+  upb_handlers_setstartmsg(h, startmsg, NULL);
+  upb_handlers_setendmsg(h, endmsg, NULL);
+
+  m = upb_handlers_msgdef(h);
+  for(upb_msg_field_begin(&i, m);
+      !upb_msg_field_done(&i);
+      upb_msg_field_next(&i)) {
+    const upb_fielddef *f = upb_msg_iter_field(&i);
+    bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
+                  upb_fielddef_packed(f);
+    upb_handlerattr attr;
+    upb_wiretype_t wt =
+        packed ? UPB_WIRE_TYPE_DELIMITED
+               : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
+
+    /* Pre-encode the tag for this field. */
+    new_tag(h, f, wt, &attr);
+
+    if (packed) {
+      upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
+      upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
+    }
+
+#define T(upper, lower, upbtype)                                     \
+  case UPB_DESCRIPTOR_TYPE_##upper:                                  \
+    if (packed) {                                                    \
+      upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
+    } else {                                                         \
+      upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
+    }                                                                \
+    break;
+
+    switch (upb_fielddef_descriptortype(f)) {
+      T(DOUBLE,   double,   double);
+      T(FLOAT,    float,    float);
+      T(INT64,    int64,    int64);
+      T(INT32,    int32,    int32);
+      T(FIXED64,  fixed64,  uint64);
+      T(FIXED32,  fixed32,  uint32);
+      T(BOOL,     bool,     bool);
+      T(UINT32,   uint32,   uint32);
+      T(UINT64,   uint64,   uint64);
+      T(ENUM,     enum,     int32);
+      T(SFIXED32, sfixed32, int32);
+      T(SFIXED64, sfixed64, int64);
+      T(SINT32,   sint32,   int32);
+      T(SINT64,   sint64,   int64);
+      case UPB_DESCRIPTOR_TYPE_STRING:
+      case UPB_DESCRIPTOR_TYPE_BYTES:
+        upb_handlers_setstartstr(h, f, encode_startstr, &attr);
+        upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
+        upb_handlers_setstring(h, f, encode_strbuf, &attr);
+        break;
+      case UPB_DESCRIPTOR_TYPE_MESSAGE:
+        upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
+        upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
+        break;
+      case UPB_DESCRIPTOR_TYPE_GROUP: {
+        /* Endgroup takes a different tag (wire_type = END_GROUP). */
+        upb_handlerattr attr2;
+        new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
+
+        upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
+        upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
+
+        upb_handlerattr_uninit(&attr2);
+        break;
+      }
+    }
+
+#undef T
+
+    upb_handlerattr_uninit(&attr);
+  }
+}
+
+void upb_pb_encoder_reset(upb_pb_encoder *e) {
+  e->segptr = NULL;
+  e->top = NULL;
+  e->depth = 0;
+}
+
+
+/* public API *****************************************************************/
+
+const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
+                                               const void *owner) {
+  return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
+}
+
+upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
+                                      upb_bytessink *output) {
+  const size_t initial_bufsize = 256;
+  const size_t initial_segbufsize = 16;
+  /* TODO(haberman): make this configurable. */
+  const size_t stack_size = 64;
+#ifndef NDEBUG
+  const size_t size_before = upb_env_bytesallocated(env);
+#endif
+
+  upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
+  if (!e) return NULL;
+
+  e->buf = upb_env_malloc(env, initial_bufsize);
+  e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
+  e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
+
+  if (!e->buf || !e->segbuf || !e->stack) {
+    return NULL;
+  }
+
+  e->limit = e->buf + initial_bufsize;
+  e->seglimit = e->segbuf + initial_segbufsize;
+  e->stacklimit = e->stack + stack_size;
+
+  upb_pb_encoder_reset(e);
+  upb_sink_reset(&e->input_, h, e);
+
+  e->env = env;
+  e->output_ = output;
+  e->subc = output->closure;
+  e->ptr = e->buf;
+
+  /* If this fails, increase the value in encoder.h. */
+  assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
+  return e;
+}
+
+upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
+                                        void *owner, upb_status *status) {
+  /* Create handlers. */
+  const upb_pbdecodermethod *decoder_m;
+  const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
+  upb_env env;
+  upb_pbdecodermethodopts opts;
+  upb_pbdecoder *decoder;
+  upb_descreader *reader;
+  bool ok;
+  upb_def **ret = NULL;
+  upb_def **defs;
+
+  upb_pbdecodermethodopts_init(&opts, reader_h);
+  decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
+
+  upb_env_init(&env);
+  upb_env_reporterrorsto(&env, status);
+
+  reader = upb_descreader_create(&env, reader_h);
+  decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
+
+  /* Push input data. */
+  ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
+
+  if (!ok) goto cleanup;
+  defs = upb_descreader_getdefs(reader, owner, n);
+  ret = malloc(sizeof(upb_def*) * (*n));
+  memcpy(ret, defs, sizeof(upb_def*) * (*n));
+
+cleanup:
+  upb_env_uninit(&env);
+  upb_handlers_unref(reader_h, &reader_h);
+  upb_pbdecodermethod_unref(decoder_m, &decoder_m);
+  return ret;
+}
+
+bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len,
+                                     upb_status *status) {
+  int n;
+  bool success;
+  upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status);
+  if (!defs) return false;
+  success = upb_symtab_add(s, defs, n, &defs, status);
+  free(defs);
+  return success;
+}
+
+char *upb_readfile(const char *filename, size_t *len) {
+  long size;
+  char *buf;
+  FILE *f = fopen(filename, "rb");
+  if(!f) return NULL;
+  if(fseek(f, 0, SEEK_END) != 0) goto error;
+  size = ftell(f);
+  if(size < 0) goto error;
+  if(fseek(f, 0, SEEK_SET) != 0) goto error;
+  buf = malloc(size + 1);
+  if(size && fread(buf, size, 1, f) != 1) goto error;
+  fclose(f);
+  if (len) *len = size;
+  return buf;
+
+error:
+  fclose(f);
+  return NULL;
+}
+
+bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
+                                          upb_status *status) {
+  size_t len;
+  bool success;
+  char *data = upb_readfile(fname, &len);
+  if (!data) {
+    if (status) upb_status_seterrf(status, "Couldn't read file: %s", fname);
+    return false;
+  }
+  success = upb_load_descriptor_into_symtab(symtab, data, len, status);
+  free(data);
+  return success;
+}
+/*
+ * upb::pb::TextPrinter
+ *
+ * OPT: This is not optimized at all.  It uses printf() which parses the format
+ * string every time, and it allocates memory for every put.
+ */
+
+
+#include <ctype.h>
+#include <float.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+struct upb_textprinter {
+  upb_sink input_;
+  upb_bytessink *output_;
+  int indent_depth_;
+  bool single_line_;
+  void *subc;
+};
+
+#define CHECK(x) if ((x) < 0) goto err;
+
+static const char *shortname(const char *longname) {
+  const char *last = strrchr(longname, '.');
+  return last ? last + 1 : longname;
+}
+
+static int indent(upb_textprinter *p) {
+  int i;
+  if (!p->single_line_)
+    for (i = 0; i < p->indent_depth_; i++)
+      upb_bytessink_putbuf(p->output_, p->subc, "  ", 2, NULL);
+  return 0;
+}
+
+static int endfield(upb_textprinter *p) {
+  const char ch = (p->single_line_ ? ' ' : '\n');
+  upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
+  return 0;
+}
+
+static int putescaped(upb_textprinter *p, const char *buf, size_t len,
+                      bool preserve_utf8) {
+  /* Based on CEscapeInternal() from Google's protobuf release. */
+  char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
+  const char *end = buf + len;
+
+  /* I think hex is prettier and more useful, but proto2 uses octal; should
+   * investigate whether it can parse hex also. */
+  const bool use_hex = false;
+  bool last_hex_escape = false; /* true if last output char was \xNN */
+
+  for (; buf < end; buf++) {
+    bool is_hex_escape;
+
+    if (dstend - dst < 4) {
+      upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
+      dst = dstbuf;
+    }
+
+    is_hex_escape = false;
+    switch (*buf) {
+      case '\n': *(dst++) = '\\'; *(dst++) = 'n';  break;
+      case '\r': *(dst++) = '\\'; *(dst++) = 'r';  break;
+      case '\t': *(dst++) = '\\'; *(dst++) = 't';  break;
+      case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
+      case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
+      case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
+      default:
+        /* Note that if we emit \xNN and the buf character after that is a hex
+         * digit then that digit must be escaped too to prevent it being
+         * interpreted as part of the character code by C. */
+        if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
+            (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
+          sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
+          is_hex_escape = use_hex;
+          dst += 4;
+        } else {
+          *(dst++) = *buf; break;
+        }
+    }
+    last_hex_escape = is_hex_escape;
+  }
+  /* Flush remaining data. */
+  upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
+  return 0;
+}
+
+bool putf(upb_textprinter *p, const char *fmt, ...) {
+  va_list args;
+  va_list args_copy;
+  char *str;
+  int written;
+  int len;
+  bool ok;
+
+  va_start(args, fmt);
+
+  /* Run once to get the length of the string. */
+  _upb_va_copy(args_copy, args);
+  len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
+  va_end(args_copy);
+
+  /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
+  str = malloc(len + 1);
+  if (!str) return false;
+  written = vsprintf(str, fmt, args);
+  va_end(args);
+  UPB_ASSERT_VAR(written, written == len);
+
+  ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
+  free(str);
+  return ok;
+}
+
+
+/* handlers *******************************************************************/
+
+static bool textprinter_startmsg(void *c, const void *hd) {
+  upb_textprinter *p = c;
+  UPB_UNUSED(hd);
+  if (p->indent_depth_ == 0) {
+    upb_bytessink_start(p->output_, 0, &p->subc);
+  }
+  return true;
+}
+
+static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
+  upb_textprinter *p = c;
+  UPB_UNUSED(hd);
+  UPB_UNUSED(s);
+  if (p->indent_depth_ == 0) {
+    upb_bytessink_end(p->output_);
+  }
+  return true;
+}
+
+#define TYPE(name, ctype, fmt) \
+  static bool textprinter_put ## name(void *closure, const void *handler_data, \
+                                      ctype val) {                             \
+    upb_textprinter *p = closure;                                              \
+    const upb_fielddef *f = handler_data;                                      \
+    CHECK(indent(p));                                                          \
+    putf(p, "%s: " fmt, upb_fielddef_name(f), val);                            \
+    CHECK(endfield(p));                                                        \
+    return true;                                                               \
+  err:                                                                         \
+    return false;                                                              \
+}
+
+static bool textprinter_putbool(void *closure, const void *handler_data,
+                                bool val) {
+  upb_textprinter *p = closure;
+  const upb_fielddef *f = handler_data;
+  CHECK(indent(p));
+  putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
+  CHECK(endfield(p));
+  return true;
+err:
+  return false;
+}
+
+#define STRINGIFY_HELPER(x) #x
+#define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
+
+TYPE(int32,  int32_t,  "%" PRId32)
+TYPE(int64,  int64_t,  "%" PRId64)
+TYPE(uint32, uint32_t, "%" PRIu32)
+TYPE(uint64, uint64_t, "%" PRIu64)
+TYPE(float,  float,    "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
+TYPE(double, double,   "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
+
+#undef TYPE
+
+/* Output a symbolic value from the enum if found, else just print as int32. */
+static bool textprinter_putenum(void *closure, const void *handler_data,
+                                int32_t val) {
+  upb_textprinter *p = closure;
+  const upb_fielddef *f = handler_data;
+  const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
+  const char *label = upb_enumdef_iton(enum_def, val);
+  if (label) {
+    indent(p);
+    putf(p, "%s: %s", upb_fielddef_name(f), label);
+    endfield(p);
+  } else {
+    if (!textprinter_putint32(closure, handler_data, val))
+      return false;
+  }
+  return true;
+}
+
+static void *textprinter_startstr(void *closure, const void *handler_data,
+                      size_t size_hint) {
+  upb_textprinter *p = closure;
+  const upb_fielddef *f = handler_data;
+  UPB_UNUSED(size_hint);
+  indent(p);
+  putf(p, "%s: \"", upb_fielddef_name(f));
+  return p;
+}
+
+static bool textprinter_endstr(void *closure, const void *handler_data) {
+  upb_textprinter *p = closure;
+  UPB_UNUSED(handler_data);
+  putf(p, "\"");
+  endfield(p);
+  return true;
+}
+
+static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
+                                 size_t len, const upb_bufhandle *handle) {
+  upb_textprinter *p = closure;
+  const upb_fielddef *f = hd;
+  UPB_UNUSED(handle);
+  CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
+  return len;
+err:
+  return 0;
+}
+
+static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
+  upb_textprinter *p = closure;
+  const char *name = handler_data;
+  CHECK(indent(p));
+  putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
+  p->indent_depth_++;
+  return p;
+err:
+  return UPB_BREAK;
+}
+
+static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
+  upb_textprinter *p = closure;
+  UPB_UNUSED(handler_data);
+  p->indent_depth_--;
+  CHECK(indent(p));
+  upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
+  CHECK(endfield(p));
+  return true;
+err:
+  return false;
+}
+
+static void onmreg(const void *c, upb_handlers *h) {
+  const upb_msgdef *m = upb_handlers_msgdef(h);
+  upb_msg_field_iter i;
+  UPB_UNUSED(c);
+
+  upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
+  upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
+
+  for(upb_msg_field_begin(&i, m);
+      !upb_msg_field_done(&i);
+      upb_msg_field_next(&i)) {
+    upb_fielddef *f = upb_msg_iter_field(&i);
+    upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
+    upb_handlerattr_sethandlerdata(&attr, f);
+    switch (upb_fielddef_type(f)) {
+      case UPB_TYPE_INT32:
+        upb_handlers_setint32(h, f, textprinter_putint32, &attr);
+        break;
+      case UPB_TYPE_INT64:
+        upb_handlers_setint64(h, f, textprinter_putint64, &attr);
+        break;
+      case UPB_TYPE_UINT32:
+        upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
+        break;
+      case UPB_TYPE_UINT64:
+        upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
+        break;
+      case UPB_TYPE_FLOAT:
+        upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
+        break;
+      case UPB_TYPE_DOUBLE:
+        upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
+        break;
+      case UPB_TYPE_BOOL:
+        upb_handlers_setbool(h, f, textprinter_putbool, &attr);
+        break;
+      case UPB_TYPE_STRING:
+      case UPB_TYPE_BYTES:
+        upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
+        upb_handlers_setstring(h, f, textprinter_putstr, &attr);
+        upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
+        break;
+      case UPB_TYPE_MESSAGE: {
+        const char *name =
+            upb_fielddef_istagdelim(f)
+                ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
+                : upb_fielddef_name(f);
+        upb_handlerattr_sethandlerdata(&attr, name);
+        upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
+        upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
+        break;
+      }
+      case UPB_TYPE_ENUM:
+        upb_handlers_setint32(h, f, textprinter_putenum, &attr);
+        break;
+    }
+  }
+}
+
+static void textprinter_reset(upb_textprinter *p, bool single_line) {
+  p->single_line_ = single_line;
+  p->indent_depth_ = 0;
+}
+
+
+/* Public API *****************************************************************/
+
+upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
+                                        upb_bytessink *output) {
+  upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
+  if (!p) return NULL;
+
+  p->output_ = output;
+  upb_sink_reset(&p->input_, h, p);
+  textprinter_reset(p, false);
+
+  return p;
+}
+
+const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
+                                                const void *owner) {
+  return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
+}
+
+upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
+
+void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
+  p->single_line_ = single_line;
+}
+
+
+/* Index is descriptor type. */
+const uint8_t upb_pb_native_wire_types[] = {
+  UPB_WIRE_TYPE_END_GROUP,     /* ENDGROUP */
+  UPB_WIRE_TYPE_64BIT,         /* DOUBLE */
+  UPB_WIRE_TYPE_32BIT,         /* FLOAT */
+  UPB_WIRE_TYPE_VARINT,        /* INT64 */
+  UPB_WIRE_TYPE_VARINT,        /* UINT64 */
+  UPB_WIRE_TYPE_VARINT,        /* INT32 */
+  UPB_WIRE_TYPE_64BIT,         /* FIXED64 */
+  UPB_WIRE_TYPE_32BIT,         /* FIXED32 */
+  UPB_WIRE_TYPE_VARINT,        /* BOOL */
+  UPB_WIRE_TYPE_DELIMITED,     /* STRING */
+  UPB_WIRE_TYPE_START_GROUP,   /* GROUP */
+  UPB_WIRE_TYPE_DELIMITED,     /* MESSAGE */
+  UPB_WIRE_TYPE_DELIMITED,     /* BYTES */
+  UPB_WIRE_TYPE_VARINT,        /* UINT32 */
+  UPB_WIRE_TYPE_VARINT,        /* ENUM */
+  UPB_WIRE_TYPE_32BIT,         /* SFIXED32 */
+  UPB_WIRE_TYPE_64BIT,         /* SFIXED64 */
+  UPB_WIRE_TYPE_VARINT,        /* SINT32 */
+  UPB_WIRE_TYPE_VARINT,        /* SINT64 */
+};
+
+/* A basic branch-based decoder, uses 32-bit values to get good performance
+ * on 32-bit architectures (but performs well on 64-bits also).
+ * This scheme comes from the original Google Protobuf implementation
+ * (proto2). */
+upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
+  upb_decoderet err = {NULL, 0};
+  const char *p = r.p;
+  uint32_t low = (uint32_t)r.val;
+  uint32_t high = 0;
+  uint32_t b;
+  b = *(p++); low  |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
+  b = *(p++); low  |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
+  b = *(p++); low  |= (b & 0x7fU) << 28;
+              high  = (b & 0x7fU) >>  4; if (!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7fU) <<  3; if (!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
+  b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
+  return err;
+
+done:
+  r.val = ((uint64_t)high << 32) | low;
+  r.p = p;
+  return r;
+}
+
+/* Like the previous, but uses 64-bit values. */
+upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
+  const char *p = r.p;
+  uint64_t val = r.val;
+  uint64_t b;
+  upb_decoderet err = {NULL, 0};
+  b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
+  b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
+  return err;
+
+done:
+  r.val = val;
+  r.p = p;
+  return r;
+}
+
+/* Given an encoded varint v, returns an integer with a single bit set that
+ * indicates the end of the varint.  Subtracting one from this value will
+ * yield a mask that leaves only bits that are part of the varint.  Returns
+ * 0 if the varint is unterminated. */
+static uint64_t upb_get_vstopbit(uint64_t v) {
+  uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
+  return ~cbits & (cbits+1);
+}
+
+/* A branchless decoder.  Credit to Pascal Massimino for the bit-twiddling. */
+upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
+  uint64_t b;
+  uint64_t stop_bit;
+  upb_decoderet my_r;
+  memcpy(&b, r.p, sizeof(b));
+  stop_bit = upb_get_vstopbit(b);
+  b =  (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
+  b +=       b & 0x007f007f007f007fULL;
+  b +=  3 * (b & 0x0000ffff0000ffffULL);
+  b += 15 * (b & 0x00000000ffffffffULL);
+  if (stop_bit == 0) {
+    /* Error: unterminated varint. */
+    upb_decoderet err_r = {(void*)0, 0};
+    return err_r;
+  }
+  my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+                            r.val | (b << 7));
+  return my_r;
+}
+
+/* A branchless decoder.  Credit to Daniel Wright for the bit-twiddling. */
+upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
+  uint64_t b;
+  uint64_t stop_bit;
+  upb_decoderet my_r;
+  memcpy(&b, r.p, sizeof(b));
+  stop_bit = upb_get_vstopbit(b);
+  b &= (stop_bit - 1);
+  b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL);
+  b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL);
+  b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL);
+  if (stop_bit == 0) {
+    /* Error: unterminated varint. */
+    upb_decoderet err_r = {(void*)0, 0};
+    return err_r;
+  }
+  my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
+                            r.val | (b << 14));
+  return my_r;
+}
+
+#line 1 "upb/json/parser.rl"
+/*
+** upb::json::Parser (upb_json_parser)
+**
+** A parser that uses the Ragel State Machine Compiler to generate
+** the finite automata.
+**
+** Ragel only natively handles regular languages, but we can manually
+** program it a bit to handle context-free languages like JSON, by using
+** the "fcall" and "fret" constructs.
+**
+** This parser can handle the basics, but needs several things to be fleshed
+** out:
+**
+** - handling of unicode escape sequences (including high surrogate pairs).
+** - properly check and report errors for unknown fields, stack overflow,
+**   improper array nesting (or lack of nesting).
+** - handling of base64 sequences with padding characters.
+** - handling of push-back (non-success returns from sink functions).
+** - handling of keys/escape-sequences/etc that span input buffers.
+*/
+
+#include <stdio.h>
+#include <stdint.h>
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+
+
+#define UPB_JSON_MAX_DEPTH 64
+
+typedef struct {
+  upb_sink sink;
+
+  /* The current message in which we're parsing, and the field whose value we're
+   * expecting next. */
+  const upb_msgdef *m;
+  const upb_fielddef *f;
+
+  /* We are in a repeated-field context, ready to emit mapentries as
+   * submessages. This flag alters the start-of-object (open-brace) behavior to
+   * begin a sequence of mapentry messages rather than a single submessage. */
+  bool is_map;
+
+  /* We are in a map-entry message context. This flag is set when parsing the
+   * value field of a single map entry and indicates to all value-field parsers
+   * (subobjects, strings, numbers, and bools) that the map-entry submessage
+   * should end as soon as the value is parsed. */
+  bool is_mapentry;
+
+  /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
+   * message's map field that we're currently parsing. This differs from |f|
+   * because |f| is the field in the *current* message (i.e., the map-entry
+   * message itself), not the parent's field that leads to this map. */
+  const upb_fielddef *mapfield;
+} upb_jsonparser_frame;
+
+struct upb_json_parser {
+  upb_env *env;
+  upb_byteshandler input_handler_;
+  upb_bytessink input_;
+
+  /* Stack to track the JSON scopes we are in. */
+  upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
+  upb_jsonparser_frame *top;
+  upb_jsonparser_frame *limit;
+
+  upb_status status;
+
+  /* Ragel's internal parsing stack for the parsing state machine. */
+  int current_state;
+  int parser_stack[UPB_JSON_MAX_DEPTH];
+  int parser_top;
+
+  /* The handle for the current buffer. */
+  const upb_bufhandle *handle;
+
+  /* Accumulate buffer.  See details in parser.rl. */
+  const char *accumulated;
+  size_t accumulated_len;
+  char *accumulate_buf;
+  size_t accumulate_buf_size;
+
+  /* Multi-part text data.  See details in parser.rl. */
+  int multipart_state;
+  upb_selector_t string_selector;
+
+  /* Input capture.  See details in parser.rl. */
+  const char *capture;
+
+  /* Intermediate result of parsing a unicode escape sequence. */
+  uint32_t digit;
+};
+
+#define PARSER_CHECK_RETURN(x) if (!(x)) return false
+
+/* Used to signal that a capture has been suspended. */
+static char suspend_capture;
+
+static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
+                                             upb_handlertype_t type) {
+  upb_selector_t sel;
+  bool ok = upb_handlers_getselector(p->top->f, type, &sel);
+  UPB_ASSERT_VAR(ok, ok);
+  return sel;
+}
+
+static upb_selector_t parser_getsel(upb_json_parser *p) {
+  return getsel_for_handlertype(
+      p, upb_handlers_getprimitivehandlertype(p->top->f));
+}
+
+static bool check_stack(upb_json_parser *p) {
+  if ((p->top + 1) == p->limit) {
+    upb_status_seterrmsg(&p->status, "Nesting too deep");
+    upb_env_reporterror(p->env, &p->status);
+    return false;
+  }
+
+  return true;
+}
+
+/* There are GCC/Clang built-ins for overflow checking which we could start
+ * using if there was any performance benefit to it. */
+
+static bool checked_add(size_t a, size_t b, size_t *c) {
+  if (SIZE_MAX - a < b) return false;
+  *c = a + b;
+  return true;
+}
+
+static size_t saturating_multiply(size_t a, size_t b) {
+  /* size_t is unsigned, so this is defined behavior even on overflow. */
+  size_t ret = a * b;
+  if (b != 0 && ret / b != a) {
+    ret = SIZE_MAX;
+  }
+  return ret;
+}
+
+
+/* Base64 decoding ************************************************************/
+
+/* TODO(haberman): make this streaming. */
+
+static const signed char b64table[] = {
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */,
+  52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
+  60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
+  -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
+  07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
+  15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
+  23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1,
+  -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
+  33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
+  41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
+  49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
+  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
+};
+
+/* Returns the table value sign-extended to 32 bits.  Knowing that the upper
+ * bits will be 1 for unrecognized characters makes it easier to check for
+ * this error condition later (see below). */
+int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
+
+/* Returns true if the given character is not a valid base64 character or
+ * padding. */
+bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
+
+static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
+                        size_t len) {
+  const char *limit = ptr + len;
+  for (; ptr < limit; ptr += 4) {
+    uint32_t val;
+    char output[3];
+
+    if (limit - ptr < 4) {
+      upb_status_seterrf(&p->status,
+                         "Base64 input for bytes field not a multiple of 4: %s",
+                         upb_fielddef_name(p->top->f));
+      upb_env_reporterror(p->env, &p->status);
+      return false;
+    }
+
+    val = b64lookup(ptr[0]) << 18 |
+          b64lookup(ptr[1]) << 12 |
+          b64lookup(ptr[2]) << 6  |
+          b64lookup(ptr[3]);
+
+    /* Test the upper bit; returns true if any of the characters returned -1. */
+    if (val & 0x80000000) {
+      goto otherchar;
+    }
+
+    output[0] = val >> 16;
+    output[1] = (val >> 8) & 0xff;
+    output[2] = val & 0xff;
+    upb_sink_putstring(&p->top->sink, sel, output, 3, NULL);
+  }
+  return true;
+
+otherchar:
+  if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
+      nonbase64(ptr[3]) ) {
+    upb_status_seterrf(&p->status,
+                       "Non-base64 characters in bytes field: %s",
+                       upb_fielddef_name(p->top->f));
+    upb_env_reporterror(p->env, &p->status);
+    return false;
+  } if (ptr[2] == '=') {
+    uint32_t val;
+    char output;
+
+    /* Last group contains only two input bytes, one output byte. */
+    if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
+      goto badpadding;
+    }
+
+    val = b64lookup(ptr[0]) << 18 |
+          b64lookup(ptr[1]) << 12;
+
+    assert(!(val & 0x80000000));
+    output = val >> 16;
+    upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL);
+    return true;
+  } else {
+    uint32_t val;
+    char output[2];
+
+    /* Last group contains only three input bytes, two output bytes. */
+    if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
+      goto badpadding;
+    }
+
+    val = b64lookup(ptr[0]) << 18 |
+          b64lookup(ptr[1]) << 12 |
+          b64lookup(ptr[2]) << 6;
+
+    output[0] = val >> 16;
+    output[1] = (val >> 8) & 0xff;
+    upb_sink_putstring(&p->top->sink, sel, output, 2, NULL);
+    return true;
+  }
+
+badpadding:
+  upb_status_seterrf(&p->status,
+                     "Incorrect base64 padding for field: %s (%.*s)",
+                     upb_fielddef_name(p->top->f),
+                     4, ptr);
+  upb_env_reporterror(p->env, &p->status);
+  return false;
+}
+
+
+/* Accumulate buffer **********************************************************/
+
+/* Functionality for accumulating a buffer.
+ *
+ * Some parts of the parser need an entire value as a contiguous string.  For
+ * example, to look up a member name in a hash table, or to turn a string into
+ * a number, the relevant library routines need the input string to be in
+ * contiguous memory, even if the value spanned two or more buffers in the
+ * input.  These routines handle that.
+ *
+ * In the common case we can just point to the input buffer to get this
+ * contiguous string and avoid any actual copy.  So we optimistically begin
+ * this way.  But there are a few cases where we must instead copy into a
+ * separate buffer:
+ *
+ *   1. The string was not contiguous in the input (it spanned buffers).
+ *
+ *   2. The string included escape sequences that need to be interpreted to get
+ *      the true value in a contiguous buffer. */
+
+static void assert_accumulate_empty(upb_json_parser *p) {
+  UPB_UNUSED(p);
+  assert(p->accumulated == NULL);
+  assert(p->accumulated_len == 0);
+}
+
+static void accumulate_clear(upb_json_parser *p) {
+  p->accumulated = NULL;
+  p->accumulated_len = 0;
+}
+
+/* Used internally by accumulate_append(). */
+static bool accumulate_realloc(upb_json_parser *p, size_t need) {
+  void *mem;
+  size_t old_size = p->accumulate_buf_size;
+  size_t new_size = UPB_MAX(old_size, 128);
+  while (new_size < need) {
+    new_size = saturating_multiply(new_size, 2);
+  }
+
+  mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
+  if (!mem) {
+    upb_status_seterrmsg(&p->status, "Out of memory allocating buffer.");
+    upb_env_reporterror(p->env, &p->status);
+    return false;
+  }
+
+  p->accumulate_buf = mem;
+  p->accumulate_buf_size = new_size;
+  return true;
+}
+
+/* Logically appends the given data to the append buffer.
+ * If "can_alias" is true, we will try to avoid actually copying, but the buffer
+ * must be valid until the next accumulate_append() call (if any). */
+static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
+                              bool can_alias) {
+  size_t need;
+
+  if (!p->accumulated && can_alias) {
+    p->accumulated = buf;
+    p->accumulated_len = len;
+    return true;
+  }
+
+  if (!checked_add(p->accumulated_len, len, &need)) {
+    upb_status_seterrmsg(&p->status, "Integer overflow.");
+    upb_env_reporterror(p->env, &p->status);
+    return false;
+  }
+
+  if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
+    return false;
+  }
+
+  if (p->accumulated != p->accumulate_buf) {
+    memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
+    p->accumulated = p->accumulate_buf;
+  }
+
+  memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
+  p->accumulated_len += len;
+  return true;
+}
+
+/* Returns a pointer to the data accumulated since the last accumulate_clear()
+ * call, and writes the length to *len.  This with point either to the input
+ * buffer or a temporary accumulate buffer. */
+static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
+  assert(p->accumulated);
+  *len = p->accumulated_len;
+  return p->accumulated;
+}
+
+
+/* Mult-part text data ********************************************************/
+
+/* When we have text data in the input, it can often come in multiple segments.
+ * For example, there may be some raw string data followed by an escape
+ * sequence.  The two segments are processed with different logic.  Also buffer
+ * seams in the input can cause multiple segments.
+ *
+ * As we see segments, there are two main cases for how we want to process them:
+ *
+ *  1. we want to push the captured input directly to string handlers.
+ *
+ *  2. we need to accumulate all the parts into a contiguous buffer for further
+ *     processing (field name lookup, string->number conversion, etc). */
+
+/* This is the set of states for p->multipart_state. */
+enum {
+  /* We are not currently processing multipart data. */
+  MULTIPART_INACTIVE = 0,
+
+  /* We are processing multipart data by accumulating it into a contiguous
+   * buffer. */
+  MULTIPART_ACCUMULATE = 1,
+
+  /* We are processing multipart data by pushing each part directly to the
+   * current string handlers. */
+  MULTIPART_PUSHEAGERLY = 2
+};
+
+/* Start a multi-part text value where we accumulate the data for processing at
+ * the end. */
+static void multipart_startaccum(upb_json_parser *p) {
+  assert_accumulate_empty(p);
+  assert(p->multipart_state == MULTIPART_INACTIVE);
+  p->multipart_state = MULTIPART_ACCUMULATE;
+}
+
+/* Start a multi-part text value where we immediately push text data to a string
+ * value with the given selector. */
+static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
+  assert_accumulate_empty(p);
+  assert(p->multipart_state == MULTIPART_INACTIVE);
+  p->multipart_state = MULTIPART_PUSHEAGERLY;
+  p->string_selector = sel;
+}
+
+static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
+                           bool can_alias) {
+  switch (p->multipart_state) {
+    case MULTIPART_INACTIVE:
+      upb_status_seterrmsg(
+          &p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
+      upb_env_reporterror(p->env, &p->status);
+      return false;
+
+    case MULTIPART_ACCUMULATE:
+      if (!accumulate_append(p, buf, len, can_alias)) {
+        return false;
+      }
+      break;
+
+    case MULTIPART_PUSHEAGERLY: {
+      const upb_bufhandle *handle = can_alias ? p->handle : NULL;
+      upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
+      break;
+    }
+  }
+
+  return true;
+}
+
+/* Note: this invalidates the accumulate buffer!  Call only after reading its
+ * contents. */
+static void multipart_end(upb_json_parser *p) {
+  assert(p->multipart_state != MULTIPART_INACTIVE);
+  p->multipart_state = MULTIPART_INACTIVE;
+  accumulate_clear(p);
+}
+
+
+/* Input capture **************************************************************/
+
+/* Functionality for capturing a region of the input as text.  Gracefully
+ * handles the case where a buffer seam occurs in the middle of the captured
+ * region. */
+
+static void capture_begin(upb_json_parser *p, const char *ptr) {
+  assert(p->multipart_state != MULTIPART_INACTIVE);
+  assert(p->capture == NULL);
+  p->capture = ptr;
+}
+
+static bool capture_end(upb_json_parser *p, const char *ptr) {
+  assert(p->capture);
+  if (multipart_text(p, p->capture, ptr - p->capture, true)) {
+    p->capture = NULL;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+/* This is called at the end of each input buffer (ie. when we have hit a
+ * buffer seam).  If we are in the middle of capturing the input, this
+ * processes the unprocessed capture region. */
+static void capture_suspend(upb_json_parser *p, const char **ptr) {
+  if (!p->capture) return;
+
+  if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
+    /* We use this as a signal that we were in the middle of capturing, and
+     * that capturing should resume at the beginning of the next buffer.
+     * 
+     * We can't use *ptr here, because we have no guarantee that this pointer
+     * will be valid when we resume (if the underlying memory is freed, then
+     * using the pointer at all, even to compare to NULL, is likely undefined
+     * behavior). */
+    p->capture = &suspend_capture;
+  } else {
+    /* Need to back up the pointer to the beginning of the capture, since
+     * we were not able to actually preserve it. */
+    *ptr = p->capture;
+  }
+}
+
+static void capture_resume(upb_json_parser *p, const char *ptr) {
+  if (p->capture) {
+    assert(p->capture == &suspend_capture);
+    p->capture = ptr;
+  }
+}
+
+
+/* Callbacks from the parser **************************************************/
+
+/* These are the functions called directly from the parser itself.
+ * We define these in the same order as their declarations in the parser. */
+
+static char escape_char(char in) {
+  switch (in) {
+    case 'r': return '\r';
+    case 't': return '\t';
+    case 'n': return '\n';
+    case 'f': return '\f';
+    case 'b': return '\b';
+    case '/': return '/';
+    case '"': return '"';
+    case '\\': return '\\';
+    default:
+      assert(0);
+      return 'x';
+  }
+}
+
+static bool escape(upb_json_parser *p, const char *ptr) {
+  char ch = escape_char(*ptr);
+  return multipart_text(p, &ch, 1, false);
+}
+
+static void start_hex(upb_json_parser *p) {
+  p->digit = 0;
+}
+
+static void hexdigit(upb_json_parser *p, const char *ptr) {
+  char ch = *ptr;
+
+  p->digit <<= 4;
+
+  if (ch >= '0' && ch <= '9') {
+    p->digit += (ch - '0');
+  } else if (ch >= 'a' && ch <= 'f') {
+    p->digit += ((ch - 'a') + 10);
+  } else {
+    assert(ch >= 'A' && ch <= 'F');
+    p->digit += ((ch - 'A') + 10);
+  }
+}
+
+static bool end_hex(upb_json_parser *p) {
+  uint32_t codepoint = p->digit;
+
+  /* emit the codepoint as UTF-8. */
+  char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
+  int length = 0;
+  if (codepoint <= 0x7F) {
+    utf8[0] = codepoint;
+    length = 1;
+  } else if (codepoint <= 0x07FF) {
+    utf8[1] = (codepoint & 0x3F) | 0x80;
+    codepoint >>= 6;
+    utf8[0] = (codepoint & 0x1F) | 0xC0;
+    length = 2;
+  } else /* codepoint <= 0xFFFF */ {
+    utf8[2] = (codepoint & 0x3F) | 0x80;
+    codepoint >>= 6;
+    utf8[1] = (codepoint & 0x3F) | 0x80;
+    codepoint >>= 6;
+    utf8[0] = (codepoint & 0x0F) | 0xE0;
+    length = 3;
+  }
+  /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
+   * we have to wait for the next escape to get the full code point). */
+
+  return multipart_text(p, utf8, length, false);
+}
+
+static void start_text(upb_json_parser *p, const char *ptr) {
+  capture_begin(p, ptr);
+}
+
+static bool end_text(upb_json_parser *p, const char *ptr) {
+  return capture_end(p, ptr);
+}
+
+static void start_number(upb_json_parser *p, const char *ptr) {
+  multipart_startaccum(p);
+  capture_begin(p, ptr);
+}
+
+static bool parse_number(upb_json_parser *p);
+
+static bool end_number(upb_json_parser *p, const char *ptr) {
+  if (!capture_end(p, ptr)) {
+    return false;
+  }
+
+  return parse_number(p);
+}
+
+static bool parse_number(upb_json_parser *p) {
+  size_t len;
+  const char *buf;
+  const char *myend;
+  char *end;
+
+  /* strtol() and friends unfortunately do not support specifying the length of
+   * the input string, so we need to force a copy into a NULL-terminated buffer. */
+  if (!multipart_text(p, "\0", 1, false)) {
+    return false;
+  }
+
+  buf = accumulate_getptr(p, &len);
+  myend = buf + len - 1;  /* One for NULL. */
+
+  /* XXX: We are using strtol to parse integers, but this is wrong as even
+   * integers can be represented as 1e6 (for example), which strtol can't
+   * handle correctly.
+   *
+   * XXX: Also, we can't handle large integers properly because strto[u]ll
+   * isn't in C89.
+   *
+   * XXX: Also, we don't properly check floats for overflow, since strtof
+   * isn't in C89. */
+  switch (upb_fielddef_type(p->top->f)) {
+    case UPB_TYPE_ENUM:
+    case UPB_TYPE_INT32: {
+      long val = strtol(p->accumulated, &end, 0);
+      if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
+        goto err;
+      else
+        upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
+      break;
+    }
+    case UPB_TYPE_INT64: {
+      long long val = strtol(p->accumulated, &end, 0);
+      if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
+        goto err;
+      else
+        upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
+      break;
+    }
+    case UPB_TYPE_UINT32: {
+      unsigned long val = strtoul(p->accumulated, &end, 0);
+      if (val > UINT32_MAX || errno == ERANGE || end != myend)
+        goto err;
+      else
+        upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
+      break;
+    }
+    case UPB_TYPE_UINT64: {
+      unsigned long long val = strtoul(p->accumulated, &end, 0);
+      if (val > UINT64_MAX || errno == ERANGE || end != myend)
+        goto err;
+      else
+        upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
+      break;
+    }
+    case UPB_TYPE_DOUBLE: {
+      double val = strtod(p->accumulated, &end);
+      if (errno == ERANGE || end != myend)
+        goto err;
+      else
+        upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
+      break;
+    }
+    case UPB_TYPE_FLOAT: {
+      float val = strtod(p->accumulated, &end);
+      if (errno == ERANGE || end != myend)
+        goto err;
+      else
+        upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
+      break;
+    }
+    default:
+      assert(false);
+  }
+
+  multipart_end(p);
+
+  return true;
+
+err:
+  upb_status_seterrf(&p->status, "error parsing number: %s", buf);
+  upb_env_reporterror(p->env, &p->status);
+  multipart_end(p);
+  return false;
+}
+
+static bool parser_putbool(upb_json_parser *p, bool val) {
+  bool ok;
+
+  if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
+    upb_status_seterrf(&p->status,
+                       "Boolean value specified for non-bool field: %s",
+                       upb_fielddef_name(p->top->f));
+    upb_env_reporterror(p->env, &p->status);
+    return false;
+  }
+
+  ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
+  UPB_ASSERT_VAR(ok, ok);
+
+  return true;
+}
+
+static bool start_stringval(upb_json_parser *p) {
+  assert(p->top->f);
+
+  if (upb_fielddef_isstring(p->top->f)) {
+    upb_jsonparser_frame *inner;
+    upb_selector_t sel;
+
+    if (!check_stack(p)) return false;
+
+    /* Start a new parser frame: parser frames correspond one-to-one with
+     * handler frames, and string events occur in a sub-frame. */
+    inner = p->top + 1;
+    sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
+    upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
+    inner->m = p->top->m;
+    inner->f = p->top->f;
+    inner->is_map = false;
+    inner->is_mapentry = false;
+    p->top = inner;
+
+    if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
+      /* For STRING fields we push data directly to the handlers as it is
+       * parsed.  We don't do this yet for BYTES fields, because our base64
+       * decoder is not streaming.
+       *
+       * TODO(haberman): make base64 decoding streaming also. */
+      multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
+      return true;
+    } else {
+      multipart_startaccum(p);
+      return true;
+    }
+  } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
+    /* No need to push a frame -- symbolic enum names in quotes remain in the
+     * current parser frame.
+     *
+     * Enum string values must accumulate so we can look up the value in a table
+     * once it is complete. */
+    multipart_startaccum(p);
+    return true;
+  } else {
+    upb_status_seterrf(&p->status,
+                       "String specified for non-string/non-enum field: %s",
+                       upb_fielddef_name(p->top->f));
+    upb_env_reporterror(p->env, &p->status);
+    return false;
+  }
+}
+
+static bool end_stringval(upb_json_parser *p) {
+  bool ok = true;
+
+  switch (upb_fielddef_type(p->top->f)) {
+    case UPB_TYPE_BYTES:
+      if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
+                       p->accumulated, p->accumulated_len)) {
+        return false;
+      }
+      /* Fall through. */
+
+    case UPB_TYPE_STRING: {
+      upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
+      upb_sink_endstr(&p->top->sink, sel);
+      p->top--;
+      break;
+    }
+
+    case UPB_TYPE_ENUM: {
+      /* Resolve enum symbolic name to integer value. */
+      const upb_enumdef *enumdef =
+          (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
+
+      size_t len;
+      const char *buf = accumulate_getptr(p, &len);
+
+      int32_t int_val = 0;
+      ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
+
+      if (ok) {
+        upb_selector_t sel = parser_getsel(p);
+        upb_sink_putint32(&p->top->sink, sel, int_val);
+      } else {
+        upb_status_seterrf(&p->status, "Enum value unknown: '%.*s'", len, buf);
+        upb_env_reporterror(p->env, &p->status);
+      }
+
+      break;
+    }
+
+    default:
+      assert(false);
+      upb_status_seterrmsg(&p->status, "Internal error in JSON decoder");
+      upb_env_reporterror(p->env, &p->status);
+      ok = false;
+      break;
+  }
+
+  multipart_end(p);
+
+  return ok;
+}
+
+static void start_member(upb_json_parser *p) {
+  assert(!p->top->f);
+  multipart_startaccum(p);
+}
+
+/* Helper: invoked during parse_mapentry() to emit the mapentry message's key
+ * field based on the current contents of the accumulate buffer. */
+static bool parse_mapentry_key(upb_json_parser *p) {
+
+  size_t len;
+  const char *buf = accumulate_getptr(p, &len);
+
+  /* Emit the key field. We do a bit of ad-hoc parsing here because the
+   * parser state machine has already decided that this is a string field
+   * name, and we are reinterpreting it as some arbitrary key type. In
+   * particular, integer and bool keys are quoted, so we need to parse the
+   * quoted string contents here. */
+
+  p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
+  if (p->top->f == NULL) {
+    upb_status_seterrmsg(&p->status, "mapentry message has no key");
+    upb_env_reporterror(p->env, &p->status);
+    return false;
+  }
+  switch (upb_fielddef_type(p->top->f)) {
+    case UPB_TYPE_INT32:
+    case UPB_TYPE_INT64:
+    case UPB_TYPE_UINT32:
+    case UPB_TYPE_UINT64:
+      /* Invoke end_number. The accum buffer has the number's text already. */
+      if (!parse_number(p)) {
+        return false;
+      }
+      break;
+    case UPB_TYPE_BOOL:
+      if (len == 4 && !strncmp(buf, "true", 4)) {
+        if (!parser_putbool(p, true)) {
+          return false;
+        }
+      } else if (len == 5 && !strncmp(buf, "false", 5)) {
+        if (!parser_putbool(p, false)) {
+          return false;
+        }
+      } else {
+        upb_status_seterrmsg(&p->status,
+                             "Map bool key not 'true' or 'false'");
+        upb_env_reporterror(p->env, &p->status);
+        return false;
+      }
+      multipart_end(p);
+      break;
+    case UPB_TYPE_STRING:
+    case UPB_TYPE_BYTES: {
+      upb_sink subsink;
+      upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
+      upb_sink_startstr(&p->top->sink, sel, len, &subsink);
+      sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
+      upb_sink_putstring(&subsink, sel, buf, len, NULL);
+      sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
+      upb_sink_endstr(&subsink, sel);
+      multipart_end(p);
+      break;
+    }
+    default:
+      upb_status_seterrmsg(&p->status, "Invalid field type for map key");
+      upb_env_reporterror(p->env, &p->status);
+      return false;
+  }
+
+  return true;
+}
+
+/* Helper: emit one map entry (as a submessage in the map field sequence). This
+ * is invoked from end_membername(), at the end of the map entry's key string,
+ * with the map key in the accumulate buffer. It parses the key from that
+ * buffer, emits the handler calls to start the mapentry submessage (setting up
+ * its subframe in the process), and sets up state in the subframe so that the
+ * value parser (invoked next) will emit the mapentry's value field and then
+ * end the mapentry message. */
+
+static bool handle_mapentry(upb_json_parser *p) {
+  const upb_fielddef *mapfield;
+  const upb_msgdef *mapentrymsg;
+  upb_jsonparser_frame *inner;
+  upb_selector_t sel;
+
+  /* Map entry: p->top->sink is the seq frame, so we need to start a frame
+   * for the mapentry itself, and then set |f| in that frame so that the map
+   * value field is parsed, and also set a flag to end the frame after the
+   * map-entry value is parsed. */
+  if (!check_stack(p)) return false;
+
+  mapfield = p->top->mapfield;
+  mapentrymsg = upb_fielddef_msgsubdef(mapfield);
+
+  inner = p->top + 1;
+  p->top->f = mapfield;
+  sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
+  upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
+  inner->m = mapentrymsg;
+  inner->mapfield = mapfield;
+  inner->is_map = false;
+
+  /* Don't set this to true *yet* -- we reuse parsing handlers below to push
+   * the key field value to the sink, and these handlers will pop the frame
+   * if they see is_mapentry (when invoked by the parser state machine, they
+   * would have just seen the map-entry value, not key). */
+  inner->is_mapentry = false;
+  p->top = inner;
+
+  /* send STARTMSG in submsg frame. */
+  upb_sink_startmsg(&p->top->sink);
+
+  parse_mapentry_key(p);
+
+  /* Set up the value field to receive the map-entry value. */
+  p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
+  p->top->is_mapentry = true;  /* set up to pop frame after value is parsed. */
+  p->top->mapfield = mapfield;
+  if (p->top->f == NULL) {
+    upb_status_seterrmsg(&p->status, "mapentry message has no value");
+    upb_env_reporterror(p->env, &p->status);
+    return false;
+  }
+
+  return true;
+}
+
+static bool end_membername(upb_json_parser *p) {
+  assert(!p->top->f);
+
+  if (p->top->is_map) {
+    return handle_mapentry(p);
+  } else {
+    size_t len;
+    const char *buf = accumulate_getptr(p, &len);
+    const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
+
+    if (!f) {
+      /* TODO(haberman): Ignore unknown fields if requested/configured to do
+       * so. */
+      upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
+      upb_env_reporterror(p->env, &p->status);
+      return false;
+    }
+
+    p->top->f = f;
+    multipart_end(p);
+
+    return true;
+  }
+}
+
+static void end_member(upb_json_parser *p) {
+  /* If we just parsed a map-entry value, end that frame too. */
+  if (p->top->is_mapentry) {
+    upb_status s = UPB_STATUS_INIT;
+    upb_selector_t sel;
+    bool ok;
+    const upb_fielddef *mapfield;
+
+    assert(p->top > p->stack);
+    /* send ENDMSG on submsg. */
+    upb_sink_endmsg(&p->top->sink, &s);
+    mapfield = p->top->mapfield;
+
+    /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
+    p->top--;
+    ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
+    UPB_ASSERT_VAR(ok, ok);
+    upb_sink_endsubmsg(&p->top->sink, sel);
+  }
+
+  p->top->f = NULL;
+}
+
+static bool start_subobject(upb_json_parser *p) {
+  assert(p->top->f);
+
+  if (upb_fielddef_ismap(p->top->f)) {
+    upb_jsonparser_frame *inner;
+    upb_selector_t sel;
+
+    /* Beginning of a map. Start a new parser frame in a repeated-field
+     * context. */
+    if (!check_stack(p)) return false;
+
+    inner = p->top + 1;
+    sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
+    upb_sink_startseq(&p->top->sink, sel, &inner->sink);
+    inner->m = upb_fielddef_msgsubdef(p->top->f);
+    inner->mapfield = p->top->f;
+    inner->f = NULL;
+    inner->is_map = true;
+    inner->is_mapentry = false;
+    p->top = inner;
+
+    return true;
+  } else if (upb_fielddef_issubmsg(p->top->f)) {
+    upb_jsonparser_frame *inner;
+    upb_selector_t sel;
+
+    /* Beginning of a subobject. Start a new parser frame in the submsg
+     * context. */
+    if (!check_stack(p)) return false;
+
+    inner = p->top + 1;
+
+    sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
+    upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
+    inner->m = upb_fielddef_msgsubdef(p->top->f);
+    inner->f = NULL;
+    inner->is_map = false;
+    inner->is_mapentry = false;
+    p->top = inner;
+
+    return true;
+  } else {
+    upb_status_seterrf(&p->status,
+                       "Object specified for non-message/group field: %s",
+                       upb_fielddef_name(p->top->f));
+    upb_env_reporterror(p->env, &p->status);
+    return false;
+  }
+}
+
+static void end_subobject(upb_json_parser *p) {
+  if (p->top->is_map) {
+    upb_selector_t sel;
+    p->top--;
+    sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
+    upb_sink_endseq(&p->top->sink, sel);
+  } else {
+    upb_selector_t sel;
+    p->top--;
+    sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
+    upb_sink_endsubmsg(&p->top->sink, sel);
+  }
+}
+
+static bool start_array(upb_json_parser *p) {
+  upb_jsonparser_frame *inner;
+  upb_selector_t sel;
+
+  assert(p->top->f);
+
+  if (!upb_fielddef_isseq(p->top->f)) {
+    upb_status_seterrf(&p->status,
+                       "Array specified for non-repeated field: %s",
+                       upb_fielddef_name(p->top->f));
+    upb_env_reporterror(p->env, &p->status);
+    return false;
+  }
+
+  if (!check_stack(p)) return false;
+
+  inner = p->top + 1;
+  sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
+  upb_sink_startseq(&p->top->sink, sel, &inner->sink);
+  inner->m = p->top->m;
+  inner->f = p->top->f;
+  inner->is_map = false;
+  inner->is_mapentry = false;
+  p->top = inner;
+
+  return true;
+}
+
+static void end_array(upb_json_parser *p) {
+  upb_selector_t sel;
+
+  assert(p->top > p->stack);
+
+  p->top--;
+  sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
+  upb_sink_endseq(&p->top->sink, sel);
+}
+
+static void start_object(upb_json_parser *p) {
+  if (!p->top->is_map) {
+    upb_sink_startmsg(&p->top->sink);
+  }
+}
+
+static void end_object(upb_json_parser *p) {
+  if (!p->top->is_map) {
+    upb_status status;
+    upb_status_clear(&status);
+    upb_sink_endmsg(&p->top->sink, &status);
+    if (!upb_ok(&status)) {
+      upb_env_reporterror(p->env, &status);
+    }
+  }
+}
+
+
+#define CHECK_RETURN_TOP(x) if (!(x)) goto error
+
+
+/* The actual parser **********************************************************/
+
+/* What follows is the Ragel parser itself.  The language is specified in Ragel
+ * and the actions call our C functions above.
+ *
+ * Ragel has an extensive set of functionality, and we use only a small part of
+ * it.  There are many action types but we only use a few:
+ *
+ *   ">" -- transition into a machine
+ *   "%" -- transition out of a machine
+ *   "@" -- transition into a final state of a machine.
+ *
+ * "@" transitions are tricky because a machine can transition into a final
+ * state repeatedly.  But in some cases we know this can't happen, for example
+ * a string which is delimited by a final '"' can only transition into its
+ * final state once, when the closing '"' is seen. */
+
+
+#line 1218 "upb/json/parser.rl"
+
+
+
+#line 1130 "upb/json/parser.c"
+static const char _json_actions[] = {
+	0, 1, 0, 1, 2, 1, 3, 1, 
+	5, 1, 6, 1, 7, 1, 8, 1, 
+	10, 1, 12, 1, 13, 1, 14, 1, 
+	15, 1, 16, 1, 17, 1, 21, 1, 
+	25, 1, 27, 2, 3, 8, 2, 4, 
+	5, 2, 6, 2, 2, 6, 8, 2, 
+	11, 9, 2, 13, 15, 2, 14, 15, 
+	2, 18, 1, 2, 19, 27, 2, 20, 
+	9, 2, 22, 27, 2, 23, 27, 2, 
+	24, 27, 2, 26, 27, 3, 14, 11, 
+	9
+};
+
+static const unsigned char _json_key_offsets[] = {
+	0, 0, 4, 9, 14, 15, 19, 24, 
+	29, 34, 38, 42, 45, 48, 50, 54, 
+	58, 60, 62, 67, 69, 71, 80, 86, 
+	92, 98, 104, 106, 115, 116, 116, 116, 
+	121, 126, 131, 132, 133, 134, 135, 135, 
+	136, 137, 138, 138, 139, 140, 141, 141, 
+	146, 151, 152, 156, 161, 166, 171, 175, 
+	175, 178, 178, 178
+};
+
+static const char _json_trans_keys[] = {
+	32, 123, 9, 13, 32, 34, 125, 9, 
+	13, 32, 34, 125, 9, 13, 34, 32, 
+	58, 9, 13, 32, 93, 125, 9, 13, 
+	32, 44, 125, 9, 13, 32, 44, 125, 
+	9, 13, 32, 34, 9, 13, 45, 48, 
+	49, 57, 48, 49, 57, 46, 69, 101, 
+	48, 57, 69, 101, 48, 57, 43, 45, 
+	48, 57, 48, 57, 48, 57, 46, 69, 
+	101, 48, 57, 34, 92, 34, 92, 34, 
+	47, 92, 98, 102, 110, 114, 116, 117, 
+	48, 57, 65, 70, 97, 102, 48, 57, 
+	65, 70, 97, 102, 48, 57, 65, 70, 
+	97, 102, 48, 57, 65, 70, 97, 102, 
+	34, 92, 34, 45, 91, 102, 110, 116, 
+	123, 48, 57, 34, 32, 93, 125, 9, 
+	13, 32, 44, 93, 9, 13, 32, 93, 
+	125, 9, 13, 97, 108, 115, 101, 117, 
+	108, 108, 114, 117, 101, 32, 34, 125, 
+	9, 13, 32, 34, 125, 9, 13, 34, 
+	32, 58, 9, 13, 32, 93, 125, 9, 
+	13, 32, 44, 125, 9, 13, 32, 44, 
+	125, 9, 13, 32, 34, 9, 13, 32, 
+	9, 13, 0
+};
+
+static const char _json_single_lengths[] = {
+	0, 2, 3, 3, 1, 2, 3, 3, 
+	3, 2, 2, 1, 3, 0, 2, 2, 
+	0, 0, 3, 2, 2, 9, 0, 0, 
+	0, 0, 2, 7, 1, 0, 0, 3, 
+	3, 3, 1, 1, 1, 1, 0, 1, 
+	1, 1, 0, 1, 1, 1, 0, 3, 
+	3, 1, 2, 3, 3, 3, 2, 0, 
+	1, 0, 0, 0
+};
+
+static const char _json_range_lengths[] = {
+	0, 1, 1, 1, 0, 1, 1, 1, 
+	1, 1, 1, 1, 0, 1, 1, 1, 
+	1, 1, 1, 0, 0, 0, 3, 3, 
+	3, 3, 0, 1, 0, 0, 0, 1, 
+	1, 1, 0, 0, 0, 0, 0, 0, 
+	0, 0, 0, 0, 0, 0, 0, 1, 
+	1, 0, 1, 1, 1, 1, 1, 0, 
+	1, 0, 0, 0
+};
+
+static const short _json_index_offsets[] = {
+	0, 0, 4, 9, 14, 16, 20, 25, 
+	30, 35, 39, 43, 46, 50, 52, 56, 
+	60, 62, 64, 69, 72, 75, 85, 89, 
+	93, 97, 101, 104, 113, 115, 116, 117, 
+	122, 127, 132, 134, 136, 138, 140, 141, 
+	143, 145, 147, 148, 150, 152, 154, 155, 
+	160, 165, 167, 171, 176, 181, 186, 190, 
+	191, 194, 195, 196
+};
+
+static const char _json_indicies[] = {
+	0, 2, 0, 1, 3, 4, 5, 3, 
+	1, 6, 7, 8, 6, 1, 9, 1, 
+	10, 11, 10, 1, 11, 1, 1, 11, 
+	12, 13, 14, 15, 13, 1, 16, 17, 
+	8, 16, 1, 17, 7, 17, 1, 18, 
+	19, 20, 1, 19, 20, 1, 22, 23, 
+	23, 21, 24, 1, 23, 23, 24, 21, 
+	25, 25, 26, 1, 26, 1, 26, 21, 
+	22, 23, 23, 20, 21, 28, 29, 27, 
+	31, 32, 30, 33, 33, 33, 33, 33, 
+	33, 33, 33, 34, 1, 35, 35, 35, 
+	1, 36, 36, 36, 1, 37, 37, 37, 
+	1, 38, 38, 38, 1, 40, 41, 39, 
+	42, 43, 44, 45, 46, 47, 48, 43, 
+	1, 49, 1, 50, 51, 53, 54, 1, 
+	53, 52, 55, 56, 54, 55, 1, 56, 
+	1, 1, 56, 52, 57, 1, 58, 1, 
+	59, 1, 60, 1, 61, 62, 1, 63, 
+	1, 64, 1, 65, 66, 1, 67, 1, 
+	68, 1, 69, 70, 71, 72, 70, 1, 
+	73, 74, 75, 73, 1, 76, 1, 77, 
+	78, 77, 1, 78, 1, 1, 78, 79, 
+	80, 81, 82, 80, 1, 83, 84, 75, 
+	83, 1, 84, 74, 84, 1, 85, 86, 
+	86, 1, 1, 1, 1, 0
+};
+
+static const char _json_trans_targs[] = {
+	1, 0, 2, 3, 4, 56, 3, 4, 
+	56, 5, 5, 6, 7, 8, 9, 56, 
+	8, 9, 11, 12, 18, 57, 13, 15, 
+	14, 16, 17, 20, 58, 21, 20, 58, 
+	21, 19, 22, 23, 24, 25, 26, 20, 
+	58, 21, 28, 30, 31, 34, 39, 43, 
+	47, 29, 59, 59, 32, 31, 29, 32, 
+	33, 35, 36, 37, 38, 59, 40, 41, 
+	42, 59, 44, 45, 46, 59, 48, 49, 
+	55, 48, 49, 55, 50, 50, 51, 52, 
+	53, 54, 55, 53, 54, 59, 56
+};
+
+static const char _json_trans_actions[] = {
+	0, 0, 0, 21, 77, 53, 0, 47, 
+	23, 17, 0, 0, 15, 19, 19, 50, 
+	0, 0, 0, 0, 0, 1, 0, 0, 
+	0, 0, 0, 3, 13, 0, 0, 35, 
+	5, 11, 0, 38, 7, 7, 7, 41, 
+	44, 9, 62, 56, 25, 0, 0, 0, 
+	31, 29, 33, 59, 15, 0, 27, 0, 
+	0, 0, 0, 0, 0, 68, 0, 0, 
+	0, 71, 0, 0, 0, 65, 21, 77, 
+	53, 0, 47, 23, 17, 0, 0, 15, 
+	19, 19, 50, 0, 0, 74, 0
+};
+
+static const int json_start = 1;
+
+static const int json_en_number_machine = 10;
+static const int json_en_string_machine = 19;
+static const int json_en_value_machine = 27;
+static const int json_en_main = 1;
+
+
+#line 1221 "upb/json/parser.rl"
+
+size_t parse(void *closure, const void *hd, const char *buf, size_t size,
+             const upb_bufhandle *handle) {
+  upb_json_parser *parser = closure;
+
+  /* Variables used by Ragel's generated code. */
+  int cs = parser->current_state;
+  int *stack = parser->parser_stack;
+  int top = parser->parser_top;
+
+  const char *p = buf;
+  const char *pe = buf + size;
+
+  parser->handle = handle;
+
+  UPB_UNUSED(hd);
+  UPB_UNUSED(handle);
+
+  capture_resume(parser, buf);
+
+  
+#line 1301 "upb/json/parser.c"
+	{
+	int _klen;
+	unsigned int _trans;
+	const char *_acts;
+	unsigned int _nacts;
+	const char *_keys;
+
+	if ( p == pe )
+		goto _test_eof;
+	if ( cs == 0 )
+		goto _out;
+_resume:
+	_keys = _json_trans_keys + _json_key_offsets[cs];
+	_trans = _json_index_offsets[cs];
+
+	_klen = _json_single_lengths[cs];
+	if ( _klen > 0 ) {
+		const char *_lower = _keys;
+		const char *_mid;
+		const char *_upper = _keys + _klen - 1;
+		while (1) {
+			if ( _upper < _lower )
+				break;
+
+			_mid = _lower + ((_upper-_lower) >> 1);
+			if ( (*p) < *_mid )
+				_upper = _mid - 1;
+			else if ( (*p) > *_mid )
+				_lower = _mid + 1;
+			else {
+				_trans += (unsigned int)(_mid - _keys);
+				goto _match;
+			}
+		}
+		_keys += _klen;
+		_trans += _klen;
+	}
+
+	_klen = _json_range_lengths[cs];
+	if ( _klen > 0 ) {
+		const char *_lower = _keys;
+		const char *_mid;
+		const char *_upper = _keys + (_klen<<1) - 2;
+		while (1) {
+			if ( _upper < _lower )
+				break;
+
+			_mid = _lower + (((_upper-_lower) >> 1) & ~1);
+			if ( (*p) < _mid[0] )
+				_upper = _mid - 2;
+			else if ( (*p) > _mid[1] )
+				_lower = _mid + 2;
+			else {
+				_trans += (unsigned int)((_mid - _keys)>>1);
+				goto _match;
+			}
+		}
+		_trans += _klen;
+	}
+
+_match:
+	_trans = _json_indicies[_trans];
+	cs = _json_trans_targs[_trans];
+
+	if ( _json_trans_actions[_trans] == 0 )
+		goto _again;
+
+	_acts = _json_actions + _json_trans_actions[_trans];
+	_nacts = (unsigned int) *_acts++;
+	while ( _nacts-- > 0 )
+	{
+		switch ( *_acts++ )
+		{
+	case 0:
+#line 1133 "upb/json/parser.rl"
+	{ p--; {cs = stack[--top]; goto _again;} }
+	break;
+	case 1:
+#line 1134 "upb/json/parser.rl"
+	{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
+	break;
+	case 2:
+#line 1138 "upb/json/parser.rl"
+	{ start_text(parser, p); }
+	break;
+	case 3:
+#line 1139 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(end_text(parser, p)); }
+	break;
+	case 4:
+#line 1145 "upb/json/parser.rl"
+	{ start_hex(parser); }
+	break;
+	case 5:
+#line 1146 "upb/json/parser.rl"
+	{ hexdigit(parser, p); }
+	break;
+	case 6:
+#line 1147 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(end_hex(parser)); }
+	break;
+	case 7:
+#line 1153 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(escape(parser, p)); }
+	break;
+	case 8:
+#line 1159 "upb/json/parser.rl"
+	{ p--; {cs = stack[--top]; goto _again;} }
+	break;
+	case 9:
+#line 1162 "upb/json/parser.rl"
+	{ {stack[top++] = cs; cs = 19; goto _again;} }
+	break;
+	case 10:
+#line 1164 "upb/json/parser.rl"
+	{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
+	break;
+	case 11:
+#line 1169 "upb/json/parser.rl"
+	{ start_member(parser); }
+	break;
+	case 12:
+#line 1170 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(end_membername(parser)); }
+	break;
+	case 13:
+#line 1173 "upb/json/parser.rl"
+	{ end_member(parser); }
+	break;
+	case 14:
+#line 1179 "upb/json/parser.rl"
+	{ start_object(parser); }
+	break;
+	case 15:
+#line 1182 "upb/json/parser.rl"
+	{ end_object(parser); }
+	break;
+	case 16:
+#line 1188 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(start_array(parser)); }
+	break;
+	case 17:
+#line 1192 "upb/json/parser.rl"
+	{ end_array(parser); }
+	break;
+	case 18:
+#line 1197 "upb/json/parser.rl"
+	{ start_number(parser, p); }
+	break;
+	case 19:
+#line 1198 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(end_number(parser, p)); }
+	break;
+	case 20:
+#line 1200 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(start_stringval(parser)); }
+	break;
+	case 21:
+#line 1201 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(end_stringval(parser)); }
+	break;
+	case 22:
+#line 1203 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
+	break;
+	case 23:
+#line 1205 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
+	break;
+	case 24:
+#line 1207 "upb/json/parser.rl"
+	{ /* null value */ }
+	break;
+	case 25:
+#line 1209 "upb/json/parser.rl"
+	{ CHECK_RETURN_TOP(start_subobject(parser)); }
+	break;
+	case 26:
+#line 1210 "upb/json/parser.rl"
+	{ end_subobject(parser); }
+	break;
+	case 27:
+#line 1215 "upb/json/parser.rl"
+	{ p--; {cs = stack[--top]; goto _again;} }
+	break;
+#line 1487 "upb/json/parser.c"
+		}
+	}
+
+_again:
+	if ( cs == 0 )
+		goto _out;
+	if ( ++p != pe )
+		goto _resume;
+	_test_eof: {}
+	_out: {}
+	}
+
+#line 1242 "upb/json/parser.rl"
+
+  if (p != pe) {
+    upb_status_seterrf(&parser->status, "Parse error at %s\n", p);
+    upb_env_reporterror(parser->env, &parser->status);
+  } else {
+    capture_suspend(parser, &p);
+  }
+
+error:
+  /* Save parsing state back to parser. */
+  parser->current_state = cs;
+  parser->parser_top = top;
+
+  return p - buf;
+}
+
+bool end(void *closure, const void *hd) {
+  UPB_UNUSED(closure);
+  UPB_UNUSED(hd);
+
+  /* Prevent compile warning on unused static constants. */
+  UPB_UNUSED(json_start);
+  UPB_UNUSED(json_en_number_machine);
+  UPB_UNUSED(json_en_string_machine);
+  UPB_UNUSED(json_en_value_machine);
+  UPB_UNUSED(json_en_main);
+  return true;
+}
+
+static void json_parser_reset(upb_json_parser *p) {
+  int cs;
+  int top;
+
+  p->top = p->stack;
+  p->top->f = NULL;
+  p->top->is_map = false;
+  p->top->is_mapentry = false;
+
+  /* Emit Ragel initialization of the parser. */
+  
+#line 1541 "upb/json/parser.c"
+	{
+	cs = json_start;
+	top = 0;
+	}
+
+#line 1282 "upb/json/parser.rl"
+  p->current_state = cs;
+  p->parser_top = top;
+  accumulate_clear(p);
+  p->multipart_state = MULTIPART_INACTIVE;
+  p->capture = NULL;
+  p->accumulated = NULL;
+  upb_status_clear(&p->status);
+}
+
+
+/* Public API *****************************************************************/
+
+upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
+#ifndef NDEBUG
+  const size_t size_before = upb_env_bytesallocated(env);
+#endif
+  upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
+  if (!p) return false;
+
+  p->env = env;
+  p->limit = p->stack + UPB_JSON_MAX_DEPTH;
+  p->accumulate_buf = NULL;
+  p->accumulate_buf_size = 0;
+  upb_byteshandler_init(&p->input_handler_);
+  upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
+  upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
+  upb_bytessink_reset(&p->input_, &p->input_handler_, p);
+
+  json_parser_reset(p);
+  upb_sink_reset(&p->top->sink, output->handlers, output->closure);
+  p->top->m = upb_handlers_msgdef(output->handlers);
+
+  /* If this fails, uncomment and increase the value in parser.h. */
+  /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
+  assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
+  return p;
+}
+
+upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
+  return &p->input_;
+}
+/*
+** This currently uses snprintf() to format primitives, and could be optimized
+** further.
+*/
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+
+struct upb_json_printer {
+  upb_sink input_;
+  /* BytesSink closure. */
+  void *subc_;
+  upb_bytessink *output_;
+
+  /* We track the depth so that we know when to emit startstr/endstr on the
+   * output. */
+  int depth_;
+
+  /* Have we emitted the first element? This state is necessary to emit commas
+   * without leaving a trailing comma in arrays/maps. We keep this state per
+   * frame depth.
+   *
+   * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
+   * We count frames (contexts in which we separate elements by commas) as both
+   * repeated fields and messages (maps), and the worst case is a
+   * message->repeated field->submessage->repeated field->... nesting. */
+  bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
+};
+
+/* StringPiece; a pointer plus a length. */
+typedef struct {
+  const char *ptr;
+  size_t len;
+} strpc;
+
+strpc *newstrpc(upb_handlers *h, const upb_fielddef *f) {
+  strpc *ret = malloc(sizeof(*ret));
+  ret->ptr = upb_fielddef_name(f);
+  ret->len = strlen(ret->ptr);
+  upb_handlers_addcleanup(h, ret, free);
+  return ret;
+}
+
+/* ------------ JSON string printing: values, maps, arrays ------------------ */
+
+static void print_data(
+    upb_json_printer *p, const char *buf, unsigned int len) {
+  /* TODO: Will need to change if we support pushback from the sink. */
+  size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
+  UPB_ASSERT_VAR(n, n == len);
+}
+
+static void print_comma(upb_json_printer *p) {
+  if (!p->first_elem_[p->depth_]) {
+    print_data(p, ",", 1);
+  }
+  p->first_elem_[p->depth_] = false;
+}
+
+/* Helpers that print properly formatted elements to the JSON output stream. */
+
+/* Used for escaping control chars in strings. */
+static const char kControlCharLimit = 0x20;
+
+UPB_INLINE bool is_json_escaped(char c) {
+  /* See RFC 4627. */
+  unsigned char uc = (unsigned char)c;
+  return uc < kControlCharLimit || uc == '"' || uc == '\\';
+}
+
+UPB_INLINE char* json_nice_escape(char c) {
+  switch (c) {
+    case '"':  return "\\\"";
+    case '\\': return "\\\\";
+    case '\b': return "\\b";
+    case '\f': return "\\f";
+    case '\n': return "\\n";
+    case '\r': return "\\r";
+    case '\t': return "\\t";
+    default:   return NULL;
+  }
+}
+
+/* Write a properly escaped string chunk. The surrounding quotes are *not*
+ * printed; this is so that the caller has the option of emitting the string
+ * content in chunks. */
+static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
+  const char* unescaped_run = NULL;
+  unsigned int i;
+  for (i = 0; i < len; i++) {
+    char c = buf[i];
+    /* Handle escaping. */
+    if (is_json_escaped(c)) {
+      /* Use a "nice" escape, like \n, if one exists for this character. */
+      const char* escape = json_nice_escape(c);
+      /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
+       * escape. */
+      char escape_buf[8];
+      if (!escape) {
+        unsigned char byte = (unsigned char)c;
+        _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
+        escape = escape_buf;
+      }
+
+      /* N.B. that we assume that the input encoding is equal to the output
+       * encoding (both UTF-8 for  now), so for chars >= 0x20 and != \, ", we
+       * can simply pass the bytes through. */
+
+      /* If there's a current run of unescaped chars, print that run first. */
+      if (unescaped_run) {
+        print_data(p, unescaped_run, &buf[i] - unescaped_run);
+        unescaped_run = NULL;
+      }
+      /* Then print the escape code. */
+      print_data(p, escape, strlen(escape));
+    } else {
+      /* Add to the current unescaped run of characters. */
+      if (unescaped_run == NULL) {
+        unescaped_run = &buf[i];
+      }
+    }
+  }
+
+  /* If the string ended in a run of unescaped characters, print that last run. */
+  if (unescaped_run) {
+    print_data(p, unescaped_run, &buf[len] - unescaped_run);
+  }
+}
+
+#define CHKLENGTH(x) if (!(x)) return -1;
+
+/* Helpers that format floating point values according to our custom formats.
+ * Right now we use %.8g and %.17g for float/double, respectively, to match
+ * proto2::util::JsonFormat's defaults.  May want to change this later. */
+
+static size_t fmt_double(double val, char* buf, size_t length) {
+  size_t n = _upb_snprintf(buf, length, "%.17g", val);
+  CHKLENGTH(n > 0 && n < length);
+  return n;
+}
+
+static size_t fmt_float(float val, char* buf, size_t length) {
+  size_t n = _upb_snprintf(buf, length, "%.8g", val);
+  CHKLENGTH(n > 0 && n < length);
+  return n;
+}
+
+static size_t fmt_bool(bool val, char* buf, size_t length) {
+  size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
+  CHKLENGTH(n > 0 && n < length);
+  return n;
+}
+
+static size_t fmt_int64(long val, char* buf, size_t length) {
+  size_t n = _upb_snprintf(buf, length, "%ld", val);
+  CHKLENGTH(n > 0 && n < length);
+  return n;
+}
+
+static size_t fmt_uint64(unsigned long long val, char* buf, size_t length) {
+  size_t n = _upb_snprintf(buf, length, "%llu", val);
+  CHKLENGTH(n > 0 && n < length);
+  return n;
+}
+
+/* Print a map key given a field name. Called by scalar field handlers and by
+ * startseq for repeated fields. */
+static bool putkey(void *closure, const void *handler_data) {
+  upb_json_printer *p = closure;
+  const strpc *key = handler_data;
+  print_comma(p);
+  print_data(p, "\"", 1);
+  putstring(p, key->ptr, key->len);
+  print_data(p, "\":", 2);
+  return true;
+}
+
+#define CHKFMT(val) if ((val) == (size_t)-1) return false;
+#define CHK(val)    if (!(val)) return false;
+
+#define TYPE_HANDLERS(type, fmt_func)                                        \
+  static bool put##type(void *closure, const void *handler_data, type val) { \
+    upb_json_printer *p = closure;                                           \
+    char data[64];                                                           \
+    size_t length = fmt_func(val, data, sizeof(data));                       \
+    UPB_UNUSED(handler_data);                                                \
+    CHKFMT(length);                                                          \
+    print_data(p, data, length);                                             \
+    return true;                                                             \
+  }                                                                          \
+  static bool scalar_##type(void *closure, const void *handler_data,         \
+                            type val) {                                      \
+    CHK(putkey(closure, handler_data));                                      \
+    CHK(put##type(closure, handler_data, val));                              \
+    return true;                                                             \
+  }                                                                          \
+  static bool repeated_##type(void *closure, const void *handler_data,       \
+                              type val) {                                    \
+    upb_json_printer *p = closure;                                           \
+    print_comma(p);                                                          \
+    CHK(put##type(closure, handler_data, val));                              \
+    return true;                                                             \
+  }
+
+#define TYPE_HANDLERS_MAPKEY(type, fmt_func)                                 \
+  static bool putmapkey_##type(void *closure, const void *handler_data,      \
+                            type val) {                                      \
+    upb_json_printer *p = closure;                                           \
+    print_data(p, "\"", 1);                                                  \
+    CHK(put##type(closure, handler_data, val));                              \
+    print_data(p, "\":", 2);                                                 \
+    return true;                                                             \
+  }
+
+TYPE_HANDLERS(double,   fmt_double)
+TYPE_HANDLERS(float,    fmt_float)
+TYPE_HANDLERS(bool,     fmt_bool)
+TYPE_HANDLERS(int32_t,  fmt_int64)
+TYPE_HANDLERS(uint32_t, fmt_int64)
+TYPE_HANDLERS(int64_t,  fmt_int64)
+TYPE_HANDLERS(uint64_t, fmt_uint64)
+
+/* double and float are not allowed to be map keys. */
+TYPE_HANDLERS_MAPKEY(bool,     fmt_bool)
+TYPE_HANDLERS_MAPKEY(int32_t,  fmt_int64)
+TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64)
+TYPE_HANDLERS_MAPKEY(int64_t,  fmt_int64)
+TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64)
+
+#undef TYPE_HANDLERS
+#undef TYPE_HANDLERS_MAPKEY
+
+typedef struct {
+  void *keyname;
+  const upb_enumdef *enumdef;
+} EnumHandlerData;
+
+static bool scalar_enum(void *closure, const void *handler_data,
+                        int32_t val) {
+  const EnumHandlerData *hd = handler_data;
+  upb_json_printer *p = closure;
+  const char *symbolic_name;
+
+  CHK(putkey(closure, hd->keyname));
+
+  symbolic_name = upb_enumdef_iton(hd->enumdef, val);
+  if (symbolic_name) {
+    print_data(p, "\"", 1);
+    putstring(p, symbolic_name, strlen(symbolic_name));
+    print_data(p, "\"", 1);
+  } else {
+    putint32_t(closure, NULL, val);
+  }
+
+  return true;
+}
+
+static void print_enum_symbolic_name(upb_json_printer *p,
+                                     const upb_enumdef *def,
+                                     int32_t val) {
+  const char *symbolic_name = upb_enumdef_iton(def, val);
+  if (symbolic_name) {
+    print_data(p, "\"", 1);
+    putstring(p, symbolic_name, strlen(symbolic_name));
+    print_data(p, "\"", 1);
+  } else {
+    putint32_t(p, NULL, val);
+  }
+}
+
+static bool repeated_enum(void *closure, const void *handler_data,
+                          int32_t val) {
+  const EnumHandlerData *hd = handler_data;
+  upb_json_printer *p = closure;
+  print_comma(p);
+
+  print_enum_symbolic_name(p, hd->enumdef, val);
+
+  return true;
+}
+
+static bool mapvalue_enum(void *closure, const void *handler_data,
+                          int32_t val) {
+  const EnumHandlerData *hd = handler_data;
+  upb_json_printer *p = closure;
+
+  print_enum_symbolic_name(p, hd->enumdef, val);
+
+  return true;
+}
+
+static void *scalar_startsubmsg(void *closure, const void *handler_data) {
+  return putkey(closure, handler_data) ? closure : UPB_BREAK;
+}
+
+static void *repeated_startsubmsg(void *closure, const void *handler_data) {
+  upb_json_printer *p = closure;
+  UPB_UNUSED(handler_data);
+  print_comma(p);
+  return closure;
+}
+
+static void start_frame(upb_json_printer *p) {
+  p->depth_++;
+  p->first_elem_[p->depth_] = true;
+  print_data(p, "{", 1);
+}
+
+static void end_frame(upb_json_printer *p) {
+  print_data(p, "}", 1);
+  p->depth_--;
+}
+
+static bool printer_startmsg(void *closure, const void *handler_data) {
+  upb_json_printer *p = closure;
+  UPB_UNUSED(handler_data);
+  if (p->depth_ == 0) {
+    upb_bytessink_start(p->output_, 0, &p->subc_);
+  }
+  start_frame(p);
+  return true;
+}
+
+static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
+  upb_json_printer *p = closure;
+  UPB_UNUSED(handler_data);
+  UPB_UNUSED(s);
+  end_frame(p);
+  if (p->depth_ == 0) {
+    upb_bytessink_end(p->output_);
+  }
+  return true;
+}
+
+static void *startseq(void *closure, const void *handler_data) {
+  upb_json_printer *p = closure;
+  CHK(putkey(closure, handler_data));
+  p->depth_++;
+  p->first_elem_[p->depth_] = true;
+  print_data(p, "[", 1);
+  return closure;
+}
+
+static bool endseq(void *closure, const void *handler_data) {
+  upb_json_printer *p = closure;
+  UPB_UNUSED(handler_data);
+  print_data(p, "]", 1);
+  p->depth_--;
+  return true;
+}
+
+static void *startmap(void *closure, const void *handler_data) {
+  upb_json_printer *p = closure;
+  CHK(putkey(closure, handler_data));
+  p->depth_++;
+  p->first_elem_[p->depth_] = true;
+  print_data(p, "{", 1);
+  return closure;
+}
+
+static bool endmap(void *closure, const void *handler_data) {
+  upb_json_printer *p = closure;
+  UPB_UNUSED(handler_data);
+  print_data(p, "}", 1);
+  p->depth_--;
+  return true;
+}
+
+static size_t putstr(void *closure, const void *handler_data, const char *str,
+                     size_t len, const upb_bufhandle *handle) {
+  upb_json_printer *p = closure;
+  UPB_UNUSED(handler_data);
+  UPB_UNUSED(handle);
+  putstring(p, str, len);
+  return len;
+}
+
+/* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
+static size_t putbytes(void *closure, const void *handler_data, const char *str,
+                       size_t len, const upb_bufhandle *handle) {
+  upb_json_printer *p = closure;
+
+  /* This is the regular base64, not the "web-safe" version. */
+  static const char base64[] =
+      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+  /* Base64-encode. */
+  char data[16000];
+  const char *limit = data + sizeof(data);
+  const unsigned char *from = (const unsigned char*)str;
+  char *to = data;
+  size_t remaining = len;
+  size_t bytes;
+
+  UPB_UNUSED(handler_data);
+  UPB_UNUSED(handle);
+
+  while (remaining > 2) {
+    /* TODO(haberman): handle encoded lengths > sizeof(data) */
+    UPB_ASSERT_VAR(limit, (limit - to) >= 4);
+
+    to[0] = base64[from[0] >> 2];
+    to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
+    to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
+    to[3] = base64[from[2] & 0x3f];
+
+    remaining -= 3;
+    to += 4;
+    from += 3;
+  }
+
+  switch (remaining) {
+    case 2:
+      to[0] = base64[from[0] >> 2];
+      to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
+      to[2] = base64[(from[1] & 0xf) << 2];
+      to[3] = '=';
+      to += 4;
+      from += 2;
+      break;
+    case 1:
+      to[0] = base64[from[0] >> 2];
+      to[1] = base64[((from[0] & 0x3) << 4)];
+      to[2] = '=';
+      to[3] = '=';
+      to += 4;
+      from += 1;
+      break;
+  }
+
+  bytes = to - data;
+  print_data(p, "\"", 1);
+  putstring(p, data, bytes);
+  print_data(p, "\"", 1);
+  return len;
+}
+
+static void *scalar_startstr(void *closure, const void *handler_data,
+                             size_t size_hint) {
+  upb_json_printer *p = closure;
+  UPB_UNUSED(handler_data);
+  UPB_UNUSED(size_hint);
+  CHK(putkey(closure, handler_data));
+  print_data(p, "\"", 1);
+  return p;
+}
+
+static size_t scalar_str(void *closure, const void *handler_data,
+                         const char *str, size_t len,
+                         const upb_bufhandle *handle) {
+  CHK(putstr(closure, handler_data, str, len, handle));
+  return len;
+}
+
+static bool scalar_endstr(void *closure, const void *handler_data) {
+  upb_json_printer *p = closure;
+  UPB_UNUSED(handler_data);
+  print_data(p, "\"", 1);
+  return true;
+}
+
+static void *repeated_startstr(void *closure, const void *handler_data,
+                               size_t size_hint) {
+  upb_json_printer *p = closure;
+  UPB_UNUSED(handler_data);
+  UPB_UNUSED(size_hint);
+  print_comma(p);
+  print_data(p, "\"", 1);
+  return p;
+}
+
+static size_t repeated_str(void *closure, const void *handler_data,
+                           const char *str, size_t len,
+                           const upb_bufhandle *handle) {
+  CHK(putstr(closure, handler_data, str, len, handle));
+  return len;
+}
+
+static bool repeated_endstr(void *closure, const void *handler_data) {
+  upb_json_printer *p = closure;
+  UPB_UNUSED(handler_data);
+  print_data(p, "\"", 1);
+  return true;
+}
+
+static void *mapkeyval_startstr(void *closure, const void *handler_data,
+                                size_t size_hint) {
+  upb_json_printer *p = closure;
+  UPB_UNUSED(handler_data);
+  UPB_UNUSED(size_hint);
+  print_data(p, "\"", 1);
+  return p;
+}
+
+static size_t mapkey_str(void *closure, const void *handler_data,
+                         const char *str, size_t len,
+                         const upb_bufhandle *handle) {
+  CHK(putstr(closure, handler_data, str, len, handle));
+  return len;
+}
+
+static bool mapkey_endstr(void *closure, const void *handler_data) {
+  upb_json_printer *p = closure;
+  UPB_UNUSED(handler_data);
+  print_data(p, "\":", 2);
+  return true;
+}
+
+static bool mapvalue_endstr(void *closure, const void *handler_data) {
+  upb_json_printer *p = closure;
+  UPB_UNUSED(handler_data);
+  print_data(p, "\"", 1);
+  return true;
+}
+
+static size_t scalar_bytes(void *closure, const void *handler_data,
+                           const char *str, size_t len,
+                           const upb_bufhandle *handle) {
+  CHK(putkey(closure, handler_data));
+  CHK(putbytes(closure, handler_data, str, len, handle));
+  return len;
+}
+
+static size_t repeated_bytes(void *closure, const void *handler_data,
+                             const char *str, size_t len,
+                             const upb_bufhandle *handle) {
+  upb_json_printer *p = closure;
+  print_comma(p);
+  CHK(putbytes(closure, handler_data, str, len, handle));
+  return len;
+}
+
+static size_t mapkey_bytes(void *closure, const void *handler_data,
+                           const char *str, size_t len,
+                           const upb_bufhandle *handle) {
+  upb_json_printer *p = closure;
+  CHK(putbytes(closure, handler_data, str, len, handle));
+  print_data(p, ":", 1);
+  return len;
+}
+
+static void set_enum_hd(upb_handlers *h,
+                        const upb_fielddef *f,
+                        upb_handlerattr *attr) {
+  EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
+  hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
+  hd->keyname = newstrpc(h, f);
+  upb_handlers_addcleanup(h, hd, free);
+  upb_handlerattr_sethandlerdata(attr, hd);
+}
+
+/* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
+ * in a map).
+ *
+ * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
+ * key or value cases properly. The right way to do this is to allocate a
+ * temporary structure at the start of a mapentry submessage, store key and
+ * value data in it as key and value handlers are called, and then print the
+ * key/value pair once at the end of the submessage. If we don't do this, we
+ * should at least detect the case and throw an error. However, so far all of
+ * our sources that emit mapentry messages do so canonically (with one key
+ * field, and then one value field), so this is not a pressing concern at the
+ * moment. */
+void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
+  const upb_msgdef *md = upb_handlers_msgdef(h);
+
+  /* A mapentry message is printed simply as '"key": value'. Rather than
+   * special-case key and value for every type below, we just handle both
+   * fields explicitly here. */
+  const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
+  const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
+
+  upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
+
+  UPB_UNUSED(closure);
+
+  switch (upb_fielddef_type(key_field)) {
+    case UPB_TYPE_INT32:
+      upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
+      break;
+    case UPB_TYPE_INT64:
+      upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
+      break;
+    case UPB_TYPE_UINT32:
+      upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
+      break;
+    case UPB_TYPE_UINT64:
+      upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
+      break;
+    case UPB_TYPE_BOOL:
+      upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
+      break;
+    case UPB_TYPE_STRING:
+      upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
+      upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
+      upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
+      break;
+    case UPB_TYPE_BYTES:
+      upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
+      break;
+    default:
+      assert(false);
+      break;
+  }
+
+  switch (upb_fielddef_type(value_field)) {
+    case UPB_TYPE_INT32:
+      upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
+      break;
+    case UPB_TYPE_INT64:
+      upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
+      break;
+    case UPB_TYPE_UINT32:
+      upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
+      break;
+    case UPB_TYPE_UINT64:
+      upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
+      break;
+    case UPB_TYPE_BOOL:
+      upb_handlers_setbool(h, value_field, putbool, &empty_attr);
+      break;
+    case UPB_TYPE_FLOAT:
+      upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
+      break;
+    case UPB_TYPE_DOUBLE:
+      upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
+      break;
+    case UPB_TYPE_STRING:
+      upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
+      upb_handlers_setstring(h, value_field, putstr, &empty_attr);
+      upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
+      break;
+    case UPB_TYPE_BYTES:
+      upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
+      break;
+    case UPB_TYPE_ENUM: {
+      upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
+      set_enum_hd(h, value_field, &enum_attr);
+      upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
+      upb_handlerattr_uninit(&enum_attr);
+      break;
+    }
+    case UPB_TYPE_MESSAGE:
+      /* No handler necessary -- the submsg handlers will print the message
+       * as appropriate. */
+      break;
+  }
+
+  upb_handlerattr_uninit(&empty_attr);
+}
+
+void printer_sethandlers(const void *closure, upb_handlers *h) {
+  const upb_msgdef *md = upb_handlers_msgdef(h);
+  bool is_mapentry = upb_msgdef_mapentry(md);
+  upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
+  upb_msg_field_iter i;
+
+  UPB_UNUSED(closure);
+
+  if (is_mapentry) {
+    /* mapentry messages are sufficiently different that we handle them
+     * separately. */
+    printer_sethandlers_mapentry(closure, h);
+    return;
+  }
+
+  upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
+  upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
+
+#define TYPE(type, name, ctype)                                               \
+  case type:                                                                  \
+    if (upb_fielddef_isseq(f)) {                                              \
+      upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr);            \
+    } else {                                                                  \
+      upb_handlers_set##name(h, f, scalar_##ctype, &name_attr);               \
+    }                                                                         \
+    break;
+
+  upb_msg_field_begin(&i, md);
+  for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
+    const upb_fielddef *f = upb_msg_iter_field(&i);
+
+    upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
+    upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f));
+
+    if (upb_fielddef_ismap(f)) {
+      upb_handlers_setstartseq(h, f, startmap, &name_attr);
+      upb_handlers_setendseq(h, f, endmap, &name_attr);
+    } else if (upb_fielddef_isseq(f)) {
+      upb_handlers_setstartseq(h, f, startseq, &name_attr);
+      upb_handlers_setendseq(h, f, endseq, &empty_attr);
+    }
+
+    switch (upb_fielddef_type(f)) {
+      TYPE(UPB_TYPE_FLOAT,  float,  float);
+      TYPE(UPB_TYPE_DOUBLE, double, double);
+      TYPE(UPB_TYPE_BOOL,   bool,   bool);
+      TYPE(UPB_TYPE_INT32,  int32,  int32_t);
+      TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
+      TYPE(UPB_TYPE_INT64,  int64,  int64_t);
+      TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
+      case UPB_TYPE_ENUM: {
+        /* For now, we always emit symbolic names for enums. We may want an
+         * option later to control this behavior, but we will wait for a real
+         * need first. */
+        upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
+        set_enum_hd(h, f, &enum_attr);
+
+        if (upb_fielddef_isseq(f)) {
+          upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
+        } else {
+          upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
+        }
+
+        upb_handlerattr_uninit(&enum_attr);
+        break;
+      }
+      case UPB_TYPE_STRING:
+        if (upb_fielddef_isseq(f)) {
+          upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
+          upb_handlers_setstring(h, f, repeated_str, &empty_attr);
+          upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
+        } else {
+          upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
+          upb_handlers_setstring(h, f, scalar_str, &empty_attr);
+          upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
+        }
+        break;
+      case UPB_TYPE_BYTES:
+        /* XXX: this doesn't support strings that span buffers yet. The base64
+         * encoder will need to be made resumable for this to work properly. */
+        if (upb_fielddef_isseq(f)) {
+          upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
+        } else {
+          upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
+        }
+        break;
+      case UPB_TYPE_MESSAGE:
+        if (upb_fielddef_isseq(f)) {
+          upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
+        } else {
+          upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
+        }
+        break;
+    }
+
+    upb_handlerattr_uninit(&name_attr);
+  }
+
+  upb_handlerattr_uninit(&empty_attr);
+#undef TYPE
+}
+
+static void json_printer_reset(upb_json_printer *p) {
+  p->depth_ = 0;
+}
+
+
+/* Public API *****************************************************************/
+
+upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
+                                          upb_bytessink *output) {
+#ifndef NDEBUG
+  size_t size_before = upb_env_bytesallocated(e);
+#endif
+
+  upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
+  if (!p) return NULL;
+
+  p->output_ = output;
+  json_printer_reset(p);
+  upb_sink_reset(&p->input_, h, p);
+
+  /* If this fails, increase the value in printer.h. */
+  assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
+  return p;
+}
+
+upb_sink *upb_json_printer_input(upb_json_printer *p) {
+  return &p->input_;
+}
+
+const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
+                                                 const void *owner) {
+  return upb_handlers_newfrozen(md, owner, printer_sethandlers, NULL);
+}

+ 8217 - 0
php/ext/google/protobuf/upb.h

@@ -0,0 +1,8217 @@
+// Amalgamated source file
+/*
+** Defs are upb's internal representation of the constructs that can appear
+** in a .proto file:
+**
+** - upb::MessageDef (upb_msgdef): describes a "message" construct.
+** - upb::FieldDef (upb_fielddef): describes a message field.
+** - upb::EnumDef (upb_enumdef): describes an enum.
+** - upb::OneofDef (upb_oneofdef): describes a oneof.
+** - upb::Def (upb_def): base class of all the others.
+**
+** TODO: definitions of services.
+**
+** Like upb_refcounted objects, defs are mutable only until frozen, and are
+** only thread-safe once frozen.
+**
+** This is a mixed C/C++ interface that offers a full API to both languages.
+** See the top-level README for more information.
+*/
+
+#ifndef UPB_DEF_H_
+#define UPB_DEF_H_
+
+/*
+** upb::RefCounted (upb_refcounted)
+**
+** A refcounting scheme that supports circular refs.  It accomplishes this by
+** partitioning the set of objects into groups such that no cycle spans groups;
+** we can then reference-count the group as a whole and ignore refs within the
+** group.  When objects are mutable, these groups are computed very
+** conservatively; we group any objects that have ever had a link between them.
+** When objects are frozen, we compute strongly-connected components which
+** allows us to be precise and only group objects that are actually cyclic.
+**
+** This is a mixed C/C++ interface that offers a full API to both languages.
+** See the top-level README for more information.
+*/
+
+#ifndef UPB_REFCOUNTED_H_
+#define UPB_REFCOUNTED_H_
+
+/*
+** upb_table
+**
+** This header is INTERNAL-ONLY!  Its interfaces are not public or stable!
+** This file defines very fast int->upb_value (inttable) and string->upb_value
+** (strtable) hash tables.
+**
+** The table uses chained scatter with Brent's variation (inspired by the Lua
+** implementation of hash tables).  The hash function for strings is Austin
+** Appleby's "MurmurHash."
+**
+** The inttable uses uintptr_t as its key, which guarantees it can be used to
+** store pointers or integers of at least 32 bits (upb isn't really useful on
+** systems where sizeof(void*) < 4).
+**
+** The table must be homogenous (all values of the same type).  In debug
+** mode, we check this on insert and lookup.
+*/
+
+#ifndef UPB_TABLE_H_
+#define UPB_TABLE_H_
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+/*
+** This file contains shared definitions that are widely used across upb.
+**
+** This is a mixed C/C++ interface that offers a full API to both languages.
+** See the top-level README for more information.
+*/
+
+#ifndef UPB_H_
+#define UPB_H_
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+/* UPB_INLINE: inline if possible, emit standalone code if required. */
+#ifdef __cplusplus
+#define UPB_INLINE inline
+#elif defined (__GNUC__)
+#define UPB_INLINE static __inline__
+#else
+#define UPB_INLINE static
+#endif
+
+/* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler
+ * doesn't provide these preprocessor symbols. */
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#define UPB_BIG_ENDIAN
+#endif
+
+/* Macros for function attributes on compilers that support them. */
+#ifdef __GNUC__
+#define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
+#define UPB_NOINLINE __attribute__((noinline))
+#define UPB_NORETURN __attribute__((__noreturn__))
+#else  /* !defined(__GNUC__) */
+#define UPB_FORCEINLINE
+#define UPB_NOINLINE
+#define UPB_NORETURN
+#endif
+
+/* A few hacky workarounds for functions not in C89.
+ * For internal use only!
+ * TODO(haberman): fix these by including our own implementations, or finding
+ * another workaround.
+ */
+#ifdef __GNUC__
+#define _upb_snprintf __builtin_snprintf
+#define _upb_vsnprintf __builtin_vsnprintf
+#define _upb_va_copy(a, b) __va_copy(a, b)
+#elif __STDC_VERSION__ >= 199901L
+/* C99 versions. */
+#define _upb_snprintf snprintf
+#define _upb_vsnprintf vsnprintf
+#define _upb_va_copy(a, b) va_copy(a, b)
+#else
+#error Need implementations of [v]snprintf and va_copy
+#endif
+
+
+#if ((defined(__cplusplus) && __cplusplus >= 201103L) || \
+      defined(__GXX_EXPERIMENTAL_CXX0X__)) && !defined(UPB_NO_CXX11)
+#define UPB_CXX11
+#endif
+
+/* UPB_DISALLOW_COPY_AND_ASSIGN()
+ * UPB_DISALLOW_POD_OPS()
+ *
+ * Declare these in the "private" section of a C++ class to forbid copy/assign
+ * or all POD ops (construct, destruct, copy, assign) on that class. */
+#ifdef UPB_CXX11
+#include <type_traits>
+#define UPB_DISALLOW_COPY_AND_ASSIGN(class_name) \
+  class_name(const class_name&) = delete; \
+  void operator=(const class_name&) = delete;
+#define UPB_DISALLOW_POD_OPS(class_name, full_class_name) \
+  class_name() = delete; \
+  ~class_name() = delete; \
+  UPB_DISALLOW_COPY_AND_ASSIGN(class_name)
+#define UPB_ASSERT_STDLAYOUT(type) \
+  static_assert(std::is_standard_layout<type>::value, \
+                #type " must be standard layout");
+#else  /* !defined(UPB_CXX11) */
+#define UPB_DISALLOW_COPY_AND_ASSIGN(class_name) \
+  class_name(const class_name&); \
+  void operator=(const class_name&);
+#define UPB_DISALLOW_POD_OPS(class_name, full_class_name) \
+  class_name(); \
+  ~class_name(); \
+  UPB_DISALLOW_COPY_AND_ASSIGN(class_name)
+#define UPB_ASSERT_STDLAYOUT(type)
+#endif
+
+/* UPB_DECLARE_TYPE()
+ * UPB_DECLARE_DERIVED_TYPE()
+ * UPB_DECLARE_DERIVED_TYPE2()
+ *
+ * Macros for declaring C and C++ types both, including inheritance.
+ * The inheritance doesn't use real C++ inheritance, to stay compatible with C.
+ *
+ * These macros also provide upcasts:
+ *  - in C: types-specific functions (ie. upb_foo_upcast(foo))
+ *  - in C++: upb::upcast(foo) along with implicit conversions
+ *
+ * Downcasts are not provided, but upb/def.h defines downcasts for upb::Def. */
+
+#define UPB_C_UPCASTS(ty, base)                                      \
+  UPB_INLINE base *ty ## _upcast_mutable(ty *p) { return (base*)p; } \
+  UPB_INLINE const base *ty ## _upcast(const ty *p) { return (const base*)p; }
+
+#define UPB_C_UPCASTS2(ty, base, base2)                                 \
+  UPB_C_UPCASTS(ty, base)                                               \
+  UPB_INLINE base2 *ty ## _upcast2_mutable(ty *p) { return (base2*)p; } \
+  UPB_INLINE const base2 *ty ## _upcast2(const ty *p) { return (const base2*)p; }
+
+#ifdef __cplusplus
+
+#define UPB_BEGIN_EXTERN_C extern "C" {
+#define UPB_END_EXTERN_C }
+#define UPB_PRIVATE_FOR_CPP private:
+#define UPB_DECLARE_TYPE(cppname, cname) typedef cppname cname;
+
+#define UPB_DECLARE_DERIVED_TYPE(cppname, cppbase, cname, cbase)  \
+  UPB_DECLARE_TYPE(cppname, cname)                                \
+  UPB_C_UPCASTS(cname, cbase)                                     \
+  namespace upb {                                                 \
+  template <>                                                     \
+  class Pointer<cppname> : public PointerBase<cppname, cppbase> { \
+   public:                                                        \
+    explicit Pointer(cppname* ptr) : PointerBase(ptr) {}          \
+  };                                                              \
+  template <>                                                     \
+  class Pointer<const cppname>                                    \
+      : public PointerBase<const cppname, const cppbase> {        \
+   public:                                                        \
+    explicit Pointer(const cppname* ptr) : PointerBase(ptr) {}    \
+  };                                                              \
+  }
+
+#define UPB_DECLARE_DERIVED_TYPE2(cppname, cppbase, cppbase2, cname, cbase,  \
+                                  cbase2)                                    \
+  UPB_DECLARE_TYPE(cppname, cname)                                           \
+  UPB_C_UPCASTS2(cname, cbase, cbase2)                                       \
+  namespace upb {                                                            \
+  template <>                                                                \
+  class Pointer<cppname> : public PointerBase2<cppname, cppbase, cppbase2> { \
+   public:                                                                   \
+    explicit Pointer(cppname* ptr) : PointerBase2(ptr) {}                    \
+  };                                                                         \
+  template <>                                                                \
+  class Pointer<const cppname>                                               \
+      : public PointerBase2<const cppname, const cppbase, const cppbase2> {  \
+   public:                                                                   \
+    explicit Pointer(const cppname* ptr) : PointerBase2(ptr) {}              \
+  };                                                                         \
+  }
+
+#else  /* !defined(__cplusplus) */
+
+#define UPB_BEGIN_EXTERN_C
+#define UPB_END_EXTERN_C
+#define UPB_PRIVATE_FOR_CPP
+#define UPB_DECLARE_TYPE(cppname, cname) \
+  struct cname;                          \
+  typedef struct cname cname;
+#define UPB_DECLARE_DERIVED_TYPE(cppname, cppbase, cname, cbase) \
+  UPB_DECLARE_TYPE(cppname, cname)                               \
+  UPB_C_UPCASTS(cname, cbase)
+#define UPB_DECLARE_DERIVED_TYPE2(cppname, cppbase, cppbase2,    \
+                                  cname, cbase, cbase2)          \
+  UPB_DECLARE_TYPE(cppname, cname)                               \
+  UPB_C_UPCASTS2(cname, cbase, cbase2)
+
+#endif  /* defined(__cplusplus) */
+
+#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
+#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
+
+#define UPB_UNUSED(var) (void)var
+
+/* For asserting something about a variable when the variable is not used for
+ * anything else.  This prevents "unused variable" warnings when compiling in
+ * debug mode. */
+#define UPB_ASSERT_VAR(var, predicate) UPB_UNUSED(var); assert(predicate)
+
+/* Generic function type. */
+typedef void upb_func();
+
+/* C++ Casts ******************************************************************/
+
+#ifdef __cplusplus
+
+namespace upb {
+
+template <class T> class Pointer;
+
+/* Casts to a subclass.  The caller must know that cast is correct; an
+ * incorrect cast will throw an assertion failure in debug mode.
+ *
+ * Example:
+ *   upb::Def* def = GetDef();
+ *   // Assert-fails if this was not actually a MessageDef.
+ *   upb::MessgeDef* md = upb::down_cast<upb::MessageDef>(def);
+ *
+ * Note that downcasts are only defined for some types (at the moment you can
+ * only downcast from a upb::Def to a specific Def type). */
+template<class To, class From> To down_cast(From* f);
+
+/* Casts to a subclass.  If the class does not actually match the given To type,
+ * returns NULL.
+ *
+ * Example:
+ *   upb::Def* def = GetDef();
+ *   // md will be NULL if this was not actually a MessageDef.
+ *   upb::MessgeDef* md = upb::down_cast<upb::MessageDef>(def);
+ *
+ * Note that dynamic casts are only defined for some types (at the moment you
+ * can only downcast from a upb::Def to a specific Def type).. */
+template<class To, class From> To dyn_cast(From* f);
+
+/* Casts to any base class, or the type itself (ie. can be a no-op).
+ *
+ * Example:
+ *   upb::MessageDef* md = GetDef();
+ *   // This will fail to compile if this wasn't actually a base class.
+ *   upb::Def* def = upb::upcast(md);
+ */
+template <class T> inline Pointer<T> upcast(T *f) { return Pointer<T>(f); }
+
+/* Attempt upcast to specific base class.
+ *
+ * Example:
+ *   upb::MessageDef* md = GetDef();
+ *   upb::upcast_to<upb::Def>(md)->MethodOnDef();
+ */
+template <class T, class F> inline T* upcast_to(F *f) {
+  return static_cast<T*>(upcast(f));
+}
+
+/* PointerBase<T>: implementation detail of upb::upcast().
+ * It is implicitly convertable to pointers to the Base class(es).
+ */
+template <class T, class Base>
+class PointerBase {
+ public:
+  explicit PointerBase(T* ptr) : ptr_(ptr) {}
+  operator T*() { return ptr_; }
+  operator Base*() { return (Base*)ptr_; }
+
+ private:
+  T* ptr_;
+};
+
+template <class T, class Base, class Base2>
+class PointerBase2 : public PointerBase<T, Base> {
+ public:
+  explicit PointerBase2(T* ptr) : PointerBase<T, Base>(ptr) {}
+  operator Base2*() { return Pointer<Base>(*this); }
+};
+
+}
+
+#endif
+
+
+/* upb::reffed_ptr ************************************************************/
+
+#ifdef __cplusplus
+
+#include <algorithm>  /* For std::swap(). */
+
+namespace upb {
+
+/* Provides RAII semantics for upb refcounted objects.  Each reffed_ptr owns a
+ * ref on whatever object it points to (if any). */
+template <class T> class reffed_ptr {
+ public:
+  reffed_ptr() : ptr_(NULL) {}
+
+  /* If ref_donor is NULL, takes a new ref, otherwise adopts from ref_donor. */
+  template <class U>
+  reffed_ptr(U* val, const void* ref_donor = NULL)
+      : ptr_(upb::upcast(val)) {
+    if (ref_donor) {
+      assert(ptr_);
+      ptr_->DonateRef(ref_donor, this);
+    } else if (ptr_) {
+      ptr_->Ref(this);
+    }
+  }
+
+  template <class U>
+  reffed_ptr(const reffed_ptr<U>& other)
+      : ptr_(upb::upcast(other.get())) {
+    if (ptr_) ptr_->Ref(this);
+  }
+
+  ~reffed_ptr() { if (ptr_) ptr_->Unref(this); }
+
+  template <class U>
+  reffed_ptr& operator=(const reffed_ptr<U>& other) {
+    reset(other.get());
+    return *this;
+  }
+
+  reffed_ptr& operator=(const reffed_ptr& other) {
+    reset(other.get());
+    return *this;
+  }
+
+  /* TODO(haberman): add C++11 move construction/assignment for greater
+   * efficiency. */
+
+  void swap(reffed_ptr& other) {
+    if (ptr_ == other.ptr_) {
+      return;
+    }
+
+    if (ptr_) ptr_->DonateRef(this, &other);
+    if (other.ptr_) other.ptr_->DonateRef(&other, this);
+    std::swap(ptr_, other.ptr_);
+  }
+
+  T& operator*() const {
+    assert(ptr_);
+    return *ptr_;
+  }
+
+  T* operator->() const {
+    assert(ptr_);
+    return ptr_;
+  }
+
+  T* get() const { return ptr_; }
+
+  /* If ref_donor is NULL, takes a new ref, otherwise adopts from ref_donor. */
+  template <class U>
+  void reset(U* ptr = NULL, const void* ref_donor = NULL) {
+    reffed_ptr(ptr, ref_donor).swap(*this);
+  }
+
+  template <class U>
+  reffed_ptr<U> down_cast() {
+    return reffed_ptr<U>(upb::down_cast<U*>(get()));
+  }
+
+  template <class U>
+  reffed_ptr<U> dyn_cast() {
+    return reffed_ptr<U>(upb::dyn_cast<U*>(get()));
+  }
+
+  /* Plain release() is unsafe; if we were the only owner, it would leak the
+   * object.  Instead we provide this: */
+  T* ReleaseTo(const void* new_owner) {
+    T* ret = NULL;
+    ptr_->DonateRef(this, new_owner);
+    std::swap(ret, ptr_);
+    return ret;
+  }
+
+ private:
+  T* ptr_;
+};
+
+}  /* namespace upb */
+
+#endif  /* __cplusplus */
+
+
+/* upb::Status ****************************************************************/
+
+#ifdef __cplusplus
+namespace upb {
+class ErrorSpace;
+class Status;
+}
+#endif
+
+UPB_DECLARE_TYPE(upb::ErrorSpace, upb_errorspace)
+UPB_DECLARE_TYPE(upb::Status, upb_status)
+
+/* The maximum length of an error message before it will get truncated. */
+#define UPB_STATUS_MAX_MESSAGE 128
+
+/* An error callback function is used to report errors from some component.
+ * The function can return "true" to indicate that the component should try
+ * to recover and proceed, but this is not always possible. */
+typedef bool upb_errcb_t(void *closure, const upb_status* status);
+
+#ifdef __cplusplus
+class upb::ErrorSpace {
+#else
+struct upb_errorspace {
+#endif
+  const char *name;
+  /* Should the error message in the status object according to this code. */
+  void (*set_message)(upb_status* status, int code);
+};
+
+#ifdef __cplusplus
+
+/* Object representing a success or failure status.
+ * It owns no resources and allocates no memory, so it should work
+ * even in OOM situations. */
+
+class upb::Status {
+ public:
+  Status();
+
+  /* Returns true if there is no error. */
+  bool ok() const;
+
+  /* Optional error space and code, useful if the caller wants to
+   * programmatically check the specific kind of error. */
+  ErrorSpace* error_space();
+  int code() const;
+
+  const char *error_message() const;
+
+  /* The error message will be truncated if it is longer than
+   * UPB_STATUS_MAX_MESSAGE-4. */
+  void SetErrorMessage(const char* msg);
+  void SetFormattedErrorMessage(const char* fmt, ...);
+
+  /* If there is no error message already, this will use the ErrorSpace to
+   * populate the error message for this code.  The caller can still call
+   * SetErrorMessage() to give a more specific message. */
+  void SetErrorCode(ErrorSpace* space, int code);
+
+  /* Resets the status to a successful state with no message. */
+  void Clear();
+
+  void CopyFrom(const Status& other);
+
+ private:
+  UPB_DISALLOW_COPY_AND_ASSIGN(Status)
+#else
+struct upb_status {
+#endif
+  bool ok_;
+
+  /* Specific status code defined by some error space (optional). */
+  int code_;
+  upb_errorspace *error_space_;
+
+  /* Error message; NULL-terminated. */
+  char msg[UPB_STATUS_MAX_MESSAGE];
+};
+
+#define UPB_STATUS_INIT {true, 0, NULL, {0}}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The returned string is invalidated by any other call into the status. */
+const char *upb_status_errmsg(const upb_status *status);
+bool upb_ok(const upb_status *status);
+upb_errorspace *upb_status_errspace(const upb_status *status);
+int upb_status_errcode(const upb_status *status);
+
+/* Any of the functions that write to a status object allow status to be NULL,
+ * to support use cases where the function's caller does not care about the
+ * status message. */
+void upb_status_clear(upb_status *status);
+void upb_status_seterrmsg(upb_status *status, const char *msg);
+void upb_status_seterrf(upb_status *status, const char *fmt, ...);
+void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args);
+void upb_status_seterrcode(upb_status *status, upb_errorspace *space, int code);
+void upb_status_copy(upb_status *to, const upb_status *from);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+
+namespace upb {
+
+/* C++ Wrappers */
+inline Status::Status() { Clear(); }
+inline bool Status::ok() const { return upb_ok(this); }
+inline const char* Status::error_message() const {
+  return upb_status_errmsg(this);
+}
+inline void Status::SetErrorMessage(const char* msg) {
+  upb_status_seterrmsg(this, msg);
+}
+inline void Status::SetFormattedErrorMessage(const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  upb_status_vseterrf(this, fmt, args);
+  va_end(args);
+}
+inline void Status::SetErrorCode(ErrorSpace* space, int code) {
+  upb_status_seterrcode(this, space, code);
+}
+inline void Status::Clear() { upb_status_clear(this); }
+inline void Status::CopyFrom(const Status& other) {
+  upb_status_copy(this, &other);
+}
+
+}  /* namespace upb */
+
+#endif
+
+#endif  /* UPB_H_ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* upb_value ******************************************************************/
+
+/* A tagged union (stored untagged inside the table) so that we can check that
+ * clients calling table accessors are correctly typed without having to have
+ * an explosion of accessors. */
+typedef enum {
+  UPB_CTYPE_INT32    = 1,
+  UPB_CTYPE_INT64    = 2,
+  UPB_CTYPE_UINT32   = 3,
+  UPB_CTYPE_UINT64   = 4,
+  UPB_CTYPE_BOOL     = 5,
+  UPB_CTYPE_CSTR     = 6,
+  UPB_CTYPE_PTR      = 7,
+  UPB_CTYPE_CONSTPTR = 8,
+  UPB_CTYPE_FPTR     = 9
+} upb_ctype_t;
+
+typedef struct {
+  uint64_t val;
+#ifndef NDEBUG
+  /* In debug mode we carry the value type around also so we can check accesses
+   * to be sure the right member is being read. */
+  upb_ctype_t ctype;
+#endif
+} upb_value;
+
+#ifdef NDEBUG
+#define SET_TYPE(dest, val)      UPB_UNUSED(val)
+#else
+#define SET_TYPE(dest, val) dest = val
+#endif
+
+/* Like strdup(), which isn't always available since it's not ANSI C. */
+char *upb_strdup(const char *s);
+/* Variant that works with a length-delimited rather than NULL-delimited string,
+ * as supported by strtable. */
+char *upb_strdup2(const char *s, size_t len);
+
+UPB_INLINE void _upb_value_setval(upb_value *v, uint64_t val,
+                                  upb_ctype_t ctype) {
+  v->val = val;
+  SET_TYPE(v->ctype, ctype);
+}
+
+UPB_INLINE upb_value _upb_value_val(uint64_t val, upb_ctype_t ctype) {
+  upb_value ret;
+  _upb_value_setval(&ret, val, ctype);
+  return ret;
+}
+
+/* For each value ctype, define the following set of functions:
+ *
+ * // Get/set an int32 from a upb_value.
+ * int32_t upb_value_getint32(upb_value val);
+ * void upb_value_setint32(upb_value *val, int32_t cval);
+ *
+ * // Construct a new upb_value from an int32.
+ * upb_value upb_value_int32(int32_t val); */
+#define FUNCS(name, membername, type_t, converter, proto_type) \
+  UPB_INLINE void upb_value_set ## name(upb_value *val, type_t cval) { \
+    val->val = (converter)cval; \
+    SET_TYPE(val->ctype, proto_type); \
+  } \
+  UPB_INLINE upb_value upb_value_ ## name(type_t val) { \
+    upb_value ret; \
+    upb_value_set ## name(&ret, val); \
+    return ret; \
+  } \
+  UPB_INLINE type_t upb_value_get ## name(upb_value val) { \
+    assert(val.ctype == proto_type); \
+    return (type_t)(converter)val.val; \
+  }
+
+FUNCS(int32,    int32,        int32_t,      int32_t,    UPB_CTYPE_INT32)
+FUNCS(int64,    int64,        int64_t,      int64_t,    UPB_CTYPE_INT64)
+FUNCS(uint32,   uint32,       uint32_t,     uint32_t,   UPB_CTYPE_UINT32)
+FUNCS(uint64,   uint64,       uint64_t,     uint64_t,   UPB_CTYPE_UINT64)
+FUNCS(bool,     _bool,        bool,         bool,       UPB_CTYPE_BOOL)
+FUNCS(cstr,     cstr,         char*,        uintptr_t,  UPB_CTYPE_CSTR)
+FUNCS(ptr,      ptr,          void*,        uintptr_t,  UPB_CTYPE_PTR)
+FUNCS(constptr, constptr,     const void*,  uintptr_t,  UPB_CTYPE_CONSTPTR)
+FUNCS(fptr,     fptr,         upb_func*,    uintptr_t,  UPB_CTYPE_FPTR)
+
+#undef FUNCS
+#undef SET_TYPE
+
+
+/* upb_tabkey *****************************************************************/
+
+/* Either:
+ *   1. an actual integer key, or
+ *   2. a pointer to a string prefixed by its uint32_t length, owned by us.
+ *
+ * ...depending on whether this is a string table or an int table.  We would
+ * make this a union of those two types, but C89 doesn't support statically
+ * initializing a non-first union member. */
+typedef uintptr_t upb_tabkey;
+
+#define UPB_TABKEY_NUM(n) n
+#define UPB_TABKEY_NONE 0
+/* The preprocessor isn't quite powerful enough to turn the compile-time string
+ * length into a byte-wise string representation, so code generation needs to
+ * help it along.
+ *
+ * "len1" is the low byte and len4 is the high byte. */
+#ifdef UPB_BIG_ENDIAN
+#define UPB_TABKEY_STR(len1, len2, len3, len4, strval) \
+    (uintptr_t)(len4 len3 len2 len1 strval)
+#else
+#define UPB_TABKEY_STR(len1, len2, len3, len4, strval) \
+    (uintptr_t)(len1 len2 len3 len4 strval)
+#endif
+
+UPB_INLINE char *upb_tabstr(upb_tabkey key, uint32_t *len) {
+  char* mem = (char*)key;
+  if (len) memcpy(len, mem, sizeof(*len));
+  return mem + sizeof(*len);
+}
+
+
+/* upb_tabval *****************************************************************/
+
+#ifdef __cplusplus
+
+/* Status initialization not supported.
+ *
+ * This separate definition is necessary because in C++, UINTPTR_MAX isn't
+ * reliably available. */
+typedef struct {
+  uint64_t val;
+} upb_tabval;
+
+#else
+
+/* C -- supports static initialization, but to support static initialization of
+ * both integers and points for both 32 and 64 bit targets, it takes a little
+ * bit of doing. */
+
+#if UINTPTR_MAX == 0xffffffffffffffffULL
+#define UPB_PTR_IS_64BITS
+#elif UINTPTR_MAX != 0xffffffff
+#error Could not determine how many bits pointers are.
+#endif
+
+typedef union {
+  /* For static initialization.
+   *
+   * Unfortunately this ugliness is necessary -- it is the only way that we can,
+   * with -std=c89 -pedantic, statically initialize this to either a pointer or
+   * an integer on 32-bit platforms. */
+  struct {
+#ifdef UPB_PTR_IS_64BITS
+    uintptr_t val;
+#else
+    uintptr_t val1;
+    uintptr_t val2;
+#endif
+  } staticinit;
+
+  /* The normal accessor that we use for everything at runtime. */
+  uint64_t val;
+} upb_tabval;
+
+#ifdef UPB_PTR_IS_64BITS
+#define UPB_TABVALUE_INT_INIT(v) {{v}}
+#define UPB_TABVALUE_EMPTY_INIT  {{-1}}
+#else
+
+/* 32-bit pointers */
+
+#ifdef UPB_BIG_ENDIAN
+#define UPB_TABVALUE_INT_INIT(v) {{0, v}}
+#define UPB_TABVALUE_EMPTY_INIT  {{-1, -1}}
+#else
+#define UPB_TABVALUE_INT_INIT(v) {{v, 0}}
+#define UPB_TABVALUE_EMPTY_INIT  {{-1, -1}}
+#endif
+
+#endif
+
+#define UPB_TABVALUE_PTR_INIT(v) UPB_TABVALUE_INT_INIT((uintptr_t)v)
+
+#undef UPB_PTR_IS_64BITS
+
+#endif  /* __cplusplus */
+
+
+/* upb_table ******************************************************************/
+
+typedef struct _upb_tabent {
+  upb_tabkey key;
+  upb_tabval val;
+
+  /* Internal chaining.  This is const so we can create static initializers for
+   * tables.  We cast away const sometimes, but *only* when the containing
+   * upb_table is known to be non-const.  This requires a bit of care, but
+   * the subtlety is confined to table.c. */
+  const struct _upb_tabent *next;
+} upb_tabent;
+
+typedef struct {
+  size_t count;          /* Number of entries in the hash part. */
+  size_t mask;           /* Mask to turn hash value -> bucket. */
+  upb_ctype_t ctype;     /* Type of all values. */
+  uint8_t size_lg2;      /* Size of the hashtable part is 2^size_lg2 entries. */
+
+  /* Hash table entries.
+   * Making this const isn't entirely accurate; what we really want is for it to
+   * have the same const-ness as the table it's inside.  But there's no way to
+   * declare that in C.  So we have to make it const so that we can statically
+   * initialize const hash tables.  Then we cast away const when we have to.
+   */
+  const upb_tabent *entries;
+} upb_table;
+
+typedef struct {
+  upb_table t;
+} upb_strtable;
+
+#define UPB_STRTABLE_INIT(count, mask, ctype, size_lg2, entries) \
+  {{count, mask, ctype, size_lg2, entries}}
+
+#define UPB_EMPTY_STRTABLE_INIT(ctype)                           \
+  UPB_STRTABLE_INIT(0, 0, ctype, 0, NULL)
+
+typedef struct {
+  upb_table t;              /* For entries that don't fit in the array part. */
+  const upb_tabval *array;  /* Array part of the table. See const note above. */
+  size_t array_size;        /* Array part size. */
+  size_t array_count;       /* Array part number of elements. */
+} upb_inttable;
+
+#define UPB_INTTABLE_INIT(count, mask, ctype, size_lg2, ent, a, asize, acount) \
+  {{count, mask, ctype, size_lg2, ent}, a, asize, acount}
+
+#define UPB_EMPTY_INTTABLE_INIT(ctype) \
+  UPB_INTTABLE_INIT(0, 0, ctype, 0, NULL, NULL, 0, 0)
+
+#define UPB_ARRAY_EMPTYENT -1
+
+UPB_INLINE size_t upb_table_size(const upb_table *t) {
+  if (t->size_lg2 == 0)
+    return 0;
+  else
+    return 1 << t->size_lg2;
+}
+
+/* Internal-only functions, in .h file only out of necessity. */
+UPB_INLINE bool upb_tabent_isempty(const upb_tabent *e) {
+  return e->key == 0;
+}
+
+/* Used by some of the unit tests for generic hashing functionality. */
+uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed);
+
+UPB_INLINE uintptr_t upb_intkey(uintptr_t key) {
+  return key;
+}
+
+UPB_INLINE uint32_t upb_inthash(uintptr_t key) {
+  return (uint32_t)key;
+}
+
+static const upb_tabent *upb_getentry(const upb_table *t, uint32_t hash) {
+  return t->entries + (hash & t->mask);
+}
+
+UPB_INLINE bool upb_arrhas(upb_tabval key) {
+  return key.val != (uint64_t)-1;
+}
+
+/* Initialize and uninitialize a table, respectively.  If memory allocation
+ * failed, false is returned that the table is uninitialized. */
+bool upb_inttable_init(upb_inttable *table, upb_ctype_t ctype);
+bool upb_strtable_init(upb_strtable *table, upb_ctype_t ctype);
+void upb_inttable_uninit(upb_inttable *table);
+void upb_strtable_uninit(upb_strtable *table);
+
+/* Returns the number of values in the table. */
+size_t upb_inttable_count(const upb_inttable *t);
+UPB_INLINE size_t upb_strtable_count(const upb_strtable *t) {
+  return t->t.count;
+}
+
+/* Inserts the given key into the hashtable with the given value.  The key must
+ * not already exist in the hash table.  For string tables, the key must be
+ * NULL-terminated, and the table will make an internal copy of the key.
+ * Inttables must not insert a value of UINTPTR_MAX.
+ *
+ * If a table resize was required but memory allocation failed, false is
+ * returned and the table is unchanged. */
+bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val);
+bool upb_strtable_insert2(upb_strtable *t, const char *key, size_t len,
+                          upb_value val);
+
+/* For NULL-terminated strings. */
+UPB_INLINE bool upb_strtable_insert(upb_strtable *t, const char *key,
+                                    upb_value val) {
+  return upb_strtable_insert2(t, key, strlen(key), val);
+}
+
+/* Looks up key in this table, returning "true" if the key was found.
+ * If v is non-NULL, copies the value for this key into *v. */
+bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v);
+bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
+                          upb_value *v);
+
+/* For NULL-terminated strings. */
+UPB_INLINE bool upb_strtable_lookup(const upb_strtable *t, const char *key,
+                                    upb_value *v) {
+  return upb_strtable_lookup2(t, key, strlen(key), v);
+}
+
+/* Removes an item from the table.  Returns true if the remove was successful,
+ * and stores the removed item in *val if non-NULL. */
+bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val);
+bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
+                          upb_value *val);
+
+/* For NULL-terminated strings. */
+UPB_INLINE bool upb_strtable_remove(upb_strtable *t, const char *key,
+                                    upb_value *v) {
+  return upb_strtable_remove2(t, key, strlen(key), v);
+}
+
+/* Updates an existing entry in an inttable.  If the entry does not exist,
+ * returns false and does nothing.  Unlike insert/remove, this does not
+ * invalidate iterators. */
+bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val);
+
+/* Handy routines for treating an inttable like a stack.  May not be mixed with
+ * other insert/remove calls. */
+bool upb_inttable_push(upb_inttable *t, upb_value val);
+upb_value upb_inttable_pop(upb_inttable *t);
+
+/* Convenience routines for inttables with pointer keys. */
+bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val);
+bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val);
+bool upb_inttable_lookupptr(
+    const upb_inttable *t, const void *key, upb_value *val);
+
+/* Optimizes the table for the current set of entries, for both memory use and
+ * lookup time.  Client should call this after all entries have been inserted;
+ * inserting more entries is legal, but will likely require a table resize. */
+void upb_inttable_compact(upb_inttable *t);
+
+/* A special-case inlinable version of the lookup routine for 32-bit
+ * integers. */
+UPB_INLINE bool upb_inttable_lookup32(const upb_inttable *t, uint32_t key,
+                                      upb_value *v) {
+  *v = upb_value_int32(0);  /* Silence compiler warnings. */
+  if (key < t->array_size) {
+    upb_tabval arrval = t->array[key];
+    if (upb_arrhas(arrval)) {
+      _upb_value_setval(v, arrval.val, t->t.ctype);
+      return true;
+    } else {
+      return false;
+    }
+  } else {
+    const upb_tabent *e;
+    if (t->t.entries == NULL) return false;
+    for (e = upb_getentry(&t->t, upb_inthash(key)); true; e = e->next) {
+      if ((uint32_t)e->key == key) {
+        _upb_value_setval(v, e->val.val, t->t.ctype);
+        return true;
+      }
+      if (e->next == NULL) return false;
+    }
+  }
+}
+
+/* Exposed for testing only. */
+bool upb_strtable_resize(upb_strtable *t, size_t size_lg2);
+
+/* Iterators ******************************************************************/
+
+/* Iterators for int and string tables.  We are subject to some kind of unusual
+ * design constraints:
+ *
+ * For high-level languages:
+ *  - we must be able to guarantee that we don't crash or corrupt memory even if
+ *    the program accesses an invalidated iterator.
+ *
+ * For C++11 range-based for:
+ *  - iterators must be copyable
+ *  - iterators must be comparable
+ *  - it must be possible to construct an "end" value.
+ *
+ * Iteration order is undefined.
+ *
+ * Modifying the table invalidates iterators.  upb_{str,int}table_done() is
+ * guaranteed to work even on an invalidated iterator, as long as the table it
+ * is iterating over has not been freed.  Calling next() or accessing data from
+ * an invalidated iterator yields unspecified elements from the table, but it is
+ * guaranteed not to crash and to return real table elements (except when done()
+ * is true). */
+
+
+/* upb_strtable_iter **********************************************************/
+
+/*   upb_strtable_iter i;
+ *   upb_strtable_begin(&i, t);
+ *   for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
+ *     const char *key = upb_strtable_iter_key(&i);
+ *     const upb_value val = upb_strtable_iter_value(&i);
+ *     // ...
+ *   }
+ */
+
+typedef struct {
+  const upb_strtable *t;
+  size_t index;
+} upb_strtable_iter;
+
+void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t);
+void upb_strtable_next(upb_strtable_iter *i);
+bool upb_strtable_done(const upb_strtable_iter *i);
+const char *upb_strtable_iter_key(upb_strtable_iter *i);
+size_t upb_strtable_iter_keylength(upb_strtable_iter *i);
+upb_value upb_strtable_iter_value(const upb_strtable_iter *i);
+void upb_strtable_iter_setdone(upb_strtable_iter *i);
+bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
+                               const upb_strtable_iter *i2);
+
+
+/* upb_inttable_iter **********************************************************/
+
+/*   upb_inttable_iter i;
+ *   upb_inttable_begin(&i, t);
+ *   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
+ *     uintptr_t key = upb_inttable_iter_key(&i);
+ *     upb_value val = upb_inttable_iter_value(&i);
+ *     // ...
+ *   }
+ */
+
+typedef struct {
+  const upb_inttable *t;
+  size_t index;
+  bool array_part;
+} upb_inttable_iter;
+
+void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t);
+void upb_inttable_next(upb_inttable_iter *i);
+bool upb_inttable_done(const upb_inttable_iter *i);
+uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i);
+upb_value upb_inttable_iter_value(const upb_inttable_iter *i);
+void upb_inttable_iter_setdone(upb_inttable_iter *i);
+bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
+                               const upb_inttable_iter *i2);
+
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_TABLE_H_ */
+
+/* Reference tracking will check ref()/unref() operations to make sure the
+ * ref ownership is correct.  Where possible it will also make tools like
+ * Valgrind attribute ref leaks to the code that took the leaked ref, not
+ * the code that originally created the object.
+ *
+ * Enabling this requires the application to define upb_lock()/upb_unlock()
+ * functions that acquire/release a global mutex (or #define UPB_THREAD_UNSAFE).
+ * For this reason we don't enable it by default, even in debug builds.
+ */
+
+/* #define UPB_DEBUG_REFS */
+
+#ifdef __cplusplus
+namespace upb { class RefCounted; }
+#endif
+
+UPB_DECLARE_TYPE(upb::RefCounted, upb_refcounted)
+
+struct upb_refcounted_vtbl;
+
+#ifdef __cplusplus
+
+class upb::RefCounted {
+ public:
+  /* Returns true if the given object is frozen. */
+  bool IsFrozen() const;
+
+  /* Increases the ref count, the new ref is owned by "owner" which must not
+   * already own a ref (and should not itself be a refcounted object if the ref
+   * could possibly be circular; see below).
+   * Thread-safe iff "this" is frozen. */
+  void Ref(const void *owner) const;
+
+  /* Release a ref that was acquired from upb_refcounted_ref() and collects any
+   * objects it can. */
+  void Unref(const void *owner) const;
+
+  /* Moves an existing ref from "from" to "to", without changing the overall
+   * ref count.  DonateRef(foo, NULL, owner) is the same as Ref(foo, owner),
+   * but "to" may not be NULL. */
+  void DonateRef(const void *from, const void *to) const;
+
+  /* Verifies that a ref to the given object is currently held by the given
+   * owner.  Only effective in UPB_DEBUG_REFS builds. */
+  void CheckRef(const void *owner) const;
+
+ private:
+  UPB_DISALLOW_POD_OPS(RefCounted, upb::RefCounted)
+#else
+struct upb_refcounted {
+#endif
+  /* TODO(haberman): move the actual structure definition to structdefs.int.h.
+   * The only reason they are here is because inline functions need to see the
+   * definition of upb_handlers, which needs to see this definition.  But we
+   * can change the upb_handlers inline functions to deal in raw offsets
+   * instead.
+   */
+
+  /* A single reference count shared by all objects in the group. */
+  uint32_t *group;
+
+  /* A singly-linked list of all objects in the group. */
+  upb_refcounted *next;
+
+  /* Table of function pointers for this type. */
+  const struct upb_refcounted_vtbl *vtbl;
+
+  /* Maintained only when mutable, this tracks the number of refs (but not
+   * ref2's) to this object.  *group should be the sum of all individual_count
+   * in the group. */
+  uint32_t individual_count;
+
+  bool is_frozen;
+
+#ifdef UPB_DEBUG_REFS
+  upb_inttable *refs;  /* Maps owner -> trackedref for incoming refs. */
+  upb_inttable *ref2s; /* Set of targets for outgoing ref2s. */
+#endif
+};
+
+#ifdef UPB_DEBUG_REFS
+#define UPB_REFCOUNT_INIT(refs, ref2s) \
+    {&static_refcount, NULL, NULL, 0, true, refs, ref2s}
+#else
+#define UPB_REFCOUNT_INIT(refs, ref2s) {&static_refcount, NULL, NULL, 0, true}
+#endif
+
+UPB_BEGIN_EXTERN_C
+
+/* It is better to use tracked refs when possible, for the extra debugging
+ * capability.  But if this is not possible (because you don't have easy access
+ * to a stable pointer value that is associated with the ref), you can pass
+ * UPB_UNTRACKED_REF instead.  */
+extern const void *UPB_UNTRACKED_REF;
+
+/* Native C API. */
+bool upb_refcounted_isfrozen(const upb_refcounted *r);
+void upb_refcounted_ref(const upb_refcounted *r, const void *owner);
+void upb_refcounted_unref(const upb_refcounted *r, const void *owner);
+void upb_refcounted_donateref(
+    const upb_refcounted *r, const void *from, const void *to);
+void upb_refcounted_checkref(const upb_refcounted *r, const void *owner);
+
+#define UPB_REFCOUNTED_CMETHODS(type, upcastfunc) \
+  UPB_INLINE bool type ## _isfrozen(const type *v) { \
+    return upb_refcounted_isfrozen(upcastfunc(v)); \
+  } \
+  UPB_INLINE void type ## _ref(const type *v, const void *owner) { \
+    upb_refcounted_ref(upcastfunc(v), owner); \
+  } \
+  UPB_INLINE void type ## _unref(const type *v, const void *owner) { \
+    upb_refcounted_unref(upcastfunc(v), owner); \
+  } \
+  UPB_INLINE void type ## _donateref(const type *v, const void *from, const void *to) { \
+    upb_refcounted_donateref(upcastfunc(v), from, to); \
+  } \
+  UPB_INLINE void type ## _checkref(const type *v, const void *owner) { \
+    upb_refcounted_checkref(upcastfunc(v), owner); \
+  }
+
+#define UPB_REFCOUNTED_CPPMETHODS \
+  bool IsFrozen() const { \
+    return upb::upcast_to<const upb::RefCounted>(this)->IsFrozen(); \
+  } \
+  void Ref(const void *owner) const { \
+    return upb::upcast_to<const upb::RefCounted>(this)->Ref(owner); \
+  } \
+  void Unref(const void *owner) const { \
+    return upb::upcast_to<const upb::RefCounted>(this)->Unref(owner); \
+  } \
+  void DonateRef(const void *from, const void *to) const { \
+    return upb::upcast_to<const upb::RefCounted>(this)->DonateRef(from, to); \
+  } \
+  void CheckRef(const void *owner) const { \
+    return upb::upcast_to<const upb::RefCounted>(this)->CheckRef(owner); \
+  }
+
+/* Internal-to-upb Interface **************************************************/
+
+typedef void upb_refcounted_visit(const upb_refcounted *r,
+                                  const upb_refcounted *subobj,
+                                  void *closure);
+
+struct upb_refcounted_vtbl {
+  /* Must visit all subobjects that are currently ref'd via upb_refcounted_ref2.
+   * Must be longjmp()-safe. */
+  void (*visit)(const upb_refcounted *r, upb_refcounted_visit *visit, void *c);
+
+  /* Must free the object and release all references to other objects. */
+  void (*free)(upb_refcounted *r);
+};
+
+/* Initializes the refcounted with a single ref for the given owner.  Returns
+ * false if memory could not be allocated. */
+bool upb_refcounted_init(upb_refcounted *r,
+                         const struct upb_refcounted_vtbl *vtbl,
+                         const void *owner);
+
+/* Adds a ref from one refcounted object to another ("from" must not already
+ * own a ref).  These refs may be circular; cycles will be collected correctly
+ * (if conservatively).  These refs do not need to be freed in from's free()
+ * function. */
+void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from);
+
+/* Removes a ref that was acquired from upb_refcounted_ref2(), and collects any
+ * object it can.  This is only necessary when "from" no longer points to "r",
+ * and not from from's "free" function. */
+void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from);
+
+#define upb_ref2(r, from) \
+    upb_refcounted_ref2((const upb_refcounted*)r, (upb_refcounted*)from)
+#define upb_unref2(r, from) \
+    upb_refcounted_unref2((const upb_refcounted*)r, (upb_refcounted*)from)
+
+/* Freezes all mutable object reachable by ref2() refs from the given roots.
+ * This will split refcounting groups into precise SCC groups, so that
+ * refcounting of frozen objects can be more aggressive.  If memory allocation
+ * fails, or if more than 2**31 mutable objects are reachable from "roots", or
+ * if the maximum depth of the graph exceeds "maxdepth", false is returned and
+ * the objects are unchanged.
+ *
+ * After this operation succeeds, the objects are frozen/const, and may not be
+ * used through non-const pointers.  In particular, they may not be passed as
+ * the second parameter of upb_refcounted_{ref,unref}2().  On the upside, all
+ * operations on frozen refcounteds are threadsafe, and objects will be freed
+ * at the precise moment that they become unreachable.
+ *
+ * Caller must own refs on each object in the "roots" list. */
+bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
+                           int maxdepth);
+
+/* Shared by all compiled-in refcounted objects. */
+extern uint32_t static_refcount;
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+/* C++ Wrappers. */
+namespace upb {
+inline bool RefCounted::IsFrozen() const {
+  return upb_refcounted_isfrozen(this);
+}
+inline void RefCounted::Ref(const void *owner) const {
+  upb_refcounted_ref(this, owner);
+}
+inline void RefCounted::Unref(const void *owner) const {
+  upb_refcounted_unref(this, owner);
+}
+inline void RefCounted::DonateRef(const void *from, const void *to) const {
+  upb_refcounted_donateref(this, from, to);
+}
+inline void RefCounted::CheckRef(const void *owner) const {
+  upb_refcounted_checkref(this, owner);
+}
+}  /* namespace upb */
+#endif
+
+#endif  /* UPB_REFCOUNT_H_ */
+
+#ifdef __cplusplus
+#include <cstring>
+#include <string>
+#include <vector>
+
+namespace upb {
+class Def;
+class EnumDef;
+class FieldDef;
+class MessageDef;
+class OneofDef;
+}
+#endif
+
+UPB_DECLARE_DERIVED_TYPE(upb::Def, upb::RefCounted, upb_def, upb_refcounted)
+
+/* The maximum message depth that the type graph can have.  This is a resource
+ * limit for the C stack since we sometimes need to recursively traverse the
+ * graph.  Cycles are ok; the traversal will stop when it detects a cycle, but
+ * we must hit the cycle before the maximum depth is reached.
+ *
+ * If having a single static limit is too inflexible, we can add another variant
+ * of Def::Freeze that allows specifying this as a parameter. */
+#define UPB_MAX_MESSAGE_DEPTH 64
+
+
+/* upb::Def: base class for defs  *********************************************/
+
+/* All the different kind of defs we support.  These correspond 1:1 with
+ * declarations in a .proto file. */
+typedef enum {
+  UPB_DEF_MSG,
+  UPB_DEF_FIELD,
+  UPB_DEF_ENUM,
+  UPB_DEF_ONEOF,
+  UPB_DEF_SERVICE,   /* Not yet implemented. */
+  UPB_DEF_ANY = -1   /* Wildcard for upb_symtab_get*() */
+} upb_deftype_t;
+
+#ifdef __cplusplus
+
+/* The base class of all defs.  Its base is upb::RefCounted (use upb::upcast()
+ * to convert). */
+class upb::Def {
+ public:
+  typedef upb_deftype_t Type;
+
+  Def* Dup(const void *owner) const;
+
+  /* upb::RefCounted methods like Ref()/Unref(). */
+  UPB_REFCOUNTED_CPPMETHODS
+
+  Type def_type() const;
+
+  /* "fullname" is the def's fully-qualified name (eg. foo.bar.Message). */
+  const char *full_name() const;
+
+  /* The def must be mutable.  Caller retains ownership of fullname.  Defs are
+   * not required to have a name; if a def has no name when it is frozen, it
+   * will remain an anonymous def.  On failure, returns false and details in "s"
+   * if non-NULL. */
+  bool set_full_name(const char* fullname, upb::Status* s);
+  bool set_full_name(const std::string &fullname, upb::Status* s);
+
+  /* Freezes the given defs; this validates all constraints and marks the defs
+   * as frozen (read-only).  "defs" may not contain any fielddefs, but fields
+   * of any msgdefs will be frozen.
+   *
+   * Symbolic references to sub-types and enum defaults must have already been
+   * resolved.  Any mutable defs reachable from any of "defs" must also be in
+   * the list; more formally, "defs" must be a transitive closure of mutable
+   * defs.
+   *
+   * After this operation succeeds, the finalized defs must only be accessed
+   * through a const pointer! */
+  static bool Freeze(Def* const* defs, int n, Status* status);
+  static bool Freeze(const std::vector<Def*>& defs, Status* status);
+
+ private:
+  UPB_DISALLOW_POD_OPS(Def, upb::Def)
+};
+
+#endif  /* __cplusplus */
+
+UPB_BEGIN_EXTERN_C
+
+/* Native C API. */
+upb_def *upb_def_dup(const upb_def *def, const void *owner);
+
+/* Include upb_refcounted methods like upb_def_ref()/upb_def_unref(). */
+UPB_REFCOUNTED_CMETHODS(upb_def, upb_def_upcast)
+
+upb_deftype_t upb_def_type(const upb_def *d);
+const char *upb_def_fullname(const upb_def *d);
+bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s);
+bool upb_def_freeze(upb_def *const *defs, int n, upb_status *s);
+
+UPB_END_EXTERN_C
+
+
+/* upb::Def casts *************************************************************/
+
+#ifdef __cplusplus
+#define UPB_CPP_CASTS(cname, cpptype)                                          \
+  namespace upb {                                                              \
+  template <>                                                                  \
+  inline cpptype *down_cast<cpptype *, Def>(Def * def) {                       \
+    return upb_downcast_##cname##_mutable(def);                                \
+  }                                                                            \
+  template <>                                                                  \
+  inline cpptype *dyn_cast<cpptype *, Def>(Def * def) {                        \
+    return upb_dyncast_##cname##_mutable(def);                                 \
+  }                                                                            \
+  template <>                                                                  \
+  inline const cpptype *down_cast<const cpptype *, const Def>(                 \
+      const Def *def) {                                                        \
+    return upb_downcast_##cname(def);                                          \
+  }                                                                            \
+  template <>                                                                  \
+  inline const cpptype *dyn_cast<const cpptype *, const Def>(const Def *def) { \
+    return upb_dyncast_##cname(def);                                           \
+  }                                                                            \
+  template <>                                                                  \
+  inline const cpptype *down_cast<const cpptype *, Def>(Def * def) {           \
+    return upb_downcast_##cname(def);                                          \
+  }                                                                            \
+  template <>                                                                  \
+  inline const cpptype *dyn_cast<const cpptype *, Def>(Def * def) {            \
+    return upb_dyncast_##cname(def);                                           \
+  }                                                                            \
+  }  /* namespace upb */
+#else
+#define UPB_CPP_CASTS(cname, cpptype)
+#endif  /* __cplusplus */
+
+/* Dynamic casts, for determining if a def is of a particular type at runtime.
+ * Downcasts, for when some wants to assert that a def is of a particular type.
+ * These are only checked if we are building debug. */
+#define UPB_DEF_CASTS(lower, upper, cpptype)                               \
+  UPB_INLINE const upb_##lower *upb_dyncast_##lower(const upb_def *def) {  \
+    if (upb_def_type(def) != UPB_DEF_##upper) return NULL;                 \
+    return (upb_##lower *)def;                                             \
+  }                                                                        \
+  UPB_INLINE const upb_##lower *upb_downcast_##lower(const upb_def *def) { \
+    assert(upb_def_type(def) == UPB_DEF_##upper);                          \
+    return (const upb_##lower *)def;                                       \
+  }                                                                        \
+  UPB_INLINE upb_##lower *upb_dyncast_##lower##_mutable(upb_def *def) {    \
+    return (upb_##lower *)upb_dyncast_##lower(def);                        \
+  }                                                                        \
+  UPB_INLINE upb_##lower *upb_downcast_##lower##_mutable(upb_def *def) {   \
+    return (upb_##lower *)upb_downcast_##lower(def);                       \
+  }                                                                        \
+  UPB_CPP_CASTS(lower, cpptype)
+
+#define UPB_DEFINE_DEF(cppname, lower, upper, cppmethods, members)             \
+  UPB_DEFINE_CLASS2(cppname, upb::Def, upb::RefCounted, cppmethods,            \
+                   members)                                                    \
+  UPB_DEF_CASTS(lower, upper, cppname)
+
+#define UPB_DECLARE_DEF_TYPE(cppname, lower, upper) \
+  UPB_DECLARE_DERIVED_TYPE2(cppname, upb::Def, upb::RefCounted, \
+                            upb_ ## lower, upb_def, upb_refcounted) \
+  UPB_DEF_CASTS(lower, upper, cppname)
+
+UPB_DECLARE_DEF_TYPE(upb::FieldDef, fielddef, FIELD)
+UPB_DECLARE_DEF_TYPE(upb::MessageDef, msgdef, MSG)
+UPB_DECLARE_DEF_TYPE(upb::EnumDef, enumdef, ENUM)
+UPB_DECLARE_DEF_TYPE(upb::OneofDef, oneofdef, ONEOF)
+
+#undef UPB_DECLARE_DEF_TYPE
+#undef UPB_DEF_CASTS
+#undef UPB_CPP_CASTS
+
+
+/* upb::FieldDef **************************************************************/
+
+/* The types a field can have.  Note that this list is not identical to the
+ * types defined in descriptor.proto, which gives INT32 and SINT32 separate
+ * types (we distinguish the two with the "integer encoding" enum below). */
+typedef enum {
+  UPB_TYPE_FLOAT    = 1,
+  UPB_TYPE_DOUBLE   = 2,
+  UPB_TYPE_BOOL     = 3,
+  UPB_TYPE_STRING   = 4,
+  UPB_TYPE_BYTES    = 5,
+  UPB_TYPE_MESSAGE  = 6,
+  UPB_TYPE_ENUM     = 7,  /* Enum values are int32. */
+  UPB_TYPE_INT32    = 8,
+  UPB_TYPE_UINT32   = 9,
+  UPB_TYPE_INT64    = 10,
+  UPB_TYPE_UINT64   = 11
+} upb_fieldtype_t;
+
+/* The repeated-ness of each field; this matches descriptor.proto. */
+typedef enum {
+  UPB_LABEL_OPTIONAL = 1,
+  UPB_LABEL_REQUIRED = 2,
+  UPB_LABEL_REPEATED = 3
+} upb_label_t;
+
+/* How integers should be encoded in serializations that offer multiple
+ * integer encoding methods. */
+typedef enum {
+  UPB_INTFMT_VARIABLE = 1,
+  UPB_INTFMT_FIXED = 2,
+  UPB_INTFMT_ZIGZAG = 3   /* Only for signed types (INT32/INT64). */
+} upb_intfmt_t;
+
+/* Descriptor types, as defined in descriptor.proto. */
+typedef enum {
+  UPB_DESCRIPTOR_TYPE_DOUBLE   = 1,
+  UPB_DESCRIPTOR_TYPE_FLOAT    = 2,
+  UPB_DESCRIPTOR_TYPE_INT64    = 3,
+  UPB_DESCRIPTOR_TYPE_UINT64   = 4,
+  UPB_DESCRIPTOR_TYPE_INT32    = 5,
+  UPB_DESCRIPTOR_TYPE_FIXED64  = 6,
+  UPB_DESCRIPTOR_TYPE_FIXED32  = 7,
+  UPB_DESCRIPTOR_TYPE_BOOL     = 8,
+  UPB_DESCRIPTOR_TYPE_STRING   = 9,
+  UPB_DESCRIPTOR_TYPE_GROUP    = 10,
+  UPB_DESCRIPTOR_TYPE_MESSAGE  = 11,
+  UPB_DESCRIPTOR_TYPE_BYTES    = 12,
+  UPB_DESCRIPTOR_TYPE_UINT32   = 13,
+  UPB_DESCRIPTOR_TYPE_ENUM     = 14,
+  UPB_DESCRIPTOR_TYPE_SFIXED32 = 15,
+  UPB_DESCRIPTOR_TYPE_SFIXED64 = 16,
+  UPB_DESCRIPTOR_TYPE_SINT32   = 17,
+  UPB_DESCRIPTOR_TYPE_SINT64   = 18
+} upb_descriptortype_t;
+
+/* Maximum field number allowed for FieldDefs.  This is an inherent limit of the
+ * protobuf wire format. */
+#define UPB_MAX_FIELDNUMBER ((1 << 29) - 1)
+
+#ifdef __cplusplus
+
+/* A upb_fielddef describes a single field in a message.  It is most often
+ * found as a part of a upb_msgdef, but can also stand alone to represent
+ * an extension.
+ *
+ * Its base class is upb::Def (use upb::upcast() to convert). */
+class upb::FieldDef {
+ public:
+  typedef upb_fieldtype_t Type;
+  typedef upb_label_t Label;
+  typedef upb_intfmt_t IntegerFormat;
+  typedef upb_descriptortype_t DescriptorType;
+
+  /* These return true if the given value is a valid member of the enumeration. */
+  static bool CheckType(int32_t val);
+  static bool CheckLabel(int32_t val);
+  static bool CheckDescriptorType(int32_t val);
+  static bool CheckIntegerFormat(int32_t val);
+
+  /* These convert to the given enumeration; they require that the value is
+   * valid. */
+  static Type ConvertType(int32_t val);
+  static Label ConvertLabel(int32_t val);
+  static DescriptorType ConvertDescriptorType(int32_t val);
+  static IntegerFormat ConvertIntegerFormat(int32_t val);
+
+  /* Returns NULL if memory allocation failed. */
+  static reffed_ptr<FieldDef> New();
+
+  /* Duplicates the given field, returning NULL if memory allocation failed.
+   * When a fielddef is duplicated, the subdef (if any) is made symbolic if it
+   * wasn't already.  If the subdef is set but has no name (which is possible
+   * since msgdefs are not required to have a name) the new fielddef's subdef
+   * will be unset. */
+  FieldDef* Dup(const void* owner) const;
+
+  /* upb::RefCounted methods like Ref()/Unref(). */
+  UPB_REFCOUNTED_CPPMETHODS
+
+  /* Functionality from upb::Def. */
+  const char* full_name() const;
+
+  bool type_is_set() const;  /* set_[descriptor_]type() has been called? */
+  Type type() const;         /* Requires that type_is_set() == true. */
+  Label label() const;       /* Defaults to UPB_LABEL_OPTIONAL. */
+  const char* name() const;  /* NULL if uninitialized. */
+  uint32_t number() const;   /* Returns 0 if uninitialized. */
+  bool is_extension() const;
+
+  /* For UPB_TYPE_MESSAGE fields only where is_tag_delimited() == false,
+   * indicates whether this field should have lazy parsing handlers that yield
+   * the unparsed string for the submessage.
+   *
+   * TODO(haberman): I think we want to move this into a FieldOptions container
+   * when we add support for custom options (the FieldOptions struct will
+   * contain both regular FieldOptions like "lazy" *and* custom options). */
+  bool lazy() const;
+
+  /* For non-string, non-submessage fields, this indicates whether binary
+   * protobufs are encoded in packed or non-packed format.
+   *
+   * TODO(haberman): see note above about putting options like this into a
+   * FieldOptions container. */
+  bool packed() const;
+
+  /* An integer that can be used as an index into an array of fields for
+   * whatever message this field belongs to.  Guaranteed to be less than
+   * f->containing_type()->field_count().  May only be accessed once the def has
+   * been finalized. */
+  int index() const;
+
+  /* The MessageDef to which this field belongs.
+   *
+   * If this field has been added to a MessageDef, that message can be retrieved
+   * directly (this is always the case for frozen FieldDefs).
+   *
+   * If the field has not yet been added to a MessageDef, you can set the name
+   * of the containing type symbolically instead.  This is mostly useful for
+   * extensions, where the extension is declared separately from the message. */
+  const MessageDef* containing_type() const;
+  const char* containing_type_name();
+
+  /* The OneofDef to which this field belongs, or NULL if this field is not part
+   * of a oneof. */
+  const OneofDef* containing_oneof() const;
+
+  /* The field's type according to the enum in descriptor.proto.  This is not
+   * the same as UPB_TYPE_*, because it distinguishes between (for example)
+   * INT32 and SINT32, whereas our "type" enum does not.  This return of
+   * descriptor_type() is a function of type(), integer_format(), and
+   * is_tag_delimited().  Likewise set_descriptor_type() sets all three
+   * appropriately. */
+  DescriptorType descriptor_type() const;
+
+  /* Convenient field type tests. */
+  bool IsSubMessage() const;
+  bool IsString() const;
+  bool IsSequence() const;
+  bool IsPrimitive() const;
+  bool IsMap() const;
+
+  /* How integers are encoded.  Only meaningful for integer types.
+   * Defaults to UPB_INTFMT_VARIABLE, and is reset when "type" changes. */
+  IntegerFormat integer_format() const;
+
+  /* Whether a submessage field is tag-delimited or not (if false, then
+   * length-delimited).  May only be set when type() == UPB_TYPE_MESSAGE. */
+  bool is_tag_delimited() const;
+
+  /* Returns the non-string default value for this fielddef, which may either
+   * be something the client set explicitly or the "default default" (0 for
+   * numbers, empty for strings).  The field's type indicates the type of the
+   * returned value, except for enum fields that are still mutable.
+   *
+   * Requires that the given function matches the field's current type. */
+  int64_t default_int64() const;
+  int32_t default_int32() const;
+  uint64_t default_uint64() const;
+  uint32_t default_uint32() const;
+  bool default_bool() const;
+  float default_float() const;
+  double default_double() const;
+
+  /* The resulting string is always NULL-terminated.  If non-NULL, the length
+   * will be stored in *len. */
+  const char *default_string(size_t* len) const;
+
+  /* For frozen UPB_TYPE_ENUM fields, enum defaults can always be read as either
+   * string or int32, and both of these methods will always return true.
+   *
+   * For mutable UPB_TYPE_ENUM fields, the story is a bit more complicated.
+   * Enum defaults are unusual. They can be specified either as string or int32,
+   * but to be valid the enum must have that value as a member.  And if no
+   * default is specified, the "default default" comes from the EnumDef.
+   *
+   * We allow reading the default as either an int32 or a string, but only if
+   * we have a meaningful value to report.  We have a meaningful value if it was
+   * set explicitly, or if we could get the "default default" from the EnumDef.
+   * Also if you explicitly set the name and we find the number in the EnumDef */
+  bool EnumHasStringDefault() const;
+  bool EnumHasInt32Default() const;
+
+  /* Submessage and enum fields must reference a "subdef", which is the
+   * upb::MessageDef or upb::EnumDef that defines their type.  Note that when
+   * the FieldDef is mutable it may not have a subdef *yet*, but this function
+   * still returns true to indicate that the field's type requires a subdef. */
+  bool HasSubDef() const;
+
+  /* Returns the enum or submessage def for this field, if any.  The field's
+   * type must match (ie. you may only call enum_subdef() for fields where
+   * type() == UPB_TYPE_ENUM).  Returns NULL if the subdef has not been set or
+   * is currently set symbolically. */
+  const EnumDef* enum_subdef() const;
+  const MessageDef* message_subdef() const;
+
+  /* Returns the generic subdef for this field.  Requires that HasSubDef() (ie.
+   * only works for UPB_TYPE_ENUM and UPB_TYPE_MESSAGE fields). */
+  const Def* subdef() const;
+
+  /* Returns the symbolic name of the subdef.  If the subdef is currently set
+   * unresolved (ie. set symbolically) returns the symbolic name.  If it has
+   * been resolved to a specific subdef, returns the name from that subdef. */
+  const char* subdef_name() const;
+
+  /* Setters (non-const methods), only valid for mutable FieldDefs! ***********/
+
+  bool set_full_name(const char* fullname, upb::Status* s);
+  bool set_full_name(const std::string& fullname, upb::Status* s);
+
+  /* This may only be called if containing_type() == NULL (ie. the field has not
+   * been added to a message yet). */
+  bool set_containing_type_name(const char *name, Status* status);
+  bool set_containing_type_name(const std::string& name, Status* status);
+
+  /* Defaults to false.  When we freeze, we ensure that this can only be true
+   * for length-delimited message fields.  Prior to freezing this can be true or
+   * false with no restrictions. */
+  void set_lazy(bool lazy);
+
+  /* Defaults to true.  Sets whether this field is encoded in packed format. */
+  void set_packed(bool packed);
+
+  /* "type" or "descriptor_type" MUST be set explicitly before the fielddef is
+   * finalized.  These setters require that the enum value is valid; if the
+   * value did not come directly from an enum constant, the caller should
+   * validate it first with the functions above (CheckFieldType(), etc). */
+  void set_type(Type type);
+  void set_label(Label label);
+  void set_descriptor_type(DescriptorType type);
+  void set_is_extension(bool is_extension);
+
+  /* "number" and "name" must be set before the FieldDef is added to a
+   * MessageDef, and may not be set after that.
+   *
+   * "name" is the same as full_name()/set_full_name(), but since fielddefs
+   * most often use simple, non-qualified names, we provide this accessor
+   * also.  Generally only extensions will want to think of this name as
+   * fully-qualified. */
+  bool set_number(uint32_t number, upb::Status* s);
+  bool set_name(const char* name, upb::Status* s);
+  bool set_name(const std::string& name, upb::Status* s);
+
+  void set_integer_format(IntegerFormat format);
+  bool set_tag_delimited(bool tag_delimited, upb::Status* s);
+
+  /* Sets default value for the field.  The call must exactly match the type
+   * of the field.  Enum fields may use either setint32 or setstring to set
+   * the default numerically or symbolically, respectively, but symbolic
+   * defaults must be resolved before finalizing (see ResolveEnumDefault()).
+   *
+   * Changing the type of a field will reset its default. */
+  void set_default_int64(int64_t val);
+  void set_default_int32(int32_t val);
+  void set_default_uint64(uint64_t val);
+  void set_default_uint32(uint32_t val);
+  void set_default_bool(bool val);
+  void set_default_float(float val);
+  void set_default_double(double val);
+  bool set_default_string(const void *str, size_t len, Status *s);
+  bool set_default_string(const std::string &str, Status *s);
+  void set_default_cstr(const char *str, Status *s);
+
+  /* Before a fielddef is frozen, its subdef may be set either directly (with a
+   * upb::Def*) or symbolically.  Symbolic refs must be resolved before the
+   * containing msgdef can be frozen (see upb_resolve() above).  upb always
+   * guarantees that any def reachable from a live def will also be kept alive.
+   *
+   * Both methods require that upb_hassubdef(f) (so the type must be set prior
+   * to calling these methods).  Returns false if this is not the case, or if
+   * the given subdef is not of the correct type.  The subdef is reset if the
+   * field's type is changed.  The subdef can be set to NULL to clear it. */
+  bool set_subdef(const Def* subdef, Status* s);
+  bool set_enum_subdef(const EnumDef* subdef, Status* s);
+  bool set_message_subdef(const MessageDef* subdef, Status* s);
+  bool set_subdef_name(const char* name, Status* s);
+  bool set_subdef_name(const std::string &name, Status* s);
+
+ private:
+  UPB_DISALLOW_POD_OPS(FieldDef, upb::FieldDef)
+};
+
+# endif  /* defined(__cplusplus) */
+
+UPB_BEGIN_EXTERN_C
+
+/* Native C API. */
+upb_fielddef *upb_fielddef_new(const void *owner);
+upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner);
+
+/* Include upb_refcounted methods like upb_fielddef_ref(). */
+UPB_REFCOUNTED_CMETHODS(upb_fielddef, upb_fielddef_upcast2)
+
+/* Methods from upb_def. */
+const char *upb_fielddef_fullname(const upb_fielddef *f);
+bool upb_fielddef_setfullname(upb_fielddef *f, const char *fullname,
+                              upb_status *s);
+
+bool upb_fielddef_typeisset(const upb_fielddef *f);
+upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f);
+upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f);
+upb_label_t upb_fielddef_label(const upb_fielddef *f);
+uint32_t upb_fielddef_number(const upb_fielddef *f);
+const char *upb_fielddef_name(const upb_fielddef *f);
+bool upb_fielddef_isextension(const upb_fielddef *f);
+bool upb_fielddef_lazy(const upb_fielddef *f);
+bool upb_fielddef_packed(const upb_fielddef *f);
+const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f);
+const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f);
+upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f);
+const char *upb_fielddef_containingtypename(upb_fielddef *f);
+upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f);
+uint32_t upb_fielddef_index(const upb_fielddef *f);
+bool upb_fielddef_istagdelim(const upb_fielddef *f);
+bool upb_fielddef_issubmsg(const upb_fielddef *f);
+bool upb_fielddef_isstring(const upb_fielddef *f);
+bool upb_fielddef_isseq(const upb_fielddef *f);
+bool upb_fielddef_isprimitive(const upb_fielddef *f);
+bool upb_fielddef_ismap(const upb_fielddef *f);
+int64_t upb_fielddef_defaultint64(const upb_fielddef *f);
+int32_t upb_fielddef_defaultint32(const upb_fielddef *f);
+uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f);
+uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f);
+bool upb_fielddef_defaultbool(const upb_fielddef *f);
+float upb_fielddef_defaultfloat(const upb_fielddef *f);
+double upb_fielddef_defaultdouble(const upb_fielddef *f);
+const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len);
+bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f);
+bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f);
+bool upb_fielddef_hassubdef(const upb_fielddef *f);
+const upb_def *upb_fielddef_subdef(const upb_fielddef *f);
+const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f);
+const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f);
+const char *upb_fielddef_subdefname(const upb_fielddef *f);
+
+void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type);
+void upb_fielddef_setdescriptortype(upb_fielddef *f, int type);
+void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label);
+bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s);
+bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s);
+bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
+                                        upb_status *s);
+void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension);
+void upb_fielddef_setlazy(upb_fielddef *f, bool lazy);
+void upb_fielddef_setpacked(upb_fielddef *f, bool packed);
+void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt);
+void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim);
+void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t val);
+void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t val);
+void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t val);
+void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t val);
+void upb_fielddef_setdefaultbool(upb_fielddef *f, bool val);
+void upb_fielddef_setdefaultfloat(upb_fielddef *f, float val);
+void upb_fielddef_setdefaultdouble(upb_fielddef *f, double val);
+bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
+                                upb_status *s);
+void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
+                                 upb_status *s);
+bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
+                            upb_status *s);
+bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
+                               upb_status *s);
+bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
+                                upb_status *s);
+bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
+                                upb_status *s);
+
+bool upb_fielddef_checklabel(int32_t label);
+bool upb_fielddef_checktype(int32_t type);
+bool upb_fielddef_checkdescriptortype(int32_t type);
+bool upb_fielddef_checkintfmt(int32_t fmt);
+
+UPB_END_EXTERN_C
+
+
+/* upb::MessageDef ************************************************************/
+
+typedef upb_inttable_iter upb_msg_field_iter;
+typedef upb_strtable_iter upb_msg_oneof_iter;
+
+#ifdef __cplusplus
+
+/* Structure that describes a single .proto message type.
+ *
+ * Its base class is upb::Def (use upb::upcast() to convert). */
+class upb::MessageDef {
+ public:
+  /* Returns NULL if memory allocation failed. */
+  static reffed_ptr<MessageDef> New();
+
+  /* upb::RefCounted methods like Ref()/Unref(). */
+  UPB_REFCOUNTED_CPPMETHODS
+
+  /* Functionality from upb::Def. */
+  const char* full_name() const;
+  bool set_full_name(const char* fullname, Status* s);
+  bool set_full_name(const std::string& fullname, Status* s);
+
+  /* Call to freeze this MessageDef.
+   * WARNING: this will fail if this message has any unfrozen submessages!
+   * Messages with cycles must be frozen as a batch using upb::Def::Freeze(). */
+  bool Freeze(Status* s);
+
+  /* The number of fields that belong to the MessageDef. */
+  int field_count() const;
+
+  /* The number of oneofs that belong to the MessageDef. */
+  int oneof_count() const;
+
+  /* Adds a field (upb_fielddef object) to a msgdef.  Requires that the msgdef
+   * and the fielddefs are mutable.  The fielddef's name and number must be
+   * set, and the message may not already contain any field with this name or
+   * number, and this fielddef may not be part of another message.  In error
+   * cases false is returned and the msgdef is unchanged.
+   *
+   * If the given field is part of a oneof, this call succeeds if and only if
+   * that oneof is already part of this msgdef. (Note that adding a oneof to a
+   * msgdef automatically adds all of its fields to the msgdef at the time that
+   * the oneof is added, so it is usually more idiomatic to add the oneof's
+   * fields first then add the oneof to the msgdef. This case is supported for
+   * convenience.)
+   *
+   * If |f| is already part of this MessageDef, this method performs no action
+   * and returns true (success). Thus, this method is idempotent. */
+  bool AddField(FieldDef* f, Status* s);
+  bool AddField(const reffed_ptr<FieldDef>& f, Status* s);
+
+  /* Adds a oneof (upb_oneofdef object) to a msgdef. Requires that the msgdef,
+   * oneof, and any fielddefs are mutable, that the fielddefs contained in the
+   * oneof do not have any name or number conflicts with existing fields in the
+   * msgdef, and that the oneof's name is unique among all oneofs in the msgdef.
+   * If the oneof is added successfully, all of its fields will be added
+   * directly to the msgdef as well. In error cases, false is returned and the
+   * msgdef is unchanged. */
+  bool AddOneof(OneofDef* o, Status* s);
+  bool AddOneof(const reffed_ptr<OneofDef>& o, Status* s);
+
+  /* These return NULL if the field is not found. */
+  FieldDef* FindFieldByNumber(uint32_t number);
+  FieldDef* FindFieldByName(const char *name, size_t len);
+  const FieldDef* FindFieldByNumber(uint32_t number) const;
+  const FieldDef* FindFieldByName(const char* name, size_t len) const;
+
+
+  FieldDef* FindFieldByName(const char *name) {
+    return FindFieldByName(name, strlen(name));
+  }
+  const FieldDef* FindFieldByName(const char *name) const {
+    return FindFieldByName(name, strlen(name));
+  }
+
+  template <class T>
+  FieldDef* FindFieldByName(const T& str) {
+    return FindFieldByName(str.c_str(), str.size());
+  }
+  template <class T>
+  const FieldDef* FindFieldByName(const T& str) const {
+    return FindFieldByName(str.c_str(), str.size());
+  }
+
+  OneofDef* FindOneofByName(const char* name, size_t len);
+  const OneofDef* FindOneofByName(const char* name, size_t len) const;
+
+  OneofDef* FindOneofByName(const char* name) {
+    return FindOneofByName(name, strlen(name));
+  }
+  const OneofDef* FindOneofByName(const char* name) const {
+    return FindOneofByName(name, strlen(name));
+  }
+
+  template<class T>
+  OneofDef* FindOneofByName(const T& str) {
+    return FindOneofByName(str.c_str(), str.size());
+  }
+  template<class T>
+  const OneofDef* FindOneofByName(const T& str) const {
+    return FindOneofByName(str.c_str(), str.size());
+  }
+
+  /* Returns a new msgdef that is a copy of the given msgdef (and a copy of all
+   * the fields) but with any references to submessages broken and replaced
+   * with just the name of the submessage.  Returns NULL if memory allocation
+   * failed.
+   *
+   * TODO(haberman): which is more useful, keeping fields resolved or
+   * unresolving them?  If there's no obvious answer, Should this functionality
+   * just be moved into symtab.c? */
+  MessageDef* Dup(const void* owner) const;
+
+  /* Is this message a map entry? */
+  void setmapentry(bool map_entry);
+  bool mapentry() const;
+
+  /* Iteration over fields.  The order is undefined. */
+  class field_iterator
+      : public std::iterator<std::forward_iterator_tag, FieldDef*> {
+   public:
+    explicit field_iterator(MessageDef* md);
+    static field_iterator end(MessageDef* md);
+
+    void operator++();
+    FieldDef* operator*() const;
+    bool operator!=(const field_iterator& other) const;
+    bool operator==(const field_iterator& other) const;
+
+   private:
+    upb_msg_field_iter iter_;
+  };
+
+  class const_field_iterator
+      : public std::iterator<std::forward_iterator_tag, const FieldDef*> {
+   public:
+    explicit const_field_iterator(const MessageDef* md);
+    static const_field_iterator end(const MessageDef* md);
+
+    void operator++();
+    const FieldDef* operator*() const;
+    bool operator!=(const const_field_iterator& other) const;
+    bool operator==(const const_field_iterator& other) const;
+
+   private:
+    upb_msg_field_iter iter_;
+  };
+
+  /* Iteration over oneofs. The order is undefined. */
+  class oneof_iterator
+      : public std::iterator<std::forward_iterator_tag, FieldDef*> {
+   public:
+    explicit oneof_iterator(MessageDef* md);
+    static oneof_iterator end(MessageDef* md);
+
+    void operator++();
+    OneofDef* operator*() const;
+    bool operator!=(const oneof_iterator& other) const;
+    bool operator==(const oneof_iterator& other) const;
+
+   private:
+    upb_msg_oneof_iter iter_;
+  };
+
+  class const_oneof_iterator
+      : public std::iterator<std::forward_iterator_tag, const FieldDef*> {
+   public:
+    explicit const_oneof_iterator(const MessageDef* md);
+    static const_oneof_iterator end(const MessageDef* md);
+
+    void operator++();
+    const OneofDef* operator*() const;
+    bool operator!=(const const_oneof_iterator& other) const;
+    bool operator==(const const_oneof_iterator& other) const;
+
+   private:
+    upb_msg_oneof_iter iter_;
+  };
+
+  class FieldAccessor {
+   public:
+    explicit FieldAccessor(MessageDef* msg) : msg_(msg) {}
+    field_iterator begin() { return msg_->field_begin(); }
+    field_iterator end() { return msg_->field_end(); }
+   private:
+    MessageDef* msg_;
+  };
+
+  class ConstFieldAccessor {
+   public:
+    explicit ConstFieldAccessor(const MessageDef* msg) : msg_(msg) {}
+    const_field_iterator begin() { return msg_->field_begin(); }
+    const_field_iterator end() { return msg_->field_end(); }
+   private:
+    const MessageDef* msg_;
+  };
+
+  class OneofAccessor {
+   public:
+    explicit OneofAccessor(MessageDef* msg) : msg_(msg) {}
+    oneof_iterator begin() { return msg_->oneof_begin(); }
+    oneof_iterator end() { return msg_->oneof_end(); }
+   private:
+    MessageDef* msg_;
+  };
+
+  class ConstOneofAccessor {
+   public:
+    explicit ConstOneofAccessor(const MessageDef* msg) : msg_(msg) {}
+    const_oneof_iterator begin() { return msg_->oneof_begin(); }
+    const_oneof_iterator end() { return msg_->oneof_end(); }
+   private:
+    const MessageDef* msg_;
+  };
+
+  field_iterator field_begin();
+  field_iterator field_end();
+  const_field_iterator field_begin() const;
+  const_field_iterator field_end() const;
+
+  oneof_iterator oneof_begin();
+  oneof_iterator oneof_end();
+  const_oneof_iterator oneof_begin() const;
+  const_oneof_iterator oneof_end() const;
+
+  FieldAccessor fields() { return FieldAccessor(this); }
+  ConstFieldAccessor fields() const { return ConstFieldAccessor(this); }
+  OneofAccessor oneofs() { return OneofAccessor(this); }
+  ConstOneofAccessor oneofs() const { return ConstOneofAccessor(this); }
+
+ private:
+  UPB_DISALLOW_POD_OPS(MessageDef, upb::MessageDef)
+};
+
+#endif  /* __cplusplus */
+
+UPB_BEGIN_EXTERN_C
+
+/* Returns NULL if memory allocation failed. */
+upb_msgdef *upb_msgdef_new(const void *owner);
+
+/* Include upb_refcounted methods like upb_msgdef_ref(). */
+UPB_REFCOUNTED_CMETHODS(upb_msgdef, upb_msgdef_upcast2)
+
+bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status);
+
+const char *upb_msgdef_fullname(const upb_msgdef *m);
+bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname, upb_status *s);
+
+upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner);
+bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
+                         upb_status *s);
+bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
+                         upb_status *s);
+
+/* Field lookup in a couple of different variations:
+ *   - itof = int to field
+ *   - ntof = name to field
+ *   - ntofz = name to field, null-terminated string. */
+const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i);
+const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
+                                    size_t len);
+int upb_msgdef_numfields(const upb_msgdef *m);
+
+UPB_INLINE const upb_fielddef *upb_msgdef_ntofz(const upb_msgdef *m,
+                                                const char *name) {
+  return upb_msgdef_ntof(m, name, strlen(name));
+}
+
+UPB_INLINE upb_fielddef *upb_msgdef_itof_mutable(upb_msgdef *m, uint32_t i) {
+  return (upb_fielddef*)upb_msgdef_itof(m, i);
+}
+
+UPB_INLINE upb_fielddef *upb_msgdef_ntof_mutable(upb_msgdef *m,
+                                                 const char *name, size_t len) {
+  return (upb_fielddef *)upb_msgdef_ntof(m, name, len);
+}
+
+/* Oneof lookup:
+ *   - ntoo = name to oneof
+ *   - ntooz = name to oneof, null-terminated string. */
+const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
+                                    size_t len);
+int upb_msgdef_numoneofs(const upb_msgdef *m);
+
+UPB_INLINE const upb_oneofdef *upb_msgdef_ntooz(const upb_msgdef *m,
+                                               const char *name) {
+  return upb_msgdef_ntoo(m, name, strlen(name));
+}
+
+UPB_INLINE upb_oneofdef *upb_msgdef_ntoo_mutable(upb_msgdef *m,
+                                                 const char *name, size_t len) {
+  return (upb_oneofdef *)upb_msgdef_ntoo(m, name, len);
+}
+
+void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry);
+bool upb_msgdef_mapentry(const upb_msgdef *m);
+
+/* Well-known field tag numbers for map-entry messages. */
+#define UPB_MAPENTRY_KEY   1
+#define UPB_MAPENTRY_VALUE 2
+
+const upb_oneofdef *upb_msgdef_findoneof(const upb_msgdef *m,
+                                          const char *name);
+int upb_msgdef_numoneofs(const upb_msgdef *m);
+
+/* upb_msg_field_iter i;
+ * for(upb_msg_field_begin(&i, m);
+ *     !upb_msg_field_done(&i);
+ *     upb_msg_field_next(&i)) {
+ *   upb_fielddef *f = upb_msg_iter_field(&i);
+ *   // ...
+ * }
+ *
+ * For C we don't have separate iterators for const and non-const.
+ * It is the caller's responsibility to cast the upb_fielddef* to
+ * const if the upb_msgdef* is const. */
+void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m);
+void upb_msg_field_next(upb_msg_field_iter *iter);
+bool upb_msg_field_done(const upb_msg_field_iter *iter);
+upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter);
+void upb_msg_field_iter_setdone(upb_msg_field_iter *iter);
+
+/* Similar to above, we also support iterating through the oneofs in a
+ * msgdef. */
+void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m);
+void upb_msg_oneof_next(upb_msg_oneof_iter *iter);
+bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter);
+upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter);
+void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter);
+
+UPB_END_EXTERN_C
+
+
+/* upb::EnumDef ***************************************************************/
+
+typedef upb_strtable_iter upb_enum_iter;
+
+#ifdef __cplusplus
+
+/* Class that represents an enum.  Its base class is upb::Def (convert with
+ * upb::upcast()). */
+class upb::EnumDef {
+ public:
+  /* Returns NULL if memory allocation failed. */
+  static reffed_ptr<EnumDef> New();
+
+  /* upb::RefCounted methods like Ref()/Unref(). */
+  UPB_REFCOUNTED_CPPMETHODS
+
+  /* Functionality from upb::Def. */
+  const char* full_name() const;
+  bool set_full_name(const char* fullname, Status* s);
+  bool set_full_name(const std::string& fullname, Status* s);
+
+  /* Call to freeze this EnumDef. */
+  bool Freeze(Status* s);
+
+  /* The value that is used as the default when no field default is specified.
+   * If not set explicitly, the first value that was added will be used.
+   * The default value must be a member of the enum.
+   * Requires that value_count() > 0. */
+  int32_t default_value() const;
+
+  /* Sets the default value.  If this value is not valid, returns false and an
+   * error message in status. */
+  bool set_default_value(int32_t val, Status* status);
+
+  /* Returns the number of values currently defined in the enum.  Note that
+   * multiple names can refer to the same number, so this may be greater than
+   * the total number of unique numbers. */
+  int value_count() const;
+
+  /* Adds a single name/number pair to the enum.  Fails if this name has
+   * already been used by another value. */
+  bool AddValue(const char* name, int32_t num, Status* status);
+  bool AddValue(const std::string& name, int32_t num, Status* status);
+
+  /* Lookups from name to integer, returning true if found. */
+  bool FindValueByName(const char* name, int32_t* num) const;
+
+  /* Finds the name corresponding to the given number, or NULL if none was
+   * found.  If more than one name corresponds to this number, returns the
+   * first one that was added. */
+  const char* FindValueByNumber(int32_t num) const;
+
+  /* Returns a new EnumDef with all the same values.  The new EnumDef will be
+   * owned by the given owner. */
+  EnumDef* Dup(const void* owner) const;
+
+  /* Iteration over name/value pairs.  The order is undefined.
+   * Adding an enum val invalidates any iterators.
+   *
+   * TODO: make compatible with range-for, with elements as pairs? */
+  class Iterator {
+   public:
+    explicit Iterator(const EnumDef*);
+
+    int32_t number();
+    const char *name();
+    bool Done();
+    void Next();
+
+   private:
+    upb_enum_iter iter_;
+  };
+
+ private:
+  UPB_DISALLOW_POD_OPS(EnumDef, upb::EnumDef)
+};
+
+#endif  /* __cplusplus */
+
+UPB_BEGIN_EXTERN_C
+
+/* Native C API. */
+upb_enumdef *upb_enumdef_new(const void *owner);
+upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner);
+
+/* Include upb_refcounted methods like upb_enumdef_ref(). */
+UPB_REFCOUNTED_CMETHODS(upb_enumdef, upb_enumdef_upcast2)
+
+bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status);
+
+/* From upb_def. */
+const char *upb_enumdef_fullname(const upb_enumdef *e);
+bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
+                             upb_status *s);
+
+int32_t upb_enumdef_default(const upb_enumdef *e);
+bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s);
+int upb_enumdef_numvals(const upb_enumdef *e);
+bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
+                        upb_status *status);
+
+/* Enum lookups:
+ * - ntoi:  look up a name with specified length.
+ * - ntoiz: look up a name provided as a null-terminated string.
+ * - iton:  look up an integer, returning the name as a null-terminated
+ *          string. */
+bool upb_enumdef_ntoi(const upb_enumdef *e, const char *name, size_t len,
+                      int32_t *num);
+UPB_INLINE bool upb_enumdef_ntoiz(const upb_enumdef *e,
+                                  const char *name, int32_t *num) {
+  return upb_enumdef_ntoi(e, name, strlen(name), num);
+}
+const char *upb_enumdef_iton(const upb_enumdef *e, int32_t num);
+
+/*  upb_enum_iter i;
+ *  for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
+ *    // ...
+ *  }
+ */
+void upb_enum_begin(upb_enum_iter *iter, const upb_enumdef *e);
+void upb_enum_next(upb_enum_iter *iter);
+bool upb_enum_done(upb_enum_iter *iter);
+const char *upb_enum_iter_name(upb_enum_iter *iter);
+int32_t upb_enum_iter_number(upb_enum_iter *iter);
+
+UPB_END_EXTERN_C
+
+/* upb::OneofDef **************************************************************/
+
+typedef upb_inttable_iter upb_oneof_iter;
+
+#ifdef __cplusplus
+
+/* Class that represents a oneof.  Its base class is upb::Def (convert with
+ * upb::upcast()). */
+class upb::OneofDef {
+ public:
+  /* Returns NULL if memory allocation failed. */
+  static reffed_ptr<OneofDef> New();
+
+  /* upb::RefCounted methods like Ref()/Unref(). */
+  UPB_REFCOUNTED_CPPMETHODS
+
+  /* Functionality from upb::Def. */
+  const char* full_name() const;
+
+  /* Returns the MessageDef that owns this OneofDef. */
+  const MessageDef* containing_type() const;
+
+  /* Returns the name of this oneof. This is the name used to look up the oneof
+   * by name once added to a message def. */
+  const char* name() const;
+  bool set_name(const char* name, Status* s);
+
+  /* Returns the number of fields currently defined in the oneof. */
+  int field_count() const;
+
+  /* Adds a field to the oneof. The field must not have been added to any other
+   * oneof or msgdef. If the oneof is not yet part of a msgdef, then when the
+   * oneof is eventually added to a msgdef, all fields added to the oneof will
+   * also be added to the msgdef at that time. If the oneof is already part of a
+   * msgdef, the field must either be a part of that msgdef already, or must not
+   * be a part of any msgdef; in the latter case, the field is added to the
+   * msgdef as a part of this operation.
+   *
+   * The field may only have an OPTIONAL label, never REQUIRED or REPEATED.
+   *
+   * If |f| is already part of this MessageDef, this method performs no action
+   * and returns true (success). Thus, this method is idempotent. */
+  bool AddField(FieldDef* field, Status* s);
+  bool AddField(const reffed_ptr<FieldDef>& field, Status* s);
+
+  /* Looks up by name. */
+  const FieldDef* FindFieldByName(const char* name, size_t len) const;
+  FieldDef* FindFieldByName(const char* name, size_t len);
+  const FieldDef* FindFieldByName(const char* name) const {
+    return FindFieldByName(name, strlen(name));
+  }
+  FieldDef* FindFieldByName(const char* name) {
+    return FindFieldByName(name, strlen(name));
+  }
+
+  template <class T>
+  FieldDef* FindFieldByName(const T& str) {
+    return FindFieldByName(str.c_str(), str.size());
+  }
+  template <class T>
+  const FieldDef* FindFieldByName(const T& str) const {
+    return FindFieldByName(str.c_str(), str.size());
+  }
+
+  /* Looks up by tag number. */
+  const FieldDef* FindFieldByNumber(uint32_t num) const;
+
+  /* Returns a new OneofDef with all the same fields. The OneofDef will be owned
+   * by the given owner. */
+  OneofDef* Dup(const void* owner) const;
+
+  /* Iteration over fields.  The order is undefined. */
+  class iterator : public std::iterator<std::forward_iterator_tag, FieldDef*> {
+   public:
+    explicit iterator(OneofDef* md);
+    static iterator end(OneofDef* md);
+
+    void operator++();
+    FieldDef* operator*() const;
+    bool operator!=(const iterator& other) const;
+    bool operator==(const iterator& other) const;
+
+   private:
+    upb_oneof_iter iter_;
+  };
+
+  class const_iterator
+      : public std::iterator<std::forward_iterator_tag, const FieldDef*> {
+   public:
+    explicit const_iterator(const OneofDef* md);
+    static const_iterator end(const OneofDef* md);
+
+    void operator++();
+    const FieldDef* operator*() const;
+    bool operator!=(const const_iterator& other) const;
+    bool operator==(const const_iterator& other) const;
+
+   private:
+    upb_oneof_iter iter_;
+  };
+
+  iterator begin();
+  iterator end();
+  const_iterator begin() const;
+  const_iterator end() const;
+
+ private:
+  UPB_DISALLOW_POD_OPS(OneofDef, upb::OneofDef)
+};
+
+#endif  /* __cplusplus */
+
+UPB_BEGIN_EXTERN_C
+
+/* Native C API. */
+upb_oneofdef *upb_oneofdef_new(const void *owner);
+upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner);
+
+/* Include upb_refcounted methods like upb_oneofdef_ref(). */
+UPB_REFCOUNTED_CMETHODS(upb_oneofdef, upb_oneofdef_upcast2)
+
+const char *upb_oneofdef_name(const upb_oneofdef *o);
+bool upb_oneofdef_setname(upb_oneofdef *o, const char *name, upb_status *s);
+
+const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o);
+int upb_oneofdef_numfields(const upb_oneofdef *o);
+bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
+                           const void *ref_donor,
+                           upb_status *s);
+
+/* Oneof lookups:
+ * - ntof:  look up a field by name.
+ * - ntofz: look up a field by name (as a null-terminated string).
+ * - itof:  look up a field by number. */
+const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
+                                      const char *name, size_t length);
+UPB_INLINE const upb_fielddef *upb_oneofdef_ntofz(const upb_oneofdef *o,
+                                                  const char *name) {
+  return upb_oneofdef_ntof(o, name, strlen(name));
+}
+const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num);
+
+/*  upb_oneof_iter i;
+ *  for(upb_oneof_begin(&i, e); !upb_oneof_done(&i); upb_oneof_next(&i)) {
+ *    // ...
+ *  }
+ */
+void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o);
+void upb_oneof_next(upb_oneof_iter *iter);
+bool upb_oneof_done(upb_oneof_iter *iter);
+upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter);
+void upb_oneof_iter_setdone(upb_oneof_iter *iter);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+
+UPB_INLINE const char* upb_safecstr(const std::string& str) {
+  assert(str.size() == std::strlen(str.c_str()));
+  return str.c_str();
+}
+
+/* Inline C++ wrappers. */
+namespace upb {
+
+inline Def* Def::Dup(const void* owner) const {
+  return upb_def_dup(this, owner);
+}
+inline Def::Type Def::def_type() const { return upb_def_type(this); }
+inline const char* Def::full_name() const { return upb_def_fullname(this); }
+inline bool Def::set_full_name(const char* fullname, Status* s) {
+  return upb_def_setfullname(this, fullname, s);
+}
+inline bool Def::set_full_name(const std::string& fullname, Status* s) {
+  return upb_def_setfullname(this, upb_safecstr(fullname), s);
+}
+inline bool Def::Freeze(Def* const* defs, int n, Status* status) {
+  return upb_def_freeze(defs, n, status);
+}
+inline bool Def::Freeze(const std::vector<Def*>& defs, Status* status) {
+  return upb_def_freeze((Def* const*)&defs[0], defs.size(), status);
+}
+
+inline bool FieldDef::CheckType(int32_t val) {
+  return upb_fielddef_checktype(val);
+}
+inline bool FieldDef::CheckLabel(int32_t val) {
+  return upb_fielddef_checklabel(val);
+}
+inline bool FieldDef::CheckDescriptorType(int32_t val) {
+  return upb_fielddef_checkdescriptortype(val);
+}
+inline bool FieldDef::CheckIntegerFormat(int32_t val) {
+  return upb_fielddef_checkintfmt(val);
+}
+inline FieldDef::Type FieldDef::ConvertType(int32_t val) {
+  assert(CheckType(val));
+  return static_cast<FieldDef::Type>(val);
+}
+inline FieldDef::Label FieldDef::ConvertLabel(int32_t val) {
+  assert(CheckLabel(val));
+  return static_cast<FieldDef::Label>(val);
+}
+inline FieldDef::DescriptorType FieldDef::ConvertDescriptorType(int32_t val) {
+  assert(CheckDescriptorType(val));
+  return static_cast<FieldDef::DescriptorType>(val);
+}
+inline FieldDef::IntegerFormat FieldDef::ConvertIntegerFormat(int32_t val) {
+  assert(CheckIntegerFormat(val));
+  return static_cast<FieldDef::IntegerFormat>(val);
+}
+
+inline reffed_ptr<FieldDef> FieldDef::New() {
+  upb_fielddef *f = upb_fielddef_new(&f);
+  return reffed_ptr<FieldDef>(f, &f);
+}
+inline FieldDef* FieldDef::Dup(const void* owner) const {
+  return upb_fielddef_dup(this, owner);
+}
+inline const char* FieldDef::full_name() const {
+  return upb_fielddef_fullname(this);
+}
+inline bool FieldDef::set_full_name(const char* fullname, Status* s) {
+  return upb_fielddef_setfullname(this, fullname, s);
+}
+inline bool FieldDef::set_full_name(const std::string& fullname, Status* s) {
+  return upb_fielddef_setfullname(this, upb_safecstr(fullname), s);
+}
+inline bool FieldDef::type_is_set() const {
+  return upb_fielddef_typeisset(this);
+}
+inline FieldDef::Type FieldDef::type() const { return upb_fielddef_type(this); }
+inline FieldDef::DescriptorType FieldDef::descriptor_type() const {
+  return upb_fielddef_descriptortype(this);
+}
+inline FieldDef::Label FieldDef::label() const {
+  return upb_fielddef_label(this);
+}
+inline uint32_t FieldDef::number() const { return upb_fielddef_number(this); }
+inline const char* FieldDef::name() const { return upb_fielddef_name(this); }
+inline bool FieldDef::is_extension() const {
+  return upb_fielddef_isextension(this);
+}
+inline bool FieldDef::lazy() const {
+  return upb_fielddef_lazy(this);
+}
+inline void FieldDef::set_lazy(bool lazy) {
+  upb_fielddef_setlazy(this, lazy);
+}
+inline bool FieldDef::packed() const {
+  return upb_fielddef_packed(this);
+}
+inline void FieldDef::set_packed(bool packed) {
+  upb_fielddef_setpacked(this, packed);
+}
+inline const MessageDef* FieldDef::containing_type() const {
+  return upb_fielddef_containingtype(this);
+}
+inline const OneofDef* FieldDef::containing_oneof() const {
+  return upb_fielddef_containingoneof(this);
+}
+inline const char* FieldDef::containing_type_name() {
+  return upb_fielddef_containingtypename(this);
+}
+inline bool FieldDef::set_number(uint32_t number, Status* s) {
+  return upb_fielddef_setnumber(this, number, s);
+}
+inline bool FieldDef::set_name(const char *name, Status* s) {
+  return upb_fielddef_setname(this, name, s);
+}
+inline bool FieldDef::set_name(const std::string& name, Status* s) {
+  return upb_fielddef_setname(this, upb_safecstr(name), s);
+}
+inline bool FieldDef::set_containing_type_name(const char *name, Status* s) {
+  return upb_fielddef_setcontainingtypename(this, name, s);
+}
+inline bool FieldDef::set_containing_type_name(const std::string &name,
+                                               Status *s) {
+  return upb_fielddef_setcontainingtypename(this, upb_safecstr(name), s);
+}
+inline void FieldDef::set_type(upb_fieldtype_t type) {
+  upb_fielddef_settype(this, type);
+}
+inline void FieldDef::set_is_extension(bool is_extension) {
+  upb_fielddef_setisextension(this, is_extension);
+}
+inline void FieldDef::set_descriptor_type(FieldDef::DescriptorType type) {
+  upb_fielddef_setdescriptortype(this, type);
+}
+inline void FieldDef::set_label(upb_label_t label) {
+  upb_fielddef_setlabel(this, label);
+}
+inline bool FieldDef::IsSubMessage() const {
+  return upb_fielddef_issubmsg(this);
+}
+inline bool FieldDef::IsString() const { return upb_fielddef_isstring(this); }
+inline bool FieldDef::IsSequence() const { return upb_fielddef_isseq(this); }
+inline bool FieldDef::IsMap() const { return upb_fielddef_ismap(this); }
+inline int64_t FieldDef::default_int64() const {
+  return upb_fielddef_defaultint64(this);
+}
+inline int32_t FieldDef::default_int32() const {
+  return upb_fielddef_defaultint32(this);
+}
+inline uint64_t FieldDef::default_uint64() const {
+  return upb_fielddef_defaultuint64(this);
+}
+inline uint32_t FieldDef::default_uint32() const {
+  return upb_fielddef_defaultuint32(this);
+}
+inline bool FieldDef::default_bool() const {
+  return upb_fielddef_defaultbool(this);
+}
+inline float FieldDef::default_float() const {
+  return upb_fielddef_defaultfloat(this);
+}
+inline double FieldDef::default_double() const {
+  return upb_fielddef_defaultdouble(this);
+}
+inline const char* FieldDef::default_string(size_t* len) const {
+  return upb_fielddef_defaultstr(this, len);
+}
+inline void FieldDef::set_default_int64(int64_t value) {
+  upb_fielddef_setdefaultint64(this, value);
+}
+inline void FieldDef::set_default_int32(int32_t value) {
+  upb_fielddef_setdefaultint32(this, value);
+}
+inline void FieldDef::set_default_uint64(uint64_t value) {
+  upb_fielddef_setdefaultuint64(this, value);
+}
+inline void FieldDef::set_default_uint32(uint32_t value) {
+  upb_fielddef_setdefaultuint32(this, value);
+}
+inline void FieldDef::set_default_bool(bool value) {
+  upb_fielddef_setdefaultbool(this, value);
+}
+inline void FieldDef::set_default_float(float value) {
+  upb_fielddef_setdefaultfloat(this, value);
+}
+inline void FieldDef::set_default_double(double value) {
+  upb_fielddef_setdefaultdouble(this, value);
+}
+inline bool FieldDef::set_default_string(const void *str, size_t len,
+                                         Status *s) {
+  return upb_fielddef_setdefaultstr(this, str, len, s);
+}
+inline bool FieldDef::set_default_string(const std::string& str, Status* s) {
+  return upb_fielddef_setdefaultstr(this, str.c_str(), str.size(), s);
+}
+inline void FieldDef::set_default_cstr(const char* str, Status* s) {
+  return upb_fielddef_setdefaultcstr(this, str, s);
+}
+inline bool FieldDef::HasSubDef() const { return upb_fielddef_hassubdef(this); }
+inline const Def* FieldDef::subdef() const { return upb_fielddef_subdef(this); }
+inline const MessageDef *FieldDef::message_subdef() const {
+  return upb_fielddef_msgsubdef(this);
+}
+inline const EnumDef *FieldDef::enum_subdef() const {
+  return upb_fielddef_enumsubdef(this);
+}
+inline const char* FieldDef::subdef_name() const {
+  return upb_fielddef_subdefname(this);
+}
+inline bool FieldDef::set_subdef(const Def* subdef, Status* s) {
+  return upb_fielddef_setsubdef(this, subdef, s);
+}
+inline bool FieldDef::set_enum_subdef(const EnumDef* subdef, Status* s) {
+  return upb_fielddef_setenumsubdef(this, subdef, s);
+}
+inline bool FieldDef::set_message_subdef(const MessageDef* subdef, Status* s) {
+  return upb_fielddef_setmsgsubdef(this, subdef, s);
+}
+inline bool FieldDef::set_subdef_name(const char* name, Status* s) {
+  return upb_fielddef_setsubdefname(this, name, s);
+}
+inline bool FieldDef::set_subdef_name(const std::string& name, Status* s) {
+  return upb_fielddef_setsubdefname(this, upb_safecstr(name), s);
+}
+
+inline reffed_ptr<MessageDef> MessageDef::New() {
+  upb_msgdef *m = upb_msgdef_new(&m);
+  return reffed_ptr<MessageDef>(m, &m);
+}
+inline const char *MessageDef::full_name() const {
+  return upb_msgdef_fullname(this);
+}
+inline bool MessageDef::set_full_name(const char* fullname, Status* s) {
+  return upb_msgdef_setfullname(this, fullname, s);
+}
+inline bool MessageDef::set_full_name(const std::string& fullname, Status* s) {
+  return upb_msgdef_setfullname(this, upb_safecstr(fullname), s);
+}
+inline bool MessageDef::Freeze(Status* status) {
+  return upb_msgdef_freeze(this, status);
+}
+inline int MessageDef::field_count() const {
+  return upb_msgdef_numfields(this);
+}
+inline int MessageDef::oneof_count() const {
+  return upb_msgdef_numoneofs(this);
+}
+inline bool MessageDef::AddField(upb_fielddef* f, Status* s) {
+  return upb_msgdef_addfield(this, f, NULL, s);
+}
+inline bool MessageDef::AddField(const reffed_ptr<FieldDef>& f, Status* s) {
+  return upb_msgdef_addfield(this, f.get(), NULL, s);
+}
+inline bool MessageDef::AddOneof(upb_oneofdef* o, Status* s) {
+  return upb_msgdef_addoneof(this, o, NULL, s);
+}
+inline bool MessageDef::AddOneof(const reffed_ptr<OneofDef>& o, Status* s) {
+  return upb_msgdef_addoneof(this, o.get(), NULL, s);
+}
+inline FieldDef* MessageDef::FindFieldByNumber(uint32_t number) {
+  return upb_msgdef_itof_mutable(this, number);
+}
+inline FieldDef* MessageDef::FindFieldByName(const char* name, size_t len) {
+  return upb_msgdef_ntof_mutable(this, name, len);
+}
+inline const FieldDef* MessageDef::FindFieldByNumber(uint32_t number) const {
+  return upb_msgdef_itof(this, number);
+}
+inline const FieldDef *MessageDef::FindFieldByName(const char *name,
+                                                   size_t len) const {
+  return upb_msgdef_ntof(this, name, len);
+}
+inline OneofDef* MessageDef::FindOneofByName(const char* name, size_t len) {
+  return upb_msgdef_ntoo_mutable(this, name, len);
+}
+inline const OneofDef* MessageDef::FindOneofByName(const char* name,
+                                                   size_t len) const {
+  return upb_msgdef_ntoo(this, name, len);
+}
+inline MessageDef* MessageDef::Dup(const void *owner) const {
+  return upb_msgdef_dup(this, owner);
+}
+inline void MessageDef::setmapentry(bool map_entry) {
+  upb_msgdef_setmapentry(this, map_entry);
+}
+inline bool MessageDef::mapentry() const {
+  return upb_msgdef_mapentry(this);
+}
+inline MessageDef::field_iterator MessageDef::field_begin() {
+  return field_iterator(this);
+}
+inline MessageDef::field_iterator MessageDef::field_end() {
+  return field_iterator::end(this);
+}
+inline MessageDef::const_field_iterator MessageDef::field_begin() const {
+  return const_field_iterator(this);
+}
+inline MessageDef::const_field_iterator MessageDef::field_end() const {
+  return const_field_iterator::end(this);
+}
+
+inline MessageDef::oneof_iterator MessageDef::oneof_begin() {
+  return oneof_iterator(this);
+}
+inline MessageDef::oneof_iterator MessageDef::oneof_end() {
+  return oneof_iterator::end(this);
+}
+inline MessageDef::const_oneof_iterator MessageDef::oneof_begin() const {
+  return const_oneof_iterator(this);
+}
+inline MessageDef::const_oneof_iterator MessageDef::oneof_end() const {
+  return const_oneof_iterator::end(this);
+}
+
+inline MessageDef::field_iterator::field_iterator(MessageDef* md) {
+  upb_msg_field_begin(&iter_, md);
+}
+inline MessageDef::field_iterator MessageDef::field_iterator::end(
+    MessageDef* md) {
+  MessageDef::field_iterator iter(md);
+  upb_msg_field_iter_setdone(&iter.iter_);
+  return iter;
+}
+inline FieldDef* MessageDef::field_iterator::operator*() const {
+  return upb_msg_iter_field(&iter_);
+}
+inline void MessageDef::field_iterator::operator++() {
+  return upb_msg_field_next(&iter_);
+}
+inline bool MessageDef::field_iterator::operator==(
+    const field_iterator &other) const {
+  return upb_inttable_iter_isequal(&iter_, &other.iter_);
+}
+inline bool MessageDef::field_iterator::operator!=(
+    const field_iterator &other) const {
+  return !(*this == other);
+}
+
+inline MessageDef::const_field_iterator::const_field_iterator(
+    const MessageDef* md) {
+  upb_msg_field_begin(&iter_, md);
+}
+inline MessageDef::const_field_iterator MessageDef::const_field_iterator::end(
+    const MessageDef *md) {
+  MessageDef::const_field_iterator iter(md);
+  upb_msg_field_iter_setdone(&iter.iter_);
+  return iter;
+}
+inline const FieldDef* MessageDef::const_field_iterator::operator*() const {
+  return upb_msg_iter_field(&iter_);
+}
+inline void MessageDef::const_field_iterator::operator++() {
+  return upb_msg_field_next(&iter_);
+}
+inline bool MessageDef::const_field_iterator::operator==(
+    const const_field_iterator &other) const {
+  return upb_inttable_iter_isequal(&iter_, &other.iter_);
+}
+inline bool MessageDef::const_field_iterator::operator!=(
+    const const_field_iterator &other) const {
+  return !(*this == other);
+}
+
+inline MessageDef::oneof_iterator::oneof_iterator(MessageDef* md) {
+  upb_msg_oneof_begin(&iter_, md);
+}
+inline MessageDef::oneof_iterator MessageDef::oneof_iterator::end(
+    MessageDef* md) {
+  MessageDef::oneof_iterator iter(md);
+  upb_msg_oneof_iter_setdone(&iter.iter_);
+  return iter;
+}
+inline OneofDef* MessageDef::oneof_iterator::operator*() const {
+  return upb_msg_iter_oneof(&iter_);
+}
+inline void MessageDef::oneof_iterator::operator++() {
+  return upb_msg_oneof_next(&iter_);
+}
+inline bool MessageDef::oneof_iterator::operator==(
+    const oneof_iterator &other) const {
+  return upb_strtable_iter_isequal(&iter_, &other.iter_);
+}
+inline bool MessageDef::oneof_iterator::operator!=(
+    const oneof_iterator &other) const {
+  return !(*this == other);
+}
+
+inline MessageDef::const_oneof_iterator::const_oneof_iterator(
+    const MessageDef* md) {
+  upb_msg_oneof_begin(&iter_, md);
+}
+inline MessageDef::const_oneof_iterator MessageDef::const_oneof_iterator::end(
+    const MessageDef *md) {
+  MessageDef::const_oneof_iterator iter(md);
+  upb_msg_oneof_iter_setdone(&iter.iter_);
+  return iter;
+}
+inline const OneofDef* MessageDef::const_oneof_iterator::operator*() const {
+  return upb_msg_iter_oneof(&iter_);
+}
+inline void MessageDef::const_oneof_iterator::operator++() {
+  return upb_msg_oneof_next(&iter_);
+}
+inline bool MessageDef::const_oneof_iterator::operator==(
+    const const_oneof_iterator &other) const {
+  return upb_strtable_iter_isequal(&iter_, &other.iter_);
+}
+inline bool MessageDef::const_oneof_iterator::operator!=(
+    const const_oneof_iterator &other) const {
+  return !(*this == other);
+}
+
+inline reffed_ptr<EnumDef> EnumDef::New() {
+  upb_enumdef *e = upb_enumdef_new(&e);
+  return reffed_ptr<EnumDef>(e, &e);
+}
+inline const char* EnumDef::full_name() const {
+  return upb_enumdef_fullname(this);
+}
+inline bool EnumDef::set_full_name(const char* fullname, Status* s) {
+  return upb_enumdef_setfullname(this, fullname, s);
+}
+inline bool EnumDef::set_full_name(const std::string& fullname, Status* s) {
+  return upb_enumdef_setfullname(this, upb_safecstr(fullname), s);
+}
+inline bool EnumDef::Freeze(Status* status) {
+  return upb_enumdef_freeze(this, status);
+}
+inline int32_t EnumDef::default_value() const {
+  return upb_enumdef_default(this);
+}
+inline bool EnumDef::set_default_value(int32_t val, Status* status) {
+  return upb_enumdef_setdefault(this, val, status);
+}
+inline int EnumDef::value_count() const { return upb_enumdef_numvals(this); }
+inline bool EnumDef::AddValue(const char* name, int32_t num, Status* status) {
+  return upb_enumdef_addval(this, name, num, status);
+}
+inline bool EnumDef::AddValue(const std::string& name, int32_t num,
+                              Status* status) {
+  return upb_enumdef_addval(this, upb_safecstr(name), num, status);
+}
+inline bool EnumDef::FindValueByName(const char* name, int32_t *num) const {
+  return upb_enumdef_ntoiz(this, name, num);
+}
+inline const char* EnumDef::FindValueByNumber(int32_t num) const {
+  return upb_enumdef_iton(this, num);
+}
+inline EnumDef* EnumDef::Dup(const void* owner) const {
+  return upb_enumdef_dup(this, owner);
+}
+
+inline EnumDef::Iterator::Iterator(const EnumDef* e) {
+  upb_enum_begin(&iter_, e);
+}
+inline int32_t EnumDef::Iterator::number() {
+  return upb_enum_iter_number(&iter_);
+}
+inline const char* EnumDef::Iterator::name() {
+  return upb_enum_iter_name(&iter_);
+}
+inline bool EnumDef::Iterator::Done() { return upb_enum_done(&iter_); }
+inline void EnumDef::Iterator::Next() { return upb_enum_next(&iter_); }
+
+inline reffed_ptr<OneofDef> OneofDef::New() {
+  upb_oneofdef *o = upb_oneofdef_new(&o);
+  return reffed_ptr<OneofDef>(o, &o);
+}
+inline const char* OneofDef::full_name() const {
+  return upb_oneofdef_name(this);
+}
+
+inline const MessageDef* OneofDef::containing_type() const {
+  return upb_oneofdef_containingtype(this);
+}
+inline const char* OneofDef::name() const {
+  return upb_oneofdef_name(this);
+}
+inline bool OneofDef::set_name(const char* name, Status* s) {
+  return upb_oneofdef_setname(this, name, s);
+}
+inline int OneofDef::field_count() const {
+  return upb_oneofdef_numfields(this);
+}
+inline bool OneofDef::AddField(FieldDef* field, Status* s) {
+  return upb_oneofdef_addfield(this, field, NULL, s);
+}
+inline bool OneofDef::AddField(const reffed_ptr<FieldDef>& field, Status* s) {
+  return upb_oneofdef_addfield(this, field.get(), NULL, s);
+}
+inline const FieldDef* OneofDef::FindFieldByName(const char* name,
+                                                 size_t len) const {
+  return upb_oneofdef_ntof(this, name, len);
+}
+inline const FieldDef* OneofDef::FindFieldByNumber(uint32_t num) const {
+  return upb_oneofdef_itof(this, num);
+}
+inline OneofDef::iterator OneofDef::begin() { return iterator(this); }
+inline OneofDef::iterator OneofDef::end() { return iterator::end(this); }
+inline OneofDef::const_iterator OneofDef::begin() const {
+  return const_iterator(this);
+}
+inline OneofDef::const_iterator OneofDef::end() const {
+  return const_iterator::end(this);
+}
+
+inline OneofDef::iterator::iterator(OneofDef* o) {
+  upb_oneof_begin(&iter_, o);
+}
+inline OneofDef::iterator OneofDef::iterator::end(OneofDef* o) {
+  OneofDef::iterator iter(o);
+  upb_oneof_iter_setdone(&iter.iter_);
+  return iter;
+}
+inline FieldDef* OneofDef::iterator::operator*() const {
+  return upb_oneof_iter_field(&iter_);
+}
+inline void OneofDef::iterator::operator++() { return upb_oneof_next(&iter_); }
+inline bool OneofDef::iterator::operator==(const iterator &other) const {
+  return upb_inttable_iter_isequal(&iter_, &other.iter_);
+}
+inline bool OneofDef::iterator::operator!=(const iterator &other) const {
+  return !(*this == other);
+}
+
+inline OneofDef::const_iterator::const_iterator(const OneofDef* md) {
+  upb_oneof_begin(&iter_, md);
+}
+inline OneofDef::const_iterator OneofDef::const_iterator::end(
+    const OneofDef *md) {
+  OneofDef::const_iterator iter(md);
+  upb_oneof_iter_setdone(&iter.iter_);
+  return iter;
+}
+inline const FieldDef* OneofDef::const_iterator::operator*() const {
+  return upb_msg_iter_field(&iter_);
+}
+inline void OneofDef::const_iterator::operator++() {
+  return upb_oneof_next(&iter_);
+}
+inline bool OneofDef::const_iterator::operator==(
+    const const_iterator &other) const {
+  return upb_inttable_iter_isequal(&iter_, &other.iter_);
+}
+inline bool OneofDef::const_iterator::operator!=(
+    const const_iterator &other) const {
+  return !(*this == other);
+}
+
+}  /* namespace upb */
+#endif
+
+#endif /* UPB_DEF_H_ */
+/*
+** This file contains definitions of structs that should be considered private
+** and NOT stable across versions of upb.
+**
+** The only reason they are declared here and not in .c files is to allow upb
+** and the application (if desired) to embed statically-initialized instances
+** of structures like defs.
+**
+** If you include this file, all guarantees of ABI compatibility go out the
+** window!  Any code that includes this file needs to recompile against the
+** exact same version of upb that they are linking against.
+**
+** You also need to recompile if you change the value of the UPB_DEBUG_REFS
+** flag.
+*/
+
+
+#ifndef UPB_STATICINIT_H_
+#define UPB_STATICINIT_H_
+
+#ifdef __cplusplus
+/* Because of how we do our typedefs, this header can't be included from C++. */
+#error This file cannot be included from C++
+#endif
+
+/* upb_refcounted *************************************************************/
+
+
+/* upb_def ********************************************************************/
+
+struct upb_def {
+  upb_refcounted base;
+
+  const char *fullname;
+  char type;  /* A upb_deftype_t (char to save space) */
+
+  /* Used as a flag during the def's mutable stage.  Must be false unless
+   * it is currently being used by a function on the stack.  This allows
+   * us to easily determine which defs were passed into the function's
+   * current invocation. */
+  bool came_from_user;
+};
+
+#define UPB_DEF_INIT(name, type, refs, ref2s) \
+    { UPB_REFCOUNT_INIT(refs, ref2s), name, type, false }
+
+
+/* upb_fielddef ***************************************************************/
+
+struct upb_fielddef {
+  upb_def base;
+
+  union {
+    int64_t sint;
+    uint64_t uint;
+    double dbl;
+    float flt;
+    void *bytes;
+  } defaultval;
+  union {
+    const upb_msgdef *def;  /* If !msg_is_symbolic. */
+    char *name;             /* If msg_is_symbolic. */
+  } msg;
+  union {
+    const upb_def *def;  /* If !subdef_is_symbolic. */
+    char *name;          /* If subdef_is_symbolic. */
+  } sub;  /* The msgdef or enumdef for this field, if upb_hassubdef(f). */
+  bool subdef_is_symbolic;
+  bool msg_is_symbolic;
+  const upb_oneofdef *oneof;
+  bool default_is_string;
+  bool type_is_set_;     /* False until type is explicitly set. */
+  bool is_extension_;
+  bool lazy_;
+  bool packed_;
+  upb_intfmt_t intfmt;
+  bool tagdelim;
+  upb_fieldtype_t type_;
+  upb_label_t label_;
+  uint32_t number_;
+  uint32_t selector_base;  /* Used to index into a upb::Handlers table. */
+  uint32_t index_;
+};
+
+#define UPB_FIELDDEF_INIT(label, type, intfmt, tagdelim, is_extension, lazy,   \
+                          packed, name, num, msgdef, subdef, selector_base,    \
+                          index, defaultval, refs, ref2s)                      \
+  {                                                                            \
+    UPB_DEF_INIT(name, UPB_DEF_FIELD, refs, ref2s), defaultval, {msgdef},      \
+        {subdef}, NULL, false, false,                                          \
+        type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES, true, is_extension, \
+        lazy, packed, intfmt, tagdelim, type, label, num, selector_base, index \
+  }
+
+
+/* upb_msgdef *****************************************************************/
+
+struct upb_msgdef {
+  upb_def base;
+
+  size_t selector_count;
+  uint32_t submsg_field_count;
+
+  /* Tables for looking up fields by number and name. */
+  upb_inttable itof;  /* int to field */
+  upb_strtable ntof;  /* name to field */
+
+  /* Tables for looking up oneofs by name. */
+  upb_strtable ntoo;  /* name to oneof */
+
+  /* Is this a map-entry message?
+   * TODO: set this flag properly for static descriptors; regenerate
+   * descriptor.upb.c. */
+  bool map_entry;
+
+  /* TODO(haberman): proper extension ranges (there can be multiple). */
+};
+
+/* TODO: also support static initialization of the oneofs table. This will be
+ * needed if we compile in descriptors that contain oneofs. */
+#define UPB_MSGDEF_INIT(name, selector_count, submsg_field_count, itof, ntof, \
+                        refs, ref2s)                                          \
+  {                                                                           \
+    UPB_DEF_INIT(name, UPB_DEF_MSG, refs, ref2s), selector_count,             \
+        submsg_field_count, itof, ntof,                                       \
+        UPB_EMPTY_STRTABLE_INIT(UPB_CTYPE_PTR), false                         \
+  }
+
+
+/* upb_enumdef ****************************************************************/
+
+struct upb_enumdef {
+  upb_def base;
+
+  upb_strtable ntoi;
+  upb_inttable iton;
+  int32_t defaultval;
+};
+
+#define UPB_ENUMDEF_INIT(name, ntoi, iton, defaultval, refs, ref2s) \
+  { UPB_DEF_INIT(name, UPB_DEF_ENUM, refs, ref2s), ntoi, iton, defaultval }
+
+
+/* upb_oneofdef ***************************************************************/
+
+struct upb_oneofdef {
+  upb_def base;
+
+  upb_strtable ntof;
+  upb_inttable itof;
+  const upb_msgdef *parent;
+};
+
+#define UPB_ONEOFDEF_INIT(name, ntof, itof, refs, ref2s) \
+  { UPB_DEF_INIT(name, UPB_DEF_ENUM, refs, ref2s), ntof, itof }
+
+
+/* upb_symtab *****************************************************************/
+
+struct upb_symtab {
+  upb_refcounted base;
+
+  upb_strtable symtab;
+};
+
+#define UPB_SYMTAB_INIT(symtab, refs, ref2s) \
+  { UPB_REFCOUNT_INIT(refs, ref2s), symtab }
+
+
+#endif  /* UPB_STATICINIT_H_ */
+/*
+** upb::Handlers (upb_handlers)
+**
+** A upb_handlers is like a virtual table for a upb_msgdef.  Each field of the
+** message can have associated functions that will be called when we are
+** parsing or visiting a stream of data.  This is similar to how handlers work
+** in SAX (the Simple API for XML).
+**
+** The handlers have no idea where the data is coming from, so a single set of
+** handlers could be used with two completely different data sources (for
+** example, a parser and a visitor over in-memory objects).  This decoupling is
+** the most important feature of upb, because it allows parsers and serializers
+** to be highly reusable.
+**
+** This is a mixed C/C++ interface that offers a full API to both languages.
+** See the top-level README for more information.
+*/
+
+#ifndef UPB_HANDLERS_H
+#define UPB_HANDLERS_H
+
+
+#ifdef __cplusplus
+namespace upb {
+class BufferHandle;
+class BytesHandler;
+class HandlerAttributes;
+class Handlers;
+template <class T> class Handler;
+template <class T> struct CanonicalType;
+}  /* namespace upb */
+#endif
+
+UPB_DECLARE_TYPE(upb::BufferHandle, upb_bufhandle)
+UPB_DECLARE_TYPE(upb::BytesHandler, upb_byteshandler)
+UPB_DECLARE_TYPE(upb::HandlerAttributes, upb_handlerattr)
+UPB_DECLARE_DERIVED_TYPE(upb::Handlers, upb::RefCounted,
+                         upb_handlers, upb_refcounted)
+
+/* The maximum depth that the handler graph can have.  This is a resource limit
+ * for the C stack since we sometimes need to recursively traverse the graph.
+ * Cycles are ok; the traversal will stop when it detects a cycle, but we must
+ * hit the cycle before the maximum depth is reached.
+ *
+ * If having a single static limit is too inflexible, we can add another variant
+ * of Handlers::Freeze that allows specifying this as a parameter. */
+#define UPB_MAX_HANDLER_DEPTH 64
+
+/* All the different types of handlers that can be registered.
+ * Only needed for the advanced functions in upb::Handlers. */
+typedef enum {
+  UPB_HANDLER_INT32,
+  UPB_HANDLER_INT64,
+  UPB_HANDLER_UINT32,
+  UPB_HANDLER_UINT64,
+  UPB_HANDLER_FLOAT,
+  UPB_HANDLER_DOUBLE,
+  UPB_HANDLER_BOOL,
+  UPB_HANDLER_STARTSTR,
+  UPB_HANDLER_STRING,
+  UPB_HANDLER_ENDSTR,
+  UPB_HANDLER_STARTSUBMSG,
+  UPB_HANDLER_ENDSUBMSG,
+  UPB_HANDLER_STARTSEQ,
+  UPB_HANDLER_ENDSEQ
+} upb_handlertype_t;
+
+#define UPB_HANDLER_MAX (UPB_HANDLER_ENDSEQ+1)
+
+#define UPB_BREAK NULL
+
+/* A convenient definition for when no closure is needed. */
+extern char _upb_noclosure;
+#define UPB_NO_CLOSURE &_upb_noclosure
+
+/* A selector refers to a specific field handler in the Handlers object
+ * (for example: the STARTSUBMSG handler for field "field15"). */
+typedef int32_t upb_selector_t;
+
+UPB_BEGIN_EXTERN_C
+
+/* Forward-declares for C inline accessors.  We need to declare these here
+ * so we can "friend" them in the class declarations in C++. */
+UPB_INLINE upb_func *upb_handlers_gethandler(const upb_handlers *h,
+                                             upb_selector_t s);
+UPB_INLINE const void *upb_handlerattr_handlerdata(const upb_handlerattr *attr);
+UPB_INLINE const void *upb_handlers_gethandlerdata(const upb_handlers *h,
+                                                   upb_selector_t s);
+
+UPB_INLINE void upb_bufhandle_init(upb_bufhandle *h);
+UPB_INLINE void upb_bufhandle_setobj(upb_bufhandle *h, const void *obj,
+                                     const void *type);
+UPB_INLINE void upb_bufhandle_setbuf(upb_bufhandle *h, const char *buf,
+                                     size_t ofs);
+UPB_INLINE const void *upb_bufhandle_obj(const upb_bufhandle *h);
+UPB_INLINE const void *upb_bufhandle_objtype(const upb_bufhandle *h);
+UPB_INLINE const char *upb_bufhandle_buf(const upb_bufhandle *h);
+
+UPB_END_EXTERN_C
+
+
+/* Static selectors for upb::Handlers. */
+#define UPB_STARTMSG_SELECTOR 0
+#define UPB_ENDMSG_SELECTOR 1
+#define UPB_STATIC_SELECTOR_COUNT 2
+
+/* Static selectors for upb::BytesHandler. */
+#define UPB_STARTSTR_SELECTOR 0
+#define UPB_STRING_SELECTOR 1
+#define UPB_ENDSTR_SELECTOR 2
+
+typedef void upb_handlerfree(void *d);
+
+#ifdef __cplusplus
+
+/* A set of attributes that accompanies a handler's function pointer. */
+class upb::HandlerAttributes {
+ public:
+  HandlerAttributes();
+  ~HandlerAttributes();
+
+  /* Sets the handler data that will be passed as the second parameter of the
+   * handler.  To free this pointer when the handlers are freed, call
+   * Handlers::AddCleanup(). */
+  bool SetHandlerData(const void *handler_data);
+  const void* handler_data() const;
+
+  /* Use this to specify the type of the closure.  This will be checked against
+   * all other closure types for handler that use the same closure.
+   * Registration will fail if this does not match all other non-NULL closure
+   * types. */
+  bool SetClosureType(const void *closure_type);
+  const void* closure_type() const;
+
+  /* Use this to specify the type of the returned closure.  Only used for
+   * Start*{String,SubMessage,Sequence} handlers.  This must match the closure
+   * type of any handlers that use it (for example, the StringBuf handler must
+   * match the closure returned from StartString). */
+  bool SetReturnClosureType(const void *return_closure_type);
+  const void* return_closure_type() const;
+
+  /* Set to indicate that the handler always returns "ok" (either "true" or a
+   * non-NULL closure).  This is a hint that can allow code generators to
+   * generate more efficient code. */
+  bool SetAlwaysOk(bool always_ok);
+  bool always_ok() const;
+
+ private:
+  friend UPB_INLINE const void * ::upb_handlerattr_handlerdata(
+      const upb_handlerattr *attr);
+#else
+struct upb_handlerattr {
+#endif
+  const void *handler_data_;
+  const void *closure_type_;
+  const void *return_closure_type_;
+  bool alwaysok_;
+};
+
+#define UPB_HANDLERATTR_INITIALIZER {NULL, NULL, NULL, false}
+
+typedef struct {
+  upb_func *func;
+
+  /* It is wasteful to include the entire attributes here:
+   *
+   * * Some of the information is redundant (like storing the closure type
+   *   separately for each handler that must match).
+   * * Some of the info is only needed prior to freeze() (like closure types).
+   * * alignment padding wastes a lot of space for alwaysok_.
+   *
+   * If/when the size and locality of handlers is an issue, we can optimize this
+   * not to store the entire attr like this.  We do not expose the table's
+   * layout to allow this optimization in the future. */
+  upb_handlerattr attr;
+} upb_handlers_tabent;
+
+#ifdef __cplusplus
+
+/* Extra information about a buffer that is passed to a StringBuf handler.
+ * TODO(haberman): allow the handle to be pinned so that it will outlive
+ * the handler invocation. */
+class upb::BufferHandle {
+ public:
+  BufferHandle();
+  ~BufferHandle();
+
+  /* The beginning of the buffer.  This may be different than the pointer
+   * passed to a StringBuf handler because the handler may receive data
+   * that is from the middle or end of a larger buffer. */
+  const char* buffer() const;
+
+  /* The offset within the attached object where this buffer begins.  Only
+   * meaningful if there is an attached object. */
+  size_t object_offset() const;
+
+  /* Note that object_offset is the offset of "buf" within the attached
+   * object. */
+  void SetBuffer(const char* buf, size_t object_offset);
+
+  /* The BufferHandle can have an "attached object", which can be used to
+   * tunnel through a pointer to the buffer's underlying representation. */
+  template <class T>
+  void SetAttachedObject(const T* obj);
+
+  /* Returns NULL if the attached object is not of this type. */
+  template <class T>
+  const T* GetAttachedObject() const;
+
+ private:
+  friend UPB_INLINE void ::upb_bufhandle_init(upb_bufhandle *h);
+  friend UPB_INLINE void ::upb_bufhandle_setobj(upb_bufhandle *h,
+                                                const void *obj,
+                                                const void *type);
+  friend UPB_INLINE void ::upb_bufhandle_setbuf(upb_bufhandle *h,
+                                                const char *buf, size_t ofs);
+  friend UPB_INLINE const void* ::upb_bufhandle_obj(const upb_bufhandle *h);
+  friend UPB_INLINE const void* ::upb_bufhandle_objtype(
+      const upb_bufhandle *h);
+  friend UPB_INLINE const char* ::upb_bufhandle_buf(const upb_bufhandle *h);
+#else
+struct upb_bufhandle {
+#endif
+  const char *buf_;
+  const void *obj_;
+  const void *objtype_;
+  size_t objofs_;
+};
+
+#ifdef __cplusplus
+
+/* A upb::Handlers object represents the set of handlers associated with a
+ * message in the graph of messages.  You can think of it as a big virtual
+ * table with functions corresponding to all the events that can fire while
+ * parsing or visiting a message of a specific type.
+ *
+ * Any handlers that are not set behave as if they had successfully consumed
+ * the value.  Any unset Start* handlers will propagate their closure to the
+ * inner frame.
+ *
+ * The easiest way to create the *Handler objects needed by the Set* methods is
+ * with the UpbBind() and UpbMakeHandler() macros; see below. */
+class upb::Handlers {
+ public:
+  typedef upb_selector_t Selector;
+  typedef upb_handlertype_t Type;
+
+  typedef Handler<void *(*)(void *, const void *)> StartFieldHandler;
+  typedef Handler<bool (*)(void *, const void *)> EndFieldHandler;
+  typedef Handler<bool (*)(void *, const void *)> StartMessageHandler;
+  typedef Handler<bool (*)(void *, const void *, Status*)> EndMessageHandler;
+  typedef Handler<void *(*)(void *, const void *, size_t)> StartStringHandler;
+  typedef Handler<size_t (*)(void *, const void *, const char *, size_t,
+                             const BufferHandle *)> StringHandler;
+
+  template <class T> struct ValueHandler {
+    typedef Handler<bool(*)(void *, const void *, T)> H;
+  };
+
+  typedef ValueHandler<int32_t>::H     Int32Handler;
+  typedef ValueHandler<int64_t>::H     Int64Handler;
+  typedef ValueHandler<uint32_t>::H    UInt32Handler;
+  typedef ValueHandler<uint64_t>::H    UInt64Handler;
+  typedef ValueHandler<float>::H       FloatHandler;
+  typedef ValueHandler<double>::H      DoubleHandler;
+  typedef ValueHandler<bool>::H        BoolHandler;
+
+  /* Any function pointer can be converted to this and converted back to its
+   * correct type. */
+  typedef void GenericFunction();
+
+  typedef void HandlersCallback(const void *closure, upb_handlers *h);
+
+  /* Returns a new handlers object for the given frozen msgdef.
+   * Returns NULL if memory allocation failed. */
+  static reffed_ptr<Handlers> New(const MessageDef *m);
+
+  /* Convenience function for registering a graph of handlers that mirrors the
+   * graph of msgdefs for some message.  For "m" and all its children a new set
+   * of handlers will be created and the given callback will be invoked,
+   * allowing the client to register handlers for this message.  Note that any
+   * subhandlers set by the callback will be overwritten. */
+  static reffed_ptr<const Handlers> NewFrozen(const MessageDef *m,
+                                              HandlersCallback *callback,
+                                              const void *closure);
+
+  /* Functionality from upb::RefCounted. */
+  UPB_REFCOUNTED_CPPMETHODS
+
+  /* All handler registration functions return bool to indicate success or
+   * failure; details about failures are stored in this status object.  If a
+   * failure does occur, it must be cleared before the Handlers are frozen,
+   * otherwise the freeze() operation will fail.  The functions may *only* be
+   * used while the Handlers are mutable. */
+  const Status* status();
+  void ClearError();
+
+  /* Call to freeze these Handlers.  Requires that any SubHandlers are already
+   * frozen.  For cycles, you must use the static version below and freeze the
+   * whole graph at once. */
+  bool Freeze(Status* s);
+
+  /* Freezes the given set of handlers.  You may not freeze a handler without
+   * also freezing any handlers they point to. */
+  static bool Freeze(Handlers*const* handlers, int n, Status* s);
+  static bool Freeze(const std::vector<Handlers*>& handlers, Status* s);
+
+  /* Returns the msgdef associated with this handlers object. */
+  const MessageDef* message_def() const;
+
+  /* Adds the given pointer and function to the list of cleanup functions that
+   * will be run when these handlers are freed.  If this pointer has previously
+   * been registered, the function returns false and does nothing. */
+  bool AddCleanup(void *ptr, upb_handlerfree *cleanup);
+
+  /* Sets the startmsg handler for the message, which is defined as follows:
+   *
+   *   bool startmsg(MyType* closure) {
+   *     // Called when the message begins.  Returns true if processing should
+   *     // continue.
+   *     return true;
+   *   }
+   */
+  bool SetStartMessageHandler(const StartMessageHandler& handler);
+
+  /* Sets the endmsg handler for the message, which is defined as follows:
+   *
+   *   bool endmsg(MyType* closure, upb_status *status) {
+   *     // Called when processing of this message ends, whether in success or
+   *     // failure.  "status" indicates the final status of processing, and
+   *     // can also be modified in-place to update the final status.
+   *   }
+   */
+  bool SetEndMessageHandler(const EndMessageHandler& handler);
+
+  /* Sets the value handler for the given field, which is defined as follows
+   * (this is for an int32 field; other field types will pass their native
+   * C/C++ type for "val"):
+   *
+   *   bool OnValue(MyClosure* c, const MyHandlerData* d, int32_t val) {
+   *     // Called when the field's value is encountered.  "d" contains
+   *     // whatever data was bound to this field when it was registered.
+   *     // Returns true if processing should continue.
+   *     return true;
+   *   }
+   *
+   *   handers->SetInt32Handler(f, UpbBind(OnValue, new MyHandlerData(...)));
+   *
+   * The value type must exactly match f->type().
+   * For example, a handler that takes an int32_t parameter may only be used for
+   * fields of type UPB_TYPE_INT32 and UPB_TYPE_ENUM.
+   *
+   * Returns false if the handler failed to register; in this case the cleanup
+   * handler (if any) will be called immediately.
+   */
+  bool SetInt32Handler (const FieldDef* f,  const Int32Handler& h);
+  bool SetInt64Handler (const FieldDef* f,  const Int64Handler& h);
+  bool SetUInt32Handler(const FieldDef* f, const UInt32Handler& h);
+  bool SetUInt64Handler(const FieldDef* f, const UInt64Handler& h);
+  bool SetFloatHandler (const FieldDef* f,  const FloatHandler& h);
+  bool SetDoubleHandler(const FieldDef* f, const DoubleHandler& h);
+  bool SetBoolHandler  (const FieldDef* f,   const BoolHandler& h);
+
+  /* Like the previous, but templated on the type on the value (ie. int32).
+   * This is mostly useful to call from other templates.  To call this you must
+   * specify the template parameter explicitly, ie:
+   *   h->SetValueHandler<T>(f, UpbBind(MyHandler<T>, MyData)); */
+  template <class T>
+  bool SetValueHandler(
+      const FieldDef *f,
+      const typename ValueHandler<typename CanonicalType<T>::Type>::H& handler);
+
+  /* Sets handlers for a string field, which are defined as follows:
+   *
+   *   MySubClosure* startstr(MyClosure* c, const MyHandlerData* d,
+   *                          size_t size_hint) {
+   *     // Called when a string value begins.  The return value indicates the
+   *     // closure for the string.  "size_hint" indicates the size of the
+   *     // string if it is known, however if the string is length-delimited
+   *     // and the end-of-string is not available size_hint will be zero.
+   *     // This case is indistinguishable from the case where the size is
+   *     // known to be zero.
+   *     //
+   *     // TODO(haberman): is it important to distinguish these cases?
+   *     // If we had ssize_t as a type we could make -1 "unknown", but
+   *     // ssize_t is POSIX (not ANSI) and therefore less portable.
+   *     // In practice I suspect it won't be important to distinguish.
+   *     return closure;
+   *   }
+   *
+   *   size_t str(MyClosure* closure, const MyHandlerData* d,
+   *              const char *str, size_t len) {
+   *     // Called for each buffer of string data; the multiple physical buffers
+   *     // are all part of the same logical string.  The return value indicates
+   *     // how many bytes were consumed.  If this number is less than "len",
+   *     // this will also indicate that processing should be halted for now,
+   *     // like returning false or UPB_BREAK from any other callback.  If
+   *     // number is greater than "len", the excess bytes will be skipped over
+   *     // and not passed to the callback.
+   *     return len;
+   *   }
+   *
+   *   bool endstr(MyClosure* c, const MyHandlerData* d) {
+   *     // Called when a string value ends.  Return value indicates whether
+   *     // processing should continue.
+   *     return true;
+   *   }
+   */
+  bool SetStartStringHandler(const FieldDef* f, const StartStringHandler& h);
+  bool SetStringHandler(const FieldDef* f, const StringHandler& h);
+  bool SetEndStringHandler(const FieldDef* f, const EndFieldHandler& h);
+
+  /* Sets the startseq handler, which is defined as follows:
+   *
+   *   MySubClosure *startseq(MyClosure* c, const MyHandlerData* d) {
+   *     // Called when a sequence (repeated field) begins.  The returned
+   *     // pointer indicates the closure for the sequence (or UPB_BREAK
+   *     // to interrupt processing).
+   *     return closure;
+   *   }
+   *
+   *   h->SetStartSequenceHandler(f, UpbBind(startseq, new MyHandlerData(...)));
+   *
+   * Returns "false" if "f" does not belong to this message or is not a
+   * repeated field.
+   */
+  bool SetStartSequenceHandler(const FieldDef* f, const StartFieldHandler& h);
+
+  /* Sets the startsubmsg handler for the given field, which is defined as
+   * follows:
+   *
+   *   MySubClosure* startsubmsg(MyClosure* c, const MyHandlerData* d) {
+   *     // Called when a submessage begins.  The returned pointer indicates the
+   *     // closure for the sequence (or UPB_BREAK to interrupt processing).
+   *     return closure;
+   *   }
+   *
+   *   h->SetStartSubMessageHandler(f, UpbBind(startsubmsg,
+   *                                           new MyHandlerData(...)));
+   *
+   * Returns "false" if "f" does not belong to this message or is not a
+   * submessage/group field.
+   */
+  bool SetStartSubMessageHandler(const FieldDef* f, const StartFieldHandler& h);
+
+  /* Sets the endsubmsg handler for the given field, which is defined as
+   * follows:
+   *
+   *   bool endsubmsg(MyClosure* c, const MyHandlerData* d) {
+   *     // Called when a submessage ends.  Returns true to continue processing.
+   *     return true;
+   *   }
+   *
+   * Returns "false" if "f" does not belong to this message or is not a
+   * submessage/group field.
+   */
+  bool SetEndSubMessageHandler(const FieldDef *f, const EndFieldHandler &h);
+
+  /* Starts the endsubseq handler for the given field, which is defined as
+   * follows:
+   *
+   *   bool endseq(MyClosure* c, const MyHandlerData* d) {
+   *     // Called when a sequence ends.  Returns true continue processing.
+   *     return true;
+   *   }
+   *
+   * Returns "false" if "f" does not belong to this message or is not a
+   * repeated field.
+   */
+  bool SetEndSequenceHandler(const FieldDef* f, const EndFieldHandler& h);
+
+  /* Sets or gets the object that specifies handlers for the given field, which
+   * must be a submessage or group.  Returns NULL if no handlers are set. */
+  bool SetSubHandlers(const FieldDef* f, const Handlers* sub);
+  const Handlers* GetSubHandlers(const FieldDef* f) const;
+
+  /* Equivalent to GetSubHandlers, but takes the STARTSUBMSG selector for the
+   * field. */
+  const Handlers* GetSubHandlers(Selector startsubmsg) const;
+
+  /* A selector refers to a specific field handler in the Handlers object
+   * (for example: the STARTSUBMSG handler for field "field15").
+   * On success, returns true and stores the selector in "s".
+   * If the FieldDef or Type are invalid, returns false.
+   * The returned selector is ONLY valid for Handlers whose MessageDef
+   * contains this FieldDef. */
+  static bool GetSelector(const FieldDef* f, Type type, Selector* s);
+
+  /* Given a START selector of any kind, returns the corresponding END selector. */
+  static Selector GetEndSelector(Selector start_selector);
+
+  /* Returns the function pointer for this handler.  It is the client's
+   * responsibility to cast to the correct function type before calling it. */
+  GenericFunction* GetHandler(Selector selector);
+
+  /* Sets the given attributes to the attributes for this selector. */
+  bool GetAttributes(Selector selector, HandlerAttributes* attr);
+
+  /* Returns the handler data that was registered with this handler. */
+  const void* GetHandlerData(Selector selector);
+
+  /* Could add any of the following functions as-needed, with some minor
+   * implementation changes:
+   *
+   * const FieldDef* GetFieldDef(Selector selector);
+   * static bool IsSequence(Selector selector); */
+
+ private:
+  UPB_DISALLOW_POD_OPS(Handlers, upb::Handlers)
+
+  friend UPB_INLINE GenericFunction *::upb_handlers_gethandler(
+      const upb_handlers *h, upb_selector_t s);
+  friend UPB_INLINE const void *::upb_handlers_gethandlerdata(
+      const upb_handlers *h, upb_selector_t s);
+#else
+struct upb_handlers {
+#endif
+  upb_refcounted base;
+
+  const upb_msgdef *msg;
+  const upb_handlers **sub;
+  const void *top_closure_type;
+  upb_inttable cleanup_;
+  upb_status status_;  /* Used only when mutable. */
+  upb_handlers_tabent table[1];  /* Dynamically-sized field handler array. */
+};
+
+#ifdef __cplusplus
+
+namespace upb {
+
+/* Convenience macros for creating a Handler object that is wrapped with a
+ * type-safe wrapper function that converts the "void*" parameters/returns
+ * of the underlying C API into nice C++ function.
+ *
+ * Sample usage:
+ *   void OnValue1(MyClosure* c, const MyHandlerData* d, int32_t val) {
+ *     // do stuff ...
+ *   }
+ *
+ *   // Handler that doesn't need any data bound to it.
+ *   void OnValue2(MyClosure* c, int32_t val) {
+ *     // do stuff ...
+ *   }
+ *
+ *   // Handler that returns bool so it can return failure if necessary.
+ *   bool OnValue3(MyClosure* c, int32_t val) {
+ *     // do stuff ...
+ *     return ok;
+ *   }
+ *
+ *   // Member function handler.
+ *   class MyClosure {
+ *    public:
+ *     void OnValue(int32_t val) {
+ *       // do stuff ...
+ *     }
+ *   };
+ *
+ *   // Takes ownership of the MyHandlerData.
+ *   handlers->SetInt32Handler(f1, UpbBind(OnValue1, new MyHandlerData(...)));
+ *   handlers->SetInt32Handler(f2, UpbMakeHandler(OnValue2));
+ *   handlers->SetInt32Handler(f1, UpbMakeHandler(OnValue3));
+ *   handlers->SetInt32Handler(f2, UpbMakeHandler(&MyClosure::OnValue));
+ */
+
+#ifdef UPB_CXX11
+
+/* In C++11, the "template" disambiguator can appear even outside templates,
+ * so all calls can safely use this pair of macros. */
+
+#define UpbMakeHandler(f) upb::MatchFunc(f).template GetFunc<f>()
+
+/* We have to be careful to only evaluate "d" once. */
+#define UpbBind(f, d) upb::MatchFunc(f).template GetFunc<f>((d))
+
+#else
+
+/* Prior to C++11, the "template" disambiguator may only appear inside a
+ * template, so the regular macro must not use "template" */
+
+#define UpbMakeHandler(f) upb::MatchFunc(f).GetFunc<f>()
+
+#define UpbBind(f, d) upb::MatchFunc(f).GetFunc<f>((d))
+
+#endif  /* UPB_CXX11 */
+
+/* This macro must be used in C++98 for calls from inside a template.  But we
+ * define this variant in all cases; code that wants to be compatible with both
+ * C++98 and C++11 should always use this macro when calling from a template. */
+#define UpbMakeHandlerT(f) upb::MatchFunc(f).template GetFunc<f>()
+
+/* We have to be careful to only evaluate "d" once. */
+#define UpbBindT(f, d) upb::MatchFunc(f).template GetFunc<f>((d))
+
+/* Handler: a struct that contains the (handler, data, deleter) tuple that is
+ * used to register all handlers.  Users can Make() these directly but it's
+ * more convenient to use the UpbMakeHandler/UpbBind macros above. */
+template <class T> class Handler {
+ public:
+  /* The underlying, handler function signature that upb uses internally. */
+  typedef T FuncPtr;
+
+  /* Intentionally implicit. */
+  template <class F> Handler(F func);
+  ~Handler();
+
+ private:
+  void AddCleanup(Handlers* h) const {
+    if (cleanup_func_) {
+      bool ok = h->AddCleanup(cleanup_data_, cleanup_func_);
+      UPB_ASSERT_VAR(ok, ok);
+    }
+  }
+
+  UPB_DISALLOW_COPY_AND_ASSIGN(Handler)
+  friend class Handlers;
+  FuncPtr handler_;
+  mutable HandlerAttributes attr_;
+  mutable bool registered_;
+  void *cleanup_data_;
+  upb_handlerfree *cleanup_func_;
+};
+
+}  /* namespace upb */
+
+#endif  /* __cplusplus */
+
+UPB_BEGIN_EXTERN_C
+
+/* Native C API. */
+
+/* Handler function typedefs. */
+typedef bool upb_startmsg_handlerfunc(void *c, const void*);
+typedef bool upb_endmsg_handlerfunc(void *c, const void *, upb_status *status);
+typedef void* upb_startfield_handlerfunc(void *c, const void *hd);
+typedef bool upb_endfield_handlerfunc(void *c, const void *hd);
+typedef bool upb_int32_handlerfunc(void *c, const void *hd, int32_t val);
+typedef bool upb_int64_handlerfunc(void *c, const void *hd, int64_t val);
+typedef bool upb_uint32_handlerfunc(void *c, const void *hd, uint32_t val);
+typedef bool upb_uint64_handlerfunc(void *c, const void *hd, uint64_t val);
+typedef bool upb_float_handlerfunc(void *c, const void *hd, float val);
+typedef bool upb_double_handlerfunc(void *c, const void *hd, double val);
+typedef bool upb_bool_handlerfunc(void *c, const void *hd, bool val);
+typedef void *upb_startstr_handlerfunc(void *c, const void *hd,
+                                       size_t size_hint);
+typedef size_t upb_string_handlerfunc(void *c, const void *hd, const char *buf,
+                                      size_t n, const upb_bufhandle* handle);
+
+/* upb_bufhandle */
+size_t upb_bufhandle_objofs(const upb_bufhandle *h);
+
+/* upb_handlerattr */
+void upb_handlerattr_init(upb_handlerattr *attr);
+void upb_handlerattr_uninit(upb_handlerattr *attr);
+
+bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd);
+bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type);
+const void *upb_handlerattr_closuretype(const upb_handlerattr *attr);
+bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr,
+                                          const void *type);
+const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr);
+bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok);
+bool upb_handlerattr_alwaysok(const upb_handlerattr *attr);
+
+UPB_INLINE const void *upb_handlerattr_handlerdata(
+    const upb_handlerattr *attr) {
+  return attr->handler_data_;
+}
+
+/* upb_handlers */
+typedef void upb_handlers_callback(const void *closure, upb_handlers *h);
+upb_handlers *upb_handlers_new(const upb_msgdef *m,
+                               const void *owner);
+const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
+                                           const void *owner,
+                                           upb_handlers_callback *callback,
+                                           const void *closure);
+
+/* Include refcounted methods like upb_handlers_ref(). */
+UPB_REFCOUNTED_CMETHODS(upb_handlers, upb_handlers_upcast)
+
+const upb_status *upb_handlers_status(upb_handlers *h);
+void upb_handlers_clearerr(upb_handlers *h);
+const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h);
+bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *hfree);
+
+bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
+                              upb_handlerattr *attr);
+bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
+                            upb_handlerattr *attr);
+bool upb_handlers_setint32(upb_handlers *h, const upb_fielddef *f,
+                           upb_int32_handlerfunc *func, upb_handlerattr *attr);
+bool upb_handlers_setint64(upb_handlers *h, const upb_fielddef *f,
+                           upb_int64_handlerfunc *func, upb_handlerattr *attr);
+bool upb_handlers_setuint32(upb_handlers *h, const upb_fielddef *f,
+                            upb_uint32_handlerfunc *func,
+                            upb_handlerattr *attr);
+bool upb_handlers_setuint64(upb_handlers *h, const upb_fielddef *f,
+                            upb_uint64_handlerfunc *func,
+                            upb_handlerattr *attr);
+bool upb_handlers_setfloat(upb_handlers *h, const upb_fielddef *f,
+                           upb_float_handlerfunc *func, upb_handlerattr *attr);
+bool upb_handlers_setdouble(upb_handlers *h, const upb_fielddef *f,
+                            upb_double_handlerfunc *func,
+                            upb_handlerattr *attr);
+bool upb_handlers_setbool(upb_handlers *h, const upb_fielddef *f,
+                          upb_bool_handlerfunc *func,
+                          upb_handlerattr *attr);
+bool upb_handlers_setstartstr(upb_handlers *h, const upb_fielddef *f,
+                              upb_startstr_handlerfunc *func,
+                              upb_handlerattr *attr);
+bool upb_handlers_setstring(upb_handlers *h, const upb_fielddef *f,
+                            upb_string_handlerfunc *func,
+                            upb_handlerattr *attr);
+bool upb_handlers_setendstr(upb_handlers *h, const upb_fielddef *f,
+                            upb_endfield_handlerfunc *func,
+                            upb_handlerattr *attr);
+bool upb_handlers_setstartseq(upb_handlers *h, const upb_fielddef *f,
+                              upb_startfield_handlerfunc *func,
+                              upb_handlerattr *attr);
+bool upb_handlers_setstartsubmsg(upb_handlers *h, const upb_fielddef *f,
+                                 upb_startfield_handlerfunc *func,
+                                 upb_handlerattr *attr);
+bool upb_handlers_setendsubmsg(upb_handlers *h, const upb_fielddef *f,
+                               upb_endfield_handlerfunc *func,
+                               upb_handlerattr *attr);
+bool upb_handlers_setendseq(upb_handlers *h, const upb_fielddef *f,
+                            upb_endfield_handlerfunc *func,
+                            upb_handlerattr *attr);
+
+bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
+                                 const upb_handlers *sub);
+const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
+                                                const upb_fielddef *f);
+const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
+                                                    upb_selector_t sel);
+
+UPB_INLINE upb_func *upb_handlers_gethandler(const upb_handlers *h,
+                                             upb_selector_t s) {
+  return (upb_func *)h->table[s].func;
+}
+
+bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t s,
+                          upb_handlerattr *attr);
+
+UPB_INLINE const void *upb_handlers_gethandlerdata(const upb_handlers *h,
+                                                   upb_selector_t s) {
+  return upb_handlerattr_handlerdata(&h->table[s].attr);
+}
+
+#ifdef __cplusplus
+
+/* Handler types for single fields.
+ * Right now we only have one for TYPE_BYTES but ones for other types
+ * should follow.
+ *
+ * These follow the same handlers protocol for fields of a message. */
+class upb::BytesHandler {
+ public:
+  BytesHandler();
+  ~BytesHandler();
+#else
+struct upb_byteshandler {
+#endif
+  upb_handlers_tabent table[3];
+};
+
+void upb_byteshandler_init(upb_byteshandler *h);
+
+/* Caller must ensure that "d" outlives the handlers.
+ * TODO(haberman): should this have a "freeze" operation?  It's not necessary
+ * for memory management, but could be useful to force immutability and provide
+ * a convenient moment to verify that all registration succeeded. */
+bool upb_byteshandler_setstartstr(upb_byteshandler *h,
+                                  upb_startstr_handlerfunc *func, void *d);
+bool upb_byteshandler_setstring(upb_byteshandler *h,
+                                upb_string_handlerfunc *func, void *d);
+bool upb_byteshandler_setendstr(upb_byteshandler *h,
+                                upb_endfield_handlerfunc *func, void *d);
+
+/* "Static" methods */
+bool upb_handlers_freeze(upb_handlers *const *handlers, int n, upb_status *s);
+upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f);
+bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
+                              upb_selector_t *s);
+UPB_INLINE upb_selector_t upb_handlers_getendselector(upb_selector_t start) {
+  return start + 1;
+}
+
+/* Internal-only. */
+uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f);
+uint32_t upb_handlers_selectorcount(const upb_fielddef *f);
+
+UPB_END_EXTERN_C
+
+/*
+** Inline definitions for handlers.h, which are particularly long and a bit
+** tricky.
+*/
+
+#ifndef UPB_HANDLERS_INL_H_
+#define UPB_HANDLERS_INL_H_
+
+#include <limits.h>
+
+/* C inline methods. */
+
+/* upb_bufhandle */
+UPB_INLINE void upb_bufhandle_init(upb_bufhandle *h) {
+  h->obj_ = NULL;
+  h->objtype_ = NULL;
+  h->buf_ = NULL;
+  h->objofs_ = 0;
+}
+UPB_INLINE void upb_bufhandle_uninit(upb_bufhandle *h) {
+  UPB_UNUSED(h);
+}
+UPB_INLINE void upb_bufhandle_setobj(upb_bufhandle *h, const void *obj,
+                                     const void *type) {
+  h->obj_ = obj;
+  h->objtype_ = type;
+}
+UPB_INLINE void upb_bufhandle_setbuf(upb_bufhandle *h, const char *buf,
+                                     size_t ofs) {
+  h->buf_ = buf;
+  h->objofs_ = ofs;
+}
+UPB_INLINE const void *upb_bufhandle_obj(const upb_bufhandle *h) {
+  return h->obj_;
+}
+UPB_INLINE const void *upb_bufhandle_objtype(const upb_bufhandle *h) {
+  return h->objtype_;
+}
+UPB_INLINE const char *upb_bufhandle_buf(const upb_bufhandle *h) {
+  return h->buf_;
+}
+
+
+#ifdef __cplusplus
+
+/* Type detection and typedefs for integer types.
+ * For platforms where there are multiple 32-bit or 64-bit types, we need to be
+ * able to enumerate them so we can properly create overloads for all variants.
+ *
+ * If any platform existed where there were three integer types with the same
+ * size, this would have to become more complicated.  For example, short, int,
+ * and long could all be 32-bits.  Even more diabolically, short, int, long,
+ * and long long could all be 64 bits and still be standard-compliant.
+ * However, few platforms are this strange, and it's unlikely that upb will be
+ * used on the strangest ones. */
+
+/* Can't count on stdint.h limits like INT32_MAX, because in C++ these are
+ * only defined when __STDC_LIMIT_MACROS are defined before the *first* include
+ * of stdint.h.  We can't guarantee that someone else didn't include these first
+ * without defining __STDC_LIMIT_MACROS. */
+#define UPB_INT32_MAX 0x7fffffffLL
+#define UPB_INT32_MIN (-UPB_INT32_MAX - 1)
+#define UPB_INT64_MAX 0x7fffffffffffffffLL
+#define UPB_INT64_MIN (-UPB_INT64_MAX - 1)
+
+#if INT_MAX == UPB_INT32_MAX && INT_MIN == UPB_INT32_MIN
+#define UPB_INT_IS_32BITS 1
+#endif
+
+#if LONG_MAX == UPB_INT32_MAX && LONG_MIN == UPB_INT32_MIN
+#define UPB_LONG_IS_32BITS 1
+#endif
+
+#if LONG_MAX == UPB_INT64_MAX && LONG_MIN == UPB_INT64_MIN
+#define UPB_LONG_IS_64BITS 1
+#endif
+
+#if LLONG_MAX == UPB_INT64_MAX && LLONG_MIN == UPB_INT64_MIN
+#define UPB_LLONG_IS_64BITS 1
+#endif
+
+/* We use macros instead of typedefs so we can undefine them later and avoid
+ * leaking them outside this header file. */
+#if UPB_INT_IS_32BITS
+#define UPB_INT32_T int
+#define UPB_UINT32_T unsigned int
+
+#if UPB_LONG_IS_32BITS
+#define UPB_TWO_32BIT_TYPES 1
+#define UPB_INT32ALT_T long
+#define UPB_UINT32ALT_T unsigned long
+#endif  /* UPB_LONG_IS_32BITS */
+
+#elif UPB_LONG_IS_32BITS  /* && !UPB_INT_IS_32BITS */
+#define UPB_INT32_T long
+#define UPB_UINT32_T unsigned long
+#endif  /* UPB_INT_IS_32BITS */
+
+
+#if UPB_LONG_IS_64BITS
+#define UPB_INT64_T long
+#define UPB_UINT64_T unsigned long
+
+#if UPB_LLONG_IS_64BITS
+#define UPB_TWO_64BIT_TYPES 1
+#define UPB_INT64ALT_T long long
+#define UPB_UINT64ALT_T unsigned long long
+#endif  /* UPB_LLONG_IS_64BITS */
+
+#elif UPB_LLONG_IS_64BITS  /* && !UPB_LONG_IS_64BITS */
+#define UPB_INT64_T long long
+#define UPB_UINT64_T unsigned long long
+#endif  /* UPB_LONG_IS_64BITS */
+
+#undef UPB_INT32_MAX
+#undef UPB_INT32_MIN
+#undef UPB_INT64_MAX
+#undef UPB_INT64_MIN
+#undef UPB_INT_IS_32BITS
+#undef UPB_LONG_IS_32BITS
+#undef UPB_LONG_IS_64BITS
+#undef UPB_LLONG_IS_64BITS
+
+
+namespace upb {
+
+typedef void CleanupFunc(void *ptr);
+
+/* Template to remove "const" from "const T*" and just return "T*".
+ *
+ * We define a nonsense default because otherwise it will fail to instantiate as
+ * a function parameter type even in cases where we don't expect any caller to
+ * actually match the overload. */
+class CouldntRemoveConst {};
+template <class T> struct remove_constptr { typedef CouldntRemoveConst type; };
+template <class T> struct remove_constptr<const T *> { typedef T *type; };
+
+/* Template that we use below to remove a template specialization from
+ * consideration if it matches a specific type. */
+template <class T, class U> struct disable_if_same { typedef void Type; };
+template <class T> struct disable_if_same<T, T> {};
+
+template <class T> void DeletePointer(void *p) { delete static_cast<T>(p); }
+
+template <class T1, class T2>
+struct FirstUnlessVoidOrBool {
+  typedef T1 value;
+};
+
+template <class T2>
+struct FirstUnlessVoidOrBool<void, T2> {
+  typedef T2 value;
+};
+
+template <class T2>
+struct FirstUnlessVoidOrBool<bool, T2> {
+  typedef T2 value;
+};
+
+template<class T, class U>
+struct is_same {
+  static bool value;
+};
+
+template<class T>
+struct is_same<T, T> {
+  static bool value;
+};
+
+template<class T, class U>
+bool is_same<T, U>::value = false;
+
+template<class T>
+bool is_same<T, T>::value = true;
+
+/* FuncInfo *******************************************************************/
+
+/* Info about the user's original, pre-wrapped function. */
+template <class C, class R = void>
+struct FuncInfo {
+  /* The type of the closure that the function takes (its first param). */
+  typedef C Closure;
+
+  /* The return type. */
+  typedef R Return;
+};
+
+/* Func ***********************************************************************/
+
+/* Func1, Func2, Func3: Template classes representing a function and its
+ * signature.
+ *
+ * Since the function is a template parameter, calling the function can be
+ * inlined at compile-time and does not require a function pointer at runtime.
+ * These functions are not bound to a handler data so have no data or cleanup
+ * handler. */
+struct UnboundFunc {
+  CleanupFunc *GetCleanup() { return NULL; }
+  void *GetData() { return NULL; }
+};
+
+template <class R, class P1, R F(P1), class I>
+struct Func1 : public UnboundFunc {
+  typedef R Return;
+  typedef I FuncInfo;
+  static R Call(P1 p1) { return F(p1); }
+};
+
+template <class R, class P1, class P2, R F(P1, P2), class I>
+struct Func2 : public UnboundFunc {
+  typedef R Return;
+  typedef I FuncInfo;
+  static R Call(P1 p1, P2 p2) { return F(p1, p2); }
+};
+
+template <class R, class P1, class P2, class P3, R F(P1, P2, P3), class I>
+struct Func3 : public UnboundFunc {
+  typedef R Return;
+  typedef I FuncInfo;
+  static R Call(P1 p1, P2 p2, P3 p3) { return F(p1, p2, p3); }
+};
+
+template <class R, class P1, class P2, class P3, class P4, R F(P1, P2, P3, P4),
+          class I>
+struct Func4 : public UnboundFunc {
+  typedef R Return;
+  typedef I FuncInfo;
+  static R Call(P1 p1, P2 p2, P3 p3, P4 p4) { return F(p1, p2, p3, p4); }
+};
+
+template <class R, class P1, class P2, class P3, class P4, class P5,
+          R F(P1, P2, P3, P4, P5), class I>
+struct Func5 : public UnboundFunc {
+  typedef R Return;
+  typedef I FuncInfo;
+  static R Call(P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) {
+    return F(p1, p2, p3, p4, p5);
+  }
+};
+
+/* BoundFunc ******************************************************************/
+
+/* BoundFunc2, BoundFunc3: Like Func2/Func3 except also contains a value that
+ * shall be bound to the function's second parameter.
+ * 
+ * Note that the second parameter is a const pointer, but our stored bound value
+ * is non-const so we can free it when the handlers are destroyed. */
+template <class T>
+struct BoundFunc {
+  typedef typename remove_constptr<T>::type MutableP2;
+  explicit BoundFunc(MutableP2 data_) : data(data_) {}
+  CleanupFunc *GetCleanup() { return &DeletePointer<MutableP2>; }
+  MutableP2 GetData() { return data; }
+  MutableP2 data;
+};
+
+template <class R, class P1, class P2, R F(P1, P2), class I>
+struct BoundFunc2 : public BoundFunc<P2> {
+  typedef BoundFunc<P2> Base;
+  typedef I FuncInfo;
+  explicit BoundFunc2(typename Base::MutableP2 arg) : Base(arg) {}
+};
+
+template <class R, class P1, class P2, class P3, R F(P1, P2, P3), class I>
+struct BoundFunc3 : public BoundFunc<P2> {
+  typedef BoundFunc<P2> Base;
+  typedef I FuncInfo;
+  explicit BoundFunc3(typename Base::MutableP2 arg) : Base(arg) {}
+};
+
+template <class R, class P1, class P2, class P3, class P4, R F(P1, P2, P3, P4),
+          class I>
+struct BoundFunc4 : public BoundFunc<P2> {
+  typedef BoundFunc<P2> Base;
+  typedef I FuncInfo;
+  explicit BoundFunc4(typename Base::MutableP2 arg) : Base(arg) {}
+};
+
+template <class R, class P1, class P2, class P3, class P4, class P5,
+          R F(P1, P2, P3, P4, P5), class I>
+struct BoundFunc5 : public BoundFunc<P2> {
+  typedef BoundFunc<P2> Base;
+  typedef I FuncInfo;
+  explicit BoundFunc5(typename Base::MutableP2 arg) : Base(arg) {}
+};
+
+/* FuncSig ********************************************************************/
+
+/* FuncSig1, FuncSig2, FuncSig3: template classes reflecting a function
+ * *signature*, but without a specific function attached.
+ *
+ * These classes contain member functions that can be invoked with a
+ * specific function to return a Func/BoundFunc class. */
+template <class R, class P1>
+struct FuncSig1 {
+  template <R F(P1)>
+  Func1<R, P1, F, FuncInfo<P1, R> > GetFunc() {
+    return Func1<R, P1, F, FuncInfo<P1, R> >();
+  }
+};
+
+template <class R, class P1, class P2>
+struct FuncSig2 {
+  template <R F(P1, P2)>
+  Func2<R, P1, P2, F, FuncInfo<P1, R> > GetFunc() {
+    return Func2<R, P1, P2, F, FuncInfo<P1, R> >();
+  }
+
+  template <R F(P1, P2)>
+  BoundFunc2<R, P1, P2, F, FuncInfo<P1, R> > GetFunc(
+      typename remove_constptr<P2>::type param2) {
+    return BoundFunc2<R, P1, P2, F, FuncInfo<P1, R> >(param2);
+  }
+};
+
+template <class R, class P1, class P2, class P3>
+struct FuncSig3 {
+  template <R F(P1, P2, P3)>
+  Func3<R, P1, P2, P3, F, FuncInfo<P1, R> > GetFunc() {
+    return Func3<R, P1, P2, P3, F, FuncInfo<P1, R> >();
+  }
+
+  template <R F(P1, P2, P3)>
+  BoundFunc3<R, P1, P2, P3, F, FuncInfo<P1, R> > GetFunc(
+      typename remove_constptr<P2>::type param2) {
+    return BoundFunc3<R, P1, P2, P3, F, FuncInfo<P1, R> >(param2);
+  }
+};
+
+template <class R, class P1, class P2, class P3, class P4>
+struct FuncSig4 {
+  template <R F(P1, P2, P3, P4)>
+  Func4<R, P1, P2, P3, P4, F, FuncInfo<P1, R> > GetFunc() {
+    return Func4<R, P1, P2, P3, P4, F, FuncInfo<P1, R> >();
+  }
+
+  template <R F(P1, P2, P3, P4)>
+  BoundFunc4<R, P1, P2, P3, P4, F, FuncInfo<P1, R> > GetFunc(
+      typename remove_constptr<P2>::type param2) {
+    return BoundFunc4<R, P1, P2, P3, P4, F, FuncInfo<P1, R> >(param2);
+  }
+};
+
+template <class R, class P1, class P2, class P3, class P4, class P5>
+struct FuncSig5 {
+  template <R F(P1, P2, P3, P4, P5)>
+  Func5<R, P1, P2, P3, P4, P5, F, FuncInfo<P1, R> > GetFunc() {
+    return Func5<R, P1, P2, P3, P4, P5, F, FuncInfo<P1, R> >();
+  }
+
+  template <R F(P1, P2, P3, P4, P5)>
+  BoundFunc5<R, P1, P2, P3, P4, P5, F, FuncInfo<P1, R> > GetFunc(
+      typename remove_constptr<P2>::type param2) {
+    return BoundFunc5<R, P1, P2, P3, P4, P5, F, FuncInfo<P1, R> >(param2);
+  }
+};
+
+/* Overloaded template function that can construct the appropriate FuncSig*
+ * class given a function pointer by deducing the template parameters. */
+template <class R, class P1>
+inline FuncSig1<R, P1> MatchFunc(R (*f)(P1)) {
+  UPB_UNUSED(f);  /* Only used for template parameter deduction. */
+  return FuncSig1<R, P1>();
+}
+
+template <class R, class P1, class P2>
+inline FuncSig2<R, P1, P2> MatchFunc(R (*f)(P1, P2)) {
+  UPB_UNUSED(f);  /* Only used for template parameter deduction. */
+  return FuncSig2<R, P1, P2>();
+}
+
+template <class R, class P1, class P2, class P3>
+inline FuncSig3<R, P1, P2, P3> MatchFunc(R (*f)(P1, P2, P3)) {
+  UPB_UNUSED(f);  /* Only used for template parameter deduction. */
+  return FuncSig3<R, P1, P2, P3>();
+}
+
+template <class R, class P1, class P2, class P3, class P4>
+inline FuncSig4<R, P1, P2, P3, P4> MatchFunc(R (*f)(P1, P2, P3, P4)) {
+  UPB_UNUSED(f);  /* Only used for template parameter deduction. */
+  return FuncSig4<R, P1, P2, P3, P4>();
+}
+
+template <class R, class P1, class P2, class P3, class P4, class P5>
+inline FuncSig5<R, P1, P2, P3, P4, P5> MatchFunc(R (*f)(P1, P2, P3, P4, P5)) {
+  UPB_UNUSED(f);  /* Only used for template parameter deduction. */
+  return FuncSig5<R, P1, P2, P3, P4, P5>();
+}
+
+/* MethodSig ******************************************************************/
+
+/* CallMethod*: a function template that calls a given method. */
+template <class R, class C, R (C::*F)()>
+R CallMethod0(C *obj) {
+  return ((*obj).*F)();
+}
+
+template <class R, class C, class P1, R (C::*F)(P1)>
+R CallMethod1(C *obj, P1 arg1) {
+  return ((*obj).*F)(arg1);
+}
+
+template <class R, class C, class P1, class P2, R (C::*F)(P1, P2)>
+R CallMethod2(C *obj, P1 arg1, P2 arg2) {
+  return ((*obj).*F)(arg1, arg2);
+}
+
+template <class R, class C, class P1, class P2, class P3, R (C::*F)(P1, P2, P3)>
+R CallMethod3(C *obj, P1 arg1, P2 arg2, P3 arg3) {
+  return ((*obj).*F)(arg1, arg2, arg3);
+}
+
+template <class R, class C, class P1, class P2, class P3, class P4,
+          R (C::*F)(P1, P2, P3, P4)>
+R CallMethod4(C *obj, P1 arg1, P2 arg2, P3 arg3, P4 arg4) {
+  return ((*obj).*F)(arg1, arg2, arg3, arg4);
+}
+
+/* MethodSig: like FuncSig, but for member functions.
+ *
+ * GetFunc() returns a normal FuncN object, so after calling GetFunc() no
+ * more logic is required to special-case methods. */
+template <class R, class C>
+struct MethodSig0 {
+  template <R (C::*F)()>
+  Func1<R, C *, CallMethod0<R, C, F>, FuncInfo<C *, R> > GetFunc() {
+    return Func1<R, C *, CallMethod0<R, C, F>, FuncInfo<C *, R> >();
+  }
+};
+
+template <class R, class C, class P1>
+struct MethodSig1 {
+  template <R (C::*F)(P1)>
+  Func2<R, C *, P1, CallMethod1<R, C, P1, F>, FuncInfo<C *, R> > GetFunc() {
+    return Func2<R, C *, P1, CallMethod1<R, C, P1, F>, FuncInfo<C *, R> >();
+  }
+
+  template <R (C::*F)(P1)>
+  BoundFunc2<R, C *, P1, CallMethod1<R, C, P1, F>, FuncInfo<C *, R> > GetFunc(
+      typename remove_constptr<P1>::type param1) {
+    return BoundFunc2<R, C *, P1, CallMethod1<R, C, P1, F>, FuncInfo<C *, R> >(
+        param1);
+  }
+};
+
+template <class R, class C, class P1, class P2>
+struct MethodSig2 {
+  template <R (C::*F)(P1, P2)>
+  Func3<R, C *, P1, P2, CallMethod2<R, C, P1, P2, F>, FuncInfo<C *, R> >
+  GetFunc() {
+    return Func3<R, C *, P1, P2, CallMethod2<R, C, P1, P2, F>,
+                 FuncInfo<C *, R> >();
+  }
+
+  template <R (C::*F)(P1, P2)>
+  BoundFunc3<R, C *, P1, P2, CallMethod2<R, C, P1, P2, F>, FuncInfo<C *, R> >
+  GetFunc(typename remove_constptr<P1>::type param1) {
+    return BoundFunc3<R, C *, P1, P2, CallMethod2<R, C, P1, P2, F>,
+                      FuncInfo<C *, R> >(param1);
+  }
+};
+
+template <class R, class C, class P1, class P2, class P3>
+struct MethodSig3 {
+  template <R (C::*F)(P1, P2, P3)>
+  Func4<R, C *, P1, P2, P3, CallMethod3<R, C, P1, P2, P3, F>, FuncInfo<C *, R> >
+  GetFunc() {
+    return Func4<R, C *, P1, P2, P3, CallMethod3<R, C, P1, P2, P3, F>,
+                 FuncInfo<C *, R> >();
+  }
+
+  template <R (C::*F)(P1, P2, P3)>
+  BoundFunc4<R, C *, P1, P2, P3, CallMethod3<R, C, P1, P2, P3, F>,
+             FuncInfo<C *, R> >
+  GetFunc(typename remove_constptr<P1>::type param1) {
+    return BoundFunc4<R, C *, P1, P2, P3, CallMethod3<R, C, P1, P2, P3, F>,
+                      FuncInfo<C *, R> >(param1);
+  }
+};
+
+template <class R, class C, class P1, class P2, class P3, class P4>
+struct MethodSig4 {
+  template <R (C::*F)(P1, P2, P3, P4)>
+  Func5<R, C *, P1, P2, P3, P4, CallMethod4<R, C, P1, P2, P3, P4, F>,
+        FuncInfo<C *, R> >
+  GetFunc() {
+    return Func5<R, C *, P1, P2, P3, P4, CallMethod4<R, C, P1, P2, P3, P4, F>,
+                 FuncInfo<C *, R> >();
+  }
+
+  template <R (C::*F)(P1, P2, P3, P4)>
+  BoundFunc5<R, C *, P1, P2, P3, P4, CallMethod4<R, C, P1, P2, P3, P4, F>,
+             FuncInfo<C *, R> >
+  GetFunc(typename remove_constptr<P1>::type param1) {
+    return BoundFunc5<R, C *, P1, P2, P3, P4,
+                      CallMethod4<R, C, P1, P2, P3, P4, F>, FuncInfo<C *, R> >(
+        param1);
+  }
+};
+
+template <class R, class C>
+inline MethodSig0<R, C> MatchFunc(R (C::*f)()) {
+  UPB_UNUSED(f);  /* Only used for template parameter deduction. */
+  return MethodSig0<R, C>();
+}
+
+template <class R, class C, class P1>
+inline MethodSig1<R, C, P1> MatchFunc(R (C::*f)(P1)) {
+  UPB_UNUSED(f);  /* Only used for template parameter deduction. */
+  return MethodSig1<R, C, P1>();
+}
+
+template <class R, class C, class P1, class P2>
+inline MethodSig2<R, C, P1, P2> MatchFunc(R (C::*f)(P1, P2)) {
+  UPB_UNUSED(f);  /* Only used for template parameter deduction. */
+  return MethodSig2<R, C, P1, P2>();
+}
+
+template <class R, class C, class P1, class P2, class P3>
+inline MethodSig3<R, C, P1, P2, P3> MatchFunc(R (C::*f)(P1, P2, P3)) {
+  UPB_UNUSED(f);  /* Only used for template parameter deduction. */
+  return MethodSig3<R, C, P1, P2, P3>();
+}
+
+template <class R, class C, class P1, class P2, class P3, class P4>
+inline MethodSig4<R, C, P1, P2, P3, P4> MatchFunc(R (C::*f)(P1, P2, P3, P4)) {
+  UPB_UNUSED(f);  /* Only used for template parameter deduction. */
+  return MethodSig4<R, C, P1, P2, P3, P4>();
+}
+
+/* MaybeWrapReturn ************************************************************/
+
+/* Template class that attempts to wrap the return value of the function so it
+ * matches the expected type.  There are two main adjustments it may make:
+ *
+ *   1. If the function returns void, make it return the expected type and with
+ *      a value that always indicates success.
+ *   2. If the function returns bool, make it return the expected type with a
+ *      value that indicates success or failure.
+ *
+ * The "expected type" for return is:
+ *   1. void* for start handlers.  If the closure parameter has a different type
+ *      we will cast it to void* for the return in the success case.
+ *   2. size_t for string buffer handlers.
+ *   3. bool for everything else. */
+
+/* Template parameters are FuncN type and desired return type. */
+template <class F, class R, class Enable = void>
+struct MaybeWrapReturn;
+
+/* If the return type matches, return the given function unwrapped. */
+template <class F>
+struct MaybeWrapReturn<F, typename F::Return> {
+  typedef F Func;
+};
+
+/* Function wrapper that munges the return value from void to (bool)true. */
+template <class P1, class P2, void F(P1, P2)>
+bool ReturnTrue2(P1 p1, P2 p2) {
+  F(p1, p2);
+  return true;
+}
+
+template <class P1, class P2, class P3, void F(P1, P2, P3)>
+bool ReturnTrue3(P1 p1, P2 p2, P3 p3) {
+  F(p1, p2, p3);
+  return true;
+}
+
+/* Function wrapper that munges the return value from void to (void*)arg1  */
+template <class P1, class P2, void F(P1, P2)>
+void *ReturnClosure2(P1 p1, P2 p2) {
+  F(p1, p2);
+  return p1;
+}
+
+template <class P1, class P2, class P3, void F(P1, P2, P3)>
+void *ReturnClosure3(P1 p1, P2 p2, P3 p3) {
+  F(p1, p2, p3);
+  return p1;
+}
+
+/* Function wrapper that munges the return value from R to void*. */
+template <class R, class P1, class P2, R F(P1, P2)>
+void *CastReturnToVoidPtr2(P1 p1, P2 p2) {
+  return F(p1, p2);
+}
+
+template <class R, class P1, class P2, class P3, R F(P1, P2, P3)>
+void *CastReturnToVoidPtr3(P1 p1, P2 p2, P3 p3) {
+  return F(p1, p2, p3);
+}
+
+/* Function wrapper that munges the return value from bool to void*. */
+template <class P1, class P2, bool F(P1, P2)>
+void *ReturnClosureOrBreak2(P1 p1, P2 p2) {
+  return F(p1, p2) ? p1 : UPB_BREAK;
+}
+
+template <class P1, class P2, class P3, bool F(P1, P2, P3)>
+void *ReturnClosureOrBreak3(P1 p1, P2 p2, P3 p3) {
+  return F(p1, p2, p3) ? p1 : UPB_BREAK;
+}
+
+/* For the string callback, which takes five params, returns the size param. */
+template <class P1, class P2,
+          void F(P1, P2, const char *, size_t, const BufferHandle *)>
+size_t ReturnStringLen(P1 p1, P2 p2, const char *p3, size_t p4,
+                       const BufferHandle *p5) {
+  F(p1, p2, p3, p4, p5);
+  return p4;
+}
+
+/* For the string callback, which takes five params, returns the size param or
+ * zero. */
+template <class P1, class P2,
+          bool F(P1, P2, const char *, size_t, const BufferHandle *)>
+size_t ReturnNOr0(P1 p1, P2 p2, const char *p3, size_t p4,
+                  const BufferHandle *p5) {
+  return F(p1, p2, p3, p4, p5) ? p4 : 0;
+}
+
+/* If we have a function returning void but want a function returning bool, wrap
+ * it in a function that returns true. */
+template <class P1, class P2, void F(P1, P2), class I>
+struct MaybeWrapReturn<Func2<void, P1, P2, F, I>, bool> {
+  typedef Func2<bool, P1, P2, ReturnTrue2<P1, P2, F>, I> Func;
+};
+
+template <class P1, class P2, class P3, void F(P1, P2, P3), class I>
+struct MaybeWrapReturn<Func3<void, P1, P2, P3, F, I>, bool> {
+  typedef Func3<bool, P1, P2, P3, ReturnTrue3<P1, P2, P3, F>, I> Func;
+};
+
+/* If our function returns void but we want one returning void*, wrap it in a
+ * function that returns the first argument. */
+template <class P1, class P2, void F(P1, P2), class I>
+struct MaybeWrapReturn<Func2<void, P1, P2, F, I>, void *> {
+  typedef Func2<void *, P1, P2, ReturnClosure2<P1, P2, F>, I> Func;
+};
+
+template <class P1, class P2, class P3, void F(P1, P2, P3), class I>
+struct MaybeWrapReturn<Func3<void, P1, P2, P3, F, I>, void *> {
+  typedef Func3<void *, P1, P2, P3, ReturnClosure3<P1, P2, P3, F>, I> Func;
+};
+
+/* If our function returns R* but we want one returning void*, wrap it in a
+ * function that casts to void*. */
+template <class R, class P1, class P2, R *F(P1, P2), class I>
+struct MaybeWrapReturn<Func2<R *, P1, P2, F, I>, void *,
+                       typename disable_if_same<R *, void *>::Type> {
+  typedef Func2<void *, P1, P2, CastReturnToVoidPtr2<R *, P1, P2, F>, I> Func;
+};
+
+template <class R, class P1, class P2, class P3, R *F(P1, P2, P3), class I>
+struct MaybeWrapReturn<Func3<R *, P1, P2, P3, F, I>, void *,
+                       typename disable_if_same<R *, void *>::Type> {
+  typedef Func3<void *, P1, P2, P3, CastReturnToVoidPtr3<R *, P1, P2, P3, F>, I>
+      Func;
+};
+
+/* If our function returns bool but we want one returning void*, wrap it in a
+ * function that returns either the first param or UPB_BREAK. */
+template <class P1, class P2, bool F(P1, P2), class I>
+struct MaybeWrapReturn<Func2<bool, P1, P2, F, I>, void *> {
+  typedef Func2<void *, P1, P2, ReturnClosureOrBreak2<P1, P2, F>, I> Func;
+};
+
+template <class P1, class P2, class P3, bool F(P1, P2, P3), class I>
+struct MaybeWrapReturn<Func3<bool, P1, P2, P3, F, I>, void *> {
+  typedef Func3<void *, P1, P2, P3, ReturnClosureOrBreak3<P1, P2, P3, F>, I>
+      Func;
+};
+
+/* If our function returns void but we want one returning size_t, wrap it in a
+ * function that returns the size argument. */
+template <class P1, class P2,
+          void F(P1, P2, const char *, size_t, const BufferHandle *), class I>
+struct MaybeWrapReturn<
+    Func5<void, P1, P2, const char *, size_t, const BufferHandle *, F, I>,
+          size_t> {
+  typedef Func5<size_t, P1, P2, const char *, size_t, const BufferHandle *,
+                ReturnStringLen<P1, P2, F>, I> Func;
+};
+
+/* If our function returns bool but we want one returning size_t, wrap it in a
+ * function that returns either 0 or the buf size. */
+template <class P1, class P2,
+          bool F(P1, P2, const char *, size_t, const BufferHandle *), class I>
+struct MaybeWrapReturn<
+    Func5<bool, P1, P2, const char *, size_t, const BufferHandle *, F, I>,
+    size_t> {
+  typedef Func5<size_t, P1, P2, const char *, size_t, const BufferHandle *,
+                ReturnNOr0<P1, P2, F>, I> Func;
+};
+
+/* ConvertParams **************************************************************/
+
+/* Template class that converts the function parameters if necessary, and
+ * ignores the HandlerData parameter if appropriate.
+ *
+ * Template parameter is the are FuncN function type. */
+template <class F, class T>
+struct ConvertParams;
+
+/* Function that discards the handler data parameter. */
+template <class R, class P1, R F(P1)>
+R IgnoreHandlerData2(void *p1, const void *hd) {
+  UPB_UNUSED(hd);
+  return F(static_cast<P1>(p1));
+}
+
+template <class R, class P1, class P2Wrapper, class P2Wrapped,
+          R F(P1, P2Wrapped)>
+R IgnoreHandlerData3(void *p1, const void *hd, P2Wrapper p2) {
+  UPB_UNUSED(hd);
+  return F(static_cast<P1>(p1), p2);
+}
+
+template <class R, class P1, class P2, class P3, R F(P1, P2, P3)>
+R IgnoreHandlerData4(void *p1, const void *hd, P2 p2, P3 p3) {
+  UPB_UNUSED(hd);
+  return F(static_cast<P1>(p1), p2, p3);
+}
+
+template <class R, class P1, class P2, class P3, class P4, R F(P1, P2, P3, P4)>
+R IgnoreHandlerData5(void *p1, const void *hd, P2 p2, P3 p3, P4 p4) {
+  UPB_UNUSED(hd);
+  return F(static_cast<P1>(p1), p2, p3, p4);
+}
+
+template <class R, class P1, R F(P1, const char*, size_t)>
+R IgnoreHandlerDataIgnoreHandle(void *p1, const void *hd, const char *p2,
+                                size_t p3, const BufferHandle *handle) {
+  UPB_UNUSED(hd);
+  UPB_UNUSED(handle);
+  return F(static_cast<P1>(p1), p2, p3);
+}
+
+/* Function that casts the handler data parameter. */
+template <class R, class P1, class P2, R F(P1, P2)>
+R CastHandlerData2(void *c, const void *hd) {
+  return F(static_cast<P1>(c), static_cast<P2>(hd));
+}
+
+template <class R, class P1, class P2, class P3Wrapper, class P3Wrapped,
+          R F(P1, P2, P3Wrapped)>
+R CastHandlerData3(void *c, const void *hd, P3Wrapper p3) {
+  return F(static_cast<P1>(c), static_cast<P2>(hd), p3);
+}
+
+template <class R, class P1, class P2, class P3, class P4, class P5,
+          R F(P1, P2, P3, P4, P5)>
+R CastHandlerData5(void *c, const void *hd, P3 p3, P4 p4, P5 p5) {
+  return F(static_cast<P1>(c), static_cast<P2>(hd), p3, p4, p5);
+}
+
+template <class R, class P1, class P2, R F(P1, P2, const char *, size_t)>
+R CastHandlerDataIgnoreHandle(void *c, const void *hd, const char *p3,
+                              size_t p4, const BufferHandle *handle) {
+  UPB_UNUSED(handle);
+  return F(static_cast<P1>(c), static_cast<P2>(hd), p3, p4);
+}
+
+/* For unbound functions, ignore the handler data. */
+template <class R, class P1, R F(P1), class I, class T>
+struct ConvertParams<Func1<R, P1, F, I>, T> {
+  typedef Func2<R, void *, const void *, IgnoreHandlerData2<R, P1, F>, I> Func;
+};
+
+template <class R, class P1, class P2, R F(P1, P2), class I,
+          class R2, class P1_2, class P2_2, class P3_2>
+struct ConvertParams<Func2<R, P1, P2, F, I>,
+                     R2 (*)(P1_2, P2_2, P3_2)> {
+  typedef Func3<R, void *, const void *, P3_2,
+                IgnoreHandlerData3<R, P1, P3_2, P2, F>, I> Func;
+};
+
+/* For StringBuffer only; this ignores both the handler data and the
+ * BufferHandle. */
+template <class R, class P1, R F(P1, const char *, size_t), class I, class T>
+struct ConvertParams<Func3<R, P1, const char *, size_t, F, I>, T> {
+  typedef Func5<R, void *, const void *, const char *, size_t,
+                const BufferHandle *, IgnoreHandlerDataIgnoreHandle<R, P1, F>,
+                I> Func;
+};
+
+template <class R, class P1, class P2, class P3, class P4, R F(P1, P2, P3, P4),
+          class I, class T>
+struct ConvertParams<Func4<R, P1, P2, P3, P4, F, I>, T> {
+  typedef Func5<R, void *, const void *, P2, P3, P4,
+                IgnoreHandlerData5<R, P1, P2, P3, P4, F>, I> Func;
+};
+
+/* For bound functions, cast the handler data. */
+template <class R, class P1, class P2, R F(P1, P2), class I, class T>
+struct ConvertParams<BoundFunc2<R, P1, P2, F, I>, T> {
+  typedef Func2<R, void *, const void *, CastHandlerData2<R, P1, P2, F>, I>
+      Func;
+};
+
+template <class R, class P1, class P2, class P3, R F(P1, P2, P3), class I,
+          class R2, class P1_2, class P2_2, class P3_2>
+struct ConvertParams<BoundFunc3<R, P1, P2, P3, F, I>,
+                     R2 (*)(P1_2, P2_2, P3_2)> {
+  typedef Func3<R, void *, const void *, P3_2,
+                CastHandlerData3<R, P1, P2, P3_2, P3, F>, I> Func;
+};
+
+/* For StringBuffer only; this ignores the BufferHandle. */
+template <class R, class P1, class P2, R F(P1, P2, const char *, size_t),
+          class I, class T>
+struct ConvertParams<BoundFunc4<R, P1, P2, const char *, size_t, F, I>, T> {
+  typedef Func5<R, void *, const void *, const char *, size_t,
+                const BufferHandle *, CastHandlerDataIgnoreHandle<R, P1, P2, F>,
+                I> Func;
+};
+
+template <class R, class P1, class P2, class P3, class P4, class P5,
+          R F(P1, P2, P3, P4, P5), class I, class T>
+struct ConvertParams<BoundFunc5<R, P1, P2, P3, P4, P5, F, I>, T> {
+  typedef Func5<R, void *, const void *, P3, P4, P5,
+                CastHandlerData5<R, P1, P2, P3, P4, P5, F>, I> Func;
+};
+
+/* utype/ltype are upper/lower-case, ctype is canonical C type, vtype is
+ * variant C type. */
+#define TYPE_METHODS(utype, ltype, ctype, vtype)                               \
+  template <> struct CanonicalType<vtype> {                                    \
+    typedef ctype Type;                                                        \
+  };                                                                           \
+  template <>                                                                  \
+  inline bool Handlers::SetValueHandler<vtype>(                                \
+      const FieldDef *f,                                                       \
+      const Handlers::utype ## Handler& handler) {                             \
+    assert(!handler.registered_);                                              \
+    handler.AddCleanup(this);                                                  \
+    handler.registered_ = true;                                                \
+    return upb_handlers_set##ltype(this, f, handler.handler_, &handler.attr_); \
+  }                                                                            \
+
+TYPE_METHODS(Double, double, double,   double)
+TYPE_METHODS(Float,  float,  float,    float)
+TYPE_METHODS(UInt64, uint64, uint64_t, UPB_UINT64_T)
+TYPE_METHODS(UInt32, uint32, uint32_t, UPB_UINT32_T)
+TYPE_METHODS(Int64,  int64,  int64_t,  UPB_INT64_T)
+TYPE_METHODS(Int32,  int32,  int32_t,  UPB_INT32_T)
+TYPE_METHODS(Bool,   bool,   bool,     bool)
+
+#ifdef UPB_TWO_32BIT_TYPES
+TYPE_METHODS(Int32,  int32,  int32_t,  UPB_INT32ALT_T)
+TYPE_METHODS(UInt32, uint32, uint32_t, UPB_UINT32ALT_T)
+#endif
+
+#ifdef UPB_TWO_64BIT_TYPES
+TYPE_METHODS(Int64,  int64,  int64_t,  UPB_INT64ALT_T)
+TYPE_METHODS(UInt64, uint64, uint64_t, UPB_UINT64ALT_T)
+#endif
+#undef TYPE_METHODS
+
+template <> struct CanonicalType<Status*> {
+  typedef Status* Type;
+};
+
+/* Type methods that are only one-per-canonical-type and not
+ * one-per-cvariant. */
+
+#define TYPE_METHODS(utype, ctype) \
+    inline bool Handlers::Set##utype##Handler(const FieldDef *f, \
+                                              const utype##Handler &h) { \
+      return SetValueHandler<ctype>(f, h); \
+    } \
+
+TYPE_METHODS(Double, double)
+TYPE_METHODS(Float,  float)
+TYPE_METHODS(UInt64, uint64_t)
+TYPE_METHODS(UInt32, uint32_t)
+TYPE_METHODS(Int64,  int64_t)
+TYPE_METHODS(Int32,  int32_t)
+TYPE_METHODS(Bool,   bool)
+#undef TYPE_METHODS
+
+template <class F> struct ReturnOf;
+
+template <class R, class P1, class P2>
+struct ReturnOf<R (*)(P1, P2)> {
+  typedef R Return;
+};
+
+template <class R, class P1, class P2, class P3>
+struct ReturnOf<R (*)(P1, P2, P3)> {
+  typedef R Return;
+};
+
+template <class R, class P1, class P2, class P3, class P4>
+struct ReturnOf<R (*)(P1, P2, P3, P4)> {
+  typedef R Return;
+};
+
+template <class R, class P1, class P2, class P3, class P4, class P5>
+struct ReturnOf<R (*)(P1, P2, P3, P4, P5)> {
+  typedef R Return;
+};
+
+template<class T> const void *UniquePtrForType() {
+  static const char ch = 0;
+  return &ch;
+}
+
+template <class T>
+template <class F>
+inline Handler<T>::Handler(F func)
+    : registered_(false),
+      cleanup_data_(func.GetData()),
+      cleanup_func_(func.GetCleanup()) {
+  upb_handlerattr_sethandlerdata(&attr_, func.GetData());
+  typedef typename ReturnOf<T>::Return Return;
+  typedef typename ConvertParams<F, T>::Func ConvertedParamsFunc;
+  typedef typename MaybeWrapReturn<ConvertedParamsFunc, Return>::Func
+      ReturnWrappedFunc;
+  handler_ = ReturnWrappedFunc().Call;
+
+  /* Set attributes based on what templates can statically tell us about the
+   * user's function. */
+
+  /* If the original function returns void, then we know that we wrapped it to
+   * always return ok. */
+  bool always_ok = is_same<typename F::FuncInfo::Return, void>::value;
+  attr_.SetAlwaysOk(always_ok);
+
+  /* Closure parameter and return type. */
+  attr_.SetClosureType(UniquePtrForType<typename F::FuncInfo::Closure>());
+
+  /* We use the closure type (from the first parameter) if the return type is
+   * void or bool, since these are the two cases we wrap to return the closure's
+   * type anyway.
+   *
+   * This is all nonsense for non START* handlers, but it doesn't matter because
+   * in that case the value will be ignored. */
+  typedef typename FirstUnlessVoidOrBool<typename F::FuncInfo::Return,
+                                         typename F::FuncInfo::Closure>::value
+      EffectiveReturn;
+  attr_.SetReturnClosureType(UniquePtrForType<EffectiveReturn>());
+}
+
+template <class T>
+inline Handler<T>::~Handler() {
+  assert(registered_);
+}
+
+inline HandlerAttributes::HandlerAttributes() { upb_handlerattr_init(this); }
+inline HandlerAttributes::~HandlerAttributes() { upb_handlerattr_uninit(this); }
+inline bool HandlerAttributes::SetHandlerData(const void *hd) {
+  return upb_handlerattr_sethandlerdata(this, hd);
+}
+inline const void* HandlerAttributes::handler_data() const {
+  return upb_handlerattr_handlerdata(this);
+}
+inline bool HandlerAttributes::SetClosureType(const void *type) {
+  return upb_handlerattr_setclosuretype(this, type);
+}
+inline const void* HandlerAttributes::closure_type() const {
+  return upb_handlerattr_closuretype(this);
+}
+inline bool HandlerAttributes::SetReturnClosureType(const void *type) {
+  return upb_handlerattr_setreturnclosuretype(this, type);
+}
+inline const void* HandlerAttributes::return_closure_type() const {
+  return upb_handlerattr_returnclosuretype(this);
+}
+inline bool HandlerAttributes::SetAlwaysOk(bool always_ok) {
+  return upb_handlerattr_setalwaysok(this, always_ok);
+}
+inline bool HandlerAttributes::always_ok() const {
+  return upb_handlerattr_alwaysok(this);
+}
+
+inline BufferHandle::BufferHandle() { upb_bufhandle_init(this); }
+inline BufferHandle::~BufferHandle() { upb_bufhandle_uninit(this); }
+inline const char* BufferHandle::buffer() const {
+  return upb_bufhandle_buf(this);
+}
+inline size_t BufferHandle::object_offset() const {
+  return upb_bufhandle_objofs(this);
+}
+inline void BufferHandle::SetBuffer(const char* buf, size_t ofs) {
+  upb_bufhandle_setbuf(this, buf, ofs);
+}
+template <class T>
+void BufferHandle::SetAttachedObject(const T* obj) {
+  upb_bufhandle_setobj(this, obj, UniquePtrForType<T>());
+}
+template <class T>
+const T* BufferHandle::GetAttachedObject() const {
+  return upb_bufhandle_objtype(this) == UniquePtrForType<T>()
+      ? static_cast<const T *>(upb_bufhandle_obj(this))
+                               : NULL;
+}
+
+inline reffed_ptr<Handlers> Handlers::New(const MessageDef *m) {
+  upb_handlers *h = upb_handlers_new(m, &h);
+  return reffed_ptr<Handlers>(h, &h);
+}
+inline reffed_ptr<const Handlers> Handlers::NewFrozen(
+    const MessageDef *m, upb_handlers_callback *callback,
+    const void *closure) {
+  const upb_handlers *h = upb_handlers_newfrozen(m, &h, callback, closure);
+  return reffed_ptr<const Handlers>(h, &h);
+}
+inline const Status* Handlers::status() {
+  return upb_handlers_status(this);
+}
+inline void Handlers::ClearError() {
+  return upb_handlers_clearerr(this);
+}
+inline bool Handlers::Freeze(Status *s) {
+  upb::Handlers* h = this;
+  return upb_handlers_freeze(&h, 1, s);
+}
+inline bool Handlers::Freeze(Handlers *const *handlers, int n, Status *s) {
+  return upb_handlers_freeze(handlers, n, s);
+}
+inline bool Handlers::Freeze(const std::vector<Handlers*>& h, Status* status) {
+  return upb_handlers_freeze((Handlers* const*)&h[0], h.size(), status);
+}
+inline const MessageDef *Handlers::message_def() const {
+  return upb_handlers_msgdef(this);
+}
+inline bool Handlers::AddCleanup(void *p, upb_handlerfree *func) {
+  return upb_handlers_addcleanup(this, p, func);
+}
+inline bool Handlers::SetStartMessageHandler(
+    const Handlers::StartMessageHandler &handler) {
+  assert(!handler.registered_);
+  handler.registered_ = true;
+  handler.AddCleanup(this);
+  return upb_handlers_setstartmsg(this, handler.handler_, &handler.attr_);
+}
+inline bool Handlers::SetEndMessageHandler(
+    const Handlers::EndMessageHandler &handler) {
+  assert(!handler.registered_);
+  handler.registered_ = true;
+  handler.AddCleanup(this);
+  return upb_handlers_setendmsg(this, handler.handler_, &handler.attr_);
+}
+inline bool Handlers::SetStartStringHandler(const FieldDef *f,
+                                            const StartStringHandler &handler) {
+  assert(!handler.registered_);
+  handler.registered_ = true;
+  handler.AddCleanup(this);
+  return upb_handlers_setstartstr(this, f, handler.handler_, &handler.attr_);
+}
+inline bool Handlers::SetEndStringHandler(const FieldDef *f,
+                                          const EndFieldHandler &handler) {
+  assert(!handler.registered_);
+  handler.registered_ = true;
+  handler.AddCleanup(this);
+  return upb_handlers_setendstr(this, f, handler.handler_, &handler.attr_);
+}
+inline bool Handlers::SetStringHandler(const FieldDef *f,
+                                       const StringHandler& handler) {
+  assert(!handler.registered_);
+  handler.registered_ = true;
+  handler.AddCleanup(this);
+  return upb_handlers_setstring(this, f, handler.handler_, &handler.attr_);
+}
+inline bool Handlers::SetStartSequenceHandler(
+    const FieldDef *f, const StartFieldHandler &handler) {
+  assert(!handler.registered_);
+  handler.registered_ = true;
+  handler.AddCleanup(this);
+  return upb_handlers_setstartseq(this, f, handler.handler_, &handler.attr_);
+}
+inline bool Handlers::SetStartSubMessageHandler(
+    const FieldDef *f, const StartFieldHandler &handler) {
+  assert(!handler.registered_);
+  handler.registered_ = true;
+  handler.AddCleanup(this);
+  return upb_handlers_setstartsubmsg(this, f, handler.handler_, &handler.attr_);
+}
+inline bool Handlers::SetEndSubMessageHandler(const FieldDef *f,
+                                              const EndFieldHandler &handler) {
+  assert(!handler.registered_);
+  handler.registered_ = true;
+  handler.AddCleanup(this);
+  return upb_handlers_setendsubmsg(this, f, handler.handler_, &handler.attr_);
+}
+inline bool Handlers::SetEndSequenceHandler(const FieldDef *f,
+                                            const EndFieldHandler &handler) {
+  assert(!handler.registered_);
+  handler.registered_ = true;
+  handler.AddCleanup(this);
+  return upb_handlers_setendseq(this, f, handler.handler_, &handler.attr_);
+}
+inline bool Handlers::SetSubHandlers(const FieldDef *f, const Handlers *sub) {
+  return upb_handlers_setsubhandlers(this, f, sub);
+}
+inline const Handlers *Handlers::GetSubHandlers(const FieldDef *f) const {
+  return upb_handlers_getsubhandlers(this, f);
+}
+inline const Handlers *Handlers::GetSubHandlers(Handlers::Selector sel) const {
+  return upb_handlers_getsubhandlers_sel(this, sel);
+}
+inline bool Handlers::GetSelector(const FieldDef *f, Handlers::Type type,
+                                  Handlers::Selector *s) {
+  return upb_handlers_getselector(f, type, s);
+}
+inline Handlers::Selector Handlers::GetEndSelector(Handlers::Selector start) {
+  return upb_handlers_getendselector(start);
+}
+inline Handlers::GenericFunction *Handlers::GetHandler(
+    Handlers::Selector selector) {
+  return upb_handlers_gethandler(this, selector);
+}
+inline const void *Handlers::GetHandlerData(Handlers::Selector selector) {
+  return upb_handlers_gethandlerdata(this, selector);
+}
+
+inline BytesHandler::BytesHandler() {
+  upb_byteshandler_init(this);
+}
+
+inline BytesHandler::~BytesHandler() {}
+
+}  /* namespace upb */
+
+#endif  /* __cplusplus */
+
+
+#undef UPB_TWO_32BIT_TYPES
+#undef UPB_TWO_64BIT_TYPES
+#undef UPB_INT32_T
+#undef UPB_UINT32_T
+#undef UPB_INT32ALT_T
+#undef UPB_UINT32ALT_T
+#undef UPB_INT64_T
+#undef UPB_UINT64_T
+#undef UPB_INT64ALT_T
+#undef UPB_UINT64ALT_T
+
+#endif  /* UPB_HANDLERS_INL_H_ */
+
+#endif  /* UPB_HANDLERS_H */
+/*
+** upb::Environment (upb_env)
+**
+** A upb::Environment provides a means for injecting malloc and an
+** error-reporting callback into encoders/decoders.  This allows them to be
+** independent of nearly all assumptions about their actual environment.
+**
+** It is also a container for allocating the encoders/decoders themselves that
+** insulates clients from knowing their actual size.  This provides ABI
+** compatibility even if the size of the objects change.  And this allows the
+** structure definitions to be in the .c files instead of the .h files, making
+** the .h files smaller and more readable.
+*/
+
+
+#ifndef UPB_ENV_H_
+#define UPB_ENV_H_
+
+#ifdef __cplusplus
+namespace upb {
+class Environment;
+class SeededAllocator;
+}
+#endif
+
+UPB_DECLARE_TYPE(upb::Environment, upb_env)
+UPB_DECLARE_TYPE(upb::SeededAllocator, upb_seededalloc)
+
+typedef void *upb_alloc_func(void *ud, void *ptr, size_t oldsize, size_t size);
+typedef void upb_cleanup_func(void *ud);
+typedef bool upb_error_func(void *ud, const upb_status *status);
+
+#ifdef __cplusplus
+
+/* An environment is *not* thread-safe. */
+class upb::Environment {
+ public:
+  Environment();
+  ~Environment();
+
+  /* Set a custom memory allocation function for the environment.  May ONLY
+   * be called before any calls to Malloc()/Realloc()/AddCleanup() below.
+   * If this is not called, the system realloc() function will be used.
+   * The given user pointer "ud" will be passed to the allocation function.
+   *
+   * The allocation function will not receive corresponding "free" calls.  it
+   * must ensure that the memory is valid for the lifetime of the Environment,
+   * but it may be reclaimed any time thereafter.  The likely usage is that
+   * "ud" points to a stateful allocator, and that the allocator frees all
+   * memory, arena-style, when it is destroyed.  In this case the allocator must
+   * outlive the Environment.  Another possibility is that the allocation
+   * function returns GC-able memory that is guaranteed to be GC-rooted for the
+   * life of the Environment. */
+  void SetAllocationFunction(upb_alloc_func* alloc, void* ud);
+
+  template<class T>
+  void SetAllocator(T* allocator) {
+    SetAllocationFunction(allocator->GetAllocationFunction(), allocator);
+  }
+
+  /* Set a custom error reporting function. */
+  void SetErrorFunction(upb_error_func* func, void* ud);
+
+  /* Set the error reporting function to simply copy the status to the given
+   * status and abort. */
+  void ReportErrorsTo(Status* status);
+
+  /* Returns true if all allocations and AddCleanup() calls have succeeded,
+   * and no errors were reported with ReportError() (except ones that recovered
+   * successfully). */
+  bool ok() const;
+
+  /* Functions for use by encoders/decoders. **********************************/
+
+  /* Reports an error to this environment's callback, returning true if
+   * the caller should try to recover. */
+  bool ReportError(const Status* status);
+
+  /* Allocate memory.  Uses the environment's allocation function.
+   *
+   * There is no need to free(). All memory will be freed automatically, but is
+   * guaranteed to outlive the Environment. */
+  void* Malloc(size_t size);
+
+  /* Reallocate memory.  Preserves "oldsize" bytes from the existing buffer
+   * Requires: oldsize <= existing_size.
+   *
+   * TODO(haberman): should we also enforce that oldsize <= size? */
+  void* Realloc(void* ptr, size_t oldsize, size_t size);
+
+  /* Add a cleanup function to run when the environment is destroyed.
+   * Returns false on out-of-memory.
+   *
+   * The first call to AddCleanup() after SetAllocationFunction() is guaranteed
+   * to return true -- this makes it possible to robustly set a cleanup handler
+   * for a custom allocation function. */
+  bool AddCleanup(upb_cleanup_func* func, void* ud);
+
+  /* Total number of bytes that have been allocated.  It is undefined what
+   * Realloc() does to this counter. */
+  size_t BytesAllocated() const;
+
+ private:
+  UPB_DISALLOW_COPY_AND_ASSIGN(Environment)
+
+#else
+struct upb_env {
+#endif  /* __cplusplus */
+
+  bool ok_;
+  size_t bytes_allocated;
+
+  /* Alloc function. */
+  upb_alloc_func *alloc;
+  void *alloc_ud;
+
+  /* Error-reporting function. */
+  upb_error_func *err;
+  void *err_ud;
+
+  /* Userdata for default alloc func. */
+  void *default_alloc_ud;
+
+  /* Cleanup entries.  Pointer to a cleanup_ent, defined in env.c */
+  void *cleanup_head;
+
+  /* For future expansion, since the size of this struct is exposed to users. */
+  void *future1;
+  void *future2;
+};
+
+UPB_BEGIN_EXTERN_C
+
+void upb_env_init(upb_env *e);
+void upb_env_uninit(upb_env *e);
+void upb_env_setallocfunc(upb_env *e, upb_alloc_func *func, void *ud);
+void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud);
+void upb_env_reporterrorsto(upb_env *e, upb_status *status);
+bool upb_env_ok(const upb_env *e);
+bool upb_env_reporterror(upb_env *e, const upb_status *status);
+void *upb_env_malloc(upb_env *e, size_t size);
+void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size);
+bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud);
+size_t upb_env_bytesallocated(const upb_env *e);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+
+/* An allocator that allocates from an initial memory region (likely the stack)
+ * before falling back to another allocator. */
+class upb::SeededAllocator {
+ public:
+  SeededAllocator(void *mem, size_t len);
+  ~SeededAllocator();
+
+  /* Set a custom fallback memory allocation function for the allocator, to use
+   * once the initial region runs out.
+   *
+   * May ONLY be called before GetAllocationFunction().  If this is not
+   * called, the system realloc() will be the fallback allocator. */
+  void SetFallbackAllocator(upb_alloc_func *alloc, void *ud);
+
+  /* Gets the allocation function for this allocator. */
+  upb_alloc_func* GetAllocationFunction();
+
+ private:
+  UPB_DISALLOW_COPY_AND_ASSIGN(SeededAllocator)
+
+#else
+struct upb_seededalloc {
+#endif  /* __cplusplus */
+
+  /* Fallback alloc function.  */
+  upb_alloc_func *alloc;
+  upb_cleanup_func *alloc_cleanup;
+  void *alloc_ud;
+  bool need_cleanup;
+  bool returned_allocfunc;
+
+  /* Userdata for default alloc func. */
+  void *default_alloc_ud;
+
+  /* Pointers for the initial memory region. */
+  char *mem_base;
+  char *mem_ptr;
+  char *mem_limit;
+
+  /* For future expansion, since the size of this struct is exposed to users. */
+  void *future1;
+  void *future2;
+};
+
+UPB_BEGIN_EXTERN_C
+
+void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len);
+void upb_seededalloc_uninit(upb_seededalloc *a);
+void upb_seededalloc_setfallbackalloc(upb_seededalloc *a, upb_alloc_func *func,
+                                      void *ud);
+upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+
+namespace upb {
+
+inline Environment::Environment() {
+  upb_env_init(this);
+}
+inline Environment::~Environment() {
+  upb_env_uninit(this);
+}
+inline void Environment::SetAllocationFunction(upb_alloc_func *alloc,
+                                               void *ud) {
+  upb_env_setallocfunc(this, alloc, ud);
+}
+inline void Environment::SetErrorFunction(upb_error_func *func, void *ud) {
+  upb_env_seterrorfunc(this, func, ud);
+}
+inline void Environment::ReportErrorsTo(Status* status) {
+  upb_env_reporterrorsto(this, status);
+}
+inline bool Environment::ok() const {
+  return upb_env_ok(this);
+}
+inline bool Environment::ReportError(const Status* status) {
+  return upb_env_reporterror(this, status);
+}
+inline void *Environment::Malloc(size_t size) {
+  return upb_env_malloc(this, size);
+}
+inline void *Environment::Realloc(void *ptr, size_t oldsize, size_t size) {
+  return upb_env_realloc(this, ptr, oldsize, size);
+}
+inline bool Environment::AddCleanup(upb_cleanup_func *func, void *ud) {
+  return upb_env_addcleanup(this, func, ud);
+}
+inline size_t Environment::BytesAllocated() const {
+  return upb_env_bytesallocated(this);
+}
+
+inline SeededAllocator::SeededAllocator(void *mem, size_t len) {
+  upb_seededalloc_init(this, mem, len);
+}
+inline SeededAllocator::~SeededAllocator() {
+  upb_seededalloc_uninit(this);
+}
+inline void SeededAllocator::SetFallbackAllocator(upb_alloc_func *alloc,
+                                                  void *ud) {
+  upb_seededalloc_setfallbackalloc(this, alloc, ud);
+}
+inline upb_alloc_func *SeededAllocator::GetAllocationFunction() {
+  return upb_seededalloc_getallocfunc(this);
+}
+
+}  /* namespace upb */
+
+#endif  /* __cplusplus */
+
+#endif  /* UPB_ENV_H_ */
+/*
+** upb::Sink (upb_sink)
+** upb::BytesSink (upb_bytessink)
+**
+** A upb_sink is an object that binds a upb_handlers object to some runtime
+** state.  It is the object that can actually receive data via the upb_handlers
+** interface.
+**
+** Unlike upb_def and upb_handlers, upb_sink is never frozen, immutable, or
+** thread-safe.  You can create as many of them as you want, but each one may
+** only be used in a single thread at a time.
+**
+** If we compare with class-based OOP, a you can think of a upb_def as an
+** abstract base class, a upb_handlers as a concrete derived class, and a
+** upb_sink as an object (class instance).
+*/
+
+#ifndef UPB_SINK_H
+#define UPB_SINK_H
+
+
+#ifdef __cplusplus
+namespace upb {
+class BufferSource;
+class BytesSink;
+class Sink;
+}
+#endif
+
+UPB_DECLARE_TYPE(upb::BufferSource, upb_bufsrc)
+UPB_DECLARE_TYPE(upb::BytesSink, upb_bytessink)
+UPB_DECLARE_TYPE(upb::Sink, upb_sink)
+
+#ifdef __cplusplus
+
+/* A upb::Sink is an object that binds a upb::Handlers object to some runtime
+ * state.  It represents an endpoint to which data can be sent.
+ *
+ * TODO(haberman): right now all of these functions take selectors.  Should they
+ * take selectorbase instead?
+ *
+ * ie. instead of calling:
+ *   sink->StartString(FOO_FIELD_START_STRING, ...)
+ * a selector base would let you say:
+ *   sink->StartString(FOO_FIELD, ...)
+ *
+ * This would make call sites a little nicer and require emitting fewer selector
+ * definitions in .h files.
+ *
+ * But the current scheme has the benefit that you can retrieve a function
+ * pointer for any handler with handlers->GetHandler(selector), without having
+ * to have a separate GetHandler() function for each handler type.  The JIT
+ * compiler uses this.  To accommodate we'd have to expose a separate
+ * GetHandler() for every handler type.
+ *
+ * Also to ponder: selectors right now are independent of a specific Handlers
+ * instance.  In other words, they allocate a number to every possible handler
+ * that *could* be registered, without knowing anything about what handlers
+ * *are* registered.  That means that using selectors as table offsets prohibits
+ * us from compacting the handler table at Freeze() time.  If the table is very
+ * sparse, this could be wasteful.
+ *
+ * Having another selector-like thing that is specific to a Handlers instance
+ * would allow this compacting, but then it would be impossible to write code
+ * ahead-of-time that can be bound to any Handlers instance at runtime.  For
+ * example, a .proto file parser written as straight C will not know what
+ * Handlers it will be bound to, so when it calls sink->StartString() what
+ * selector will it pass?  It needs a selector like we have today, that is
+ * independent of any particular upb::Handlers.
+ *
+ * Is there a way then to allow Handlers table compaction? */
+class upb::Sink {
+ public:
+  /* Constructor with no initialization; must be Reset() before use. */
+  Sink() {}
+
+  /* Constructs a new sink for the given frozen handlers and closure.
+   *
+   * TODO: once the Handlers know the expected closure type, verify that T
+   * matches it. */
+  template <class T> Sink(const Handlers* handlers, T* closure);
+
+  /* Resets the value of the sink. */
+  template <class T> void Reset(const Handlers* handlers, T* closure);
+
+  /* Returns the top-level object that is bound to this sink.
+   *
+   * TODO: once the Handlers know the expected closure type, verify that T
+   * matches it. */
+  template <class T> T* GetObject() const;
+
+  /* Functions for pushing data into the sink.
+   *
+   * These return false if processing should stop (either due to error or just
+   * to suspend).
+   *
+   * These may not be called from within one of the same sink's handlers (in
+   * other words, handlers are not re-entrant). */
+
+  /* Should be called at the start and end of every message; both the top-level
+   * message and submessages.  This means that submessages should use the
+   * following sequence:
+   *   sink->StartSubMessage(startsubmsg_selector);
+   *   sink->StartMessage();
+   *   // ...
+   *   sink->EndMessage(&status);
+   *   sink->EndSubMessage(endsubmsg_selector); */
+  bool StartMessage();
+  bool EndMessage(Status* status);
+
+  /* Putting of individual values.  These work for both repeated and
+   * non-repeated fields, but for repeated fields you must wrap them in
+   * calls to StartSequence()/EndSequence(). */
+  bool PutInt32(Handlers::Selector s, int32_t val);
+  bool PutInt64(Handlers::Selector s, int64_t val);
+  bool PutUInt32(Handlers::Selector s, uint32_t val);
+  bool PutUInt64(Handlers::Selector s, uint64_t val);
+  bool PutFloat(Handlers::Selector s, float val);
+  bool PutDouble(Handlers::Selector s, double val);
+  bool PutBool(Handlers::Selector s, bool val);
+
+  /* Putting of string/bytes values.  Each string can consist of zero or more
+   * non-contiguous buffers of data.
+   *
+   * For StartString(), the function will write a sink for the string to "sub."
+   * The sub-sink must be used for any/all PutStringBuffer() calls. */
+  bool StartString(Handlers::Selector s, size_t size_hint, Sink* sub);
+  size_t PutStringBuffer(Handlers::Selector s, const char *buf, size_t len,
+                         const BufferHandle *handle);
+  bool EndString(Handlers::Selector s);
+
+  /* For submessage fields.
+   *
+   * For StartSubMessage(), the function will write a sink for the string to
+   * "sub." The sub-sink must be used for any/all handlers called within the
+   * submessage. */
+  bool StartSubMessage(Handlers::Selector s, Sink* sub);
+  bool EndSubMessage(Handlers::Selector s);
+
+  /* For repeated fields of any type, the sequence of values must be wrapped in
+   * these calls.
+   *
+   * For StartSequence(), the function will write a sink for the string to
+   * "sub." The sub-sink must be used for any/all handlers called within the
+   * sequence. */
+  bool StartSequence(Handlers::Selector s, Sink* sub);
+  bool EndSequence(Handlers::Selector s);
+
+  /* Copy and assign specifically allowed.
+   * We don't even bother making these members private because so many
+   * functions need them and this is mainly just a dumb data container anyway.
+   */
+#else
+struct upb_sink {
+#endif
+  const upb_handlers *handlers;
+  void *closure;
+};
+
+#ifdef __cplusplus
+class upb::BytesSink {
+ public:
+  BytesSink() {}
+
+  /* Constructs a new sink for the given frozen handlers and closure.
+   *
+   * TODO(haberman): once the Handlers know the expected closure type, verify
+   * that T matches it. */
+  template <class T> BytesSink(const BytesHandler* handler, T* closure);
+
+  /* Resets the value of the sink. */
+  template <class T> void Reset(const BytesHandler* handler, T* closure);
+
+  bool Start(size_t size_hint, void **subc);
+  size_t PutBuffer(void *subc, const char *buf, size_t len,
+                   const BufferHandle *handle);
+  bool End();
+#else
+struct upb_bytessink {
+#endif
+  const upb_byteshandler *handler;
+  void *closure;
+};
+
+#ifdef __cplusplus
+
+/* A class for pushing a flat buffer of data to a BytesSink.
+ * You can construct an instance of this to get a resumable source,
+ * or just call the static PutBuffer() to do a non-resumable push all in one
+ * go. */
+class upb::BufferSource {
+ public:
+  BufferSource();
+  BufferSource(const char* buf, size_t len, BytesSink* sink);
+
+  /* Returns true if the entire buffer was pushed successfully.  Otherwise the
+   * next call to PutNext() will resume where the previous one left off.
+   * TODO(haberman): implement this. */
+  bool PutNext();
+
+  /* A static version; with this version is it not possible to resume in the
+   * case of failure or a partially-consumed buffer. */
+  static bool PutBuffer(const char* buf, size_t len, BytesSink* sink);
+
+  template <class T> static bool PutBuffer(const T& str, BytesSink* sink) {
+    return PutBuffer(str.c_str(), str.size(), sink);
+  }
+#else
+struct upb_bufsrc {
+  char dummy;
+#endif
+};
+
+UPB_BEGIN_EXTERN_C
+
+/* Inline definitions. */
+
+UPB_INLINE void upb_bytessink_reset(upb_bytessink *s, const upb_byteshandler *h,
+                                    void *closure) {
+  s->handler = h;
+  s->closure = closure;
+}
+
+UPB_INLINE bool upb_bytessink_start(upb_bytessink *s, size_t size_hint,
+                                    void **subc) {
+  typedef upb_startstr_handlerfunc func;
+  func *start;
+  *subc = s->closure;
+  if (!s->handler) return true;
+  start = (func *)s->handler->table[UPB_STARTSTR_SELECTOR].func;
+
+  if (!start) return true;
+  *subc = start(s->closure, upb_handlerattr_handlerdata(
+                                &s->handler->table[UPB_STARTSTR_SELECTOR].attr),
+                size_hint);
+  return *subc != NULL;
+}
+
+UPB_INLINE size_t upb_bytessink_putbuf(upb_bytessink *s, void *subc,
+                                       const char *buf, size_t size,
+                                       const upb_bufhandle* handle) {
+  typedef upb_string_handlerfunc func;
+  func *putbuf;
+  if (!s->handler) return true;
+  putbuf = (func *)s->handler->table[UPB_STRING_SELECTOR].func;
+
+  if (!putbuf) return true;
+  return putbuf(subc, upb_handlerattr_handlerdata(
+                          &s->handler->table[UPB_STRING_SELECTOR].attr),
+                buf, size, handle);
+}
+
+UPB_INLINE bool upb_bytessink_end(upb_bytessink *s) {
+  typedef upb_endfield_handlerfunc func;
+  func *end;
+  if (!s->handler) return true;
+  end = (func *)s->handler->table[UPB_ENDSTR_SELECTOR].func;
+
+  if (!end) return true;
+  return end(s->closure,
+             upb_handlerattr_handlerdata(
+                 &s->handler->table[UPB_ENDSTR_SELECTOR].attr));
+}
+
+UPB_INLINE bool upb_bufsrc_putbuf(const char *buf, size_t len,
+                                  upb_bytessink *sink) {
+  void *subc;
+  bool ret;
+  upb_bufhandle handle;
+  upb_bufhandle_init(&handle);
+  upb_bufhandle_setbuf(&handle, buf, 0);
+  ret = upb_bytessink_start(sink, len, &subc);
+  if (ret && len != 0) {
+    ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len);
+  }
+  if (ret) {
+    ret = upb_bytessink_end(sink);
+  }
+  upb_bufhandle_uninit(&handle);
+  return ret;
+}
+
+#define PUTVAL(type, ctype)                                                    \
+  UPB_INLINE bool upb_sink_put##type(upb_sink *s, upb_selector_t sel,          \
+                                     ctype val) {                              \
+    typedef upb_##type##_handlerfunc functype;                                 \
+    functype *func;                                                            \
+    const void *hd;                                                            \
+    if (!s->handlers) return true;                                             \
+    func = (functype *)upb_handlers_gethandler(s->handlers, sel);              \
+    if (!func) return true;                                                    \
+    hd = upb_handlers_gethandlerdata(s->handlers, sel);                        \
+    return func(s->closure, hd, val);                                          \
+  }
+
+PUTVAL(int32,  int32_t)
+PUTVAL(int64,  int64_t)
+PUTVAL(uint32, uint32_t)
+PUTVAL(uint64, uint64_t)
+PUTVAL(float,  float)
+PUTVAL(double, double)
+PUTVAL(bool,   bool)
+#undef PUTVAL
+
+UPB_INLINE void upb_sink_reset(upb_sink *s, const upb_handlers *h, void *c) {
+  s->handlers = h;
+  s->closure = c;
+}
+
+UPB_INLINE size_t upb_sink_putstring(upb_sink *s, upb_selector_t sel,
+                                     const char *buf, size_t n,
+                                     const upb_bufhandle *handle) {
+  typedef upb_string_handlerfunc func;
+  func *handler;
+  const void *hd;
+  if (!s->handlers) return n;
+  handler = (func *)upb_handlers_gethandler(s->handlers, sel);
+
+  if (!handler) return n;
+  hd = upb_handlers_gethandlerdata(s->handlers, sel);
+  return handler(s->closure, hd, buf, n, handle);
+}
+
+UPB_INLINE bool upb_sink_startmsg(upb_sink *s) {
+  typedef upb_startmsg_handlerfunc func;
+  func *startmsg;
+  const void *hd;
+  if (!s->handlers) return true;
+  startmsg = (func*)upb_handlers_gethandler(s->handlers, UPB_STARTMSG_SELECTOR);
+
+  if (!startmsg) return true;
+  hd = upb_handlers_gethandlerdata(s->handlers, UPB_STARTMSG_SELECTOR);
+  return startmsg(s->closure, hd);
+}
+
+UPB_INLINE bool upb_sink_endmsg(upb_sink *s, upb_status *status) {
+  typedef upb_endmsg_handlerfunc func;
+  func *endmsg;
+  const void *hd;
+  if (!s->handlers) return true;
+  endmsg = (func *)upb_handlers_gethandler(s->handlers, UPB_ENDMSG_SELECTOR);
+
+  if (!endmsg) return true;
+  hd = upb_handlers_gethandlerdata(s->handlers, UPB_ENDMSG_SELECTOR);
+  return endmsg(s->closure, hd, status);
+}
+
+UPB_INLINE bool upb_sink_startseq(upb_sink *s, upb_selector_t sel,
+                                  upb_sink *sub) {
+  typedef upb_startfield_handlerfunc func;
+  func *startseq;
+  const void *hd;
+  sub->closure = s->closure;
+  sub->handlers = s->handlers;
+  if (!s->handlers) return true;
+  startseq = (func*)upb_handlers_gethandler(s->handlers, sel);
+
+  if (!startseq) return true;
+  hd = upb_handlers_gethandlerdata(s->handlers, sel);
+  sub->closure = startseq(s->closure, hd);
+  return sub->closure ? true : false;
+}
+
+UPB_INLINE bool upb_sink_endseq(upb_sink *s, upb_selector_t sel) {
+  typedef upb_endfield_handlerfunc func;
+  func *endseq;
+  const void *hd;
+  if (!s->handlers) return true;
+  endseq = (func*)upb_handlers_gethandler(s->handlers, sel);
+
+  if (!endseq) return true;
+  hd = upb_handlers_gethandlerdata(s->handlers, sel);
+  return endseq(s->closure, hd);
+}
+
+UPB_INLINE bool upb_sink_startstr(upb_sink *s, upb_selector_t sel,
+                                  size_t size_hint, upb_sink *sub) {
+  typedef upb_startstr_handlerfunc func;
+  func *startstr;
+  const void *hd;
+  sub->closure = s->closure;
+  sub->handlers = s->handlers;
+  if (!s->handlers) return true;
+  startstr = (func*)upb_handlers_gethandler(s->handlers, sel);
+
+  if (!startstr) return true;
+  hd = upb_handlers_gethandlerdata(s->handlers, sel);
+  sub->closure = startstr(s->closure, hd, size_hint);
+  return sub->closure ? true : false;
+}
+
+UPB_INLINE bool upb_sink_endstr(upb_sink *s, upb_selector_t sel) {
+  typedef upb_endfield_handlerfunc func;
+  func *endstr;
+  const void *hd;
+  if (!s->handlers) return true;
+  endstr = (func*)upb_handlers_gethandler(s->handlers, sel);
+
+  if (!endstr) return true;
+  hd = upb_handlers_gethandlerdata(s->handlers, sel);
+  return endstr(s->closure, hd);
+}
+
+UPB_INLINE bool upb_sink_startsubmsg(upb_sink *s, upb_selector_t sel,
+                                     upb_sink *sub) {
+  typedef upb_startfield_handlerfunc func;
+  func *startsubmsg;
+  const void *hd;
+  sub->closure = s->closure;
+  if (!s->handlers) {
+    sub->handlers = NULL;
+    return true;
+  }
+  sub->handlers = upb_handlers_getsubhandlers_sel(s->handlers, sel);
+  startsubmsg = (func*)upb_handlers_gethandler(s->handlers, sel);
+
+  if (!startsubmsg) return true;
+  hd = upb_handlers_gethandlerdata(s->handlers, sel);
+  sub->closure = startsubmsg(s->closure, hd);
+  return sub->closure ? true : false;
+}
+
+UPB_INLINE bool upb_sink_endsubmsg(upb_sink *s, upb_selector_t sel) {
+  typedef upb_endfield_handlerfunc func;
+  func *endsubmsg;
+  const void *hd;
+  if (!s->handlers) return true;
+  endsubmsg = (func*)upb_handlers_gethandler(s->handlers, sel);
+
+  if (!endsubmsg) return s->closure;
+  hd = upb_handlers_gethandlerdata(s->handlers, sel);
+  return endsubmsg(s->closure, hd);
+}
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+
+namespace upb {
+
+template <class T> Sink::Sink(const Handlers* handlers, T* closure) {
+  upb_sink_reset(this, handlers, closure);
+}
+template <class T>
+inline void Sink::Reset(const Handlers* handlers, T* closure) {
+  upb_sink_reset(this, handlers, closure);
+}
+inline bool Sink::StartMessage() {
+  return upb_sink_startmsg(this);
+}
+inline bool Sink::EndMessage(Status* status) {
+  return upb_sink_endmsg(this, status);
+}
+inline bool Sink::PutInt32(Handlers::Selector sel, int32_t val) {
+  return upb_sink_putint32(this, sel, val);
+}
+inline bool Sink::PutInt64(Handlers::Selector sel, int64_t val) {
+  return upb_sink_putint64(this, sel, val);
+}
+inline bool Sink::PutUInt32(Handlers::Selector sel, uint32_t val) {
+  return upb_sink_putuint32(this, sel, val);
+}
+inline bool Sink::PutUInt64(Handlers::Selector sel, uint64_t val) {
+  return upb_sink_putuint64(this, sel, val);
+}
+inline bool Sink::PutFloat(Handlers::Selector sel, float val) {
+  return upb_sink_putfloat(this, sel, val);
+}
+inline bool Sink::PutDouble(Handlers::Selector sel, double val) {
+  return upb_sink_putdouble(this, sel, val);
+}
+inline bool Sink::PutBool(Handlers::Selector sel, bool val) {
+  return upb_sink_putbool(this, sel, val);
+}
+inline bool Sink::StartString(Handlers::Selector sel, size_t size_hint,
+                              Sink *sub) {
+  return upb_sink_startstr(this, sel, size_hint, sub);
+}
+inline size_t Sink::PutStringBuffer(Handlers::Selector sel, const char *buf,
+                                    size_t len, const BufferHandle* handle) {
+  return upb_sink_putstring(this, sel, buf, len, handle);
+}
+inline bool Sink::EndString(Handlers::Selector sel) {
+  return upb_sink_endstr(this, sel);
+}
+inline bool Sink::StartSubMessage(Handlers::Selector sel, Sink* sub) {
+  return upb_sink_startsubmsg(this, sel, sub);
+}
+inline bool Sink::EndSubMessage(Handlers::Selector sel) {
+  return upb_sink_endsubmsg(this, sel);
+}
+inline bool Sink::StartSequence(Handlers::Selector sel, Sink* sub) {
+  return upb_sink_startseq(this, sel, sub);
+}
+inline bool Sink::EndSequence(Handlers::Selector sel) {
+  return upb_sink_endseq(this, sel);
+}
+
+template <class T>
+BytesSink::BytesSink(const BytesHandler* handler, T* closure) {
+  Reset(handler, closure);
+}
+
+template <class T>
+void BytesSink::Reset(const BytesHandler *handler, T *closure) {
+  upb_bytessink_reset(this, handler, closure);
+}
+inline bool BytesSink::Start(size_t size_hint, void **subc) {
+  return upb_bytessink_start(this, size_hint, subc);
+}
+inline size_t BytesSink::PutBuffer(void *subc, const char *buf, size_t len,
+                                   const BufferHandle *handle) {
+  return upb_bytessink_putbuf(this, subc, buf, len, handle);
+}
+inline bool BytesSink::End() {
+  return upb_bytessink_end(this);
+}
+
+inline bool BufferSource::PutBuffer(const char *buf, size_t len,
+                                    BytesSink *sink) {
+  return upb_bufsrc_putbuf(buf, len, sink);
+}
+
+}  /* namespace upb */
+#endif
+
+#endif
+/*
+** For handlers that do very tiny, very simple operations, the function call
+** overhead of calling a handler can be significant.  This file allows the
+** user to define handlers that do something very simple like store the value
+** to memory and/or set a hasbit.  JIT compilers can then special-case these
+** handlers and emit specialized code for them instead of actually calling the
+** handler.
+**
+** The functionality is very simple/limited right now but may expand to be able
+** to call another function.
+*/
+
+#ifndef UPB_SHIM_H
+#define UPB_SHIM_H
+
+
+typedef struct {
+  size_t offset;
+  int32_t hasbit;
+} upb_shim_data;
+
+#ifdef __cplusplus
+
+namespace upb {
+
+struct Shim {
+  typedef upb_shim_data Data;
+
+  /* Sets a handler for the given field that writes the value to the given
+   * offset and, if hasbit >= 0, sets a bit at the given bit offset.  Returns
+   * true if the handler was set successfully. */
+  static bool Set(Handlers *h, const FieldDef *f, size_t ofs, int32_t hasbit);
+
+  /* If this handler is a shim, returns the corresponding upb::Shim::Data and
+   * stores the type in "type".  Otherwise returns NULL. */
+  static const Data* GetData(const Handlers* h, Handlers::Selector s,
+                             FieldDef::Type* type);
+};
+
+}  /* namespace upb */
+
+#endif
+
+UPB_BEGIN_EXTERN_C
+
+/* C API. */
+bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
+                  int32_t hasbit);
+const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s,
+                                      upb_fieldtype_t *type);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+/* C++ Wrappers. */
+namespace upb {
+inline bool Shim::Set(Handlers* h, const FieldDef* f, size_t ofs,
+                      int32_t hasbit) {
+  return upb_shim_set(h, f, ofs, hasbit);
+}
+inline const Shim::Data* Shim::GetData(const Handlers* h, Handlers::Selector s,
+                                       FieldDef::Type* type) {
+  return upb_shim_getdata(h, s, type);
+}
+}  /* namespace upb */
+#endif
+
+#endif  /* UPB_SHIM_H */
+/*
+** upb::SymbolTable (upb_symtab)
+**
+** A symtab (symbol table) stores a name->def map of upb_defs.  Clients could
+** always create such tables themselves, but upb_symtab has logic for resolving
+** symbolic references, and in particular, for keeping a whole set of consistent
+** defs when replacing some subset of those defs.  This logic is nontrivial.
+**
+** This is a mixed C/C++ interface that offers a full API to both languages.
+** See the top-level README for more information.
+*/
+
+#ifndef UPB_SYMTAB_H_
+#define UPB_SYMTAB_H_
+
+
+#ifdef __cplusplus
+#include <vector>
+namespace upb { class SymbolTable; }
+#endif
+
+UPB_DECLARE_DERIVED_TYPE(upb::SymbolTable, upb::RefCounted,
+                         upb_symtab, upb_refcounted)
+
+typedef struct {
+ UPB_PRIVATE_FOR_CPP
+  upb_strtable_iter iter;
+  upb_deftype_t type;
+} upb_symtab_iter;
+
+#ifdef __cplusplus
+
+/* Non-const methods in upb::SymbolTable are NOT thread-safe. */
+class upb::SymbolTable {
+ public:
+  /* Returns a new symbol table with a single ref owned by "owner."
+   * Returns NULL if memory allocation failed. */
+  static reffed_ptr<SymbolTable> New();
+
+  /* Include RefCounted base methods. */
+  UPB_REFCOUNTED_CPPMETHODS
+
+  /* For all lookup functions, the returned pointer is not owned by the
+   * caller; it may be invalidated by any non-const call or unref of the
+   * SymbolTable!  To protect against this, take a ref if desired. */
+
+  /* Freezes the symbol table: prevents further modification of it.
+   * After the Freeze() operation is successful, the SymbolTable must only be
+   * accessed via a const pointer.
+   *
+   * Unlike with upb::MessageDef/upb::EnumDef/etc, freezing a SymbolTable is not
+   * a necessary step in using a SymbolTable.  If you have no need for it to be
+   * immutable, there is no need to freeze it ever.  However sometimes it is
+   * useful, and SymbolTables that are statically compiled into the binary are
+   * always frozen by nature. */
+  void Freeze();
+
+  /* Resolves the given symbol using the rules described in descriptor.proto,
+   * namely:
+   *
+   *    If the name starts with a '.', it is fully-qualified.  Otherwise,
+   *    C++-like scoping rules are used to find the type (i.e. first the nested
+   *    types within this message are searched, then within the parent, on up
+   *    to the root namespace).
+   *
+   * If not found, returns NULL. */
+  const Def* Resolve(const char* base, const char* sym) const;
+
+  /* Finds an entry in the symbol table with this exact name.  If not found,
+   * returns NULL. */
+  const Def* Lookup(const char *sym) const;
+  const MessageDef* LookupMessage(const char *sym) const;
+  const EnumDef* LookupEnum(const char *sym) const;
+
+  /* TODO: introduce a C++ iterator, but make it nice and templated so that if
+   * you ask for an iterator of MessageDef the iterated elements are strongly
+   * typed as MessageDef*. */
+
+  /* Adds the given mutable defs to the symtab, resolving all symbols
+   * (including enum default values) and finalizing the defs.  Only one def per
+   * name may be in the list, but defs can replace existing defs in the symtab.
+   * All defs must have a name -- anonymous defs are not allowed.  Anonymous
+   * defs can still be frozen by calling upb_def_freeze() directly.
+   *
+   * Any existing defs that can reach defs that are being replaced will
+   * themselves be replaced also, so that the resulting set of defs is fully
+   * consistent.
+   *
+   * This logic implemented in this method is a convenience; ultimately it
+   * calls some combination of upb_fielddef_setsubdef(), upb_def_dup(), and
+   * upb_freeze(), any of which the client could call themself.  However, since
+   * the logic for doing so is nontrivial, we provide it here.
+   *
+   * The entire operation either succeeds or fails.  If the operation fails,
+   * the symtab is unchanged, false is returned, and status indicates the
+   * error.  The caller passes a ref on all defs to the symtab (even if the
+   * operation fails).
+   *
+   * TODO(haberman): currently failure will leave the symtab unchanged, but may
+   * leave the defs themselves partially resolved.  Does this matter?  If so we
+   * could do a prepass that ensures that all symbols are resolvable and bail
+   * if not, so we don't mutate anything until we know the operation will
+   * succeed.
+   *
+   * TODO(haberman): since the defs must be mutable, refining a frozen def
+   * requires making mutable copies of the entire tree.  This is wasteful if
+   * only a few messages are changing.  We may want to add a way of adding a
+   * tree of frozen defs to the symtab (perhaps an alternate constructor where
+   * you pass the root of the tree?) */
+  bool Add(Def*const* defs, int n, void* ref_donor, upb_status* status);
+
+  bool Add(const std::vector<Def*>& defs, void *owner, Status* status) {
+    return Add((Def*const*)&defs[0], defs.size(), owner, status);
+  }
+
+ private:
+  UPB_DISALLOW_POD_OPS(SymbolTable, upb::SymbolTable)
+};
+
+#endif  /* __cplusplus */
+
+UPB_BEGIN_EXTERN_C
+
+/* Native C API. */
+
+/* Include refcounted methods like upb_symtab_ref(). */
+UPB_REFCOUNTED_CMETHODS(upb_symtab, upb_symtab_upcast)
+
+upb_symtab *upb_symtab_new(const void *owner);
+void upb_symtab_freeze(upb_symtab *s);
+const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
+                                  const char *sym);
+const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym);
+const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym);
+const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym);
+bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
+                    upb_status *status);
+
+/* upb_symtab_iter i;
+ * for(upb_symtab_begin(&i, s, type); !upb_symtab_done(&i);
+ *     upb_symtab_next(&i)) {
+ *   const upb_def *def = upb_symtab_iter_def(&i);
+ *    // ...
+ * }
+ *
+ * For C we don't have separate iterators for const and non-const.
+ * It is the caller's responsibility to cast the upb_fielddef* to
+ * const if the upb_msgdef* is const. */
+void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
+                      upb_deftype_t type);
+void upb_symtab_next(upb_symtab_iter *iter);
+bool upb_symtab_done(const upb_symtab_iter *iter);
+const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+/* C++ inline wrappers. */
+namespace upb {
+inline reffed_ptr<SymbolTable> SymbolTable::New() {
+  upb_symtab *s = upb_symtab_new(&s);
+  return reffed_ptr<SymbolTable>(s, &s);
+}
+
+inline void SymbolTable::Freeze() {
+  return upb_symtab_freeze(this);
+}
+inline const Def *SymbolTable::Resolve(const char *base,
+                                       const char *sym) const {
+  return upb_symtab_resolve(this, base, sym);
+}
+inline const Def* SymbolTable::Lookup(const char *sym) const {
+  return upb_symtab_lookup(this, sym);
+}
+inline const MessageDef *SymbolTable::LookupMessage(const char *sym) const {
+  return upb_symtab_lookupmsg(this, sym);
+}
+inline bool SymbolTable::Add(
+    Def*const* defs, int n, void* ref_donor, upb_status* status) {
+  return upb_symtab_add(this, (upb_def*const*)defs, n, ref_donor, status);
+}
+}  /* namespace upb */
+#endif
+
+#endif  /* UPB_SYMTAB_H_ */
+/*
+** upb::descriptor::Reader (upb_descreader)
+**
+** Provides a way of building upb::Defs from data in descriptor.proto format.
+*/
+
+#ifndef UPB_DESCRIPTOR_H
+#define UPB_DESCRIPTOR_H
+
+
+#ifdef __cplusplus
+namespace upb {
+namespace descriptor {
+class Reader;
+}  /* namespace descriptor */
+}  /* namespace upb */
+#endif
+
+UPB_DECLARE_TYPE(upb::descriptor::Reader, upb_descreader)
+
+#ifdef __cplusplus
+
+/* Class that receives descriptor data according to the descriptor.proto schema
+ * and use it to build upb::Defs corresponding to that schema. */
+class upb::descriptor::Reader {
+ public:
+  /* These handlers must have come from NewHandlers() and must outlive the
+   * Reader.
+   *
+   * TODO: generate the handlers statically (like we do with the
+   * descriptor.proto defs) so that there is no need to pass this parameter (or
+   * to build/memory-manage the handlers at runtime at all).  Unfortunately this
+   * is a bit tricky to implement for Handlers, but necessary to simplify this
+   * interface. */
+  static Reader* Create(Environment* env, const Handlers* handlers);
+
+  /* The reader's input; this is where descriptor.proto data should be sent. */
+  Sink* input();
+
+  /* Returns an array of all defs that have been parsed, and transfers ownership
+   * of them to "owner".  The number of defs is stored in *n.  Ownership of the
+   * returned array is retained and is invalidated by any other call into
+   * Reader.
+   *
+   * These defs are not frozen or resolved; they are ready to be added to a
+   * symtab. */
+  upb::Def** GetDefs(void* owner, int* n);
+
+  /* Builds and returns handlers for the reader, owned by "owner." */
+  static Handlers* NewHandlers(const void* owner);
+
+ private:
+  UPB_DISALLOW_POD_OPS(Reader, upb::descriptor::Reader)
+};
+
+#endif
+
+UPB_BEGIN_EXTERN_C
+
+/* C API. */
+upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h);
+upb_sink *upb_descreader_input(upb_descreader *r);
+upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n);
+const upb_handlers *upb_descreader_newhandlers(const void *owner);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+/* C++ implementation details. ************************************************/
+namespace upb {
+namespace descriptor {
+inline Reader* Reader::Create(Environment* e, const Handlers *h) {
+  return upb_descreader_create(e, h);
+}
+inline Sink* Reader::input() { return upb_descreader_input(this); }
+inline upb::Def** Reader::GetDefs(void* owner, int* n) {
+  return upb_descreader_getdefs(this, owner, n);
+}
+}  /* namespace descriptor */
+}  /* namespace upb */
+#endif
+
+#endif  /* UPB_DESCRIPTOR_H */
+/* This file contains accessors for a set of compiled-in defs.
+ * Note that unlike Google's protobuf, it does *not* define
+ * generated classes or any other kind of data structure for
+ * actually storing protobufs.  It only contains *defs* which
+ * let you reflect over a protobuf *schema*.
+ */
+/* This file was generated by upbc (the upb compiler).
+ * Do not edit -- your changes will be discarded when the file is
+ * regenerated. */
+
+#ifndef GOOGLE_PROTOBUF_DESCRIPTOR_UPB_H_
+#define GOOGLE_PROTOBUF_DESCRIPTOR_UPB_H_
+
+
+#ifdef __cplusplus
+UPB_BEGIN_EXTERN_C
+#endif
+
+/* Enums */
+
+typedef enum {
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_OPTIONAL = 1,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REQUIRED = 2,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_REPEATED = 3
+} google_protobuf_FieldDescriptorProto_Label;
+
+typedef enum {
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_DOUBLE = 1,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FLOAT = 2,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT64 = 3,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT64 = 4,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32 = 5,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED64 = 6,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_FIXED32 = 7,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BOOL = 8,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_STRING = 9,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_GROUP = 10,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_MESSAGE = 11,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_BYTES = 12,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_UINT32 = 13,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_ENUM = 14,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED32 = 15,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SFIXED64 = 16,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT32 = 17,
+  GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_SINT64 = 18
+} google_protobuf_FieldDescriptorProto_Type;
+
+typedef enum {
+  GOOGLE_PROTOBUF_FIELDOPTIONS_STRING = 0,
+  GOOGLE_PROTOBUF_FIELDOPTIONS_CORD = 1,
+  GOOGLE_PROTOBUF_FIELDOPTIONS_STRING_PIECE = 2
+} google_protobuf_FieldOptions_CType;
+
+typedef enum {
+  GOOGLE_PROTOBUF_FILEOPTIONS_SPEED = 1,
+  GOOGLE_PROTOBUF_FILEOPTIONS_CODE_SIZE = 2,
+  GOOGLE_PROTOBUF_FILEOPTIONS_LITE_RUNTIME = 3
+} google_protobuf_FileOptions_OptimizeMode;
+
+/* Selectors */
+
+/* google.protobuf.DescriptorProto */
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_STARTSUBMSG 3
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_STARTSUBMSG 4
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE_STARTSUBMSG 5
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_STARTSUBMSG 6
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS_STARTSUBMSG 7
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_STARTSEQ 8
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_ENDSEQ 9
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_FIELD_ENDSUBMSG 10
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_STARTSEQ 11
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_ENDSEQ 12
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_NESTED_TYPE_ENDSUBMSG 13
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_STARTSEQ 14
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_ENDSEQ 15
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_ENUM_TYPE_ENDSUBMSG 16
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE_STARTSEQ 17
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE_ENDSEQ 18
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_RANGE_ENDSUBMSG 19
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_STARTSEQ 20
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_ENDSEQ 21
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSION_ENDSUBMSG 22
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_OPTIONS_ENDSUBMSG 23
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_STRING 24
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_STARTSTR 25
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_NAME_ENDSTR 26
+
+/* google.protobuf.DescriptorProto.ExtensionRange */
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_START_INT32 2
+#define SEL_GOOGLE_PROTOBUF_DESCRIPTORPROTO_EXTENSIONRANGE_END_INT32 3
+
+/* google.protobuf.EnumDescriptorProto */
+#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 3
+#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_STARTSEQ 4
+#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_ENDSEQ 5
+#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_VALUE_ENDSUBMSG 6
+#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 7
+#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_STRING 8
+#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_STARTSTR 9
+#define SEL_GOOGLE_PROTOBUF_ENUMDESCRIPTORPROTO_NAME_ENDSTR 10
+
+/* google.protobuf.EnumOptions */
+#define SEL_GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
+#define SEL_GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
+#define SEL_GOOGLE_PROTOBUF_ENUMOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 5
+#define SEL_GOOGLE_PROTOBUF_ENUMOPTIONS_ALLOW_ALIAS_BOOL 6
+
+/* google.protobuf.EnumValueDescriptorProto */
+#define SEL_GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 3
+#define SEL_GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_STRING 4
+#define SEL_GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_STARTSTR 5
+#define SEL_GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NAME_ENDSTR 6
+#define SEL_GOOGLE_PROTOBUF_ENUMVALUEDESCRIPTORPROTO_NUMBER_INT32 7
+
+/* google.protobuf.EnumValueOptions */
+#define SEL_GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
+#define SEL_GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
+#define SEL_GOOGLE_PROTOBUF_ENUMVALUEOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 5
+
+/* google.protobuf.FieldDescriptorProto */
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 3
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_STRING 4
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_STARTSTR 5
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NAME_ENDSTR 6
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE_STRING 7
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE_STARTSTR 8
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_EXTENDEE_ENDSTR 9
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_NUMBER_INT32 10
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_LABEL_INT32 11
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_INT32 12
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_STRING 13
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_STARTSTR 14
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_TYPE_NAME_ENDSTR 15
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_STRING 16
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_STARTSTR 17
+#define SEL_GOOGLE_PROTOBUF_FIELDDESCRIPTORPROTO_DEFAULT_VALUE_ENDSTR 18
+
+/* google.protobuf.FieldOptions */
+#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
+#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
+#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 5
+#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_CTYPE_INT32 6
+#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_PACKED_BOOL 7
+#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_DEPRECATED_BOOL 8
+#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_LAZY_BOOL 9
+#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY_STRING 10
+#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY_STARTSTR 11
+#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_EXPERIMENTAL_MAP_KEY_ENDSTR 12
+#define SEL_GOOGLE_PROTOBUF_FIELDOPTIONS_WEAK_BOOL 13
+
+/* google.protobuf.FileDescriptorProto */
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_STARTSUBMSG 3
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE_STARTSUBMSG 4
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION_STARTSUBMSG 5
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 6
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO_STARTSUBMSG 7
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_STARTSEQ 8
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_ENDSEQ 9
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_MESSAGE_TYPE_ENDSUBMSG 10
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_STARTSEQ 11
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_ENDSEQ 12
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_ENUM_TYPE_ENDSUBMSG 13
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE_STARTSEQ 14
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE_ENDSEQ 15
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SERVICE_ENDSUBMSG 16
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION_STARTSEQ 17
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION_ENDSEQ 18
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_EXTENSION_ENDSUBMSG 19
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 20
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_SOURCE_CODE_INFO_ENDSUBMSG 21
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_STRING 22
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_STARTSTR 23
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_NAME_ENDSTR 24
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_STRING 25
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_STARTSTR 26
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PACKAGE_ENDSTR 27
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY_STARTSEQ 28
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY_ENDSEQ 29
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY_STRING 30
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY_STARTSTR 31
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_DEPENDENCY_ENDSTR 32
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PUBLIC_DEPENDENCY_STARTSEQ 33
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PUBLIC_DEPENDENCY_ENDSEQ 34
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_PUBLIC_DEPENDENCY_INT32 35
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_WEAK_DEPENDENCY_STARTSEQ 36
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_WEAK_DEPENDENCY_ENDSEQ 37
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORPROTO_WEAK_DEPENDENCY_INT32 38
+
+/* google.protobuf.FileDescriptorSet */
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_STARTSEQ 3
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_ENDSEQ 4
+#define SEL_GOOGLE_PROTOBUF_FILEDESCRIPTORSET_FILE_ENDSUBMSG 5
+
+/* google.protobuf.FileOptions */
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 5
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE_STRING 6
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE_STARTSTR 7
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_PACKAGE_ENDSTR 8
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME_STRING 9
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME_STARTSTR 10
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_OUTER_CLASSNAME_ENDSTR 11
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_OPTIMIZE_FOR_INT32 12
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_MULTIPLE_FILES_BOOL 13
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_GO_PACKAGE_STRING 14
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_GO_PACKAGE_STARTSTR 15
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_GO_PACKAGE_ENDSTR 16
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_CC_GENERIC_SERVICES_BOOL 17
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERIC_SERVICES_BOOL 18
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_PY_GENERIC_SERVICES_BOOL 19
+#define SEL_GOOGLE_PROTOBUF_FILEOPTIONS_JAVA_GENERATE_EQUALS_AND_HASH_BOOL 20
+
+/* google.protobuf.MessageOptions */
+#define SEL_GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
+#define SEL_GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
+#define SEL_GOOGLE_PROTOBUF_MESSAGEOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 5
+#define SEL_GOOGLE_PROTOBUF_MESSAGEOPTIONS_MESSAGE_SET_WIRE_FORMAT_BOOL 6
+#define SEL_GOOGLE_PROTOBUF_MESSAGEOPTIONS_NO_STANDARD_DESCRIPTOR_ACCESSOR_BOOL 7
+
+/* google.protobuf.MethodDescriptorProto */
+#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 3
+#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME_STRING 4
+#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME_STARTSTR 5
+#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_NAME_ENDSTR 6
+#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE_STRING 7
+#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE_STARTSTR 8
+#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_INPUT_TYPE_ENDSTR 9
+#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE_STRING 10
+#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE_STARTSTR 11
+#define SEL_GOOGLE_PROTOBUF_METHODDESCRIPTORPROTO_OUTPUT_TYPE_ENDSTR 12
+
+/* google.protobuf.MethodOptions */
+#define SEL_GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
+#define SEL_GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
+#define SEL_GOOGLE_PROTOBUF_METHODOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 5
+
+/* google.protobuf.ServiceDescriptorProto */
+#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS_STARTSUBMSG 3
+#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD_STARTSEQ 4
+#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD_ENDSEQ 5
+#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_METHOD_ENDSUBMSG 6
+#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_OPTIONS_ENDSUBMSG 7
+#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME_STRING 8
+#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME_STARTSTR 9
+#define SEL_GOOGLE_PROTOBUF_SERVICEDESCRIPTORPROTO_NAME_ENDSTR 10
+
+/* google.protobuf.ServiceOptions */
+#define SEL_GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_STARTSEQ 3
+#define SEL_GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_ENDSEQ 4
+#define SEL_GOOGLE_PROTOBUF_SERVICEOPTIONS_UNINTERPRETED_OPTION_ENDSUBMSG 5
+
+/* google.protobuf.SourceCodeInfo */
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_STARTSEQ 3
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_ENDSEQ 4
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_ENDSUBMSG 5
+
+/* google.protobuf.SourceCodeInfo.Location */
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH_STARTSEQ 2
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH_ENDSEQ 3
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_PATH_INT32 4
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN_STARTSEQ 5
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN_ENDSEQ 6
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_SPAN_INT32 7
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_LEADING_COMMENTS_STRING 8
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_LEADING_COMMENTS_STARTSTR 9
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_LEADING_COMMENTS_ENDSTR 10
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_TRAILING_COMMENTS_STRING 11
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_TRAILING_COMMENTS_STARTSTR 12
+#define SEL_GOOGLE_PROTOBUF_SOURCECODEINFO_LOCATION_TRAILING_COMMENTS_ENDSTR 13
+
+/* google.protobuf.UninterpretedOption */
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME_STARTSUBMSG 2
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME_STARTSEQ 3
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME_ENDSEQ 4
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAME_ENDSUBMSG 5
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE_STRING 6
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE_STARTSTR 7
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_IDENTIFIER_VALUE_ENDSTR 8
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_POSITIVE_INT_VALUE_UINT64 9
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NEGATIVE_INT_VALUE_INT64 10
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_DOUBLE_VALUE_DOUBLE 11
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE_STRING 12
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE_STARTSTR 13
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_STRING_VALUE_ENDSTR 14
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE_STRING 15
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE_STARTSTR 16
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_AGGREGATE_VALUE_ENDSTR 17
+
+/* google.protobuf.UninterpretedOption.NamePart */
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_STRING 2
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_STARTSTR 3
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_NAME_PART_ENDSTR 4
+#define SEL_GOOGLE_PROTOBUF_UNINTERPRETEDOPTION_NAMEPART_IS_EXTENSION_BOOL 5
+
+const upb_symtab *upbdefs_google_protobuf_descriptor(const void *owner);
+
+/* MessageDefs */
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_DescriptorProto(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.DescriptorProto");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_ExtensionRange(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.DescriptorProto.ExtensionRange");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_EnumDescriptorProto(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.EnumDescriptorProto");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_EnumOptions(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.EnumOptions");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_EnumValueDescriptorProto(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.EnumValueDescriptorProto");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_EnumValueOptions(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.EnumValueOptions");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_FieldDescriptorProto(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.FieldDescriptorProto");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_FieldOptions(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.FieldOptions");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_FileDescriptorProto(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.FileDescriptorProto");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_FileDescriptorSet(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.FileDescriptorSet");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_FileOptions(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.FileOptions");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_MessageOptions(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.MessageOptions");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_MethodDescriptorProto(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.MethodDescriptorProto");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_MethodOptions(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.MethodOptions");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_ServiceDescriptorProto(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.ServiceDescriptorProto");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_ServiceOptions(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.ServiceOptions");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_SourceCodeInfo(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.SourceCodeInfo");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_SourceCodeInfo_Location(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.SourceCodeInfo.Location");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_UninterpretedOption(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.UninterpretedOption");
+  assert(m);
+  return m;
+}
+UPB_INLINE const upb_msgdef *upbdefs_google_protobuf_UninterpretedOption_NamePart(const upb_symtab *s) {
+  const upb_msgdef *m = upb_symtab_lookupmsg(s, "google.protobuf.UninterpretedOption.NamePart");
+  assert(m);
+  return m;
+}
+
+
+/* EnumDefs */
+UPB_INLINE const upb_enumdef *upbdefs_google_protobuf_FieldDescriptorProto_Label(const upb_symtab *s) {
+  const upb_enumdef *e = upb_symtab_lookupenum(s, "google.protobuf.FieldDescriptorProto.Label");
+  assert(e);
+  return e;
+}
+UPB_INLINE const upb_enumdef *upbdefs_google_protobuf_FieldDescriptorProto_Type(const upb_symtab *s) {
+  const upb_enumdef *e = upb_symtab_lookupenum(s, "google.protobuf.FieldDescriptorProto.Type");
+  assert(e);
+  return e;
+}
+UPB_INLINE const upb_enumdef *upbdefs_google_protobuf_FieldOptions_CType(const upb_symtab *s) {
+  const upb_enumdef *e = upb_symtab_lookupenum(s, "google.protobuf.FieldOptions.CType");
+  assert(e);
+  return e;
+}
+UPB_INLINE const upb_enumdef *upbdefs_google_protobuf_FileOptions_OptimizeMode(const upb_symtab *s) {
+  const upb_enumdef *e = upb_symtab_lookupenum(s, "google.protobuf.FileOptions.OptimizeMode");
+  assert(e);
+  return e;
+}
+
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_DescriptorProto_ExtensionRange_end(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_DescriptorProto_ExtensionRange(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_DescriptorProto_ExtensionRange_start(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_DescriptorProto_ExtensionRange(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_DescriptorProto_enum_type(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_DescriptorProto(s), 4); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_DescriptorProto_extension(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_DescriptorProto(s), 6); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_DescriptorProto_extension_range(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_DescriptorProto(s), 5); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_DescriptorProto_field(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_DescriptorProto(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_DescriptorProto_name(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_DescriptorProto(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_DescriptorProto_nested_type(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_DescriptorProto(s), 3); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_DescriptorProto_options(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_DescriptorProto(s), 7); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_EnumDescriptorProto_name(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_EnumDescriptorProto(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_EnumDescriptorProto_options(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_EnumDescriptorProto(s), 3); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_EnumDescriptorProto_value(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_EnumDescriptorProto(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_EnumOptions_allow_alias(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_EnumOptions(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_EnumOptions_uninterpreted_option(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_EnumOptions(s), 999); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_EnumValueDescriptorProto_name(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_EnumValueDescriptorProto(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_EnumValueDescriptorProto_number(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_EnumValueDescriptorProto(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_EnumValueDescriptorProto_options(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_EnumValueDescriptorProto(s), 3); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_EnumValueOptions_uninterpreted_option(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_EnumValueOptions(s), 999); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldDescriptorProto_default_value(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldDescriptorProto(s), 7); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldDescriptorProto_extendee(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldDescriptorProto(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldDescriptorProto_label(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldDescriptorProto(s), 4); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldDescriptorProto_name(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldDescriptorProto(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldDescriptorProto_number(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldDescriptorProto(s), 3); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldDescriptorProto_options(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldDescriptorProto(s), 8); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldDescriptorProto_type(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldDescriptorProto(s), 5); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldDescriptorProto_type_name(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldDescriptorProto(s), 6); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldOptions_ctype(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldOptions(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldOptions_deprecated(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldOptions(s), 3); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldOptions_experimental_map_key(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldOptions(s), 9); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldOptions_lazy(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldOptions(s), 5); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldOptions_packed(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldOptions(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldOptions_uninterpreted_option(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldOptions(s), 999); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FieldOptions_weak(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FieldOptions(s), 10); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileDescriptorProto_dependency(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileDescriptorProto(s), 3); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileDescriptorProto_enum_type(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileDescriptorProto(s), 5); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileDescriptorProto_extension(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileDescriptorProto(s), 7); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileDescriptorProto_message_type(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileDescriptorProto(s), 4); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileDescriptorProto_name(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileDescriptorProto(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileDescriptorProto_options(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileDescriptorProto(s), 8); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileDescriptorProto_package(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileDescriptorProto(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileDescriptorProto_public_dependency(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileDescriptorProto(s), 10); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileDescriptorProto_service(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileDescriptorProto(s), 6); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileDescriptorProto_source_code_info(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileDescriptorProto(s), 9); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileDescriptorProto_weak_dependency(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileDescriptorProto(s), 11); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileDescriptorSet_file(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileDescriptorSet(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileOptions_cc_generic_services(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileOptions(s), 16); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileOptions_go_package(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileOptions(s), 11); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileOptions_java_generate_equals_and_hash(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileOptions(s), 20); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileOptions_java_generic_services(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileOptions(s), 17); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileOptions_java_multiple_files(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileOptions(s), 10); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileOptions_java_outer_classname(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileOptions(s), 8); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileOptions_java_package(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileOptions(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileOptions_optimize_for(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileOptions(s), 9); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileOptions_py_generic_services(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileOptions(s), 18); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_FileOptions_uninterpreted_option(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_FileOptions(s), 999); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_MessageOptions_message_set_wire_format(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_MessageOptions(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_MessageOptions_no_standard_descriptor_accessor(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_MessageOptions(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_MessageOptions_uninterpreted_option(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_MessageOptions(s), 999); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_MethodDescriptorProto_input_type(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_MethodDescriptorProto(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_MethodDescriptorProto_name(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_MethodDescriptorProto(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_MethodDescriptorProto_options(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_MethodDescriptorProto(s), 4); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_MethodDescriptorProto_output_type(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_MethodDescriptorProto(s), 3); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_MethodOptions_uninterpreted_option(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_MethodOptions(s), 999); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_ServiceDescriptorProto_method(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_ServiceDescriptorProto(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_ServiceDescriptorProto_name(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_ServiceDescriptorProto(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_ServiceDescriptorProto_options(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_ServiceDescriptorProto(s), 3); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_ServiceOptions_uninterpreted_option(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_ServiceOptions(s), 999); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_SourceCodeInfo_Location_leading_comments(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_SourceCodeInfo_Location(s), 3); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_SourceCodeInfo_Location_path(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_SourceCodeInfo_Location(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_SourceCodeInfo_Location_span(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_SourceCodeInfo_Location(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_SourceCodeInfo_Location_trailing_comments(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_SourceCodeInfo_Location(s), 4); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_SourceCodeInfo_location(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_SourceCodeInfo(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_UninterpretedOption_NamePart_is_extension(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_UninterpretedOption_NamePart(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_UninterpretedOption_NamePart_name_part(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_UninterpretedOption_NamePart(s), 1); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_UninterpretedOption_aggregate_value(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_UninterpretedOption(s), 8); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_UninterpretedOption_double_value(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_UninterpretedOption(s), 6); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_UninterpretedOption_identifier_value(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_UninterpretedOption(s), 3); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_UninterpretedOption_name(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_UninterpretedOption(s), 2); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_UninterpretedOption_negative_int_value(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_UninterpretedOption(s), 5); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_UninterpretedOption_positive_int_value(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_UninterpretedOption(s), 4); }
+UPB_INLINE const upb_fielddef *upbdefs_google_protobuf_UninterpretedOption_string_value(const upb_symtab *s) { return upb_msgdef_itof(upbdefs_google_protobuf_UninterpretedOption(s), 7); }
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+
+namespace upbdefs {
+namespace google {
+namespace protobuf {
+namespace descriptor {
+inline upb::reffed_ptr<const upb::SymbolTable> SymbolTable() {
+  const upb::SymbolTable* s = upbdefs_google_protobuf_descriptor(&s);
+  return upb::reffed_ptr<const upb::SymbolTable>(s, &s);
+}
+}  /* namespace descriptor */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+#define RETURN_REFFED(type, func) \
+    const type* obj = func(upbdefs::google::protobuf::descriptor::SymbolTable().get()); \
+    return upb::reffed_ptr<const type>(obj);
+
+namespace google {
+namespace protobuf {
+namespace DescriptorProto {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_DescriptorProto) }
+inline upb::reffed_ptr<const upb::FieldDef> enum_type() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_enum_type) }
+inline upb::reffed_ptr<const upb::FieldDef> extension() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_extension) }
+inline upb::reffed_ptr<const upb::FieldDef> extension_range() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_extension_range) }
+inline upb::reffed_ptr<const upb::FieldDef> field() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_field) }
+inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_name) }
+inline upb::reffed_ptr<const upb::FieldDef> nested_type() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_nested_type) }
+inline upb::reffed_ptr<const upb::FieldDef> options() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_options) }
+}  /* namespace DescriptorProto */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace DescriptorProto {
+namespace ExtensionRange {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_DescriptorProto_ExtensionRange) }
+inline upb::reffed_ptr<const upb::FieldDef> end() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_ExtensionRange_end) }
+inline upb::reffed_ptr<const upb::FieldDef> start() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_DescriptorProto_ExtensionRange_start) }
+}  /* namespace ExtensionRange */
+}  /* namespace DescriptorProto */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace EnumDescriptorProto {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_EnumDescriptorProto) }
+inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumDescriptorProto_name) }
+inline upb::reffed_ptr<const upb::FieldDef> options() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumDescriptorProto_options) }
+inline upb::reffed_ptr<const upb::FieldDef> value() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumDescriptorProto_value) }
+}  /* namespace EnumDescriptorProto */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace EnumOptions {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_EnumOptions) }
+inline upb::reffed_ptr<const upb::FieldDef> allow_alias() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumOptions_allow_alias) }
+inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumOptions_uninterpreted_option) }
+}  /* namespace EnumOptions */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace EnumValueDescriptorProto {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_EnumValueDescriptorProto) }
+inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumValueDescriptorProto_name) }
+inline upb::reffed_ptr<const upb::FieldDef> number() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumValueDescriptorProto_number) }
+inline upb::reffed_ptr<const upb::FieldDef> options() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumValueDescriptorProto_options) }
+}  /* namespace EnumValueDescriptorProto */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace EnumValueOptions {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_EnumValueOptions) }
+inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_EnumValueOptions_uninterpreted_option) }
+}  /* namespace EnumValueOptions */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace FieldDescriptorProto {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_FieldDescriptorProto) }
+inline upb::reffed_ptr<const upb::FieldDef> default_value() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldDescriptorProto_default_value) }
+inline upb::reffed_ptr<const upb::FieldDef> extendee() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldDescriptorProto_extendee) }
+inline upb::reffed_ptr<const upb::FieldDef> label() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldDescriptorProto_label) }
+inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldDescriptorProto_name) }
+inline upb::reffed_ptr<const upb::FieldDef> number() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldDescriptorProto_number) }
+inline upb::reffed_ptr<const upb::FieldDef> options() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldDescriptorProto_options) }
+inline upb::reffed_ptr<const upb::FieldDef> type() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldDescriptorProto_type) }
+inline upb::reffed_ptr<const upb::FieldDef> type_name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldDescriptorProto_type_name) }
+inline upb::reffed_ptr<const upb::EnumDef> Label() { RETURN_REFFED(upb::EnumDef, upbdefs_google_protobuf_FieldDescriptorProto_Label) }
+inline upb::reffed_ptr<const upb::EnumDef> Type() { RETURN_REFFED(upb::EnumDef, upbdefs_google_protobuf_FieldDescriptorProto_Type) }
+}  /* namespace FieldDescriptorProto */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace FieldOptions {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_FieldOptions) }
+inline upb::reffed_ptr<const upb::FieldDef> ctype() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldOptions_ctype) }
+inline upb::reffed_ptr<const upb::FieldDef> deprecated() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldOptions_deprecated) }
+inline upb::reffed_ptr<const upb::FieldDef> experimental_map_key() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldOptions_experimental_map_key) }
+inline upb::reffed_ptr<const upb::FieldDef> lazy() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldOptions_lazy) }
+inline upb::reffed_ptr<const upb::FieldDef> packed() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldOptions_packed) }
+inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldOptions_uninterpreted_option) }
+inline upb::reffed_ptr<const upb::FieldDef> weak() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FieldOptions_weak) }
+inline upb::reffed_ptr<const upb::EnumDef> CType() { RETURN_REFFED(upb::EnumDef, upbdefs_google_protobuf_FieldOptions_CType) }
+}  /* namespace FieldOptions */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace FileDescriptorProto {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_FileDescriptorProto) }
+inline upb::reffed_ptr<const upb::FieldDef> dependency() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_dependency) }
+inline upb::reffed_ptr<const upb::FieldDef> enum_type() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_enum_type) }
+inline upb::reffed_ptr<const upb::FieldDef> extension() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_extension) }
+inline upb::reffed_ptr<const upb::FieldDef> message_type() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_message_type) }
+inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_name) }
+inline upb::reffed_ptr<const upb::FieldDef> options() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_options) }
+inline upb::reffed_ptr<const upb::FieldDef> package() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_package) }
+inline upb::reffed_ptr<const upb::FieldDef> public_dependency() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_public_dependency) }
+inline upb::reffed_ptr<const upb::FieldDef> service() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_service) }
+inline upb::reffed_ptr<const upb::FieldDef> source_code_info() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_source_code_info) }
+inline upb::reffed_ptr<const upb::FieldDef> weak_dependency() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorProto_weak_dependency) }
+}  /* namespace FileDescriptorProto */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace FileDescriptorSet {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_FileDescriptorSet) }
+inline upb::reffed_ptr<const upb::FieldDef> file() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileDescriptorSet_file) }
+}  /* namespace FileDescriptorSet */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace FileOptions {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_FileOptions) }
+inline upb::reffed_ptr<const upb::FieldDef> cc_generic_services() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileOptions_cc_generic_services) }
+inline upb::reffed_ptr<const upb::FieldDef> go_package() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileOptions_go_package) }
+inline upb::reffed_ptr<const upb::FieldDef> java_generate_equals_and_hash() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileOptions_java_generate_equals_and_hash) }
+inline upb::reffed_ptr<const upb::FieldDef> java_generic_services() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileOptions_java_generic_services) }
+inline upb::reffed_ptr<const upb::FieldDef> java_multiple_files() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileOptions_java_multiple_files) }
+inline upb::reffed_ptr<const upb::FieldDef> java_outer_classname() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileOptions_java_outer_classname) }
+inline upb::reffed_ptr<const upb::FieldDef> java_package() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileOptions_java_package) }
+inline upb::reffed_ptr<const upb::FieldDef> optimize_for() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileOptions_optimize_for) }
+inline upb::reffed_ptr<const upb::FieldDef> py_generic_services() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileOptions_py_generic_services) }
+inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_FileOptions_uninterpreted_option) }
+inline upb::reffed_ptr<const upb::EnumDef> OptimizeMode() { RETURN_REFFED(upb::EnumDef, upbdefs_google_protobuf_FileOptions_OptimizeMode) }
+}  /* namespace FileOptions */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace MessageOptions {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_MessageOptions) }
+inline upb::reffed_ptr<const upb::FieldDef> message_set_wire_format() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MessageOptions_message_set_wire_format) }
+inline upb::reffed_ptr<const upb::FieldDef> no_standard_descriptor_accessor() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MessageOptions_no_standard_descriptor_accessor) }
+inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MessageOptions_uninterpreted_option) }
+}  /* namespace MessageOptions */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace MethodDescriptorProto {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_MethodDescriptorProto) }
+inline upb::reffed_ptr<const upb::FieldDef> input_type() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MethodDescriptorProto_input_type) }
+inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MethodDescriptorProto_name) }
+inline upb::reffed_ptr<const upb::FieldDef> options() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MethodDescriptorProto_options) }
+inline upb::reffed_ptr<const upb::FieldDef> output_type() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MethodDescriptorProto_output_type) }
+}  /* namespace MethodDescriptorProto */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace MethodOptions {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_MethodOptions) }
+inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_MethodOptions_uninterpreted_option) }
+}  /* namespace MethodOptions */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace ServiceDescriptorProto {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_ServiceDescriptorProto) }
+inline upb::reffed_ptr<const upb::FieldDef> method() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_ServiceDescriptorProto_method) }
+inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_ServiceDescriptorProto_name) }
+inline upb::reffed_ptr<const upb::FieldDef> options() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_ServiceDescriptorProto_options) }
+}  /* namespace ServiceDescriptorProto */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace ServiceOptions {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_ServiceOptions) }
+inline upb::reffed_ptr<const upb::FieldDef> uninterpreted_option() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_ServiceOptions_uninterpreted_option) }
+}  /* namespace ServiceOptions */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace SourceCodeInfo {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_SourceCodeInfo) }
+inline upb::reffed_ptr<const upb::FieldDef> location() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_SourceCodeInfo_location) }
+}  /* namespace SourceCodeInfo */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace SourceCodeInfo {
+namespace Location {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_SourceCodeInfo_Location) }
+inline upb::reffed_ptr<const upb::FieldDef> leading_comments() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_SourceCodeInfo_Location_leading_comments) }
+inline upb::reffed_ptr<const upb::FieldDef> path() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_SourceCodeInfo_Location_path) }
+inline upb::reffed_ptr<const upb::FieldDef> span() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_SourceCodeInfo_Location_span) }
+inline upb::reffed_ptr<const upb::FieldDef> trailing_comments() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_SourceCodeInfo_Location_trailing_comments) }
+}  /* namespace Location */
+}  /* namespace SourceCodeInfo */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace UninterpretedOption {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_UninterpretedOption) }
+inline upb::reffed_ptr<const upb::FieldDef> aggregate_value() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_aggregate_value) }
+inline upb::reffed_ptr<const upb::FieldDef> double_value() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_double_value) }
+inline upb::reffed_ptr<const upb::FieldDef> identifier_value() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_identifier_value) }
+inline upb::reffed_ptr<const upb::FieldDef> name() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_name) }
+inline upb::reffed_ptr<const upb::FieldDef> negative_int_value() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_negative_int_value) }
+inline upb::reffed_ptr<const upb::FieldDef> positive_int_value() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_positive_int_value) }
+inline upb::reffed_ptr<const upb::FieldDef> string_value() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_string_value) }
+}  /* namespace UninterpretedOption */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+namespace google {
+namespace protobuf {
+namespace UninterpretedOption {
+namespace NamePart {
+inline upb::reffed_ptr<const upb::MessageDef> MessageDef() { RETURN_REFFED(upb::MessageDef, upbdefs_google_protobuf_UninterpretedOption_NamePart) }
+inline upb::reffed_ptr<const upb::FieldDef> is_extension() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_NamePart_is_extension) }
+inline upb::reffed_ptr<const upb::FieldDef> name_part() { RETURN_REFFED(upb::FieldDef, upbdefs_google_protobuf_UninterpretedOption_NamePart_name_part) }
+}  /* namespace NamePart */
+}  /* namespace UninterpretedOption */
+}  /* namespace protobuf */
+}  /* namespace google */
+
+}  /* namespace upbdefs */
+
+
+#undef RETURN_REFFED
+#endif /* __cplusplus */
+
+#endif  /* GOOGLE_PROTOBUF_DESCRIPTOR_UPB_H_ */
+/*
+** Internal-only definitions for the decoder.
+*/
+
+#ifndef UPB_DECODER_INT_H_
+#define UPB_DECODER_INT_H_
+
+#include <stdlib.h>
+/*
+** upb::pb::Decoder
+**
+** A high performance, streaming, resumable decoder for the binary protobuf
+** format.
+**
+** This interface works the same regardless of what decoder backend is being
+** used.  A client of this class does not need to know whether decoding is using
+** a JITted decoder (DynASM, LLVM, etc) or an interpreted decoder.  By default,
+** it will always use the fastest available decoder.  However, you can call
+** set_allow_jit(false) to disable any JIT decoder that might be available.
+** This is primarily useful for testing purposes.
+*/
+
+#ifndef UPB_DECODER_H_
+#define UPB_DECODER_H_
+
+
+#ifdef __cplusplus
+namespace upb {
+namespace pb {
+class CodeCache;
+class Decoder;
+class DecoderMethod;
+class DecoderMethodOptions;
+}  /* namespace pb */
+}  /* namespace upb */
+#endif
+
+UPB_DECLARE_TYPE(upb::pb::CodeCache, upb_pbcodecache)
+UPB_DECLARE_TYPE(upb::pb::Decoder, upb_pbdecoder)
+UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts)
+
+UPB_DECLARE_DERIVED_TYPE(upb::pb::DecoderMethod, upb::RefCounted,
+                         upb_pbdecodermethod, upb_refcounted)
+
+#ifdef __cplusplus
+
+/* The parameters one uses to construct a DecoderMethod.
+ * TODO(haberman): move allowjit here?  Seems more convenient for users.
+ * TODO(haberman): move this to be heap allocated for ABI stability. */
+class upb::pb::DecoderMethodOptions {
+ public:
+  /* Parameter represents the destination handlers that this method will push
+   * to. */
+  explicit DecoderMethodOptions(const Handlers* dest_handlers);
+
+  /* Should the decoder push submessages to lazy handlers for fields that have
+   * them?  The caller should set this iff the lazy handlers expect data that is
+   * in protobuf binary format and the caller wishes to lazy parse it. */
+  void set_lazy(bool lazy);
+#else
+struct upb_pbdecodermethodopts {
+#endif
+  const upb_handlers *handlers;
+  bool lazy;
+};
+
+#ifdef __cplusplus
+
+/* Represents the code to parse a protobuf according to a destination
+ * Handlers. */
+class upb::pb::DecoderMethod {
+ public:
+  /* Include base methods from upb::ReferenceCounted. */
+  UPB_REFCOUNTED_CPPMETHODS
+
+  /* The destination handlers that are statically bound to this method.
+   * This method is only capable of outputting to a sink that uses these
+   * handlers. */
+  const Handlers* dest_handlers() const;
+
+  /* The input handlers for this decoder method. */
+  const BytesHandler* input_handler() const;
+
+  /* Whether this method is native. */
+  bool is_native() const;
+
+  /* Convenience method for generating a DecoderMethod without explicitly
+   * creating a CodeCache. */
+  static reffed_ptr<const DecoderMethod> New(const DecoderMethodOptions& opts);
+
+ private:
+  UPB_DISALLOW_POD_OPS(DecoderMethod, upb::pb::DecoderMethod)
+};
+
+#endif
+
+/* Preallocation hint: decoder won't allocate more bytes than this when first
+ * constructed.  This hint may be an overestimate for some build configurations.
+ * But if the decoder library is upgraded without recompiling the application,
+ * it may be an underestimate. */
+#define UPB_PB_DECODER_SIZE 4408
+
+#ifdef __cplusplus
+
+/* A Decoder receives binary protobuf data on its input sink and pushes the
+ * decoded data to its output sink. */
+class upb::pb::Decoder {
+ public:
+  /* Constructs a decoder instance for the given method, which must outlive this
+   * decoder.  Any errors during parsing will be set on the given status, which
+   * must also outlive this decoder.
+   *
+   * The sink must match the given method. */
+  static Decoder* Create(Environment* env, const DecoderMethod* method,
+                         Sink* output);
+
+  /* Returns the DecoderMethod this decoder is parsing from. */
+  const DecoderMethod* method() const;
+
+  /* The sink on which this decoder receives input. */
+  BytesSink* input();
+
+  /* Returns number of bytes successfully parsed.
+   *
+   * This can be useful for determining the stream position where an error
+   * occurred.
+   *
+   * This value may not be up-to-date when called from inside a parsing
+   * callback. */
+  uint64_t BytesParsed() const;
+
+  /* Gets/sets the parsing nexting limit.  If the total number of nested
+   * submessages and repeated fields hits this limit, parsing will fail.  This
+   * is a resource limit that controls the amount of memory used by the parsing
+   * stack.
+   *
+   * Setting the limit will fail if the parser is currently suspended at a depth
+   * greater than this, or if memory allocation of the stack fails. */
+  size_t max_nesting() const;
+  bool set_max_nesting(size_t max);
+
+  void Reset();
+
+  static const size_t kSize = UPB_PB_DECODER_SIZE;
+
+ private:
+  UPB_DISALLOW_POD_OPS(Decoder, upb::pb::Decoder)
+};
+
+#endif  /* __cplusplus */
+
+#ifdef __cplusplus
+
+/* A class for caching protobuf processing code, whether bytecode for the
+ * interpreted decoder or machine code for the JIT.
+ *
+ * This class is not thread-safe.
+ *
+ * TODO(haberman): move this to be heap allocated for ABI stability. */
+class upb::pb::CodeCache {
+ public:
+  CodeCache();
+  ~CodeCache();
+
+  /* Whether the cache is allowed to generate machine code.  Defaults to true.
+   * There is no real reason to turn it off except for testing or if you are
+   * having a specific problem with the JIT.
+   *
+   * Note that allow_jit = true does not *guarantee* that the code will be JIT
+   * compiled.  If this platform is not supported or the JIT was not compiled
+   * in, the code may still be interpreted. */
+  bool allow_jit() const;
+
+  /* This may only be called when the object is first constructed, and prior to
+   * any code generation, otherwise returns false and does nothing. */
+  bool set_allow_jit(bool allow);
+
+  /* Returns a DecoderMethod that can push data to the given handlers.
+   * If a suitable method already exists, it will be returned from the cache.
+   *
+   * Specifying the destination handlers here allows the DecoderMethod to be
+   * statically bound to the destination handlers if possible, which can allow
+   * more efficient decoding.  However the returned method may or may not
+   * actually be statically bound.  But in all cases, the returned method can
+   * push data to the given handlers. */
+  const DecoderMethod *GetDecoderMethod(const DecoderMethodOptions& opts);
+
+  /* If/when someone needs to explicitly create a dynamically-bound
+   * DecoderMethod*, we can add a method to get it here. */
+
+ private:
+  UPB_DISALLOW_COPY_AND_ASSIGN(CodeCache)
+#else
+struct upb_pbcodecache {
+#endif
+  bool allow_jit_;
+
+  /* Array of mgroups. */
+  upb_inttable groups;
+};
+
+UPB_BEGIN_EXTERN_C
+
+upb_pbdecoder *upb_pbdecoder_create(upb_env *e,
+                                    const upb_pbdecodermethod *method,
+                                    upb_sink *output);
+const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d);
+upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d);
+uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d);
+size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d);
+bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max);
+void upb_pbdecoder_reset(upb_pbdecoder *d);
+
+void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
+                                  const upb_handlers *h);
+void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy);
+
+
+/* Include refcounted methods like upb_pbdecodermethod_ref(). */
+UPB_REFCOUNTED_CMETHODS(upb_pbdecodermethod, upb_pbdecodermethod_upcast)
+
+const upb_handlers *upb_pbdecodermethod_desthandlers(
+    const upb_pbdecodermethod *m);
+const upb_byteshandler *upb_pbdecodermethod_inputhandler(
+    const upb_pbdecodermethod *m);
+bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m);
+const upb_pbdecodermethod *upb_pbdecodermethod_new(
+    const upb_pbdecodermethodopts *opts, const void *owner);
+
+void upb_pbcodecache_init(upb_pbcodecache *c);
+void upb_pbcodecache_uninit(upb_pbcodecache *c);
+bool upb_pbcodecache_allowjit(const upb_pbcodecache *c);
+bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow);
+const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
+    upb_pbcodecache *c, const upb_pbdecodermethodopts *opts);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+
+namespace upb {
+
+namespace pb {
+
+/* static */
+inline Decoder* Decoder::Create(Environment* env, const DecoderMethod* m,
+                                Sink* sink) {
+  return upb_pbdecoder_create(env, m, sink);
+}
+inline const DecoderMethod* Decoder::method() const {
+  return upb_pbdecoder_method(this);
+}
+inline BytesSink* Decoder::input() {
+  return upb_pbdecoder_input(this);
+}
+inline uint64_t Decoder::BytesParsed() const {
+  return upb_pbdecoder_bytesparsed(this);
+}
+inline size_t Decoder::max_nesting() const {
+  return upb_pbdecoder_maxnesting(this);
+}
+inline bool Decoder::set_max_nesting(size_t max) {
+  return upb_pbdecoder_setmaxnesting(this, max);
+}
+inline void Decoder::Reset() { upb_pbdecoder_reset(this); }
+
+inline DecoderMethodOptions::DecoderMethodOptions(const Handlers* h) {
+  upb_pbdecodermethodopts_init(this, h);
+}
+inline void DecoderMethodOptions::set_lazy(bool lazy) {
+  upb_pbdecodermethodopts_setlazy(this, lazy);
+}
+
+inline const Handlers* DecoderMethod::dest_handlers() const {
+  return upb_pbdecodermethod_desthandlers(this);
+}
+inline const BytesHandler* DecoderMethod::input_handler() const {
+  return upb_pbdecodermethod_inputhandler(this);
+}
+inline bool DecoderMethod::is_native() const {
+  return upb_pbdecodermethod_isnative(this);
+}
+/* static */
+inline reffed_ptr<const DecoderMethod> DecoderMethod::New(
+    const DecoderMethodOptions &opts) {
+  const upb_pbdecodermethod *m = upb_pbdecodermethod_new(&opts, &m);
+  return reffed_ptr<const DecoderMethod>(m, &m);
+}
+
+inline CodeCache::CodeCache() {
+  upb_pbcodecache_init(this);
+}
+inline CodeCache::~CodeCache() {
+  upb_pbcodecache_uninit(this);
+}
+inline bool CodeCache::allow_jit() const {
+  return upb_pbcodecache_allowjit(this);
+}
+inline bool CodeCache::set_allow_jit(bool allow) {
+  return upb_pbcodecache_setallowjit(this, allow);
+}
+inline const DecoderMethod *CodeCache::GetDecoderMethod(
+    const DecoderMethodOptions& opts) {
+  return upb_pbcodecache_getdecodermethod(this, &opts);
+}
+
+}  /* namespace pb */
+}  /* namespace upb */
+
+#endif  /* __cplusplus */
+
+#endif  /* UPB_DECODER_H_ */
+
+/* C++ names are not actually used since this type isn't exposed to users. */
+#ifdef __cplusplus
+namespace upb {
+namespace pb {
+class MessageGroup;
+}  /* namespace pb */
+}  /* namespace upb */
+#endif
+UPB_DECLARE_DERIVED_TYPE(upb::pb::MessageGroup, upb::RefCounted,
+                         mgroup, upb_refcounted)
+
+/* Opcode definitions.  The canonical meaning of each opcode is its
+ * implementation in the interpreter (the JIT is written to match this).
+ *
+ * All instructions have the opcode in the low byte.
+ * Instruction format for most instructions is:
+ *
+ * +-------------------+--------+
+ * |     arg (24)      | op (8) |
+ * +-------------------+--------+
+ *
+ * Exceptions are indicated below.  A few opcodes are multi-word. */
+typedef enum {
+  /* Opcodes 1-8, 13, 15-18 parse their respective descriptor types.
+   * Arg for all of these is the upb selector for this field. */
+#define T(type) OP_PARSE_ ## type = UPB_DESCRIPTOR_TYPE_ ## type
+  T(DOUBLE), T(FLOAT), T(INT64), T(UINT64), T(INT32), T(FIXED64), T(FIXED32),
+  T(BOOL), T(UINT32), T(SFIXED32), T(SFIXED64), T(SINT32), T(SINT64),
+#undef T
+  OP_STARTMSG       = 9,   /* No arg. */
+  OP_ENDMSG         = 10,  /* No arg. */
+  OP_STARTSEQ       = 11,
+  OP_ENDSEQ         = 12,
+  OP_STARTSUBMSG    = 14,
+  OP_ENDSUBMSG      = 19,
+  OP_STARTSTR       = 20,
+  OP_STRING         = 21,
+  OP_ENDSTR         = 22,
+
+  OP_PUSHTAGDELIM   = 23,  /* No arg. */
+  OP_PUSHLENDELIM   = 24,  /* No arg. */
+  OP_POP            = 25,  /* No arg. */
+  OP_SETDELIM       = 26,  /* No arg. */
+  OP_SETBIGGROUPNUM = 27,  /* two words:
+                            *   | unused (24)     | opc (8) |
+                            *   |        groupnum (32)      | */
+  OP_CHECKDELIM     = 28,
+  OP_CALL           = 29,
+  OP_RET            = 30,
+  OP_BRANCH         = 31,
+
+  /* Different opcodes depending on how many bytes expected. */
+  OP_TAG1           = 32,  /* | match tag (16) | jump target (8) | opc (8) | */
+  OP_TAG2           = 33,  /* | match tag (16) | jump target (8) | opc (8) | */
+  OP_TAGN           = 34,  /* three words: */
+                           /*   | unused (16) | jump target(8) | opc (8) | */
+                           /*   |           match tag 1 (32)             | */
+                           /*   |           match tag 2 (32)             | */
+
+  OP_SETDISPATCH    = 35,  /* N words: */
+                           /*   | unused (24)         | opc | */
+                           /*   | upb_inttable* (32 or 64)  | */
+
+  OP_DISPATCH       = 36,  /* No arg. */
+
+  OP_HALT           = 37   /* No arg. */
+} opcode;
+
+#define OP_MAX OP_HALT
+
+UPB_INLINE opcode getop(uint32_t instr) { return instr & 0xff; }
+
+/* Method group; represents a set of decoder methods that had their code
+ * emitted together, and must therefore be freed together.  Immutable once
+ * created.  It is possible we may want to expose this to users at some point.
+ *
+ * Overall ownership of Decoder objects looks like this:
+ *
+ *                +----------+
+ *                |          | <---> DecoderMethod
+ *                | method   |
+ * CodeCache ---> |  group   | <---> DecoderMethod
+ *                |          |
+ *                | (mgroup) | <---> DecoderMethod
+ *                +----------+
+ */
+struct mgroup {
+  upb_refcounted base;
+
+  /* Maps upb_msgdef/upb_handlers -> upb_pbdecodermethod.  We own refs on the
+   * methods. */
+  upb_inttable methods;
+
+  /* When we add the ability to link to previously existing mgroups, we'll
+   * need an array of mgroups we reference here, and own refs on them. */
+
+  /* The bytecode for our methods, if any exists.  Owned by us. */
+  uint32_t *bytecode;
+  uint32_t *bytecode_end;
+
+#ifdef UPB_USE_JIT_X64
+  /* JIT-generated machine code, if any. */
+  upb_string_handlerfunc *jit_code;
+  /* The size of the jit_code (required to munmap()). */
+  size_t jit_size;
+  char *debug_info;
+  void *dl;
+#endif
+};
+
+/* The maximum that any submessages can be nested.  Matches proto2's limit.
+ * This specifies the size of the decoder's statically-sized array and therefore
+ * setting it high will cause the upb::pb::Decoder object to be larger.
+ *
+ * If necessary we can add a runtime-settable property to Decoder that allow
+ * this to be larger than the compile-time setting, but this would add
+ * complexity, particularly since we would have to decide how/if to give users
+ * the ability to set a custom memory allocation function. */
+#define UPB_DECODER_MAX_NESTING 64
+
+/* Internal-only struct used by the decoder. */
+typedef struct {
+  /* Space optimization note: we store two pointers here that the JIT
+   * doesn't need at all; the upb_handlers* inside the sink and
+   * the dispatch table pointer.  We can optimze so that the JIT uses
+   * smaller stack frames than the interpreter.  The only thing we need
+   * to guarantee is that the fallback routines can find end_ofs. */
+  upb_sink sink;
+
+  /* The absolute stream offset of the end-of-frame delimiter.
+   * Non-delimited frames (groups and non-packed repeated fields) reuse the
+   * delimiter of their parent, even though the frame may not end there.
+   *
+   * NOTE: the JIT stores a slightly different value here for non-top frames.
+   * It stores the value relative to the end of the enclosed message.  But the
+   * top frame is still stored the same way, which is important for ensuring
+   * that calls from the JIT into C work correctly. */
+  uint64_t end_ofs;
+  const uint32_t *base;
+
+  /* 0 indicates a length-delimited field.
+   * A positive number indicates a known group.
+   * A negative number indicates an unknown group. */
+  int32_t groupnum;
+  upb_inttable *dispatch;  /* Not used by the JIT. */
+} upb_pbdecoder_frame;
+
+struct upb_pbdecodermethod {
+  upb_refcounted base;
+
+  /* While compiling, the base is relative in "ofs", after compiling it is
+   * absolute in "ptr". */
+  union {
+    uint32_t ofs;     /* PC offset of method. */
+    void *ptr;        /* Pointer to bytecode or machine code for this method. */
+  } code_base;
+
+  /* The decoder method group to which this method belongs.  We own a ref.
+   * Owning a ref on the entire group is more coarse-grained than is strictly
+   * necessary; all we truly require is that methods we directly reference
+   * outlive us, while the group could contain many other messages we don't
+   * require.  But the group represents the messages that were
+   * allocated+compiled together, so it makes the most sense to free them
+   * together also. */
+  const upb_refcounted *group;
+
+  /* Whether this method is native code or bytecode. */
+  bool is_native_;
+
+  /* The handler one calls to invoke this method. */
+  upb_byteshandler input_handler_;
+
+  /* The destination handlers this method is bound to.  We own a ref. */
+  const upb_handlers *dest_handlers_;
+
+  /* Dispatch table -- used by both bytecode decoder and JIT when encountering a
+   * field number that wasn't the one we were expecting to see.  See
+   * decoder.int.h for the layout of this table. */
+  upb_inttable dispatch;
+};
+
+struct upb_pbdecoder {
+  upb_env *env;
+
+  /* Our input sink. */
+  upb_bytessink input_;
+
+  /* The decoder method we are parsing with (owned). */
+  const upb_pbdecodermethod *method_;
+
+  size_t call_len;
+  const uint32_t *pc, *last;
+
+  /* Current input buffer and its stream offset. */
+  const char *buf, *ptr, *end, *checkpoint;
+
+  /* End of the delimited region, relative to ptr, NULL if not in this buf. */
+  const char *delim_end;
+
+  /* End of the delimited region, relative to ptr, end if not in this buf. */
+  const char *data_end;
+
+  /* Overall stream offset of "buf." */
+  uint64_t bufstart_ofs;
+
+  /* Buffer for residual bytes not parsed from the previous buffer.
+   * The maximum number of residual bytes we require is 12; a five-byte
+   * unknown tag plus an eight-byte value, less one because the value
+   * is only a partial value. */
+  char residual[12];
+  char *residual_end;
+
+  /* Bytes of data that should be discarded from the input beore we start
+   * parsing again.  We set this when we internally determine that we can
+   * safely skip the next N bytes, but this region extends past the current
+   * user buffer. */
+  size_t skip;
+
+  /* Stores the user buffer passed to our decode function. */
+  const char *buf_param;
+  size_t size_param;
+  const upb_bufhandle *handle;
+
+  /* Our internal stack. */
+  upb_pbdecoder_frame *stack, *top, *limit;
+  const uint32_t **callstack;
+  size_t stack_size;
+
+  upb_status *status;
+
+#ifdef UPB_USE_JIT_X64
+  /* Used momentarily by the generated code to store a value while a user
+   * function is called. */
+  uint32_t tmp_len;
+
+  const void *saved_rsp;
+#endif
+};
+
+/* Decoder entry points; used as handlers. */
+void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint);
+void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint);
+size_t upb_pbdecoder_decode(void *closure, const void *hd, const char *buf,
+                            size_t size, const upb_bufhandle *handle);
+bool upb_pbdecoder_end(void *closure, const void *handler_data);
+
+/* Decoder-internal functions that the JIT calls to handle fallback paths. */
+int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
+                             size_t size, const upb_bufhandle *handle);
+size_t upb_pbdecoder_suspend(upb_pbdecoder *d);
+int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
+                                  uint8_t wire_type);
+int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, uint64_t expected);
+int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, uint64_t *u64);
+int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32);
+int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64);
+void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg);
+
+/* Error messages that are shared between the bytecode and JIT decoders. */
+extern const char *kPbDecoderStackOverflow;
+extern const char *kPbDecoderSubmessageTooLong;
+
+/* Access to decoderplan members needed by the decoder. */
+const char *upb_pbdecoder_getopname(unsigned int op);
+
+/* JIT codegen entry point. */
+void upb_pbdecoder_jit(mgroup *group);
+void upb_pbdecoder_freejit(mgroup *group);
+UPB_REFCOUNTED_CMETHODS(mgroup, mgroup_upcast)
+
+/* A special label that means "do field dispatch for this message and branch to
+ * wherever that takes you." */
+#define LABEL_DISPATCH 0
+
+/* A special slot in the dispatch table that stores the epilogue (ENDMSG and/or
+ * RET) for branching to when we find an appropriate ENDGROUP tag. */
+#define DISPATCH_ENDMSG 0
+
+/* It's important to use this invalid wire type instead of 0 (which is a valid
+ * wire type). */
+#define NO_WIRE_TYPE 0xff
+
+/* The dispatch table layout is:
+ *   [field number] -> [ 48-bit offset ][ 8-bit wt2 ][ 8-bit wt1 ]
+ *
+ * If wt1 matches, jump to the 48-bit offset.  If wt2 matches, lookup
+ * (UPB_MAX_FIELDNUMBER + fieldnum) and jump there.
+ *
+ * We need two wire types because of packed/non-packed compatibility.  A
+ * primitive repeated field can use either wire type and be valid.  While we
+ * could key the table on fieldnum+wiretype, the table would be 8x sparser.
+ *
+ * Storing two wire types in the primary value allows us to quickly rule out
+ * the second wire type without needing to do a separate lookup (this case is
+ * less common than an unknown field). */
+UPB_INLINE uint64_t upb_pbdecoder_packdispatch(uint64_t ofs, uint8_t wt1,
+                                               uint8_t wt2) {
+  return (ofs << 16) | (wt2 << 8) | wt1;
+}
+
+UPB_INLINE void upb_pbdecoder_unpackdispatch(uint64_t dispatch, uint64_t *ofs,
+                                             uint8_t *wt1, uint8_t *wt2) {
+  *wt1 = (uint8_t)dispatch;
+  *wt2 = (uint8_t)(dispatch >> 8);
+  *ofs = dispatch >> 16;
+}
+
+/* All of the functions in decoder.c that return int32_t return values according
+ * to the following scheme:
+ *   1. negative values indicate a return code from the following list.
+ *   2. positive values indicate that error or end of buffer was hit, and
+ *      that the decode function should immediately return the given value
+ *      (the decoder state has already been suspended and is ready to be
+ *      resumed). */
+#define DECODE_OK -1
+#define DECODE_MISMATCH -2  /* Used only from checktag_slow(). */
+#define DECODE_ENDGROUP -3  /* Used only from checkunknown(). */
+
+#define CHECK_RETURN(x) { int32_t ret = x; if (ret >= 0) return ret; }
+
+#endif  /* UPB_DECODER_INT_H_ */
+/*
+** A number of routines for varint manipulation (we keep them all around to
+** have multiple approaches available for benchmarking).
+*/
+
+#ifndef UPB_VARINT_DECODER_H_
+#define UPB_VARINT_DECODER_H_
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* A list of types as they are encoded on-the-wire. */
+typedef enum {
+  UPB_WIRE_TYPE_VARINT      = 0,
+  UPB_WIRE_TYPE_64BIT       = 1,
+  UPB_WIRE_TYPE_DELIMITED   = 2,
+  UPB_WIRE_TYPE_START_GROUP = 3,
+  UPB_WIRE_TYPE_END_GROUP   = 4,
+  UPB_WIRE_TYPE_32BIT       = 5
+} upb_wiretype_t;
+
+#define UPB_MAX_WIRE_TYPE 5
+
+/* The maximum number of bytes that it takes to encode a 64-bit varint.
+ * Note that with a better encoding this could be 9 (TODO: write up a
+ * wiki document about this). */
+#define UPB_PB_VARINT_MAX_LEN 10
+
+/* Array of the "native" (ie. non-packed-repeated) wire type for the given a
+ * descriptor type (upb_descriptortype_t). */
+extern const uint8_t upb_pb_native_wire_types[];
+
+/* Zig-zag encoding/decoding **************************************************/
+
+UPB_INLINE int32_t upb_zzdec_32(uint32_t n) {
+  return (n >> 1) ^ -(int32_t)(n & 1);
+}
+UPB_INLINE int64_t upb_zzdec_64(uint64_t n) {
+  return (n >> 1) ^ -(int64_t)(n & 1);
+}
+UPB_INLINE uint32_t upb_zzenc_32(int32_t n) { return (n << 1) ^ (n >> 31); }
+UPB_INLINE uint64_t upb_zzenc_64(int64_t n) { return (n << 1) ^ (n >> 63); }
+
+/* Decoding *******************************************************************/
+
+/* All decoding functions return this struct by value. */
+typedef struct {
+  const char *p;  /* NULL if the varint was unterminated. */
+  uint64_t val;
+} upb_decoderet;
+
+UPB_INLINE upb_decoderet upb_decoderet_make(const char *p, uint64_t val) {
+  upb_decoderet ret;
+  ret.p = p;
+  ret.val = val;
+  return ret;
+}
+
+/* Four functions for decoding a varint of at most eight bytes.  They are all
+ * functionally identical, but are implemented in different ways and likely have
+ * different performance profiles.  We keep them around for performance testing.
+ *
+ * Note that these functions may not read byte-by-byte, so they must not be used
+ * unless there are at least eight bytes left in the buffer! */
+upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r);
+upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r);
+upb_decoderet upb_vdecode_max8_wright(upb_decoderet r);
+upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r);
+
+/* Template for a function that checks the first two bytes with branching
+ * and dispatches 2-10 bytes with a separate function.  Note that this may read
+ * up to 10 bytes, so it must not be used unless there are at least ten bytes
+ * left in the buffer! */
+#define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function)                  \
+UPB_INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) {         \
+  uint8_t *p = (uint8_t*)_p;                                                   \
+  upb_decoderet r;                                                             \
+  if ((*p & 0x80) == 0) {                                                      \
+  /* Common case: one-byte varint. */                                          \
+    return upb_decoderet_make(_p + 1, *p & 0x7fU);                             \
+  }                                                                            \
+  r = upb_decoderet_make(_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7));    \
+  if ((*(p + 1) & 0x80) == 0) {                                                \
+    /* Two-byte varint. */                                                     \
+    return r;                                                                  \
+  }                                                                            \
+  /* Longer varint, fallback to out-of-line function. */                       \
+  return decode_max8_function(r);                                              \
+}
+
+UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32)
+UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64)
+UPB_VARINT_DECODER_CHECK2(wright, upb_vdecode_max8_wright)
+UPB_VARINT_DECODER_CHECK2(massimino, upb_vdecode_max8_massimino)
+#undef UPB_VARINT_DECODER_CHECK2
+
+/* Our canonical functions for decoding varints, based on the currently
+ * favored best-performing implementations. */
+UPB_INLINE upb_decoderet upb_vdecode_fast(const char *p) {
+  if (sizeof(long) == 8)
+    return upb_vdecode_check2_branch64(p);
+  else
+    return upb_vdecode_check2_branch32(p);
+}
+
+UPB_INLINE upb_decoderet upb_vdecode_max8_fast(upb_decoderet r) {
+  return upb_vdecode_max8_massimino(r);
+}
+
+
+/* Encoding *******************************************************************/
+
+UPB_INLINE int upb_value_size(uint64_t val) {
+#ifdef __GNUC__
+  int high_bit = 63 - __builtin_clzll(val);  /* 0-based, undef if val == 0. */
+#else
+  int high_bit = 0;
+  uint64_t tmp = val;
+  while(tmp >>= 1) high_bit++;
+#endif
+  return val == 0 ? 1 : high_bit / 8 + 1;
+}
+
+/* Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN
+ * bytes long), returning how many bytes were used.
+ *
+ * TODO: benchmark and optimize if necessary. */
+UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) {
+  size_t i;
+  if (val == 0) { buf[0] = 0; return 1; }
+  i = 0;
+  while (val) {
+    uint8_t byte = val & 0x7fU;
+    val >>= 7;
+    if (val) byte |= 0x80U;
+    buf[i++] = byte;
+  }
+  return i;
+}
+
+UPB_INLINE size_t upb_varint_size(uint64_t val) {
+  char buf[UPB_PB_VARINT_MAX_LEN];
+  return upb_vencode64(val, buf);
+}
+
+/* Encodes a 32-bit varint, *not* sign-extended. */
+UPB_INLINE uint64_t upb_vencode32(uint32_t val) {
+  char buf[UPB_PB_VARINT_MAX_LEN];
+  size_t bytes = upb_vencode64(val, buf);
+  uint64_t ret = 0;
+  assert(bytes <= 5);
+  memcpy(&ret, buf, bytes);
+  assert(ret <= 0xffffffffffU);
+  return ret;
+}
+
+#ifdef __cplusplus
+}  /* extern "C" */
+#endif
+
+#endif  /* UPB_VARINT_DECODER_H_ */
+/*
+** upb::pb::Encoder (upb_pb_encoder)
+**
+** Implements a set of upb_handlers that write protobuf data to the binary wire
+** format.
+**
+** This encoder implementation does not have any access to any out-of-band or
+** precomputed lengths for submessages, so it must buffer submessages internally
+** before it can emit the first byte.
+*/
+
+#ifndef UPB_ENCODER_H_
+#define UPB_ENCODER_H_
+
+
+#ifdef __cplusplus
+namespace upb {
+namespace pb {
+class Encoder;
+}  /* namespace pb */
+}  /* namespace upb */
+#endif
+
+UPB_DECLARE_TYPE(upb::pb::Encoder, upb_pb_encoder)
+
+#define UPB_PBENCODER_MAX_NESTING 100
+
+/* upb::pb::Encoder ***********************************************************/
+
+/* Preallocation hint: decoder won't allocate more bytes than this when first
+ * constructed.  This hint may be an overestimate for some build configurations.
+ * But if the decoder library is upgraded without recompiling the application,
+ * it may be an underestimate. */
+#define UPB_PB_ENCODER_SIZE 768
+
+#ifdef __cplusplus
+
+class upb::pb::Encoder {
+ public:
+  /* Creates a new encoder in the given environment.  The Handlers must have
+   * come from NewHandlers() below. */
+  static Encoder* Create(Environment* env, const Handlers* handlers,
+                         BytesSink* output);
+
+  /* The input to the encoder. */
+  Sink* input();
+
+  /* Creates a new set of handlers for this MessageDef. */
+  static reffed_ptr<const Handlers> NewHandlers(const MessageDef* msg);
+
+  static const size_t kSize = UPB_PB_ENCODER_SIZE;
+
+ private:
+  UPB_DISALLOW_POD_OPS(Encoder, upb::pb::Encoder)
+};
+
+#endif
+
+UPB_BEGIN_EXTERN_C
+
+const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
+                                               const void *owner);
+upb_sink *upb_pb_encoder_input(upb_pb_encoder *p);
+upb_pb_encoder* upb_pb_encoder_create(upb_env* e, const upb_handlers* h,
+                                      upb_bytessink* output);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+
+namespace upb {
+namespace pb {
+inline Encoder* Encoder::Create(Environment* env, const Handlers* handlers,
+                                BytesSink* output) {
+  return upb_pb_encoder_create(env, handlers, output);
+}
+inline Sink* Encoder::input() {
+  return upb_pb_encoder_input(this);
+}
+inline reffed_ptr<const Handlers> Encoder::NewHandlers(
+    const upb::MessageDef *md) {
+  const Handlers* h = upb_pb_encoder_newhandlers(md, &h);
+  return reffed_ptr<const Handlers>(h, &h);
+}
+}  /* namespace pb */
+}  /* namespace upb */
+
+#endif
+
+#endif  /* UPB_ENCODER_H_ */
+/*
+** upb's core components like upb_decoder and upb_msg are carefully designed to
+** avoid depending on each other for maximum orthogonality.  In other words,
+** you can use a upb_decoder to decode into *any* kind of structure; upb_msg is
+** just one such structure.  A upb_msg can be serialized/deserialized into any
+** format, protobuf binary format is just one such format.
+**
+** However, for convenience we provide functions here for doing common
+** operations like deserializing protobuf binary format into a upb_msg.  The
+** compromise is that this file drags in almost all of upb as a dependency,
+** which could be undesirable if you're trying to use a trimmed-down build of
+** upb.
+**
+** While these routines are convenient, they do not reuse any encoding/decoding
+** state.  For example, if a decoder is JIT-based, it will be re-JITted every
+** time these functions are called.  For this reason, if you are parsing lots
+** of data and efficiency is an issue, these may not be the best functions to
+** use (though they are useful for prototyping, before optimizing).
+*/
+
+#ifndef UPB_GLUE_H
+#define UPB_GLUE_H
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Loads all defs from the given protobuf binary descriptor, setting default
+ * accessors and a default layout on all messages.  The caller owns the
+ * returned array of defs, which will be of length *n.  On error NULL is
+ * returned and status is set (if non-NULL). */
+upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
+                                        void *owner, upb_status *status);
+
+/* Like the previous but also adds the loaded defs to the given symtab. */
+bool upb_load_descriptor_into_symtab(upb_symtab *symtab, const char *str,
+                                     size_t len, upb_status *status);
+
+/* Like the previous but also reads the descriptor from the given filename. */
+bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
+                                          upb_status *status);
+
+/* Reads the given filename into a character string, returning NULL if there
+ * was an error. */
+char *upb_readfile(const char *filename, size_t *len);
+
+#ifdef __cplusplus
+}  /* extern "C" */
+
+namespace upb {
+
+/* All routines that load descriptors expect the descriptor to be a
+ * FileDescriptorSet. */
+inline bool LoadDescriptorFileIntoSymtab(SymbolTable* s, const char *fname,
+                                         Status* status) {
+  return upb_load_descriptor_file_into_symtab(s, fname, status);
+}
+
+inline bool LoadDescriptorIntoSymtab(SymbolTable* s, const char* str,
+                                     size_t len, Status* status) {
+  return upb_load_descriptor_into_symtab(s, str, len, status);
+}
+
+/* Templated so it can accept both string and std::string. */
+template <typename T>
+bool LoadDescriptorIntoSymtab(SymbolTable* s, const T& desc, Status* status) {
+  return upb_load_descriptor_into_symtab(s, desc.c_str(), desc.size(), status);
+}
+
+}  /* namespace upb */
+
+#endif
+
+#endif  /* UPB_GLUE_H */
+/*
+** upb::pb::TextPrinter (upb_textprinter)
+**
+** Handlers for writing to protobuf text format.
+*/
+
+#ifndef UPB_TEXT_H_
+#define UPB_TEXT_H_
+
+
+#ifdef __cplusplus
+namespace upb {
+namespace pb {
+class TextPrinter;
+}  /* namespace pb */
+}  /* namespace upb */
+#endif
+
+UPB_DECLARE_TYPE(upb::pb::TextPrinter, upb_textprinter)
+
+#ifdef __cplusplus
+
+class upb::pb::TextPrinter {
+ public:
+  /* The given handlers must have come from NewHandlers().  It must outlive the
+   * TextPrinter. */
+  static TextPrinter *Create(Environment *env, const upb::Handlers *handlers,
+                             BytesSink *output);
+
+  void SetSingleLineMode(bool single_line);
+
+  Sink* input();
+
+  /* If handler caching becomes a requirement we can add a code cache as in
+   * decoder.h */
+  static reffed_ptr<const Handlers> NewHandlers(const MessageDef* md);
+};
+
+#endif
+
+UPB_BEGIN_EXTERN_C
+
+/* C API. */
+upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
+                                        upb_bytessink *output);
+void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line);
+upb_sink *upb_textprinter_input(upb_textprinter *p);
+
+const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
+                                                const void *owner);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+
+namespace upb {
+namespace pb {
+inline TextPrinter *TextPrinter::Create(Environment *env,
+                                        const upb::Handlers *handlers,
+                                        BytesSink *output) {
+  return upb_textprinter_create(env, handlers, output);
+}
+inline void TextPrinter::SetSingleLineMode(bool single_line) {
+  upb_textprinter_setsingleline(this, single_line);
+}
+inline Sink* TextPrinter::input() {
+  return upb_textprinter_input(this);
+}
+inline reffed_ptr<const Handlers> TextPrinter::NewHandlers(
+    const MessageDef *md) {
+  const Handlers* h = upb_textprinter_newhandlers(md, &h);
+  return reffed_ptr<const Handlers>(h, &h);
+}
+}  /* namespace pb */
+}  /* namespace upb */
+
+#endif
+
+#endif  /* UPB_TEXT_H_ */
+/*
+** upb::json::Parser (upb_json_parser)
+**
+** Parses JSON according to a specific schema.
+** Support for parsing arbitrary JSON (schema-less) will be added later.
+*/
+
+#ifndef UPB_JSON_PARSER_H_
+#define UPB_JSON_PARSER_H_
+
+
+#ifdef __cplusplus
+namespace upb {
+namespace json {
+class Parser;
+}  /* namespace json */
+}  /* namespace upb */
+#endif
+
+UPB_DECLARE_TYPE(upb::json::Parser, upb_json_parser)
+
+/* upb::json::Parser **********************************************************/
+
+/* Preallocation hint: parser won't allocate more bytes than this when first
+ * constructed.  This hint may be an overestimate for some build configurations.
+ * But if the parser library is upgraded without recompiling the application,
+ * it may be an underestimate. */
+#define UPB_JSON_PARSER_SIZE 3704
+
+#ifdef __cplusplus
+
+/* Parses an incoming BytesStream, pushing the results to the destination
+ * sink. */
+class upb::json::Parser {
+ public:
+  static Parser* Create(Environment* env, Sink* output);
+
+  BytesSink* input();
+
+ private:
+  UPB_DISALLOW_POD_OPS(Parser, upb::json::Parser)
+};
+
+#endif
+
+UPB_BEGIN_EXTERN_C
+
+upb_json_parser *upb_json_parser_create(upb_env *e, upb_sink *output);
+upb_bytessink *upb_json_parser_input(upb_json_parser *p);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+
+namespace upb {
+namespace json {
+inline Parser* Parser::Create(Environment* env, Sink* output) {
+  return upb_json_parser_create(env, output);
+}
+inline BytesSink* Parser::input() {
+  return upb_json_parser_input(this);
+}
+}  /* namespace json */
+}  /* namespace upb */
+
+#endif
+
+
+#endif  /* UPB_JSON_PARSER_H_ */
+/*
+** upb::json::Printer
+**
+** Handlers that emit JSON according to a specific protobuf schema.
+*/
+
+#ifndef UPB_JSON_TYPED_PRINTER_H_
+#define UPB_JSON_TYPED_PRINTER_H_
+
+
+#ifdef __cplusplus
+namespace upb {
+namespace json {
+class Printer;
+}  /* namespace json */
+}  /* namespace upb */
+#endif
+
+UPB_DECLARE_TYPE(upb::json::Printer, upb_json_printer)
+
+
+/* upb::json::Printer *********************************************************/
+
+#define UPB_JSON_PRINTER_SIZE 168
+
+#ifdef __cplusplus
+
+/* Prints an incoming stream of data to a BytesSink in JSON format. */
+class upb::json::Printer {
+ public:
+  static Printer* Create(Environment* env, const upb::Handlers* handlers,
+                         BytesSink* output);
+
+  /* The input to the printer. */
+  Sink* input();
+
+  /* Returns handlers for printing according to the specified schema. */
+  static reffed_ptr<const Handlers> NewHandlers(const upb::MessageDef* md);
+
+  static const size_t kSize = UPB_JSON_PRINTER_SIZE;
+
+ private:
+  UPB_DISALLOW_POD_OPS(Printer, upb::json::Printer)
+};
+
+#endif
+
+UPB_BEGIN_EXTERN_C
+
+/* Native C API. */
+upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
+                                          upb_bytessink *output);
+upb_sink *upb_json_printer_input(upb_json_printer *p);
+const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
+                                                 const void *owner);
+
+UPB_END_EXTERN_C
+
+#ifdef __cplusplus
+
+namespace upb {
+namespace json {
+inline Printer* Printer::Create(Environment* env, const upb::Handlers* handlers,
+                                BytesSink* output) {
+  return upb_json_printer_create(env, handlers, output);
+}
+inline Sink* Printer::input() { return upb_json_printer_input(this); }
+inline reffed_ptr<const Handlers> Printer::NewHandlers(
+    const upb::MessageDef *md) {
+  const Handlers* h = upb_json_printer_newhandlers(md, &h);
+  return reffed_ptr<const Handlers>(h, &h);
+}
+}  /* namespace json */
+}  /* namespace upb */
+
+#endif
+
+#endif  /* UPB_JSON_TYPED_PRINTER_H_ */

+ 2 - 1
python/google/protobuf/internal/descriptor_database_test.py

@@ -35,9 +35,10 @@
 __author__ = 'matthewtoia@google.com (Matt Toia)'
 
 try:
-  import unittest2 as unittest
+  import unittest2 as unittest  #PY26
 except ImportError:
   import unittest
+
 from google.protobuf import descriptor_pb2
 from google.protobuf.internal import factory_test2_pb2
 from google.protobuf import descriptor_database

+ 7 - 6
python/google/protobuf/internal/descriptor_pool_test.py

@@ -35,11 +35,13 @@
 __author__ = 'matthewtoia@google.com (Matt Toia)'
 
 import os
+import sys
 
 try:
-  import unittest2 as unittest
+  import unittest2 as unittest  #PY26
 except ImportError:
   import unittest
+
 from google.protobuf import unittest_import_pb2
 from google.protobuf import unittest_import_public_pb2
 from google.protobuf import unittest_pb2
@@ -49,7 +51,6 @@ from google.protobuf.internal import descriptor_pool_test1_pb2
 from google.protobuf.internal import descriptor_pool_test2_pb2
 from google.protobuf.internal import factory_test1_pb2
 from google.protobuf.internal import factory_test2_pb2
-from google.protobuf.internal import test_util
 from google.protobuf import descriptor
 from google.protobuf import descriptor_database
 from google.protobuf import descriptor_pool
@@ -350,7 +351,7 @@ class DescriptorPoolTest(unittest.TestCase):
 
 
 @unittest.skipIf(api_implementation.Type() != 'cpp',
-                            'explicit tests of the C++ implementation')
+                 'explicit tests of the C++ implementation')
 class CppDescriptorPoolTest(DescriptorPoolTest):
   # TODO(amauryfa): remove when descriptor_pool.DescriptorPool() creates true
   # C++ descriptor pool object for C++ implementation.
@@ -555,7 +556,7 @@ class AddDescriptorTest(unittest.TestCase):
             prefix + 'protobuf_unittest.TestAllTypes.NestedMessage').name)
 
   @unittest.skipIf(api_implementation.Type() == 'cpp',
-                    'With the cpp implementation, Add() must be called first')
+                   'With the cpp implementation, Add() must be called first')
   def testMessage(self):
     self._TestMessage('')
     self._TestMessage('.')
@@ -591,13 +592,13 @@ class AddDescriptorTest(unittest.TestCase):
             prefix + 'protobuf_unittest.TestAllTypes.NestedEnum').name)
 
   @unittest.skipIf(api_implementation.Type() == 'cpp',
-                    'With the cpp implementation, Add() must be called first')
+                   'With the cpp implementation, Add() must be called first')
   def testEnum(self):
     self._TestEnum('')
     self._TestEnum('.')
 
   @unittest.skipIf(api_implementation.Type() == 'cpp',
-                    'With the cpp implementation, Add() must be called first')
+                   'With the cpp implementation, Add() must be called first')
   def testFile(self):
     pool = descriptor_pool.DescriptorPool()
     pool.AddFileDescriptor(unittest_pb2.DESCRIPTOR)

Některé soubory nejsou zobrazeny, neboť je v těchto rozdílových datech změněno mnoho souborů